Commit 374297e8 authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'tcp_metrics-series-of-fixes'

Eric Dumazet says:

====================
tcp_metrics: series of fixes

This series contains a fix for addr_same() and various
data-race annotations.

We still have to address races over tm->tcpm_saddr and
tm->tcpm_daddr later.
====================

Link: https://lore.kernel.org/r/20230802131500.1478140-1-edumazet@google.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents b755c25f ddf251fa
Loading
Loading
Loading
Loading
+44 −26
Original line number Diff line number Diff line
@@ -40,7 +40,7 @@ struct tcp_fastopen_metrics {

struct tcp_metrics_block {
	struct tcp_metrics_block __rcu	*tcpm_next;
	possible_net_t			tcpm_net;
	struct net			*tcpm_net;
	struct inetpeer_addr		tcpm_saddr;
	struct inetpeer_addr		tcpm_daddr;
	unsigned long			tcpm_stamp;
@@ -51,34 +51,38 @@ struct tcp_metrics_block {
	struct rcu_head			rcu_head;
};

static inline struct net *tm_net(struct tcp_metrics_block *tm)
static inline struct net *tm_net(const struct tcp_metrics_block *tm)
{
	return read_pnet(&tm->tcpm_net);
	/* Paired with the WRITE_ONCE() in tcpm_new() */
	return READ_ONCE(tm->tcpm_net);
}

static bool tcp_metric_locked(struct tcp_metrics_block *tm,
			      enum tcp_metric_index idx)
{
	return tm->tcpm_lock & (1 << idx);
	/* Paired with WRITE_ONCE() in tcpm_suck_dst() */
	return READ_ONCE(tm->tcpm_lock) & (1 << idx);
}

static u32 tcp_metric_get(struct tcp_metrics_block *tm,
static u32 tcp_metric_get(const struct tcp_metrics_block *tm,
			  enum tcp_metric_index idx)
{
	return tm->tcpm_vals[idx];
	/* Paired with WRITE_ONCE() in tcp_metric_set() */
	return READ_ONCE(tm->tcpm_vals[idx]);
}

static void tcp_metric_set(struct tcp_metrics_block *tm,
			   enum tcp_metric_index idx,
			   u32 val)
{
	tm->tcpm_vals[idx] = val;
	/* Paired with READ_ONCE() in tcp_metric_get() */
	WRITE_ONCE(tm->tcpm_vals[idx], val);
}

static bool addr_same(const struct inetpeer_addr *a,
		      const struct inetpeer_addr *b)
{
	return inetpeer_addr_cmp(a, b) == 0;
	return (a->family == b->family) && !inetpeer_addr_cmp(a, b);
}

struct tcpm_hash_bucket {
@@ -89,6 +93,7 @@ static struct tcpm_hash_bucket *tcp_metrics_hash __read_mostly;
static unsigned int		tcp_metrics_hash_log __read_mostly;

static DEFINE_SPINLOCK(tcp_metrics_lock);
static DEFINE_SEQLOCK(fastopen_seqlock);

static void tcpm_suck_dst(struct tcp_metrics_block *tm,
			  const struct dst_entry *dst,
@@ -97,7 +102,7 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm,
	u32 msval;
	u32 val;

	tm->tcpm_stamp = jiffies;
	WRITE_ONCE(tm->tcpm_stamp, jiffies);

	val = 0;
	if (dst_metric_locked(dst, RTAX_RTT))
@@ -110,30 +115,42 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm,
		val |= 1 << TCP_METRIC_CWND;
	if (dst_metric_locked(dst, RTAX_REORDERING))
		val |= 1 << TCP_METRIC_REORDERING;
	tm->tcpm_lock = val;
	/* Paired with READ_ONCE() in tcp_metric_locked() */
	WRITE_ONCE(tm->tcpm_lock, val);

	msval = dst_metric_raw(dst, RTAX_RTT);
	tm->tcpm_vals[TCP_METRIC_RTT] = msval * USEC_PER_MSEC;
	tcp_metric_set(tm, TCP_METRIC_RTT, msval * USEC_PER_MSEC);

	msval = dst_metric_raw(dst, RTAX_RTTVAR);
	tm->tcpm_vals[TCP_METRIC_RTTVAR] = msval * USEC_PER_MSEC;
	tm->tcpm_vals[TCP_METRIC_SSTHRESH] = dst_metric_raw(dst, RTAX_SSTHRESH);
	tm->tcpm_vals[TCP_METRIC_CWND] = dst_metric_raw(dst, RTAX_CWND);
	tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING);
	tcp_metric_set(tm, TCP_METRIC_RTTVAR, msval * USEC_PER_MSEC);
	tcp_metric_set(tm, TCP_METRIC_SSTHRESH,
		       dst_metric_raw(dst, RTAX_SSTHRESH));
	tcp_metric_set(tm, TCP_METRIC_CWND,
		       dst_metric_raw(dst, RTAX_CWND));
	tcp_metric_set(tm, TCP_METRIC_REORDERING,
		       dst_metric_raw(dst, RTAX_REORDERING));
	if (fastopen_clear) {
		write_seqlock(&fastopen_seqlock);
		tm->tcpm_fastopen.mss = 0;
		tm->tcpm_fastopen.syn_loss = 0;
		tm->tcpm_fastopen.try_exp = 0;
		tm->tcpm_fastopen.cookie.exp = false;
		tm->tcpm_fastopen.cookie.len = 0;
		write_sequnlock(&fastopen_seqlock);
	}
}

#define TCP_METRICS_TIMEOUT		(60 * 60 * HZ)

static void tcpm_check_stamp(struct tcp_metrics_block *tm, struct dst_entry *dst)
static void tcpm_check_stamp(struct tcp_metrics_block *tm,
			     const struct dst_entry *dst)
{
	if (tm && unlikely(time_after(jiffies, tm->tcpm_stamp + TCP_METRICS_TIMEOUT)))
	unsigned long limit;

	if (!tm)
		return;
	limit = READ_ONCE(tm->tcpm_stamp) + TCP_METRICS_TIMEOUT;
	if (unlikely(time_after(jiffies, limit)))
		tcpm_suck_dst(tm, dst, false);
}

@@ -174,20 +191,23 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst,
		oldest = deref_locked(tcp_metrics_hash[hash].chain);
		for (tm = deref_locked(oldest->tcpm_next); tm;
		     tm = deref_locked(tm->tcpm_next)) {
			if (time_before(tm->tcpm_stamp, oldest->tcpm_stamp))
			if (time_before(READ_ONCE(tm->tcpm_stamp),
					READ_ONCE(oldest->tcpm_stamp)))
				oldest = tm;
		}
		tm = oldest;
	} else {
		tm = kmalloc(sizeof(*tm), GFP_ATOMIC);
		tm = kzalloc(sizeof(*tm), GFP_ATOMIC);
		if (!tm)
			goto out_unlock;
	}
	write_pnet(&tm->tcpm_net, net);
	/* Paired with the READ_ONCE() in tm_net() */
	WRITE_ONCE(tm->tcpm_net, net);

	tm->tcpm_saddr = *saddr;
	tm->tcpm_daddr = *daddr;

	tcpm_suck_dst(tm, dst, true);
	tcpm_suck_dst(tm, dst, reclaim);

	if (likely(!reclaim)) {
		tm->tcpm_next = tcp_metrics_hash[hash].chain;
@@ -434,7 +454,7 @@ void tcp_update_metrics(struct sock *sk)
					       tp->reordering);
		}
	}
	tm->tcpm_stamp = jiffies;
	WRITE_ONCE(tm->tcpm_stamp, jiffies);
out_unlock:
	rcu_read_unlock();
}
@@ -539,8 +559,6 @@ bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst)
	return ret;
}

static DEFINE_SEQLOCK(fastopen_seqlock);

void tcp_fastopen_cache_get(struct sock *sk, u16 *mss,
			    struct tcp_fastopen_cookie *cookie)
{
@@ -647,7 +665,7 @@ static int tcp_metrics_fill_info(struct sk_buff *msg,
	}

	if (nla_put_msecs(msg, TCP_METRICS_ATTR_AGE,
			  jiffies - tm->tcpm_stamp,
			  jiffies - READ_ONCE(tm->tcpm_stamp),
			  TCP_METRICS_ATTR_PAD) < 0)
		goto nla_put_failure;

@@ -658,7 +676,7 @@ static int tcp_metrics_fill_info(struct sk_buff *msg,
		if (!nest)
			goto nla_put_failure;
		for (i = 0; i < TCP_METRIC_MAX_KERNEL + 1; i++) {
			u32 val = tm->tcpm_vals[i];
			u32 val = tcp_metric_get(tm, i);

			if (!val)
				continue;