Commit 32b3ad14 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'sysctl-data-races'

Kuniyuki Iwashima says:

====================
sysctl: Fix data-races around ipv4_table.

A sysctl variable is accessed concurrently, and there is always a chance
of data-race.  So, all readers and writers need some basic protection to
avoid load/store-tearing.

The first half of this series changes some proc handlers used in ipv4_table
to use READ_ONCE() and WRITE_ONCE() internally to fix data-races on the
sysctl side.  Then, the second half adds READ_ONCE() to the other readers
of ipv4_table.

Changes:
  v2:
    * Drop some changes that makes backporting difficult
      * First cleanup patch
      * Lockless helpers and .proc_handler changes
    * Drop the tracing part for .sysctl_mem
      * Steve already posted a fix
    * Drop int-to-bool change for cipso
      * Should be posted to net-next later
    * Drop proc_dobool() change
      * Can be included in another series

  v1: https://lore.kernel.org/netdev/20220706052130.16368-1-kuniyu@amazon.com/


====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 820b8963 73318c4b
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -1085,7 +1085,7 @@ cipso_cache_enable - BOOLEAN
cipso_cache_bucket_size - INTEGER
	The CIPSO label cache consists of a fixed size hash table with each
	hash bucket containing a number of cache entries.  This variable limits
	the number of entries in each hash bucket; the larger the value the
	the number of entries in each hash bucket; the larger the value is, the
	more CIPSO label mappings that can be cached.  When the number of
	entries in a given hash bucket reaches this limit adding new entries
	causes the oldest entry in the bucket to be removed to make room.
+1 −1
Original line number Diff line number Diff line
@@ -1529,7 +1529,7 @@ void __sk_mem_reclaim(struct sock *sk, int amount);
/* sysctl_mem values are in pages, we convert them in SK_MEM_QUANTUM units */
static inline long sk_prot_mem_limits(const struct sock *sk, int index)
{
	long val = sk->sk_prot->sysctl_mem[index];
	long val = READ_ONCE(sk->sk_prot->sysctl_mem[index]);

#if PAGE_SIZE > SK_MEM_QUANTUM
	val <<= PAGE_SHIFT - SK_MEM_QUANTUM_SHIFT;
+14 −11
Original line number Diff line number Diff line
@@ -446,14 +446,14 @@ static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
		if (*negp) {
			if (*lvalp > (unsigned long) INT_MAX + 1)
				return -EINVAL;
			*valp = -*lvalp;
			WRITE_ONCE(*valp, -*lvalp);
		} else {
			if (*lvalp > (unsigned long) INT_MAX)
				return -EINVAL;
			*valp = *lvalp;
			WRITE_ONCE(*valp, *lvalp);
		}
	} else {
		int val = *valp;
		int val = READ_ONCE(*valp);
		if (val < 0) {
			*negp = true;
			*lvalp = -(unsigned long)val;
@@ -472,9 +472,9 @@ static int do_proc_douintvec_conv(unsigned long *lvalp,
	if (write) {
		if (*lvalp > UINT_MAX)
			return -EINVAL;
		*valp = *lvalp;
		WRITE_ONCE(*valp, *lvalp);
	} else {
		unsigned int val = *valp;
		unsigned int val = READ_ONCE(*valp);
		*lvalp = (unsigned long)val;
	}
	return 0;
@@ -857,7 +857,7 @@ static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
		if ((param->min && *param->min > tmp) ||
		    (param->max && *param->max < tmp))
			return -EINVAL;
		*valp = tmp;
		WRITE_ONCE(*valp, tmp);
	}

	return 0;
@@ -923,7 +923,7 @@ static int do_proc_douintvec_minmax_conv(unsigned long *lvalp,
		    (param->max && *param->max < tmp))
			return -ERANGE;

		*valp = tmp;
		WRITE_ONCE(*valp, tmp);
	}

	return 0;
@@ -1090,9 +1090,9 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table,
				err = -EINVAL;
				break;
			}
			*i = val;
			WRITE_ONCE(*i, val);
		} else {
			val = convdiv * (*i) / convmul;
			val = convdiv * READ_ONCE(*i) / convmul;
			if (!first)
				proc_put_char(&buffer, &left, '\t');
			proc_put_long(&buffer, &left, val, false);
@@ -1173,9 +1173,12 @@ static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp,
	if (write) {
		if (*lvalp > INT_MAX / HZ)
			return 1;
		*valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
		if (*negp)
			WRITE_ONCE(*valp, -*lvalp * HZ);
		else
			WRITE_ONCE(*valp, *lvalp * HZ);
	} else {
		int val = *valp;
		int val = READ_ONCE(*valp);
		unsigned long lval;
		if (val < 0) {
			*negp = true;
+7 −5
Original line number Diff line number Diff line
@@ -239,7 +239,7 @@ static int cipso_v4_cache_check(const unsigned char *key,
	struct cipso_v4_map_cache_entry *prev_entry = NULL;
	u32 hash;

	if (!cipso_v4_cache_enabled)
	if (!READ_ONCE(cipso_v4_cache_enabled))
		return -ENOENT;

	hash = cipso_v4_map_cache_hash(key, key_len);
@@ -296,13 +296,14 @@ static int cipso_v4_cache_check(const unsigned char *key,
int cipso_v4_cache_add(const unsigned char *cipso_ptr,
		       const struct netlbl_lsm_secattr *secattr)
{
	int bkt_size = READ_ONCE(cipso_v4_cache_bucketsize);
	int ret_val = -EPERM;
	u32 bkt;
	struct cipso_v4_map_cache_entry *entry = NULL;
	struct cipso_v4_map_cache_entry *old_entry = NULL;
	u32 cipso_ptr_len;

	if (!cipso_v4_cache_enabled || cipso_v4_cache_bucketsize <= 0)
	if (!READ_ONCE(cipso_v4_cache_enabled) || bkt_size <= 0)
		return 0;

	cipso_ptr_len = cipso_ptr[1];
@@ -322,7 +323,7 @@ int cipso_v4_cache_add(const unsigned char *cipso_ptr,

	bkt = entry->hash & (CIPSO_V4_CACHE_BUCKETS - 1);
	spin_lock_bh(&cipso_v4_cache[bkt].lock);
	if (cipso_v4_cache[bkt].size < cipso_v4_cache_bucketsize) {
	if (cipso_v4_cache[bkt].size < bkt_size) {
		list_add(&entry->list, &cipso_v4_cache[bkt].list);
		cipso_v4_cache[bkt].size += 1;
	} else {
@@ -1199,7 +1200,8 @@ static int cipso_v4_gentag_rbm(const struct cipso_v4_doi *doi_def,
		/* This will send packets using the "optimized" format when
		 * possible as specified in  section 3.4.2.6 of the
		 * CIPSO draft. */
		if (cipso_v4_rbm_optfmt && ret_val > 0 && ret_val <= 10)
		if (READ_ONCE(cipso_v4_rbm_optfmt) && ret_val > 0 &&
		    ret_val <= 10)
			tag_len = 14;
		else
			tag_len = 4 + ret_val;
@@ -1603,7 +1605,7 @@ int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option)
			 * all the CIPSO validations here but it doesn't
			 * really specify _exactly_ what we need to validate
			 * ... so, just make it a sysctl tunable. */
			if (cipso_v4_rbm_strictvalid) {
			if (READ_ONCE(cipso_v4_rbm_strictvalid)) {
				if (cipso_v4_map_lvl_valid(doi_def,
							   tag[3]) < 0) {
					err_offset = opt_iter + 3;
+1 −1
Original line number Diff line number Diff line
@@ -498,7 +498,7 @@ static void tnode_free(struct key_vector *tn)
		tn = container_of(head, struct tnode, rcu)->kv;
	}

	if (tnode_free_size >= sysctl_fib_sync_mem) {
	if (tnode_free_size >= READ_ONCE(sysctl_fib_sync_mem)) {
		tnode_free_size = 0;
		synchronize_rcu();
	}
Loading