Commit 35b72573 authored by David Howells's avatar David Howells
Browse files

fscache: Fix cookie key hashing



The current hash algorithm used for hashing cookie keys is really bad,
producing almost no dispersion (after a test kernel build, ~30000 files
were split over just 18 out of the 32768 hash buckets).

Borrow the full_name_hash() hash function into fscache to do the hashing
for cookie keys and, in the future, volume keys.

I don't want to use full_name_hash() as-is because I want the hash value to
be consistent across arches and over time as the hash value produced may
get used on disk.

I can also optimise parts of it away as the key will always be a padded
array of aligned 32-bit words.

Fixes: ec0328e4 ("fscache: Maintain a catalogue of allocated cookies")
Signed-off-by: default avatarDavid Howells <dhowells@redhat.com>
Reviewed-by: default avatarJeff Layton <jlayton@redhat.com>
cc: linux-cachefs@redhat.com
Link: https://lore.kernel.org/r/162431201844.2908479.8293647220901514696.stgit@warthog.procyon.org.uk/
parent 8beabdde
Loading
Loading
Loading
Loading
+1 −13
Original line number Diff line number Diff line
@@ -87,10 +87,8 @@ void fscache_free_cookie(struct fscache_cookie *cookie)
static int fscache_set_key(struct fscache_cookie *cookie,
			   const void *index_key, size_t index_key_len)
{
	unsigned long long h;
	u32 *buf;
	int bufs;
	int i;

	bufs = DIV_ROUND_UP(index_key_len, sizeof(*buf));

@@ -104,17 +102,7 @@ static int fscache_set_key(struct fscache_cookie *cookie,
	}

	memcpy(buf, index_key, index_key_len);

	/* Calculate a hash and combine this with the length in the first word
	 * or first half word
	 */
	h = (unsigned long)cookie->parent;
	h += index_key_len + cookie->type;

	for (i = 0; i < bufs; i++)
		h += buf[i];

	cookie->key_hash = h ^ (h >> 32);
	cookie->key_hash = fscache_hash(0, buf, bufs);
	return 0;
}

+2 −0
Original line number Diff line number Diff line
@@ -74,6 +74,8 @@ extern struct workqueue_struct *fscache_object_wq;
extern struct workqueue_struct *fscache_op_wq;
DECLARE_PER_CPU(wait_queue_head_t, fscache_object_cong_wait);

extern unsigned int fscache_hash(unsigned int salt, unsigned int *data, unsigned int n);

static inline bool fscache_object_congested(void)
{
	return workqueue_congested(WORK_CPU_UNBOUND, fscache_object_wq);
+39 −0
Original line number Diff line number Diff line
@@ -93,6 +93,45 @@ static struct ctl_table fscache_sysctls_root[] = {
};
#endif

/*
 * Mixing scores (in bits) for (7,20):
 * Input delta: 1-bit      2-bit
 * 1 round:     330.3     9201.6
 * 2 rounds:   1246.4    25475.4
 * 3 rounds:   1907.1    31295.1
 * 4 rounds:   2042.3    31718.6
 * Perfect:    2048      31744
 *            (32*64)   (32*31/2 * 64)
 */
#define HASH_MIX(x, y, a)	\
	(	x ^= (a),	\
	y ^= x,	x = rol32(x, 7),\
	x += y,	y = rol32(y,20),\
	y *= 9			)

static inline unsigned int fold_hash(unsigned long x, unsigned long y)
{
	/* Use arch-optimized multiply if one exists */
	return __hash_32(y ^ __hash_32(x));
}

/*
 * Generate a hash.  This is derived from full_name_hash(), but we want to be
 * sure it is arch independent and that it doesn't change as bits of the
 * computed hash value might appear on disk.  The caller also guarantees that
 * the hashed data will be a series of aligned 32-bit words.
 */
unsigned int fscache_hash(unsigned int salt, unsigned int *data, unsigned int n)
{
	unsigned int a, x = 0, y = salt;

	for (; n; n--) {
		a = *data++;
		HASH_MIX(x, y, a);
	}
	return fold_hash(x, y);
}

/*
 * initialise the fs caching module
 */