Merge branch 'bcache-for-3.15' of git://evilpiepirate.org/~kent/linux-bcache into for-3.15/drivers (e84987a1) · Commits · EulixOS / Software / Kernel

drivers/md/bcache/Kconfig

+0 −8

Original line number	Diff line number	Diff line
		@@ -24,11 +24,3 @@ config BCACHE_CLOSURES_DEBUG
		Keeps all active closures in a linked list and provides a debugfs
		interface to list them, which makes it possible to see asynchronous
		operations that get stuck.

		# cgroup code needs to be updated:
		#
		#config CGROUP_BCACHE
		# bool "Cgroup controls for bcache"
		# depends on BCACHE && BLK_CGROUP
		# ---help---
		# TODO

drivers/md/bcache/alloc.c

+68 −105

Original line number	Diff line number	Diff line
		@@ -78,12 +78,6 @@ uint8_t bch_inc_gen(struct cache ca, struct bucket b)
		ca->set->need_gc = max(ca->set->need_gc, bucket_gc_gen(b));
		WARN_ON_ONCE(ca->set->need_gc > BUCKET_GC_GEN_MAX);

		if (CACHE_SYNC(&ca->set->sb)) {
		ca->need_save_prio = max(ca->need_save_prio,
		bucket_disk_gen(b));
		WARN_ON_ONCE(ca->need_save_prio > BUCKET_DISK_GEN_MAX);
		}

		return ret;
		}

		@@ -120,51 +114,45 @@ void bch_rescale_priorities(struct cache_set *c, int sectors)
		mutex_unlock(&c->bucket_lock);
		}

		/* Allocation */
		/*
		* Background allocation thread: scans for buckets to be invalidated,
		* invalidates them, rewrites prios/gens (marking them as invalidated on disk),
		* then optionally issues discard commands to the newly free buckets, then puts
		* them on the various freelists.
		*/

		static inline bool can_inc_bucket_gen(struct bucket *b)
		{
		return bucket_gc_gen(b) < BUCKET_GC_GEN_MAX &&
		bucket_disk_gen(b) < BUCKET_DISK_GEN_MAX;
		return bucket_gc_gen(b) < BUCKET_GC_GEN_MAX;
		}

		bool bch_bucket_add_unused(struct cache ca, struct bucket b)
		bool bch_can_invalidate_bucket(struct cache ca, struct bucket b)
		{
		BUG_ON(GC_MARK(b) \|\| GC_SECTORS_USED(b));

		if (CACHE_REPLACEMENT(&ca->sb) == CACHE_REPLACEMENT_FIFO) {
		unsigned i;

		for (i = 0; i < RESERVE_NONE; i++)
		if (!fifo_full(&ca->free[i]))
		goto add;

		return false;
		}
		add:
		b->prio = 0;

		if (can_inc_bucket_gen(b) &&
		fifo_push(&ca->unused, b - ca->buckets)) {
		atomic_inc(&b->pin);
		return true;
		}
		BUG_ON(!ca->set->gc_mark_valid);

		return false;
		}

		static bool can_invalidate_bucket(struct cache ca, struct bucket b)
		{
		return GC_MARK(b) == GC_MARK_RECLAIMABLE &&
		return (!GC_MARK(b) \|\|
		GC_MARK(b) == GC_MARK_RECLAIMABLE) &&
		!atomic_read(&b->pin) &&
		can_inc_bucket_gen(b);
		}

		static void invalidate_one_bucket(struct cache ca, struct bucket b)
		void __bch_invalidate_one_bucket(struct cache ca, struct bucket b)
		{
		lockdep_assert_held(&ca->set->bucket_lock);
		BUG_ON(GC_MARK(b) && GC_MARK(b) != GC_MARK_RECLAIMABLE);

		if (GC_SECTORS_USED(b))
		trace_bcache_invalidate(ca, b - ca->buckets);

		bch_inc_gen(ca, b);
		b->prio = INITIAL_PRIO;
		atomic_inc(&b->pin);
		}

		static void bch_invalidate_one_bucket(struct cache ca, struct bucket b)
		{
		__bch_invalidate_one_bucket(ca, b);

		fifo_push(&ca->free_inc, b - ca->buckets);
		}

		@@ -195,20 +183,7 @@ static void invalidate_buckets_lru(struct cache *ca)
		ca->heap.used = 0;

		for_each_bucket(b, ca) {
		/*
		* If we fill up the unused list, if we then return before
		* adding anything to the free_inc list we'll skip writing
		* prios/gens and just go back to allocating from the unused
		* list:
		*/
		if (fifo_full(&ca->unused))
		return;

		if (!can_invalidate_bucket(ca, b))
		continue;

		if (!GC_SECTORS_USED(b) &&
		bch_bucket_add_unused(ca, b))
		if (!bch_can_invalidate_bucket(ca, b))
		continue;

		if (!heap_full(&ca->heap))
		@@ -233,7 +208,7 @@ static void invalidate_buckets_lru(struct cache *ca)
		return;
		}

		invalidate_one_bucket(ca, b);
		bch_invalidate_one_bucket(ca, b);
		}
		}

		@@ -249,8 +224,8 @@ static void invalidate_buckets_fifo(struct cache *ca)

		b = ca->buckets + ca->fifo_last_bucket++;

		if (can_invalidate_bucket(ca, b))
		invalidate_one_bucket(ca, b);
		if (bch_can_invalidate_bucket(ca, b))
		bch_invalidate_one_bucket(ca, b);

		if (++checked >= ca->sb.nbuckets) {
		ca->invalidate_needs_gc = 1;
		@@ -274,8 +249,8 @@ static void invalidate_buckets_random(struct cache *ca)

		b = ca->buckets + n;

		if (can_invalidate_bucket(ca, b))
		invalidate_one_bucket(ca, b);
		if (bch_can_invalidate_bucket(ca, b))
		bch_invalidate_one_bucket(ca, b);

		if (++checked >= ca->sb.nbuckets / 2) {
		ca->invalidate_needs_gc = 1;
		@@ -287,8 +262,7 @@ static void invalidate_buckets_random(struct cache *ca)

		static void invalidate_buckets(struct cache *ca)
		{
		if (ca->invalidate_needs_gc)
		return;
		BUG_ON(ca->invalidate_needs_gc);

		switch (CACHE_REPLACEMENT(&ca->sb)) {
		case CACHE_REPLACEMENT_LRU:
		@@ -301,8 +275,6 @@ static void invalidate_buckets(struct cache *ca)
		invalidate_buckets_random(ca);
		break;
		}

		trace_bcache_alloc_invalidate(ca);
		}

		#define allocator_wait(ca, cond) \
		@@ -350,17 +322,10 @@ static int bch_allocator_thread(void *arg)
		* possibly issue discards to them, then we add the bucket to
		* the free list:
		*/
		while (1) {
		while (!fifo_empty(&ca->free_inc)) {
		long bucket;

		if ((!atomic_read(&ca->set->prio_blocked) \|\|
		!CACHE_SYNC(&ca->set->sb)) &&
		!fifo_empty(&ca->unused))
		fifo_pop(&ca->unused, bucket);
		else if (!fifo_empty(&ca->free_inc))
		fifo_pop(&ca->free_inc, bucket);
		else
		break;

		if (ca->discard) {
		mutex_unlock(&ca->set->bucket_lock);
		@@ -371,6 +336,7 @@ static int bch_allocator_thread(void *arg)
		}

		allocator_wait(ca, bch_allocator_push(ca, bucket));
		wake_up(&ca->set->btree_cache_wait);
		wake_up(&ca->set->bucket_wait);
		}

		@@ -380,9 +346,9 @@ static int bch_allocator_thread(void *arg)
		* them to the free_inc list:
		*/

		retry_invalidate:
		allocator_wait(ca, ca->set->gc_mark_valid &&
		(ca->need_save_prio > 64 \|\|
		!ca->invalidate_needs_gc));
		!ca->invalidate_needs_gc);
		invalidate_buckets(ca);

		/*
		@@ -390,12 +356,27 @@ static int bch_allocator_thread(void *arg)
		* new stuff to them:
		*/
		allocator_wait(ca, !atomic_read(&ca->set->prio_blocked));
		if (CACHE_SYNC(&ca->set->sb) &&
		(!fifo_empty(&ca->free_inc) \|\|
		ca->need_save_prio > 64))
		if (CACHE_SYNC(&ca->set->sb)) {
		/*
		* This could deadlock if an allocation with a btree
		* node locked ever blocked - having the btree node
		* locked would block garbage collection, but here we're
		* waiting on garbage collection before we invalidate
		* and free anything.
		*
		* But this should be safe since the btree code always
		* uses btree_check_reserve() before allocating now, and
		* if it fails it blocks without btree nodes locked.
		*/
		if (!fifo_full(&ca->free_inc))
		goto retry_invalidate;

		bch_prio_write(ca);
		}
		}
		}

		/* Allocation */

		long bch_bucket_alloc(struct cache *ca, unsigned reserve, bool wait)
		{
		@@ -408,8 +389,10 @@ long bch_bucket_alloc(struct cache *ca, unsigned reserve, bool wait)
		fifo_pop(&ca->free[reserve], r))
		goto out;

		if (!wait)
		if (!wait) {
		trace_bcache_alloc_fail(ca, reserve);
		return -1;
		}

		do {
		prepare_to_wait(&ca->set->bucket_wait, &w,
		@@ -425,6 +408,8 @@ long bch_bucket_alloc(struct cache *ca, unsigned reserve, bool wait)
		out:
		wake_up_process(ca->alloc_thread);

		trace_bcache_alloc(ca, reserve);

		if (expensive_debug_checks(ca->set)) {
		size_t iter;
		long i;
		@@ -438,8 +423,6 @@ long bch_bucket_alloc(struct cache *ca, unsigned reserve, bool wait)
		BUG_ON(i == r);
		fifo_for_each(i, &ca->free_inc, iter)
		BUG_ON(i == r);
		fifo_for_each(i, &ca->unused, iter)
		BUG_ON(i == r);
		}

		b = ca->buckets + r;
		@@ -461,17 +444,19 @@ long bch_bucket_alloc(struct cache *ca, unsigned reserve, bool wait)
		return r;
		}

		void __bch_bucket_free(struct cache ca, struct bucket b)
		{
		SET_GC_MARK(b, 0);
		SET_GC_SECTORS_USED(b, 0);
		}

		void bch_bucket_free(struct cache_set c, struct bkey k)
		{
		unsigned i;

		for (i = 0; i < KEY_PTRS(k); i++) {
		struct bucket *b = PTR_BUCKET(c, k, i);

		SET_GC_MARK(b, GC_MARK_RECLAIMABLE);
		SET_GC_SECTORS_USED(b, 0);
		bch_bucket_add_unused(PTR_CACHE(c, k, i), b);
		}
		for (i = 0; i < KEY_PTRS(k); i++)
		__bch_bucket_free(PTR_CACHE(c, k, i),
		PTR_BUCKET(c, k, i));
		}

		int __bch_bucket_alloc_set(struct cache_set *c, unsigned reserve,
		@@ -709,25 +694,3 @@ int bch_cache_allocator_start(struct cache *ca)
		ca->alloc_thread = k;
		return 0;
		}

		int bch_cache_allocator_init(struct cache *ca)
		{
		/*
		* Reserve:
		* Prio/gen writes first
		* Then 8 for btree allocations
		* Then half for the moving garbage collector
		*/
		#if 0
		ca->watermark[WATERMARK_PRIO] = 0;

		ca->watermark[WATERMARK_METADATA] = prio_buckets(ca);

		ca->watermark[WATERMARK_MOVINGGC] = 8 +
		ca->watermark[WATERMARK_METADATA];

		ca->watermark[WATERMARK_NONE] = ca->free.size / 2 +
		ca->watermark[WATERMARK_MOVINGGC];
		#endif
		return 0;
		}

drivers/md/bcache/bcache.h

+16 −40

Original line number	Diff line number	Diff line
		@@ -195,9 +195,7 @@ struct bucket {
		atomic_t pin;
		uint16_t prio;
		uint8_t gen;
		uint8_t disk_gen;
		uint8_t last_gc; /* Most out of date gen in the btree */
		uint8_t gc_gen;
		uint16_t gc_mark; /* Bitfield used by GC. See below for field */
		};

		@@ -207,9 +205,9 @@ struct bucket {
		*/

		BITMASK(GC_MARK, struct bucket, gc_mark, 0, 2);
		#define GC_MARK_RECLAIMABLE 0
		#define GC_MARK_DIRTY 1
		#define GC_MARK_METADATA 2
		#define GC_MARK_RECLAIMABLE 1
		#define GC_MARK_DIRTY 2
		#define GC_MARK_METADATA 3
		#define GC_SECTORS_USED_SIZE 13
		#define MAX_GC_SECTORS_USED (~(~0ULL << GC_SECTORS_USED_SIZE))
		BITMASK(GC_SECTORS_USED, struct bucket, gc_mark, 2, GC_SECTORS_USED_SIZE);
		@@ -426,14 +424,9 @@ struct cache {
		* their new gen to disk. After prio_write() finishes writing the new
		* gens/prios, they'll be moved to the free list (and possibly discarded
		* in the process)
		*
		* unused: GC found nothing pointing into these buckets (possibly
		* because all the data they contained was overwritten), so we only
		* need to discard them before they can be moved to the free list.
		*/
		DECLARE_FIFO(long, free)[RESERVE_NR];
		DECLARE_FIFO(long, free_inc);
		DECLARE_FIFO(long, unused);

		size_t fifo_last_bucket;

		@@ -442,12 +435,6 @@ struct cache {

		DECLARE_HEAP(struct bucket *, heap);

		/*
		* max(gen - disk_gen) for all buckets. When it gets too big we have to
		* call prio_write() to keep gens from wrapping.
		*/
		uint8_t need_save_prio;

		/*
		* If nonzero, we know we aren't going to find any buckets to invalidate
		* until a gc finishes - otherwise we could pointlessly burn a ton of
		@@ -562,19 +549,16 @@ struct cache_set {
		struct list_head btree_cache_freed;

		/* Number of elements in btree_cache + btree_cache_freeable lists */
		unsigned bucket_cache_used;
		unsigned btree_cache_used;

		/*
		* If we need to allocate memory for a new btree node and that
		* allocation fails, we can cannibalize another node in the btree cache
		* to satisfy the allocation. However, only one thread can be doing this
		* at a time, for obvious reasons - try_harder and try_wait are
		* basically a lock for this that we can wait on asynchronously. The
		* btree_root() macro releases the lock when it returns.
		* to satisfy the allocation - lock to guarantee only one thread does
		* this at a time:
		*/
		struct task_struct *try_harder;
		wait_queue_head_t try_wait;
		uint64_t try_harder_start;
		wait_queue_head_t btree_cache_wait;
		struct task_struct *btree_cache_alloc_lock;

		/*
		* When we free a btree node, we increment the gen of the bucket the
		@@ -603,7 +587,7 @@ struct cache_set {
		uint16_t min_prio;

		/*
		* max(gen - gc_gen) for all buckets. When it gets too big we have to gc
		* max(gen - last_gc) for all buckets. When it gets too big we have to gc
		* to keep gens from wrapping around.
		*/
		uint8_t need_gc;
		@@ -628,6 +612,8 @@ struct cache_set {
		/* Number of moving GC bios in flight */
		struct semaphore moving_in_flight;

		struct workqueue_struct *moving_gc_wq;

		struct btree *root;

		#ifdef CONFIG_BCACHE_DEBUG
		@@ -667,7 +653,6 @@ struct cache_set {
		struct time_stats btree_gc_time;
		struct time_stats btree_split_time;
		struct time_stats btree_read_time;
		struct time_stats try_harder_time;

		atomic_long_t cache_read_races;
		atomic_long_t writeback_keys_done;
		@@ -850,9 +835,6 @@ static inline bool cached_dev_get(struct cached_dev *dc)
		/*
		* bucket_gc_gen() returns the difference between the bucket's current gen and
		* the oldest gen of any pointer into that bucket in the btree (last_gc).
		*
		* bucket_disk_gen() returns the difference between the current gen and the gen
		* on disk; they're both used to make sure gens don't wrap around.
		*/

		static inline uint8_t bucket_gc_gen(struct bucket *b)
		@@ -860,13 +842,7 @@ static inline uint8_t bucket_gc_gen(struct bucket *b)
		return b->gen - b->last_gc;
		}

		static inline uint8_t bucket_disk_gen(struct bucket *b)
		{
		return b->gen - b->disk_gen;
		}

		#define BUCKET_GC_GEN_MAX 96U
		#define BUCKET_DISK_GEN_MAX 64U

		#define kobj_attribute_write(n, fn) \
		static struct kobj_attribute ksysfs_##n = __ATTR(n, S_IWUSR, NULL, fn)
		@@ -899,11 +875,14 @@ void bch_submit_bbio(struct bio , struct cache_set , struct bkey *, unsigned);

		uint8_t bch_inc_gen(struct cache , struct bucket );
		void bch_rescale_priorities(struct cache_set *, int);
		bool bch_bucket_add_unused(struct cache , struct bucket );

		long bch_bucket_alloc(struct cache *, unsigned, bool);
		bool bch_can_invalidate_bucket(struct cache , struct bucket );
		void __bch_invalidate_one_bucket(struct cache , struct bucket );

		void __bch_bucket_free(struct cache , struct bucket );
		void bch_bucket_free(struct cache_set , struct bkey );

		long bch_bucket_alloc(struct cache *, unsigned, bool);
		int __bch_bucket_alloc_set(struct cache_set *, unsigned,
		struct bkey *, int, bool);
		int bch_bucket_alloc_set(struct cache_set *, unsigned,
		@@ -954,13 +933,10 @@ int bch_open_buckets_alloc(struct cache_set *);
		void bch_open_buckets_free(struct cache_set *);

		int bch_cache_allocator_start(struct cache *ca);
		int bch_cache_allocator_init(struct cache *ca);

		void bch_debug_exit(void);
		int bch_debug_init(struct kobject *);
		void bch_request_exit(void);
		int bch_request_init(void);
		void bch_btree_exit(void);
		int bch_btree_init(void);

		#endif /* _BCACHE_H */

drivers/md/bcache/bset.c

+2 −2

Original line number	Diff line number	Diff line
		@@ -23,8 +23,8 @@ void bch_dump_bset(struct btree_keys b, struct bset i, unsigned set)
		for (k = i->start; k < bset_bkey_last(i); k = next) {
		next = bkey_next(k);

		printk(KERN_ERR "block %u key %li/%u: ", set,
		(uint64_t *) k - i->d, i->keys);
		printk(KERN_ERR "block %u key %u/%u: ", set,
		(unsigned) ((u64 *) k - i->d), i->keys);

		if (b->ops->key_dump)
		b->ops->key_dump(b, k);

drivers/md/bcache/bset.h

+6 −0

Original line number	Diff line number	Diff line
		@@ -478,6 +478,12 @@ static inline void bch_keylist_init(struct keylist *l)
		l->top_p = l->keys_p = l->inline_keys;
		}

		static inline void bch_keylist_init_single(struct keylist l, struct bkey k)
		{
		l->keys = k;
		l->top = bkey_next(k);
		}

		static inline void bch_keylist_push(struct keylist *l)
		{
		l->top = bkey_next(l->top);