Loading drivers/md/bcache/bcache.h +0 −8 Original line number Diff line number Diff line Loading @@ -717,14 +717,6 @@ struct bbio { #define bucket_bytes(c) ((c)->sb.bucket_size << 9) #define block_bytes(c) ((c)->sb.block_size << 9) #define __set_bytes(i, k) (sizeof(*(i)) + (k) * sizeof(uint64_t)) #define set_bytes(i) __set_bytes(i, i->keys) #define __set_blocks(i, k, c) DIV_ROUND_UP(__set_bytes(i, k), block_bytes(c)) #define set_blocks(i, c) __set_blocks(i, (i)->keys, c) #define btree_data_space(b) (PAGE_SIZE << (b)->page_order) #define prios_per_bucket(c) \ ((bucket_bytes(c) - sizeof(struct prio_set)) / \ sizeof(struct bucket_disk)) Loading drivers/md/bcache/bset.c +149 −23 Original line number Diff line number Diff line Loading @@ -302,6 +302,115 @@ bool bch_bkey_try_merge(struct btree *b, struct bkey *l, struct bkey *r) return true; } /* Auxiliary search trees */ /* 32 bits total: */ #define BKEY_MID_BITS 3 #define BKEY_EXPONENT_BITS 7 #define BKEY_MANTISSA_BITS (32 - BKEY_MID_BITS - BKEY_EXPONENT_BITS) #define BKEY_MANTISSA_MASK ((1 << BKEY_MANTISSA_BITS) - 1) struct bkey_float { unsigned exponent:BKEY_EXPONENT_BITS; unsigned m:BKEY_MID_BITS; unsigned mantissa:BKEY_MANTISSA_BITS; } __packed; /* * BSET_CACHELINE was originally intended to match the hardware cacheline size - * it used to be 64, but I realized the lookup code would touch slightly less * memory if it was 128. * * It definites the number of bytes (in struct bset) per struct bkey_float in * the auxiliar search tree - when we're done searching the bset_float tree we * have this many bytes left that we do a linear search over. * * Since (after level 5) every level of the bset_tree is on a new cacheline, * we're touching one fewer cacheline in the bset tree in exchange for one more * cacheline in the linear search - but the linear search might stop before it * gets to the second cacheline. */ #define BSET_CACHELINE 128 /* Space required for the btree node keys */ static inline size_t btree_keys_bytes(struct btree *b) { return PAGE_SIZE << b->page_order; } static inline size_t btree_keys_cachelines(struct btree *b) { return btree_keys_bytes(b) / BSET_CACHELINE; } /* Space required for the auxiliary search trees */ static inline size_t bset_tree_bytes(struct btree *b) { return btree_keys_cachelines(b) * sizeof(struct bkey_float); } /* Space required for the prev pointers */ static inline size_t bset_prev_bytes(struct btree *b) { return btree_keys_cachelines(b) * sizeof(uint8_t); } /* Memory allocation */ void bch_btree_keys_free(struct btree *b) { struct bset_tree *t = b->sets; if (bset_prev_bytes(b) < PAGE_SIZE) kfree(t->prev); else free_pages((unsigned long) t->prev, get_order(bset_prev_bytes(b))); if (bset_tree_bytes(b) < PAGE_SIZE) kfree(t->tree); else free_pages((unsigned long) t->tree, get_order(bset_tree_bytes(b))); free_pages((unsigned long) t->data, b->page_order); t->prev = NULL; t->tree = NULL; t->data = NULL; } int bch_btree_keys_alloc(struct btree *b, unsigned page_order, gfp_t gfp) { struct bset_tree *t = b->sets; BUG_ON(t->data); b->page_order = page_order; t->data = (void *) __get_free_pages(gfp, b->page_order); if (!t->data) goto err; t->tree = bset_tree_bytes(b) < PAGE_SIZE ? kmalloc(bset_tree_bytes(b), gfp) : (void *) __get_free_pages(gfp, get_order(bset_tree_bytes(b))); if (!t->tree) goto err; t->prev = bset_prev_bytes(b) < PAGE_SIZE ? kmalloc(bset_prev_bytes(b), gfp) : (void *) __get_free_pages(gfp, get_order(bset_prev_bytes(b))); if (!t->prev) goto err; return 0; err: bch_btree_keys_free(b); return -ENOMEM; } /* Binary tree stuff for auxiliary search trees */ static unsigned inorder_next(unsigned j, unsigned size) Loading Loading @@ -538,21 +647,36 @@ static void bset_alloc_tree(struct btree *b, struct bset_tree *t) t++->size = 0; } static void bset_build_unwritten_tree(struct btree *b) static void bch_bset_build_unwritten_tree(struct btree *b) { struct bset_tree *t = b->sets + b->nsets; struct bset_tree *t = bset_tree_last(b); bset_alloc_tree(b, t); if (t->tree != b->sets->tree + bset_tree_space(b)) { if (t->tree != b->sets->tree + btree_keys_cachelines(b)) { t->prev[0] = bkey_to_cacheline_offset(t->data->start); t->size = 1; } } void bch_bset_init_next(struct btree *b, struct bset *i, uint64_t magic) { if (i != b->sets->data) { b->sets[++b->nsets].data = i; i->seq = b->sets->data->seq; } else get_random_bytes(&i->seq, sizeof(uint64_t)); i->magic = magic; i->version = 0; i->keys = 0; bch_bset_build_unwritten_tree(b); } static void bset_build_written_tree(struct btree *b) { struct bset_tree *t = b->sets + b->nsets; struct bset_tree *t = bset_tree_last(b); struct bkey *k = t->data->start; unsigned j, cacheline = 1; Loading @@ -560,7 +684,7 @@ static void bset_build_written_tree(struct btree *b) t->size = min_t(unsigned, bkey_to_cacheline(t, bset_bkey_last(t->data)), b->sets->tree + bset_tree_space(b) - t->tree); b->sets->tree + btree_keys_cachelines(b) - t->tree); if (t->size < 2) { t->size = 0; Loading Loading @@ -599,7 +723,7 @@ void bch_bset_fix_invalidated_key(struct btree *b, struct bkey *k) struct bset_tree *t; unsigned inorder, j = 1; for (t = b->sets; t <= &b->sets[b->nsets]; t++) for (t = b->sets; t <= bset_tree_last(b); t++) if (k < bset_bkey_last(t->data)) goto found_set; Loading Loading @@ -639,9 +763,10 @@ fix_right: do { } while (j < t->size); } void bch_bset_fix_lookup_table(struct btree *b, struct bkey *k) static void bch_bset_fix_lookup_table(struct btree *b, struct bset_tree *t, struct bkey *k) { struct bset_tree *t = &b->sets[b->nsets]; unsigned shift = bkey_u64s(k); unsigned j = bkey_to_cacheline(t, k); Loading Loading @@ -673,7 +798,7 @@ void bch_bset_fix_lookup_table(struct btree *b, struct bkey *k) } } if (t->size == b->sets->tree + bset_tree_space(b) - t->tree) if (t->size == b->sets->tree + btree_keys_cachelines(b) - t->tree) return; /* Possibly add a new entry to the end of the lookup table */ Loading @@ -687,21 +812,23 @@ void bch_bset_fix_lookup_table(struct btree *b, struct bkey *k) } } void bch_bset_init_next(struct btree *b) void bch_bset_insert(struct btree *b, struct bkey *where, struct bkey *insert) { struct bset *i = write_block(b); struct bset_tree *t = bset_tree_last(b); if (i != b->sets[0].data) { b->sets[++b->nsets].data = i; i->seq = b->sets[0].data->seq; } else get_random_bytes(&i->seq, sizeof(uint64_t)); BUG_ON(t->data != write_block(b)); BUG_ON(bset_byte_offset(b, t->data) + __set_bytes(t->data, t->data->keys + bkey_u64s(insert)) > PAGE_SIZE << b->page_order); i->magic = bset_magic(&b->c->sb); i->version = 0; i->keys = 0; memmove((uint64_t *) where + bkey_u64s(insert), where, (void *) bset_bkey_last(t->data) - (void *) where); bset_build_unwritten_tree(b); t->data->keys += bkey_u64s(insert); bkey_copy(where, insert); bch_bset_fix_lookup_table(b, t, where); } struct bset_search_iter { Loading Loading @@ -1154,9 +1281,8 @@ void bch_btree_sort_partial(struct btree *b, unsigned start, __bch_btree_iter_init(b, &iter, NULL, &b->sets[start]); BUG_ON(b->sets[b->nsets].data == write_block(b) && (b->sets[b->nsets].size || b->nsets)); BUG_ON(!bset_written(b, bset_tree_last(b)) && (bset_tree_last(b)->size || b->nsets)); if (start) { unsigned i; Loading drivers/md/bcache/bset.h +114 −133 Original line number Diff line number Diff line Loading @@ -144,22 +144,11 @@ * first key in that range of bytes again. */ struct cache_set; /* Btree key comparison/iteration */ struct btree; struct bkey_float; #define MAX_BSETS 4U struct btree_iter { size_t size, used; #ifdef CONFIG_BCACHE_DEBUG struct btree *b; #endif struct btree_iter_set { struct bkey *k, *end; } data[MAX_BSETS]; }; struct bset_tree { /* * We construct a binary tree in an array as if the array Loading Loading @@ -192,6 +181,55 @@ struct bset_tree { struct bset *data; }; #define __set_bytes(i, k) (sizeof(*(i)) + (k) * sizeof(uint64_t)) #define set_bytes(i) __set_bytes(i, i->keys) #define __set_blocks(i, k, block_bytes) \ DIV_ROUND_UP(__set_bytes(i, k), block_bytes) #define set_blocks(i, block_bytes) \ __set_blocks(i, (i)->keys, block_bytes) void bch_btree_keys_free(struct btree *); int bch_btree_keys_alloc(struct btree *, unsigned, gfp_t); void bch_bset_fix_invalidated_key(struct btree *, struct bkey *); void bch_bset_init_next(struct btree *, struct bset *, uint64_t); void bch_bset_insert(struct btree *, struct bkey *, struct bkey *); /* Btree key iteration */ struct btree_iter { size_t size, used; #ifdef CONFIG_BCACHE_DEBUG struct btree *b; #endif struct btree_iter_set { struct bkey *k, *end; } data[MAX_BSETS]; }; typedef bool (*ptr_filter_fn)(struct btree *, const struct bkey *); struct bkey *bch_btree_iter_next(struct btree_iter *); struct bkey *bch_btree_iter_next_filter(struct btree_iter *, struct btree *, ptr_filter_fn); void bch_btree_iter_push(struct btree_iter *, struct bkey *, struct bkey *); struct bkey *bch_btree_iter_init(struct btree *, struct btree_iter *, struct bkey *); struct bkey *__bch_bset_search(struct btree *, struct bset_tree *, const struct bkey *); /* * Returns the first key that is strictly greater than search */ static inline struct bkey *bch_bset_search(struct btree *b, struct bset_tree *t, const struct bkey *search) { return search ? __bch_bset_search(b, t, search) : t->data->start; } /* Sorting */ struct bset_sort_state { Loading Loading @@ -219,6 +257,60 @@ static inline void bch_btree_sort(struct btree *b, bch_btree_sort_partial(b, 0, state); } /* Bkey utility code */ #define bset_bkey_last(i) bkey_idx((struct bkey *) (i)->d, (i)->keys) static inline struct bkey *bset_bkey_idx(struct bset *i, unsigned idx) { return bkey_idx(i->start, idx); } static inline void bkey_init(struct bkey *k) { *k = ZERO_KEY; } static __always_inline int64_t bkey_cmp(const struct bkey *l, const struct bkey *r) { return unlikely(KEY_INODE(l) != KEY_INODE(r)) ? (int64_t) KEY_INODE(l) - (int64_t) KEY_INODE(r) : (int64_t) KEY_OFFSET(l) - (int64_t) KEY_OFFSET(r); } void bch_bkey_copy_single_ptr(struct bkey *, const struct bkey *, unsigned); bool __bch_cut_front(const struct bkey *, struct bkey *); bool __bch_cut_back(const struct bkey *, struct bkey *); static inline bool bch_cut_front(const struct bkey *where, struct bkey *k) { BUG_ON(bkey_cmp(where, k) > 0); return __bch_cut_front(where, k); } static inline bool bch_cut_back(const struct bkey *where, struct bkey *k) { BUG_ON(bkey_cmp(where, &START_KEY(k)) < 0); return __bch_cut_back(where, k); } #define PRECEDING_KEY(_k) \ ({ \ struct bkey *_ret = NULL; \ \ if (KEY_INODE(_k) || KEY_OFFSET(_k)) { \ _ret = &KEY(KEY_INODE(_k), KEY_OFFSET(_k), 0); \ \ if (!_ret->low) \ _ret->high--; \ _ret->low--; \ } \ \ _ret; \ }) /* Keylists */ struct keylist { Loading Loading @@ -282,126 +374,15 @@ struct bkey *bch_keylist_pop(struct keylist *); void bch_keylist_pop_front(struct keylist *); int __bch_keylist_realloc(struct keylist *, unsigned); /* Bkey utility code */ #define bset_bkey_last(i) bkey_idx((struct bkey *) (i)->d, (i)->keys) static inline struct bkey *bset_bkey_idx(struct bset *i, unsigned idx) { return bkey_idx(i->start, idx); } static inline void bkey_init(struct bkey *k) { *k = ZERO_KEY; } static __always_inline int64_t bkey_cmp(const struct bkey *l, const struct bkey *r) { return unlikely(KEY_INODE(l) != KEY_INODE(r)) ? (int64_t) KEY_INODE(l) - (int64_t) KEY_INODE(r) : (int64_t) KEY_OFFSET(l) - (int64_t) KEY_OFFSET(r); } void bch_bkey_copy_single_ptr(struct bkey *, const struct bkey *, unsigned); bool __bch_cut_front(const struct bkey *, struct bkey *); bool __bch_cut_back(const struct bkey *, struct bkey *); static inline bool bch_cut_front(const struct bkey *where, struct bkey *k) { BUG_ON(bkey_cmp(where, k) > 0); return __bch_cut_front(where, k); } static inline bool bch_cut_back(const struct bkey *where, struct bkey *k) { BUG_ON(bkey_cmp(where, &START_KEY(k)) < 0); return __bch_cut_back(where, k); } struct cache_set; const char *bch_ptr_status(struct cache_set *, const struct bkey *); bool bch_btree_ptr_invalid(struct cache_set *, const struct bkey *); bool bch_extent_ptr_invalid(struct cache_set *, const struct bkey *); bool bch_btree_ptr_bad(struct btree *, const struct bkey *); bool bch_extent_ptr_bad(struct btree *, const struct bkey *); bool bch_ptr_bad(struct btree *, const struct bkey *); typedef bool (*ptr_filter_fn)(struct btree *, const struct bkey *); struct bkey *bch_btree_iter_next(struct btree_iter *); struct bkey *bch_btree_iter_next_filter(struct btree_iter *, struct btree *, ptr_filter_fn); void bch_btree_iter_push(struct btree_iter *, struct bkey *, struct bkey *); struct bkey *bch_btree_iter_init(struct btree *, struct btree_iter *, struct bkey *); /* 32 bits total: */ #define BKEY_MID_BITS 3 #define BKEY_EXPONENT_BITS 7 #define BKEY_MANTISSA_BITS 22 #define BKEY_MANTISSA_MASK ((1 << BKEY_MANTISSA_BITS) - 1) struct bkey_float { unsigned exponent:BKEY_EXPONENT_BITS; unsigned m:BKEY_MID_BITS; unsigned mantissa:BKEY_MANTISSA_BITS; } __packed; /* * BSET_CACHELINE was originally intended to match the hardware cacheline size - * it used to be 64, but I realized the lookup code would touch slightly less * memory if it was 128. * * It definites the number of bytes (in struct bset) per struct bkey_float in * the auxiliar search tree - when we're done searching the bset_float tree we * have this many bytes left that we do a linear search over. * * Since (after level 5) every level of the bset_tree is on a new cacheline, * we're touching one fewer cacheline in the bset tree in exchange for one more * cacheline in the linear search - but the linear search might stop before it * gets to the second cacheline. */ #define BSET_CACHELINE 128 #define bset_tree_space(b) (btree_data_space(b) / BSET_CACHELINE) #define bset_tree_bytes(b) (bset_tree_space(b) * sizeof(struct bkey_float)) #define bset_prev_bytes(b) (bset_tree_space(b) * sizeof(uint8_t)) void bch_bset_init_next(struct btree *); void bch_bset_fix_invalidated_key(struct btree *, struct bkey *); void bch_bset_fix_lookup_table(struct btree *, struct bkey *); struct bkey *__bch_bset_search(struct btree *, struct bset_tree *, const struct bkey *); /* * Returns the first key that is strictly greater than search */ static inline struct bkey *bch_bset_search(struct btree *b, struct bset_tree *t, const struct bkey *search) { return search ? __bch_bset_search(b, t, search) : t->data->start; } #define PRECEDING_KEY(_k) \ ({ \ struct bkey *_ret = NULL; \ \ if (KEY_INODE(_k) || KEY_OFFSET(_k)) { \ _ret = &KEY(KEY_INODE(_k), KEY_OFFSET(_k), 0); \ \ if (!_ret->low) \ _ret->high--; \ _ret->low--; \ } \ \ _ret; \ }) bool bch_bkey_try_merge(struct btree *, struct bkey *, struct bkey *); int bch_bset_print_stats(struct cache_set *, char *); Loading drivers/md/bcache/btree.c +63 −104 Original line number Diff line number Diff line Loading @@ -110,7 +110,7 @@ static inline bool should_split(struct btree *b) { struct bset *i = write_block(b); return b->written >= btree_blocks(b) || (b->written + __set_blocks(i, i->keys + 15, b->c) (b->written + __set_blocks(i, i->keys + 15, block_bytes(b->c)) > btree_blocks(b)); } Loading Loading @@ -206,7 +206,7 @@ static uint64_t btree_csum_set(struct btree *b, struct bset *i) void bch_btree_node_read_done(struct btree *b) { const char *err = "bad btree header"; struct bset *i = b->sets[0].data; struct bset *i = btree_bset_first(b); struct btree_iter *iter; iter = mempool_alloc(b->c->fill_iter, GFP_NOWAIT); Loading @@ -228,7 +228,8 @@ void bch_btree_node_read_done(struct btree *b) goto err; err = "bad btree header"; if (b->written + set_blocks(i, b->c) > btree_blocks(b)) if (b->written + set_blocks(i, block_bytes(b->c)) > btree_blocks(b)) goto err; err = "bad magic"; Loading @@ -253,7 +254,7 @@ void bch_btree_node_read_done(struct btree *b) bch_btree_iter_push(iter, i->start, bset_bkey_last(i)); b->written += set_blocks(i, b->c); b->written += set_blocks(i, block_bytes(b->c)); } err = "corrupted btree"; Loading @@ -272,7 +273,7 @@ void bch_btree_node_read_done(struct btree *b) goto err; if (b->written < btree_blocks(b)) bch_bset_init_next(b); bch_bset_init_next(b, write_block(b), bset_magic(&b->c->sb)); out: mempool_free(iter, b->c->fill_iter); return; Loading Loading @@ -393,7 +394,7 @@ static void btree_node_write_endio(struct bio *bio, int error) static void do_btree_node_write(struct btree *b) { struct closure *cl = &b->io; struct bset *i = b->sets[b->nsets].data; struct bset *i = btree_bset_last(b); BKEY_PADDED(key) k; i->version = BCACHE_BSET_VERSION; Loading @@ -405,7 +406,7 @@ static void do_btree_node_write(struct btree *b) b->bio->bi_end_io = btree_node_write_endio; b->bio->bi_private = cl; b->bio->bi_rw = REQ_META|WRITE_SYNC|REQ_FUA; b->bio->bi_iter.bi_size = set_blocks(i, b->c) * block_bytes(b->c); b->bio->bi_iter.bi_size = roundup(set_bytes(i), block_bytes(b->c)); bch_bio_map(b->bio, i); /* Loading @@ -424,7 +425,8 @@ static void do_btree_node_write(struct btree *b) */ bkey_copy(&k.key, &b->key); SET_PTR_OFFSET(&k.key, 0, PTR_OFFSET(&k.key, 0) + bset_offset(b, i)); SET_PTR_OFFSET(&k.key, 0, PTR_OFFSET(&k.key, 0) + bset_sector_offset(b, i)); if (!bio_alloc_pages(b->bio, GFP_NOIO)) { int j; Loading @@ -451,14 +453,14 @@ static void do_btree_node_write(struct btree *b) void bch_btree_node_write(struct btree *b, struct closure *parent) { struct bset *i = b->sets[b->nsets].data; struct bset *i = btree_bset_last(b); trace_bcache_btree_write(b); BUG_ON(current->bio_list); BUG_ON(b->written >= btree_blocks(b)); BUG_ON(b->written && !i->keys); BUG_ON(b->sets->data->seq != i->seq); BUG_ON(btree_bset_first(b)->seq != i->seq); bch_check_keys(b, "writing"); cancel_delayed_work(&b->work); Loading @@ -472,8 +474,8 @@ void bch_btree_node_write(struct btree *b, struct closure *parent) do_btree_node_write(b); b->written += set_blocks(i, b->c); atomic_long_add(set_blocks(i, b->c) * b->c->sb.block_size, b->written += set_blocks(i, block_bytes(b->c)); atomic_long_add(set_blocks(i, block_bytes(b->c)) * b->c->sb.block_size, &PTR_CACHE(b->c, &b->key, 0)->btree_sectors_written); /* If not a leaf node, always sort */ Loading @@ -490,7 +492,7 @@ void bch_btree_node_write(struct btree *b, struct closure *parent) bch_btree_verify(b); if (b->written < btree_blocks(b)) bch_bset_init_next(b); bch_bset_init_next(b, write_block(b), bset_magic(&b->c->sb)); } static void bch_btree_node_write_sync(struct btree *b) Loading @@ -515,7 +517,7 @@ static void btree_node_write_work(struct work_struct *w) static void bch_btree_leaf_dirty(struct btree *b, atomic_t *journal_ref) { struct bset *i = b->sets[b->nsets].data; struct bset *i = btree_bset_last(b); struct btree_write *w = btree_current_write(b); BUG_ON(!b->written); Loading Loading @@ -575,29 +577,12 @@ static void mca_reinit(struct btree *b) static void mca_data_free(struct btree *b) { struct bset_tree *t = b->sets; BUG_ON(b->io_mutex.count != 1); if (bset_prev_bytes(b) < PAGE_SIZE) kfree(t->prev); else free_pages((unsigned long) t->prev, get_order(bset_prev_bytes(b))); if (bset_tree_bytes(b) < PAGE_SIZE) kfree(t->tree); else free_pages((unsigned long) t->tree, get_order(bset_tree_bytes(b))); bch_btree_keys_free(b); free_pages((unsigned long) t->data, b->page_order); t->prev = NULL; t->tree = NULL; t->data = NULL; list_move(&b->list, &b->c->btree_cache_freed); b->c->bucket_cache_used--; list_move(&b->list, &b->c->btree_cache_freed); } static void mca_bucket_free(struct btree *b) Loading @@ -616,34 +601,16 @@ static unsigned btree_order(struct bkey *k) static void mca_data_alloc(struct btree *b, struct bkey *k, gfp_t gfp) { struct bset_tree *t = b->sets; BUG_ON(t->data); b->page_order = max_t(unsigned, if (!bch_btree_keys_alloc(b, max_t(unsigned, ilog2(b->c->btree_pages), btree_order(k)); t->data = (void *) __get_free_pages(gfp, b->page_order); if (!t->data) goto err; t->tree = bset_tree_bytes(b) < PAGE_SIZE ? kmalloc(bset_tree_bytes(b), gfp) : (void *) __get_free_pages(gfp, get_order(bset_tree_bytes(b))); if (!t->tree) goto err; t->prev = bset_prev_bytes(b) < PAGE_SIZE ? kmalloc(bset_prev_bytes(b), gfp) : (void *) __get_free_pages(gfp, get_order(bset_prev_bytes(b))); if (!t->prev) goto err; list_move(&b->list, &b->c->btree_cache); btree_order(k)), gfp)) { b->c->bucket_cache_used++; return; err: mca_data_free(b); list_move(&b->list, &b->c->btree_cache); } else { list_move(&b->list, &b->c->btree_cache_freed); } } static struct btree *mca_bucket_alloc(struct cache_set *c, Loading Loading @@ -1111,7 +1078,7 @@ struct btree *bch_btree_node_alloc(struct cache_set *c, int level, bool wait) } b->accessed = 1; bch_bset_init_next(b); bch_bset_init_next(b, b->sets->data, bset_magic(&b->c->sb)); mutex_unlock(&c->bucket_lock); Loading Loading @@ -1298,7 +1265,8 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, blocks = btree_default_blocks(b->c) * 2 / 3; if (nodes < 2 || __set_blocks(b->sets[0].data, keys, b->c) > blocks * (nodes - 1)) __set_blocks(b->sets[0].data, keys, block_bytes(b->c)) > blocks * (nodes - 1)) return 0; for (i = 0; i < nodes; i++) { Loading @@ -1308,8 +1276,8 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, } for (i = nodes - 1; i > 0; --i) { struct bset *n1 = new_nodes[i]->sets->data; struct bset *n2 = new_nodes[i - 1]->sets->data; struct bset *n1 = btree_bset_first(new_nodes[i]); struct bset *n2 = btree_bset_first(new_nodes[i - 1]); struct bkey *k, *last = NULL; keys = 0; Loading @@ -1319,7 +1287,8 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, k < bset_bkey_last(n2); k = bkey_next(k)) { if (__set_blocks(n1, n1->keys + keys + bkey_u64s(k), b->c) > blocks) bkey_u64s(k), block_bytes(b->c)) > blocks) break; last = k; Loading @@ -1335,7 +1304,8 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, * though) */ if (__set_blocks(n1, n1->keys + n2->keys, b->c) > btree_blocks(new_nodes[i])) block_bytes(b->c)) > btree_blocks(new_nodes[i])) goto out_nocoalesce; keys = n2->keys; Loading @@ -1343,8 +1313,8 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, last = &r->b->key; } BUG_ON(__set_blocks(n1, n1->keys + keys, b->c) > btree_blocks(new_nodes[i])); BUG_ON(__set_blocks(n1, n1->keys + keys, block_bytes(b->c)) > btree_blocks(new_nodes[i])); if (last) bkey_copy_key(&new_nodes[i]->key, last); Loading Loading @@ -1380,7 +1350,7 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, } /* We emptied out this node */ BUG_ON(new_nodes[0]->sets->data->keys); BUG_ON(btree_bset_first(new_nodes[0])->keys); btree_node_free(new_nodes[0]); rw_unlock(true, new_nodes[0]); Loading Loading @@ -1831,19 +1801,6 @@ int bch_btree_check(struct cache_set *c) /* Btree insertion */ static void shift_keys(struct btree *b, struct bkey *where, struct bkey *insert) { struct bset *i = b->sets[b->nsets].data; memmove((uint64_t *) where + bkey_u64s(insert), where, (void *) bset_bkey_last(i) - (void *) where); i->keys += bkey_u64s(insert); bkey_copy(where, insert); bch_bset_fix_lookup_table(b, where); } static bool fix_overlapping_extents(struct btree *b, struct bkey *insert, struct btree_iter *iter, struct bkey *replace_key) Loading Loading @@ -1944,13 +1901,13 @@ static bool fix_overlapping_extents(struct btree *b, struct bkey *insert, * depends on us inserting a new key for the top * here. */ top = bch_bset_search(b, &b->sets[b->nsets], top = bch_bset_search(b, bset_tree_last(b), insert); shift_keys(b, top, k); bch_bset_insert(b, top, k); } else { BKEY_PADDED(key) temp; bkey_copy(&temp.key, k); shift_keys(b, k, &temp.key); bch_bset_insert(b, k, &temp.key); top = bkey_next(k); } Loading Loading @@ -1999,7 +1956,7 @@ static bool fix_overlapping_extents(struct btree *b, struct bkey *insert, static bool btree_insert_key(struct btree *b, struct btree_op *op, struct bkey *k, struct bkey *replace_key) { struct bset *i = b->sets[b->nsets].data; struct bset *i = btree_bset_last(b); struct bkey *m, *prev; unsigned status = BTREE_INSERT_STATUS_INSERT; Loading Loading @@ -2051,10 +2008,10 @@ static bool btree_insert_key(struct btree *b, struct btree_op *op, goto copy; } else { BUG_ON(replace_key); m = bch_bset_search(b, &b->sets[b->nsets], k); m = bch_bset_search(b, bset_tree_last(b), k); } insert: shift_keys(b, m, k); insert: bch_bset_insert(b, m, k); copy: bkey_copy(m, k); merged: bch_check_keys(b, "%u for %s", status, Loading @@ -2079,8 +2036,9 @@ static bool bch_btree_insert_keys(struct btree *b, struct btree_op *op, struct bset *i = write_block(b); struct bkey *k = insert_keys->keys; if (b->written + __set_blocks(i, i->keys + bkey_u64s(k), b->c) > btree_blocks(b)) if (b->written + __set_blocks(i, i->keys + bkey_u64s(k), block_bytes(b->c)) > btree_blocks(b)) break; if (bkey_cmp(k, &b->key) <= 0) { Loading Loading @@ -2130,12 +2088,13 @@ static int btree_split(struct btree *b, struct btree_op *op, if (IS_ERR(n1)) goto err; split = set_blocks(n1->sets[0].data, n1->c) > (btree_blocks(b) * 4) / 5; split = set_blocks(btree_bset_first(n1), block_bytes(n1->c)) > (btree_blocks(b) * 4) / 5; if (split) { unsigned keys = 0; trace_bcache_btree_node_split(b, n1->sets[0].data->keys); trace_bcache_btree_node_split(b, btree_bset_first(n1)->keys); n2 = bch_btree_node_alloc(b->c, b->level, true); if (IS_ERR(n2)) Loading @@ -2154,20 +2113,20 @@ static int btree_split(struct btree *b, struct btree_op *op, * search tree yet */ while (keys < (n1->sets[0].data->keys * 3) / 5) keys += bkey_u64s(bset_bkey_idx(n1->sets[0].data, while (keys < (btree_bset_first(n1)->keys * 3) / 5) keys += bkey_u64s(bset_bkey_idx(btree_bset_first(n1), keys)); bkey_copy_key(&n1->key, bset_bkey_idx(n1->sets[0].data, keys)); keys += bkey_u64s(bset_bkey_idx(n1->sets[0].data, keys)); bset_bkey_idx(btree_bset_first(n1), keys)); keys += bkey_u64s(bset_bkey_idx(btree_bset_first(n1), keys)); n2->sets[0].data->keys = n1->sets[0].data->keys - keys; n1->sets[0].data->keys = keys; btree_bset_first(n2)->keys = btree_bset_first(n1)->keys - keys; btree_bset_first(n1)->keys = keys; memcpy(n2->sets[0].data->start, bset_bkey_last(n1->sets[0].data), n2->sets[0].data->keys * sizeof(uint64_t)); memcpy(btree_bset_first(n2)->start, bset_bkey_last(btree_bset_first(n1)), btree_bset_first(n2)->keys * sizeof(uint64_t)); bkey_copy_key(&n2->key, &b->key); Loading @@ -2175,7 +2134,7 @@ static int btree_split(struct btree *b, struct btree_op *op, bch_btree_node_write(n2, &cl); rw_unlock(true, n2); } else { trace_bcache_btree_node_compact(b, n1->sets[0].data->keys); trace_bcache_btree_node_compact(b, btree_bset_first(n1)->keys); bch_btree_insert_keys(n1, op, insert_keys, replace_key); } Loading Loading @@ -2256,7 +2215,7 @@ static int bch_btree_insert_node(struct btree *b, struct btree_op *op, -EINTR; } } else { BUG_ON(write_block(b) != b->sets[b->nsets].data); BUG_ON(write_block(b) != btree_bset_last(b)); if (bch_btree_insert_keys(b, op, insert_keys, replace_key)) { if (!b->level) Loading drivers/md/bcache/btree.h +7 −2 Original line number Diff line number Diff line Loading @@ -180,9 +180,9 @@ static inline struct btree_write *btree_prev_write(struct btree *b) return b->writes + (btree_node_write_idx(b) ^ 1); } static inline unsigned bset_offset(struct btree *b, struct bset *i) static inline struct bset_tree *bset_tree_last(struct btree *b) { return (((size_t) i) - ((size_t) b->sets->data)) >> 9; return b->sets + b->nsets; } static inline struct bset *btree_bset_first(struct btree *b) Loading @@ -190,6 +190,11 @@ static inline struct bset *btree_bset_first(struct btree *b) return b->sets->data; } static inline struct bset *btree_bset_last(struct btree *b) { return bset_tree_last(b)->data; } static inline unsigned bset_byte_offset(struct btree *b, struct bset *i) { return ((size_t) i) - ((size_t) b->sets->data); Loading Loading
drivers/md/bcache/bcache.h +0 −8 Original line number Diff line number Diff line Loading @@ -717,14 +717,6 @@ struct bbio { #define bucket_bytes(c) ((c)->sb.bucket_size << 9) #define block_bytes(c) ((c)->sb.block_size << 9) #define __set_bytes(i, k) (sizeof(*(i)) + (k) * sizeof(uint64_t)) #define set_bytes(i) __set_bytes(i, i->keys) #define __set_blocks(i, k, c) DIV_ROUND_UP(__set_bytes(i, k), block_bytes(c)) #define set_blocks(i, c) __set_blocks(i, (i)->keys, c) #define btree_data_space(b) (PAGE_SIZE << (b)->page_order) #define prios_per_bucket(c) \ ((bucket_bytes(c) - sizeof(struct prio_set)) / \ sizeof(struct bucket_disk)) Loading
drivers/md/bcache/bset.c +149 −23 Original line number Diff line number Diff line Loading @@ -302,6 +302,115 @@ bool bch_bkey_try_merge(struct btree *b, struct bkey *l, struct bkey *r) return true; } /* Auxiliary search trees */ /* 32 bits total: */ #define BKEY_MID_BITS 3 #define BKEY_EXPONENT_BITS 7 #define BKEY_MANTISSA_BITS (32 - BKEY_MID_BITS - BKEY_EXPONENT_BITS) #define BKEY_MANTISSA_MASK ((1 << BKEY_MANTISSA_BITS) - 1) struct bkey_float { unsigned exponent:BKEY_EXPONENT_BITS; unsigned m:BKEY_MID_BITS; unsigned mantissa:BKEY_MANTISSA_BITS; } __packed; /* * BSET_CACHELINE was originally intended to match the hardware cacheline size - * it used to be 64, but I realized the lookup code would touch slightly less * memory if it was 128. * * It definites the number of bytes (in struct bset) per struct bkey_float in * the auxiliar search tree - when we're done searching the bset_float tree we * have this many bytes left that we do a linear search over. * * Since (after level 5) every level of the bset_tree is on a new cacheline, * we're touching one fewer cacheline in the bset tree in exchange for one more * cacheline in the linear search - but the linear search might stop before it * gets to the second cacheline. */ #define BSET_CACHELINE 128 /* Space required for the btree node keys */ static inline size_t btree_keys_bytes(struct btree *b) { return PAGE_SIZE << b->page_order; } static inline size_t btree_keys_cachelines(struct btree *b) { return btree_keys_bytes(b) / BSET_CACHELINE; } /* Space required for the auxiliary search trees */ static inline size_t bset_tree_bytes(struct btree *b) { return btree_keys_cachelines(b) * sizeof(struct bkey_float); } /* Space required for the prev pointers */ static inline size_t bset_prev_bytes(struct btree *b) { return btree_keys_cachelines(b) * sizeof(uint8_t); } /* Memory allocation */ void bch_btree_keys_free(struct btree *b) { struct bset_tree *t = b->sets; if (bset_prev_bytes(b) < PAGE_SIZE) kfree(t->prev); else free_pages((unsigned long) t->prev, get_order(bset_prev_bytes(b))); if (bset_tree_bytes(b) < PAGE_SIZE) kfree(t->tree); else free_pages((unsigned long) t->tree, get_order(bset_tree_bytes(b))); free_pages((unsigned long) t->data, b->page_order); t->prev = NULL; t->tree = NULL; t->data = NULL; } int bch_btree_keys_alloc(struct btree *b, unsigned page_order, gfp_t gfp) { struct bset_tree *t = b->sets; BUG_ON(t->data); b->page_order = page_order; t->data = (void *) __get_free_pages(gfp, b->page_order); if (!t->data) goto err; t->tree = bset_tree_bytes(b) < PAGE_SIZE ? kmalloc(bset_tree_bytes(b), gfp) : (void *) __get_free_pages(gfp, get_order(bset_tree_bytes(b))); if (!t->tree) goto err; t->prev = bset_prev_bytes(b) < PAGE_SIZE ? kmalloc(bset_prev_bytes(b), gfp) : (void *) __get_free_pages(gfp, get_order(bset_prev_bytes(b))); if (!t->prev) goto err; return 0; err: bch_btree_keys_free(b); return -ENOMEM; } /* Binary tree stuff for auxiliary search trees */ static unsigned inorder_next(unsigned j, unsigned size) Loading Loading @@ -538,21 +647,36 @@ static void bset_alloc_tree(struct btree *b, struct bset_tree *t) t++->size = 0; } static void bset_build_unwritten_tree(struct btree *b) static void bch_bset_build_unwritten_tree(struct btree *b) { struct bset_tree *t = b->sets + b->nsets; struct bset_tree *t = bset_tree_last(b); bset_alloc_tree(b, t); if (t->tree != b->sets->tree + bset_tree_space(b)) { if (t->tree != b->sets->tree + btree_keys_cachelines(b)) { t->prev[0] = bkey_to_cacheline_offset(t->data->start); t->size = 1; } } void bch_bset_init_next(struct btree *b, struct bset *i, uint64_t magic) { if (i != b->sets->data) { b->sets[++b->nsets].data = i; i->seq = b->sets->data->seq; } else get_random_bytes(&i->seq, sizeof(uint64_t)); i->magic = magic; i->version = 0; i->keys = 0; bch_bset_build_unwritten_tree(b); } static void bset_build_written_tree(struct btree *b) { struct bset_tree *t = b->sets + b->nsets; struct bset_tree *t = bset_tree_last(b); struct bkey *k = t->data->start; unsigned j, cacheline = 1; Loading @@ -560,7 +684,7 @@ static void bset_build_written_tree(struct btree *b) t->size = min_t(unsigned, bkey_to_cacheline(t, bset_bkey_last(t->data)), b->sets->tree + bset_tree_space(b) - t->tree); b->sets->tree + btree_keys_cachelines(b) - t->tree); if (t->size < 2) { t->size = 0; Loading Loading @@ -599,7 +723,7 @@ void bch_bset_fix_invalidated_key(struct btree *b, struct bkey *k) struct bset_tree *t; unsigned inorder, j = 1; for (t = b->sets; t <= &b->sets[b->nsets]; t++) for (t = b->sets; t <= bset_tree_last(b); t++) if (k < bset_bkey_last(t->data)) goto found_set; Loading Loading @@ -639,9 +763,10 @@ fix_right: do { } while (j < t->size); } void bch_bset_fix_lookup_table(struct btree *b, struct bkey *k) static void bch_bset_fix_lookup_table(struct btree *b, struct bset_tree *t, struct bkey *k) { struct bset_tree *t = &b->sets[b->nsets]; unsigned shift = bkey_u64s(k); unsigned j = bkey_to_cacheline(t, k); Loading Loading @@ -673,7 +798,7 @@ void bch_bset_fix_lookup_table(struct btree *b, struct bkey *k) } } if (t->size == b->sets->tree + bset_tree_space(b) - t->tree) if (t->size == b->sets->tree + btree_keys_cachelines(b) - t->tree) return; /* Possibly add a new entry to the end of the lookup table */ Loading @@ -687,21 +812,23 @@ void bch_bset_fix_lookup_table(struct btree *b, struct bkey *k) } } void bch_bset_init_next(struct btree *b) void bch_bset_insert(struct btree *b, struct bkey *where, struct bkey *insert) { struct bset *i = write_block(b); struct bset_tree *t = bset_tree_last(b); if (i != b->sets[0].data) { b->sets[++b->nsets].data = i; i->seq = b->sets[0].data->seq; } else get_random_bytes(&i->seq, sizeof(uint64_t)); BUG_ON(t->data != write_block(b)); BUG_ON(bset_byte_offset(b, t->data) + __set_bytes(t->data, t->data->keys + bkey_u64s(insert)) > PAGE_SIZE << b->page_order); i->magic = bset_magic(&b->c->sb); i->version = 0; i->keys = 0; memmove((uint64_t *) where + bkey_u64s(insert), where, (void *) bset_bkey_last(t->data) - (void *) where); bset_build_unwritten_tree(b); t->data->keys += bkey_u64s(insert); bkey_copy(where, insert); bch_bset_fix_lookup_table(b, t, where); } struct bset_search_iter { Loading Loading @@ -1154,9 +1281,8 @@ void bch_btree_sort_partial(struct btree *b, unsigned start, __bch_btree_iter_init(b, &iter, NULL, &b->sets[start]); BUG_ON(b->sets[b->nsets].data == write_block(b) && (b->sets[b->nsets].size || b->nsets)); BUG_ON(!bset_written(b, bset_tree_last(b)) && (bset_tree_last(b)->size || b->nsets)); if (start) { unsigned i; Loading
drivers/md/bcache/bset.h +114 −133 Original line number Diff line number Diff line Loading @@ -144,22 +144,11 @@ * first key in that range of bytes again. */ struct cache_set; /* Btree key comparison/iteration */ struct btree; struct bkey_float; #define MAX_BSETS 4U struct btree_iter { size_t size, used; #ifdef CONFIG_BCACHE_DEBUG struct btree *b; #endif struct btree_iter_set { struct bkey *k, *end; } data[MAX_BSETS]; }; struct bset_tree { /* * We construct a binary tree in an array as if the array Loading Loading @@ -192,6 +181,55 @@ struct bset_tree { struct bset *data; }; #define __set_bytes(i, k) (sizeof(*(i)) + (k) * sizeof(uint64_t)) #define set_bytes(i) __set_bytes(i, i->keys) #define __set_blocks(i, k, block_bytes) \ DIV_ROUND_UP(__set_bytes(i, k), block_bytes) #define set_blocks(i, block_bytes) \ __set_blocks(i, (i)->keys, block_bytes) void bch_btree_keys_free(struct btree *); int bch_btree_keys_alloc(struct btree *, unsigned, gfp_t); void bch_bset_fix_invalidated_key(struct btree *, struct bkey *); void bch_bset_init_next(struct btree *, struct bset *, uint64_t); void bch_bset_insert(struct btree *, struct bkey *, struct bkey *); /* Btree key iteration */ struct btree_iter { size_t size, used; #ifdef CONFIG_BCACHE_DEBUG struct btree *b; #endif struct btree_iter_set { struct bkey *k, *end; } data[MAX_BSETS]; }; typedef bool (*ptr_filter_fn)(struct btree *, const struct bkey *); struct bkey *bch_btree_iter_next(struct btree_iter *); struct bkey *bch_btree_iter_next_filter(struct btree_iter *, struct btree *, ptr_filter_fn); void bch_btree_iter_push(struct btree_iter *, struct bkey *, struct bkey *); struct bkey *bch_btree_iter_init(struct btree *, struct btree_iter *, struct bkey *); struct bkey *__bch_bset_search(struct btree *, struct bset_tree *, const struct bkey *); /* * Returns the first key that is strictly greater than search */ static inline struct bkey *bch_bset_search(struct btree *b, struct bset_tree *t, const struct bkey *search) { return search ? __bch_bset_search(b, t, search) : t->data->start; } /* Sorting */ struct bset_sort_state { Loading Loading @@ -219,6 +257,60 @@ static inline void bch_btree_sort(struct btree *b, bch_btree_sort_partial(b, 0, state); } /* Bkey utility code */ #define bset_bkey_last(i) bkey_idx((struct bkey *) (i)->d, (i)->keys) static inline struct bkey *bset_bkey_idx(struct bset *i, unsigned idx) { return bkey_idx(i->start, idx); } static inline void bkey_init(struct bkey *k) { *k = ZERO_KEY; } static __always_inline int64_t bkey_cmp(const struct bkey *l, const struct bkey *r) { return unlikely(KEY_INODE(l) != KEY_INODE(r)) ? (int64_t) KEY_INODE(l) - (int64_t) KEY_INODE(r) : (int64_t) KEY_OFFSET(l) - (int64_t) KEY_OFFSET(r); } void bch_bkey_copy_single_ptr(struct bkey *, const struct bkey *, unsigned); bool __bch_cut_front(const struct bkey *, struct bkey *); bool __bch_cut_back(const struct bkey *, struct bkey *); static inline bool bch_cut_front(const struct bkey *where, struct bkey *k) { BUG_ON(bkey_cmp(where, k) > 0); return __bch_cut_front(where, k); } static inline bool bch_cut_back(const struct bkey *where, struct bkey *k) { BUG_ON(bkey_cmp(where, &START_KEY(k)) < 0); return __bch_cut_back(where, k); } #define PRECEDING_KEY(_k) \ ({ \ struct bkey *_ret = NULL; \ \ if (KEY_INODE(_k) || KEY_OFFSET(_k)) { \ _ret = &KEY(KEY_INODE(_k), KEY_OFFSET(_k), 0); \ \ if (!_ret->low) \ _ret->high--; \ _ret->low--; \ } \ \ _ret; \ }) /* Keylists */ struct keylist { Loading Loading @@ -282,126 +374,15 @@ struct bkey *bch_keylist_pop(struct keylist *); void bch_keylist_pop_front(struct keylist *); int __bch_keylist_realloc(struct keylist *, unsigned); /* Bkey utility code */ #define bset_bkey_last(i) bkey_idx((struct bkey *) (i)->d, (i)->keys) static inline struct bkey *bset_bkey_idx(struct bset *i, unsigned idx) { return bkey_idx(i->start, idx); } static inline void bkey_init(struct bkey *k) { *k = ZERO_KEY; } static __always_inline int64_t bkey_cmp(const struct bkey *l, const struct bkey *r) { return unlikely(KEY_INODE(l) != KEY_INODE(r)) ? (int64_t) KEY_INODE(l) - (int64_t) KEY_INODE(r) : (int64_t) KEY_OFFSET(l) - (int64_t) KEY_OFFSET(r); } void bch_bkey_copy_single_ptr(struct bkey *, const struct bkey *, unsigned); bool __bch_cut_front(const struct bkey *, struct bkey *); bool __bch_cut_back(const struct bkey *, struct bkey *); static inline bool bch_cut_front(const struct bkey *where, struct bkey *k) { BUG_ON(bkey_cmp(where, k) > 0); return __bch_cut_front(where, k); } static inline bool bch_cut_back(const struct bkey *where, struct bkey *k) { BUG_ON(bkey_cmp(where, &START_KEY(k)) < 0); return __bch_cut_back(where, k); } struct cache_set; const char *bch_ptr_status(struct cache_set *, const struct bkey *); bool bch_btree_ptr_invalid(struct cache_set *, const struct bkey *); bool bch_extent_ptr_invalid(struct cache_set *, const struct bkey *); bool bch_btree_ptr_bad(struct btree *, const struct bkey *); bool bch_extent_ptr_bad(struct btree *, const struct bkey *); bool bch_ptr_bad(struct btree *, const struct bkey *); typedef bool (*ptr_filter_fn)(struct btree *, const struct bkey *); struct bkey *bch_btree_iter_next(struct btree_iter *); struct bkey *bch_btree_iter_next_filter(struct btree_iter *, struct btree *, ptr_filter_fn); void bch_btree_iter_push(struct btree_iter *, struct bkey *, struct bkey *); struct bkey *bch_btree_iter_init(struct btree *, struct btree_iter *, struct bkey *); /* 32 bits total: */ #define BKEY_MID_BITS 3 #define BKEY_EXPONENT_BITS 7 #define BKEY_MANTISSA_BITS 22 #define BKEY_MANTISSA_MASK ((1 << BKEY_MANTISSA_BITS) - 1) struct bkey_float { unsigned exponent:BKEY_EXPONENT_BITS; unsigned m:BKEY_MID_BITS; unsigned mantissa:BKEY_MANTISSA_BITS; } __packed; /* * BSET_CACHELINE was originally intended to match the hardware cacheline size - * it used to be 64, but I realized the lookup code would touch slightly less * memory if it was 128. * * It definites the number of bytes (in struct bset) per struct bkey_float in * the auxiliar search tree - when we're done searching the bset_float tree we * have this many bytes left that we do a linear search over. * * Since (after level 5) every level of the bset_tree is on a new cacheline, * we're touching one fewer cacheline in the bset tree in exchange for one more * cacheline in the linear search - but the linear search might stop before it * gets to the second cacheline. */ #define BSET_CACHELINE 128 #define bset_tree_space(b) (btree_data_space(b) / BSET_CACHELINE) #define bset_tree_bytes(b) (bset_tree_space(b) * sizeof(struct bkey_float)) #define bset_prev_bytes(b) (bset_tree_space(b) * sizeof(uint8_t)) void bch_bset_init_next(struct btree *); void bch_bset_fix_invalidated_key(struct btree *, struct bkey *); void bch_bset_fix_lookup_table(struct btree *, struct bkey *); struct bkey *__bch_bset_search(struct btree *, struct bset_tree *, const struct bkey *); /* * Returns the first key that is strictly greater than search */ static inline struct bkey *bch_bset_search(struct btree *b, struct bset_tree *t, const struct bkey *search) { return search ? __bch_bset_search(b, t, search) : t->data->start; } #define PRECEDING_KEY(_k) \ ({ \ struct bkey *_ret = NULL; \ \ if (KEY_INODE(_k) || KEY_OFFSET(_k)) { \ _ret = &KEY(KEY_INODE(_k), KEY_OFFSET(_k), 0); \ \ if (!_ret->low) \ _ret->high--; \ _ret->low--; \ } \ \ _ret; \ }) bool bch_bkey_try_merge(struct btree *, struct bkey *, struct bkey *); int bch_bset_print_stats(struct cache_set *, char *); Loading
drivers/md/bcache/btree.c +63 −104 Original line number Diff line number Diff line Loading @@ -110,7 +110,7 @@ static inline bool should_split(struct btree *b) { struct bset *i = write_block(b); return b->written >= btree_blocks(b) || (b->written + __set_blocks(i, i->keys + 15, b->c) (b->written + __set_blocks(i, i->keys + 15, block_bytes(b->c)) > btree_blocks(b)); } Loading Loading @@ -206,7 +206,7 @@ static uint64_t btree_csum_set(struct btree *b, struct bset *i) void bch_btree_node_read_done(struct btree *b) { const char *err = "bad btree header"; struct bset *i = b->sets[0].data; struct bset *i = btree_bset_first(b); struct btree_iter *iter; iter = mempool_alloc(b->c->fill_iter, GFP_NOWAIT); Loading @@ -228,7 +228,8 @@ void bch_btree_node_read_done(struct btree *b) goto err; err = "bad btree header"; if (b->written + set_blocks(i, b->c) > btree_blocks(b)) if (b->written + set_blocks(i, block_bytes(b->c)) > btree_blocks(b)) goto err; err = "bad magic"; Loading @@ -253,7 +254,7 @@ void bch_btree_node_read_done(struct btree *b) bch_btree_iter_push(iter, i->start, bset_bkey_last(i)); b->written += set_blocks(i, b->c); b->written += set_blocks(i, block_bytes(b->c)); } err = "corrupted btree"; Loading @@ -272,7 +273,7 @@ void bch_btree_node_read_done(struct btree *b) goto err; if (b->written < btree_blocks(b)) bch_bset_init_next(b); bch_bset_init_next(b, write_block(b), bset_magic(&b->c->sb)); out: mempool_free(iter, b->c->fill_iter); return; Loading Loading @@ -393,7 +394,7 @@ static void btree_node_write_endio(struct bio *bio, int error) static void do_btree_node_write(struct btree *b) { struct closure *cl = &b->io; struct bset *i = b->sets[b->nsets].data; struct bset *i = btree_bset_last(b); BKEY_PADDED(key) k; i->version = BCACHE_BSET_VERSION; Loading @@ -405,7 +406,7 @@ static void do_btree_node_write(struct btree *b) b->bio->bi_end_io = btree_node_write_endio; b->bio->bi_private = cl; b->bio->bi_rw = REQ_META|WRITE_SYNC|REQ_FUA; b->bio->bi_iter.bi_size = set_blocks(i, b->c) * block_bytes(b->c); b->bio->bi_iter.bi_size = roundup(set_bytes(i), block_bytes(b->c)); bch_bio_map(b->bio, i); /* Loading @@ -424,7 +425,8 @@ static void do_btree_node_write(struct btree *b) */ bkey_copy(&k.key, &b->key); SET_PTR_OFFSET(&k.key, 0, PTR_OFFSET(&k.key, 0) + bset_offset(b, i)); SET_PTR_OFFSET(&k.key, 0, PTR_OFFSET(&k.key, 0) + bset_sector_offset(b, i)); if (!bio_alloc_pages(b->bio, GFP_NOIO)) { int j; Loading @@ -451,14 +453,14 @@ static void do_btree_node_write(struct btree *b) void bch_btree_node_write(struct btree *b, struct closure *parent) { struct bset *i = b->sets[b->nsets].data; struct bset *i = btree_bset_last(b); trace_bcache_btree_write(b); BUG_ON(current->bio_list); BUG_ON(b->written >= btree_blocks(b)); BUG_ON(b->written && !i->keys); BUG_ON(b->sets->data->seq != i->seq); BUG_ON(btree_bset_first(b)->seq != i->seq); bch_check_keys(b, "writing"); cancel_delayed_work(&b->work); Loading @@ -472,8 +474,8 @@ void bch_btree_node_write(struct btree *b, struct closure *parent) do_btree_node_write(b); b->written += set_blocks(i, b->c); atomic_long_add(set_blocks(i, b->c) * b->c->sb.block_size, b->written += set_blocks(i, block_bytes(b->c)); atomic_long_add(set_blocks(i, block_bytes(b->c)) * b->c->sb.block_size, &PTR_CACHE(b->c, &b->key, 0)->btree_sectors_written); /* If not a leaf node, always sort */ Loading @@ -490,7 +492,7 @@ void bch_btree_node_write(struct btree *b, struct closure *parent) bch_btree_verify(b); if (b->written < btree_blocks(b)) bch_bset_init_next(b); bch_bset_init_next(b, write_block(b), bset_magic(&b->c->sb)); } static void bch_btree_node_write_sync(struct btree *b) Loading @@ -515,7 +517,7 @@ static void btree_node_write_work(struct work_struct *w) static void bch_btree_leaf_dirty(struct btree *b, atomic_t *journal_ref) { struct bset *i = b->sets[b->nsets].data; struct bset *i = btree_bset_last(b); struct btree_write *w = btree_current_write(b); BUG_ON(!b->written); Loading Loading @@ -575,29 +577,12 @@ static void mca_reinit(struct btree *b) static void mca_data_free(struct btree *b) { struct bset_tree *t = b->sets; BUG_ON(b->io_mutex.count != 1); if (bset_prev_bytes(b) < PAGE_SIZE) kfree(t->prev); else free_pages((unsigned long) t->prev, get_order(bset_prev_bytes(b))); if (bset_tree_bytes(b) < PAGE_SIZE) kfree(t->tree); else free_pages((unsigned long) t->tree, get_order(bset_tree_bytes(b))); bch_btree_keys_free(b); free_pages((unsigned long) t->data, b->page_order); t->prev = NULL; t->tree = NULL; t->data = NULL; list_move(&b->list, &b->c->btree_cache_freed); b->c->bucket_cache_used--; list_move(&b->list, &b->c->btree_cache_freed); } static void mca_bucket_free(struct btree *b) Loading @@ -616,34 +601,16 @@ static unsigned btree_order(struct bkey *k) static void mca_data_alloc(struct btree *b, struct bkey *k, gfp_t gfp) { struct bset_tree *t = b->sets; BUG_ON(t->data); b->page_order = max_t(unsigned, if (!bch_btree_keys_alloc(b, max_t(unsigned, ilog2(b->c->btree_pages), btree_order(k)); t->data = (void *) __get_free_pages(gfp, b->page_order); if (!t->data) goto err; t->tree = bset_tree_bytes(b) < PAGE_SIZE ? kmalloc(bset_tree_bytes(b), gfp) : (void *) __get_free_pages(gfp, get_order(bset_tree_bytes(b))); if (!t->tree) goto err; t->prev = bset_prev_bytes(b) < PAGE_SIZE ? kmalloc(bset_prev_bytes(b), gfp) : (void *) __get_free_pages(gfp, get_order(bset_prev_bytes(b))); if (!t->prev) goto err; list_move(&b->list, &b->c->btree_cache); btree_order(k)), gfp)) { b->c->bucket_cache_used++; return; err: mca_data_free(b); list_move(&b->list, &b->c->btree_cache); } else { list_move(&b->list, &b->c->btree_cache_freed); } } static struct btree *mca_bucket_alloc(struct cache_set *c, Loading Loading @@ -1111,7 +1078,7 @@ struct btree *bch_btree_node_alloc(struct cache_set *c, int level, bool wait) } b->accessed = 1; bch_bset_init_next(b); bch_bset_init_next(b, b->sets->data, bset_magic(&b->c->sb)); mutex_unlock(&c->bucket_lock); Loading Loading @@ -1298,7 +1265,8 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, blocks = btree_default_blocks(b->c) * 2 / 3; if (nodes < 2 || __set_blocks(b->sets[0].data, keys, b->c) > blocks * (nodes - 1)) __set_blocks(b->sets[0].data, keys, block_bytes(b->c)) > blocks * (nodes - 1)) return 0; for (i = 0; i < nodes; i++) { Loading @@ -1308,8 +1276,8 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, } for (i = nodes - 1; i > 0; --i) { struct bset *n1 = new_nodes[i]->sets->data; struct bset *n2 = new_nodes[i - 1]->sets->data; struct bset *n1 = btree_bset_first(new_nodes[i]); struct bset *n2 = btree_bset_first(new_nodes[i - 1]); struct bkey *k, *last = NULL; keys = 0; Loading @@ -1319,7 +1287,8 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, k < bset_bkey_last(n2); k = bkey_next(k)) { if (__set_blocks(n1, n1->keys + keys + bkey_u64s(k), b->c) > blocks) bkey_u64s(k), block_bytes(b->c)) > blocks) break; last = k; Loading @@ -1335,7 +1304,8 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, * though) */ if (__set_blocks(n1, n1->keys + n2->keys, b->c) > btree_blocks(new_nodes[i])) block_bytes(b->c)) > btree_blocks(new_nodes[i])) goto out_nocoalesce; keys = n2->keys; Loading @@ -1343,8 +1313,8 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, last = &r->b->key; } BUG_ON(__set_blocks(n1, n1->keys + keys, b->c) > btree_blocks(new_nodes[i])); BUG_ON(__set_blocks(n1, n1->keys + keys, block_bytes(b->c)) > btree_blocks(new_nodes[i])); if (last) bkey_copy_key(&new_nodes[i]->key, last); Loading Loading @@ -1380,7 +1350,7 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, } /* We emptied out this node */ BUG_ON(new_nodes[0]->sets->data->keys); BUG_ON(btree_bset_first(new_nodes[0])->keys); btree_node_free(new_nodes[0]); rw_unlock(true, new_nodes[0]); Loading Loading @@ -1831,19 +1801,6 @@ int bch_btree_check(struct cache_set *c) /* Btree insertion */ static void shift_keys(struct btree *b, struct bkey *where, struct bkey *insert) { struct bset *i = b->sets[b->nsets].data; memmove((uint64_t *) where + bkey_u64s(insert), where, (void *) bset_bkey_last(i) - (void *) where); i->keys += bkey_u64s(insert); bkey_copy(where, insert); bch_bset_fix_lookup_table(b, where); } static bool fix_overlapping_extents(struct btree *b, struct bkey *insert, struct btree_iter *iter, struct bkey *replace_key) Loading Loading @@ -1944,13 +1901,13 @@ static bool fix_overlapping_extents(struct btree *b, struct bkey *insert, * depends on us inserting a new key for the top * here. */ top = bch_bset_search(b, &b->sets[b->nsets], top = bch_bset_search(b, bset_tree_last(b), insert); shift_keys(b, top, k); bch_bset_insert(b, top, k); } else { BKEY_PADDED(key) temp; bkey_copy(&temp.key, k); shift_keys(b, k, &temp.key); bch_bset_insert(b, k, &temp.key); top = bkey_next(k); } Loading Loading @@ -1999,7 +1956,7 @@ static bool fix_overlapping_extents(struct btree *b, struct bkey *insert, static bool btree_insert_key(struct btree *b, struct btree_op *op, struct bkey *k, struct bkey *replace_key) { struct bset *i = b->sets[b->nsets].data; struct bset *i = btree_bset_last(b); struct bkey *m, *prev; unsigned status = BTREE_INSERT_STATUS_INSERT; Loading Loading @@ -2051,10 +2008,10 @@ static bool btree_insert_key(struct btree *b, struct btree_op *op, goto copy; } else { BUG_ON(replace_key); m = bch_bset_search(b, &b->sets[b->nsets], k); m = bch_bset_search(b, bset_tree_last(b), k); } insert: shift_keys(b, m, k); insert: bch_bset_insert(b, m, k); copy: bkey_copy(m, k); merged: bch_check_keys(b, "%u for %s", status, Loading @@ -2079,8 +2036,9 @@ static bool bch_btree_insert_keys(struct btree *b, struct btree_op *op, struct bset *i = write_block(b); struct bkey *k = insert_keys->keys; if (b->written + __set_blocks(i, i->keys + bkey_u64s(k), b->c) > btree_blocks(b)) if (b->written + __set_blocks(i, i->keys + bkey_u64s(k), block_bytes(b->c)) > btree_blocks(b)) break; if (bkey_cmp(k, &b->key) <= 0) { Loading Loading @@ -2130,12 +2088,13 @@ static int btree_split(struct btree *b, struct btree_op *op, if (IS_ERR(n1)) goto err; split = set_blocks(n1->sets[0].data, n1->c) > (btree_blocks(b) * 4) / 5; split = set_blocks(btree_bset_first(n1), block_bytes(n1->c)) > (btree_blocks(b) * 4) / 5; if (split) { unsigned keys = 0; trace_bcache_btree_node_split(b, n1->sets[0].data->keys); trace_bcache_btree_node_split(b, btree_bset_first(n1)->keys); n2 = bch_btree_node_alloc(b->c, b->level, true); if (IS_ERR(n2)) Loading @@ -2154,20 +2113,20 @@ static int btree_split(struct btree *b, struct btree_op *op, * search tree yet */ while (keys < (n1->sets[0].data->keys * 3) / 5) keys += bkey_u64s(bset_bkey_idx(n1->sets[0].data, while (keys < (btree_bset_first(n1)->keys * 3) / 5) keys += bkey_u64s(bset_bkey_idx(btree_bset_first(n1), keys)); bkey_copy_key(&n1->key, bset_bkey_idx(n1->sets[0].data, keys)); keys += bkey_u64s(bset_bkey_idx(n1->sets[0].data, keys)); bset_bkey_idx(btree_bset_first(n1), keys)); keys += bkey_u64s(bset_bkey_idx(btree_bset_first(n1), keys)); n2->sets[0].data->keys = n1->sets[0].data->keys - keys; n1->sets[0].data->keys = keys; btree_bset_first(n2)->keys = btree_bset_first(n1)->keys - keys; btree_bset_first(n1)->keys = keys; memcpy(n2->sets[0].data->start, bset_bkey_last(n1->sets[0].data), n2->sets[0].data->keys * sizeof(uint64_t)); memcpy(btree_bset_first(n2)->start, bset_bkey_last(btree_bset_first(n1)), btree_bset_first(n2)->keys * sizeof(uint64_t)); bkey_copy_key(&n2->key, &b->key); Loading @@ -2175,7 +2134,7 @@ static int btree_split(struct btree *b, struct btree_op *op, bch_btree_node_write(n2, &cl); rw_unlock(true, n2); } else { trace_bcache_btree_node_compact(b, n1->sets[0].data->keys); trace_bcache_btree_node_compact(b, btree_bset_first(n1)->keys); bch_btree_insert_keys(n1, op, insert_keys, replace_key); } Loading Loading @@ -2256,7 +2215,7 @@ static int bch_btree_insert_node(struct btree *b, struct btree_op *op, -EINTR; } } else { BUG_ON(write_block(b) != b->sets[b->nsets].data); BUG_ON(write_block(b) != btree_bset_last(b)); if (bch_btree_insert_keys(b, op, insert_keys, replace_key)) { if (!b->level) Loading
drivers/md/bcache/btree.h +7 −2 Original line number Diff line number Diff line Loading @@ -180,9 +180,9 @@ static inline struct btree_write *btree_prev_write(struct btree *b) return b->writes + (btree_node_write_idx(b) ^ 1); } static inline unsigned bset_offset(struct btree *b, struct bset *i) static inline struct bset_tree *bset_tree_last(struct btree *b) { return (((size_t) i) - ((size_t) b->sets->data)) >> 9; return b->sets + b->nsets; } static inline struct bset *btree_bset_first(struct btree *b) Loading @@ -190,6 +190,11 @@ static inline struct bset *btree_bset_first(struct btree *b) return b->sets->data; } static inline struct bset *btree_bset_last(struct btree *b) { return bset_tree_last(b)->data; } static inline unsigned bset_byte_offset(struct btree *b, struct bset *i) { return ((size_t) i) - ((size_t) b->sets->data); Loading