Loading fs/btrfs/extent-tree.c +17 −2 Original line number Diff line number Diff line Loading @@ -3684,11 +3684,21 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, return -ENOMEM; /* * We don't need the lock here since we are protected by the transaction * commit. We want to do the cache_save_setup first and then run the * Even though we are in the critical section of the transaction commit, * we can still have concurrent tasks adding elements to this * transaction's list of dirty block groups. These tasks correspond to * endio free space workers started when writeback finishes for a * space cache, which run inode.c:btrfs_finish_ordered_io(), and can * allocate new block groups as a result of COWing nodes of the root * tree when updating the free space inode. The writeback for the space * caches is triggered by an earlier call to * btrfs_start_dirty_block_groups() and iterations of the following * loop. * Also we want to do the cache_save_setup first and then run the * delayed refs to make sure we have the best chance at doing this all * in one shot. */ spin_lock(&cur_trans->dirty_bgs_lock); while (!list_empty(&cur_trans->dirty_bgs)) { cache = list_first_entry(&cur_trans->dirty_bgs, struct btrfs_block_group_cache, Loading @@ -3700,11 +3710,13 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, * finish and then do it all again */ if (!list_empty(&cache->io_list)) { spin_unlock(&cur_trans->dirty_bgs_lock); list_del_init(&cache->io_list); btrfs_wait_cache_io(root, trans, cache, &cache->io_ctl, path, cache->key.objectid); btrfs_put_block_group(cache); spin_lock(&cur_trans->dirty_bgs_lock); } /* Loading @@ -3712,6 +3724,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, * on any pending IO */ list_del_init(&cache->dirty_list); spin_unlock(&cur_trans->dirty_bgs_lock); should_put = 1; cache_save_setup(cache, trans, path); Loading Loading @@ -3743,7 +3756,9 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, /* if its not on the io list, we need to put the block group */ if (should_put) btrfs_put_block_group(cache); spin_lock(&cur_trans->dirty_bgs_lock); } spin_unlock(&cur_trans->dirty_bgs_lock); while (!list_empty(io)) { cache = list_first_entry(io, struct btrfs_block_group_cache, Loading fs/btrfs/inode.c +78 −49 Original line number Diff line number Diff line Loading @@ -66,6 +66,13 @@ struct btrfs_iget_args { struct btrfs_root *root; }; struct btrfs_dio_data { u64 outstanding_extents; u64 reserve; u64 unsubmitted_oe_range_start; u64 unsubmitted_oe_range_end; }; static const struct inode_operations btrfs_dir_inode_operations; static const struct inode_operations btrfs_symlink_inode_operations; static const struct inode_operations btrfs_dir_ro_inode_operations; Loading Loading @@ -7408,24 +7415,20 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend, btrfs_start_ordered_extent(inode, ordered, 1); btrfs_put_ordered_extent(ordered); } else { /* Screw you mmap */ ret = btrfs_fdatawrite_range(inode, lockstart, lockend); if (ret) break; ret = filemap_fdatawait_range(inode->i_mapping, lockstart, lockend); if (ret) break; /* * If we found a page that couldn't be invalidated just * fall back to buffered. * We could trigger writeback for this range (and wait * for it to complete) and then invalidate the pages for * this range (through invalidate_inode_pages2_range()), * but that can lead us to a deadlock with a concurrent * call to readpages() (a buffered read or a defrag call * triggered a readahead) on a page lock due to an * ordered dio extent we created before but did not have * yet a corresponding bio submitted (whence it can not * complete), which makes readpages() wait for that * ordered extent to complete while holding a lock on * that page. */ ret = invalidate_inode_pages2_range(inode->i_mapping, lockstart >> PAGE_CACHE_SHIFT, lockend >> PAGE_CACHE_SHIFT); if (ret) ret = -ENOTBLK; break; } Loading Loading @@ -7482,11 +7485,6 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start, return em; } struct btrfs_dio_data { u64 outstanding_extents; u64 reserve; }; static void adjust_dio_outstanding_extents(struct inode *inode, struct btrfs_dio_data *dio_data, const u64 len) Loading Loading @@ -7670,6 +7668,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, btrfs_free_reserved_data_space(inode, start, len); WARN_ON(dio_data->reserve < len); dio_data->reserve -= len; dio_data->unsubmitted_oe_range_end = start + len; current->journal_info = dio_data; } Loading Loading @@ -7992,22 +7991,22 @@ static void btrfs_endio_direct_read(struct bio *bio) bio_put(bio); } static void btrfs_endio_direct_write(struct bio *bio) static void btrfs_endio_direct_write_update_ordered(struct inode *inode, const u64 offset, const u64 bytes, const int uptodate) { struct btrfs_dio_private *dip = bio->bi_private; struct inode *inode = dip->inode; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_ordered_extent *ordered = NULL; u64 ordered_offset = dip->logical_offset; u64 ordered_bytes = dip->bytes; struct bio *dio_bio; u64 ordered_offset = offset; u64 ordered_bytes = bytes; int ret; again: ret = btrfs_dec_test_first_ordered_pending(inode, &ordered, &ordered_offset, ordered_bytes, !bio->bi_error); uptodate); if (!ret) goto out_test; Loading @@ -8020,13 +8019,22 @@ static void btrfs_endio_direct_write(struct bio *bio) * our bio might span multiple ordered extents. If we haven't * completed the accounting for the whole dio, go back and try again */ if (ordered_offset < dip->logical_offset + dip->bytes) { ordered_bytes = dip->logical_offset + dip->bytes - ordered_offset; if (ordered_offset < offset + bytes) { ordered_bytes = offset + bytes - ordered_offset; ordered = NULL; goto again; } dio_bio = dip->dio_bio; } static void btrfs_endio_direct_write(struct bio *bio) { struct btrfs_dio_private *dip = bio->bi_private; struct bio *dio_bio = dip->dio_bio; btrfs_endio_direct_write_update_ordered(dip->inode, dip->logical_offset, dip->bytes, !bio->bi_error); kfree(dip); Loading Loading @@ -8334,6 +8342,21 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio, dip->subio_endio = btrfs_subio_endio_read; } /* * Reset the range for unsubmitted ordered extents (to a 0 length range) * even if we fail to submit a bio, because in such case we do the * corresponding error handling below and it must not be done a second * time by btrfs_direct_IO(). */ if (write) { struct btrfs_dio_data *dio_data = current->journal_info; dio_data->unsubmitted_oe_range_end = dip->logical_offset + dip->bytes; dio_data->unsubmitted_oe_range_start = dio_data->unsubmitted_oe_range_end; } ret = btrfs_submit_direct_hook(rw, dip, skip_sum); if (!ret) return; Loading Loading @@ -8362,24 +8385,15 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio, dip = NULL; io_bio = NULL; } else { if (write) { struct btrfs_ordered_extent *ordered; ordered = btrfs_lookup_ordered_extent(inode, file_offset); set_bit(BTRFS_ORDERED_IOERR, &ordered->flags); /* * Decrements our ref on the ordered extent and removes * the ordered extent from the inode's ordered tree, * doing all the proper resource cleanup such as for the * reserved space and waking up any waiters for this * ordered extent (through btrfs_remove_ordered_extent). */ btrfs_finish_ordered_io(ordered); } else { if (write) btrfs_endio_direct_write_update_ordered(inode, file_offset, dio_bio->bi_iter.bi_size, 0); else unlock_extent(&BTRFS_I(inode)->io_tree, file_offset, file_offset + dio_bio->bi_iter.bi_size - 1); } dio_bio->bi_error = -EIO; /* * Releases and cleans up our dio_bio, no need to bio_put() Loading Loading @@ -8479,6 +8493,8 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, * originally calculated. Abuse current->journal_info for this. */ dio_data.reserve = round_up(count, root->sectorsize); dio_data.unsubmitted_oe_range_start = (u64)offset; dio_data.unsubmitted_oe_range_end = (u64)offset; current->journal_info = &dio_data; } else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK, &BTRFS_I(inode)->runtime_flags)) { Loading @@ -8497,6 +8513,19 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, if (dio_data.reserve) btrfs_delalloc_release_space(inode, offset, dio_data.reserve); /* * On error we might have left some ordered extents * without submitting corresponding bios for them, so * cleanup them up to avoid other tasks getting them * and waiting for them to complete forever. */ if (dio_data.unsubmitted_oe_range_start < dio_data.unsubmitted_oe_range_end) btrfs_endio_direct_write_update_ordered(inode, dio_data.unsubmitted_oe_range_start, dio_data.unsubmitted_oe_range_end - dio_data.unsubmitted_oe_range_start, 0); } else if (ret >= 0 && (size_t)ret < count) btrfs_delalloc_release_space(inode, offset, count - (size_t)ret); Loading fs/btrfs/transaction.c +17 −0 Original line number Diff line number Diff line Loading @@ -75,6 +75,23 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction) list_del_init(&em->list); free_extent_map(em); } /* * If any block groups are found in ->deleted_bgs then it's * because the transaction was aborted and a commit did not * happen (things failed before writing the new superblock * and calling btrfs_finish_extent_commit()), so we can not * discard the physical locations of the block groups. */ while (!list_empty(&transaction->deleted_bgs)) { struct btrfs_block_group_cache *cache; cache = list_first_entry(&transaction->deleted_bgs, struct btrfs_block_group_cache, bg_list); list_del_init(&cache->bg_list); btrfs_put_block_group_trimming(cache); btrfs_put_block_group(cache); } kmem_cache_free(btrfs_transaction_cachep, transaction); } } Loading fs/btrfs/tree-defrag.c +24 −3 Original line number Diff line number Diff line Loading @@ -89,6 +89,12 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, goto out; } btrfs_release_path(path); /* * We don't need a lock on a leaf. btrfs_realloc_node() will lock all * leafs from path->nodes[1], so set lowest_level to 1 to avoid later * a deadlock (attempting to write lock an already write locked leaf). */ path->lowest_level = 1; wret = btrfs_search_slot(trans, root, &key, path, 0, 1); if (wret < 0) { Loading @@ -99,9 +105,12 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, ret = 0; goto out; } path->slots[1] = btrfs_header_nritems(path->nodes[1]); next_key_ret = btrfs_find_next_key(root, path, &key, 1, min_trans); /* * The node at level 1 must always be locked when our path has * keep_locks set and lowest_level is 1, regardless of the value of * path->slots[1]. */ BUG_ON(path->locks[1] == 0); ret = btrfs_realloc_node(trans, root, path->nodes[1], 0, &last_ret, Loading @@ -110,6 +119,18 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, WARN_ON(ret == -EAGAIN); goto out; } /* * Now that we reallocated the node we can find the next key. Note that * btrfs_find_next_key() can release our path and do another search * without COWing, this is because even with path->keep_locks = 1, * btrfs_search_slot() / ctree.c:unlock_up() does not keeps a lock on a * node when path->slots[node_level - 1] does not point to the last * item or a slot beyond the last item (ctree.c:unlock_up()). Therefore * we search for the next key after reallocating our node. */ path->slots[1] = btrfs_header_nritems(path->nodes[1]); next_key_ret = btrfs_find_next_key(root, path, &key, 1, min_trans); if (next_key_ret == 0) { memcpy(&root->defrag_progress, &key, sizeof(key)); ret = -EAGAIN; Loading fs/btrfs/volumes.c +15 −2 Original line number Diff line number Diff line Loading @@ -4825,19 +4825,31 @@ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans, goto out; } /* * Take the device list mutex to prevent races with the final phase of * a device replace operation that replaces the device object associated * with the map's stripes, because the device object's id can change * at any time during that final phase of the device replace operation * (dev-replace.c:btrfs_dev_replace_finishing()). */ mutex_lock(&chunk_root->fs_info->fs_devices->device_list_mutex); for (i = 0; i < map->num_stripes; i++) { device = map->stripes[i].dev; dev_offset = map->stripes[i].physical; ret = btrfs_update_device(trans, device); if (ret) goto out; break; ret = btrfs_alloc_dev_extent(trans, device, chunk_root->root_key.objectid, BTRFS_FIRST_CHUNK_TREE_OBJECTID, chunk_offset, dev_offset, stripe_size); if (ret) break; } if (ret) { mutex_unlock(&chunk_root->fs_info->fs_devices->device_list_mutex); goto out; } Loading @@ -4851,6 +4863,7 @@ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans, memcpy(stripe->dev_uuid, device->uuid, BTRFS_UUID_SIZE); stripe++; } mutex_unlock(&chunk_root->fs_info->fs_devices->device_list_mutex); btrfs_set_stack_chunk_length(chunk, chunk_size); btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid); Loading Loading
fs/btrfs/extent-tree.c +17 −2 Original line number Diff line number Diff line Loading @@ -3684,11 +3684,21 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, return -ENOMEM; /* * We don't need the lock here since we are protected by the transaction * commit. We want to do the cache_save_setup first and then run the * Even though we are in the critical section of the transaction commit, * we can still have concurrent tasks adding elements to this * transaction's list of dirty block groups. These tasks correspond to * endio free space workers started when writeback finishes for a * space cache, which run inode.c:btrfs_finish_ordered_io(), and can * allocate new block groups as a result of COWing nodes of the root * tree when updating the free space inode. The writeback for the space * caches is triggered by an earlier call to * btrfs_start_dirty_block_groups() and iterations of the following * loop. * Also we want to do the cache_save_setup first and then run the * delayed refs to make sure we have the best chance at doing this all * in one shot. */ spin_lock(&cur_trans->dirty_bgs_lock); while (!list_empty(&cur_trans->dirty_bgs)) { cache = list_first_entry(&cur_trans->dirty_bgs, struct btrfs_block_group_cache, Loading @@ -3700,11 +3710,13 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, * finish and then do it all again */ if (!list_empty(&cache->io_list)) { spin_unlock(&cur_trans->dirty_bgs_lock); list_del_init(&cache->io_list); btrfs_wait_cache_io(root, trans, cache, &cache->io_ctl, path, cache->key.objectid); btrfs_put_block_group(cache); spin_lock(&cur_trans->dirty_bgs_lock); } /* Loading @@ -3712,6 +3724,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, * on any pending IO */ list_del_init(&cache->dirty_list); spin_unlock(&cur_trans->dirty_bgs_lock); should_put = 1; cache_save_setup(cache, trans, path); Loading Loading @@ -3743,7 +3756,9 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, /* if its not on the io list, we need to put the block group */ if (should_put) btrfs_put_block_group(cache); spin_lock(&cur_trans->dirty_bgs_lock); } spin_unlock(&cur_trans->dirty_bgs_lock); while (!list_empty(io)) { cache = list_first_entry(io, struct btrfs_block_group_cache, Loading
fs/btrfs/inode.c +78 −49 Original line number Diff line number Diff line Loading @@ -66,6 +66,13 @@ struct btrfs_iget_args { struct btrfs_root *root; }; struct btrfs_dio_data { u64 outstanding_extents; u64 reserve; u64 unsubmitted_oe_range_start; u64 unsubmitted_oe_range_end; }; static const struct inode_operations btrfs_dir_inode_operations; static const struct inode_operations btrfs_symlink_inode_operations; static const struct inode_operations btrfs_dir_ro_inode_operations; Loading Loading @@ -7408,24 +7415,20 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend, btrfs_start_ordered_extent(inode, ordered, 1); btrfs_put_ordered_extent(ordered); } else { /* Screw you mmap */ ret = btrfs_fdatawrite_range(inode, lockstart, lockend); if (ret) break; ret = filemap_fdatawait_range(inode->i_mapping, lockstart, lockend); if (ret) break; /* * If we found a page that couldn't be invalidated just * fall back to buffered. * We could trigger writeback for this range (and wait * for it to complete) and then invalidate the pages for * this range (through invalidate_inode_pages2_range()), * but that can lead us to a deadlock with a concurrent * call to readpages() (a buffered read or a defrag call * triggered a readahead) on a page lock due to an * ordered dio extent we created before but did not have * yet a corresponding bio submitted (whence it can not * complete), which makes readpages() wait for that * ordered extent to complete while holding a lock on * that page. */ ret = invalidate_inode_pages2_range(inode->i_mapping, lockstart >> PAGE_CACHE_SHIFT, lockend >> PAGE_CACHE_SHIFT); if (ret) ret = -ENOTBLK; break; } Loading Loading @@ -7482,11 +7485,6 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start, return em; } struct btrfs_dio_data { u64 outstanding_extents; u64 reserve; }; static void adjust_dio_outstanding_extents(struct inode *inode, struct btrfs_dio_data *dio_data, const u64 len) Loading Loading @@ -7670,6 +7668,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, btrfs_free_reserved_data_space(inode, start, len); WARN_ON(dio_data->reserve < len); dio_data->reserve -= len; dio_data->unsubmitted_oe_range_end = start + len; current->journal_info = dio_data; } Loading Loading @@ -7992,22 +7991,22 @@ static void btrfs_endio_direct_read(struct bio *bio) bio_put(bio); } static void btrfs_endio_direct_write(struct bio *bio) static void btrfs_endio_direct_write_update_ordered(struct inode *inode, const u64 offset, const u64 bytes, const int uptodate) { struct btrfs_dio_private *dip = bio->bi_private; struct inode *inode = dip->inode; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_ordered_extent *ordered = NULL; u64 ordered_offset = dip->logical_offset; u64 ordered_bytes = dip->bytes; struct bio *dio_bio; u64 ordered_offset = offset; u64 ordered_bytes = bytes; int ret; again: ret = btrfs_dec_test_first_ordered_pending(inode, &ordered, &ordered_offset, ordered_bytes, !bio->bi_error); uptodate); if (!ret) goto out_test; Loading @@ -8020,13 +8019,22 @@ static void btrfs_endio_direct_write(struct bio *bio) * our bio might span multiple ordered extents. If we haven't * completed the accounting for the whole dio, go back and try again */ if (ordered_offset < dip->logical_offset + dip->bytes) { ordered_bytes = dip->logical_offset + dip->bytes - ordered_offset; if (ordered_offset < offset + bytes) { ordered_bytes = offset + bytes - ordered_offset; ordered = NULL; goto again; } dio_bio = dip->dio_bio; } static void btrfs_endio_direct_write(struct bio *bio) { struct btrfs_dio_private *dip = bio->bi_private; struct bio *dio_bio = dip->dio_bio; btrfs_endio_direct_write_update_ordered(dip->inode, dip->logical_offset, dip->bytes, !bio->bi_error); kfree(dip); Loading Loading @@ -8334,6 +8342,21 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio, dip->subio_endio = btrfs_subio_endio_read; } /* * Reset the range for unsubmitted ordered extents (to a 0 length range) * even if we fail to submit a bio, because in such case we do the * corresponding error handling below and it must not be done a second * time by btrfs_direct_IO(). */ if (write) { struct btrfs_dio_data *dio_data = current->journal_info; dio_data->unsubmitted_oe_range_end = dip->logical_offset + dip->bytes; dio_data->unsubmitted_oe_range_start = dio_data->unsubmitted_oe_range_end; } ret = btrfs_submit_direct_hook(rw, dip, skip_sum); if (!ret) return; Loading Loading @@ -8362,24 +8385,15 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio, dip = NULL; io_bio = NULL; } else { if (write) { struct btrfs_ordered_extent *ordered; ordered = btrfs_lookup_ordered_extent(inode, file_offset); set_bit(BTRFS_ORDERED_IOERR, &ordered->flags); /* * Decrements our ref on the ordered extent and removes * the ordered extent from the inode's ordered tree, * doing all the proper resource cleanup such as for the * reserved space and waking up any waiters for this * ordered extent (through btrfs_remove_ordered_extent). */ btrfs_finish_ordered_io(ordered); } else { if (write) btrfs_endio_direct_write_update_ordered(inode, file_offset, dio_bio->bi_iter.bi_size, 0); else unlock_extent(&BTRFS_I(inode)->io_tree, file_offset, file_offset + dio_bio->bi_iter.bi_size - 1); } dio_bio->bi_error = -EIO; /* * Releases and cleans up our dio_bio, no need to bio_put() Loading Loading @@ -8479,6 +8493,8 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, * originally calculated. Abuse current->journal_info for this. */ dio_data.reserve = round_up(count, root->sectorsize); dio_data.unsubmitted_oe_range_start = (u64)offset; dio_data.unsubmitted_oe_range_end = (u64)offset; current->journal_info = &dio_data; } else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK, &BTRFS_I(inode)->runtime_flags)) { Loading @@ -8497,6 +8513,19 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, if (dio_data.reserve) btrfs_delalloc_release_space(inode, offset, dio_data.reserve); /* * On error we might have left some ordered extents * without submitting corresponding bios for them, so * cleanup them up to avoid other tasks getting them * and waiting for them to complete forever. */ if (dio_data.unsubmitted_oe_range_start < dio_data.unsubmitted_oe_range_end) btrfs_endio_direct_write_update_ordered(inode, dio_data.unsubmitted_oe_range_start, dio_data.unsubmitted_oe_range_end - dio_data.unsubmitted_oe_range_start, 0); } else if (ret >= 0 && (size_t)ret < count) btrfs_delalloc_release_space(inode, offset, count - (size_t)ret); Loading
fs/btrfs/transaction.c +17 −0 Original line number Diff line number Diff line Loading @@ -75,6 +75,23 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction) list_del_init(&em->list); free_extent_map(em); } /* * If any block groups are found in ->deleted_bgs then it's * because the transaction was aborted and a commit did not * happen (things failed before writing the new superblock * and calling btrfs_finish_extent_commit()), so we can not * discard the physical locations of the block groups. */ while (!list_empty(&transaction->deleted_bgs)) { struct btrfs_block_group_cache *cache; cache = list_first_entry(&transaction->deleted_bgs, struct btrfs_block_group_cache, bg_list); list_del_init(&cache->bg_list); btrfs_put_block_group_trimming(cache); btrfs_put_block_group(cache); } kmem_cache_free(btrfs_transaction_cachep, transaction); } } Loading
fs/btrfs/tree-defrag.c +24 −3 Original line number Diff line number Diff line Loading @@ -89,6 +89,12 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, goto out; } btrfs_release_path(path); /* * We don't need a lock on a leaf. btrfs_realloc_node() will lock all * leafs from path->nodes[1], so set lowest_level to 1 to avoid later * a deadlock (attempting to write lock an already write locked leaf). */ path->lowest_level = 1; wret = btrfs_search_slot(trans, root, &key, path, 0, 1); if (wret < 0) { Loading @@ -99,9 +105,12 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, ret = 0; goto out; } path->slots[1] = btrfs_header_nritems(path->nodes[1]); next_key_ret = btrfs_find_next_key(root, path, &key, 1, min_trans); /* * The node at level 1 must always be locked when our path has * keep_locks set and lowest_level is 1, regardless of the value of * path->slots[1]. */ BUG_ON(path->locks[1] == 0); ret = btrfs_realloc_node(trans, root, path->nodes[1], 0, &last_ret, Loading @@ -110,6 +119,18 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, WARN_ON(ret == -EAGAIN); goto out; } /* * Now that we reallocated the node we can find the next key. Note that * btrfs_find_next_key() can release our path and do another search * without COWing, this is because even with path->keep_locks = 1, * btrfs_search_slot() / ctree.c:unlock_up() does not keeps a lock on a * node when path->slots[node_level - 1] does not point to the last * item or a slot beyond the last item (ctree.c:unlock_up()). Therefore * we search for the next key after reallocating our node. */ path->slots[1] = btrfs_header_nritems(path->nodes[1]); next_key_ret = btrfs_find_next_key(root, path, &key, 1, min_trans); if (next_key_ret == 0) { memcpy(&root->defrag_progress, &key, sizeof(key)); ret = -EAGAIN; Loading
fs/btrfs/volumes.c +15 −2 Original line number Diff line number Diff line Loading @@ -4825,19 +4825,31 @@ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans, goto out; } /* * Take the device list mutex to prevent races with the final phase of * a device replace operation that replaces the device object associated * with the map's stripes, because the device object's id can change * at any time during that final phase of the device replace operation * (dev-replace.c:btrfs_dev_replace_finishing()). */ mutex_lock(&chunk_root->fs_info->fs_devices->device_list_mutex); for (i = 0; i < map->num_stripes; i++) { device = map->stripes[i].dev; dev_offset = map->stripes[i].physical; ret = btrfs_update_device(trans, device); if (ret) goto out; break; ret = btrfs_alloc_dev_extent(trans, device, chunk_root->root_key.objectid, BTRFS_FIRST_CHUNK_TREE_OBJECTID, chunk_offset, dev_offset, stripe_size); if (ret) break; } if (ret) { mutex_unlock(&chunk_root->fs_info->fs_devices->device_list_mutex); goto out; } Loading @@ -4851,6 +4863,7 @@ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans, memcpy(stripe->dev_uuid, device->uuid, BTRFS_UUID_SIZE); stripe++; } mutex_unlock(&chunk_root->fs_info->fs_devices->device_list_mutex); btrfs_set_stack_chunk_length(chunk, chunk_size); btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid); Loading