Commit 122fa8c5 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull btrfs updates from David Sterba:
 "A normal mix of improvements, core changes and features that user have
  been missing or complaining about.

  User visible changes:

   - new sysfs exports:
      - add sysfs knob to limit scrub IO bandwidth per device
      - device stats are also available in
           /sys/fs/btrfs/FSID/devinfo/DEVID/error_stats

   - support cancellable resize and device delete ioctls

   - change how the empty value is interpreted when setting a property,
     so far we have only 'btrfs.compression' and we need to distinguish
     a reset to defaults and setting "do not compress", in general the
     empty value will always mean 'reset to defaults' for any other
     property, for compression it's either 'no' or 'none' to forbid
     compression

  Performance improvements:

   - no need for full sync when truncation does not touch extents,
     reported run time change is -12%

   - avoid unnecessary logging of xattrs during fast fsyncs (+17%
     throughput, -17% runtime on xattr stress workload)

  Core:

   - preemptive flushing improvements and fixes
      - adjust clamping logic on multi-threaded workloads to avoid
        flushing too soon
      - take into account global block reserve, may help on almost full
        filesystems
      - continue flushing when there are enough pending delalloc and
        ordered bytes

   - simplify logic around conditional transaction commit, a workaround
     used in the past for throttling that's been superseded by ticket
     reservations that manage the throttling in a better way

   - subpage blocksize preparation:
      - submit read time repair only for each corrupted sector
      - scrub repair now works with sectors and not pages
      - free space cache (v1) works with sectors and not pages
      - more fine grained bio tracking for extents
      - subpage support in page callbacks, extent callbacks, end io
        callbacks

   - simplify transaction abort logic and always abort and don't check
     various potentially unreliable stats tracked by the transaction

   - exclusive operations can do more checks when started and allow eg.
     cancellation of the same running operation

   - ensure relocation never runs while we have send operations running,
     e.g. when zoned background auto reclaim starts

  Fixes:

   - zoned: more sanity checks of write pointer

   - improve error handling in delayed inodes

   - send:
      - fix invalid path for unlink operations after parent
        orphanization
      - fix crash when memory allocations trigger reclaim

   - skip compression of we have only one page (can't make things
     better)

   - empty value of a property newly means reset to default

  Other:

   - lots of cleanups, comment updates, yearly typo fixing

   - disable build on platforms having page size 256K"

* tag 'for-5.14-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (101 commits)
  btrfs: remove unused btrfs_fs_info::total_pinned
  btrfs: rip out btrfs_space_info::total_bytes_pinned
  btrfs: rip the first_ticket_bytes logic from fail_all_tickets
  btrfs: remove FLUSH_DELAYED_REFS from data ENOSPC flushing
  btrfs: rip out may_commit_transaction
  btrfs: send: fix crash when memory allocations trigger reclaim
  btrfs: ensure relocation never runs while we have send operations running
  btrfs: shorten integrity checker extent data mount option
  btrfs: switch mount option bits to enums and use wider type
  btrfs: props: change how empty value is interpreted
  btrfs: compression: don't try to compress if we don't have enough pages
  btrfs: fix unbalanced unlock in qgroup_account_snapshot()
  btrfs: sysfs: export dev stats in devinfo directory
  btrfs: fix typos in comments
  btrfs: remove a stale comment for btrfs_decompress_bio()
  btrfs: send: use list_move_tail instead of list_del/list_add_tail
  btrfs: disable build on platforms having page size 256K
  btrfs: send: fix invalid path for unlink operations after parent orphanization
  btrfs: inline wait_current_trans_commit_start in its caller
  btrfs: sink wait_for_unblock parameter to async commit
  ...
parents 7aed4d57 629e33a1
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -18,6 +18,8 @@ config BTRFS_FS
	select RAID6_PQ
	select XOR_BLOCKS
	select SRCU
	depends on !PPC_256K_PAGES	# powerpc
	depends on !PAGE_SIZE_256KB	# hexagon

	help
	  Btrfs is a general purpose copy-on-write filesystem with extents,
+17 −14
Original line number Diff line number Diff line
@@ -1399,7 +1399,6 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
		btrfs_space_info_update_bytes_pinned(fs_info, space_info,
						     -block_group->pinned);
		space_info->bytes_readonly += block_group->pinned;
		__btrfs_mod_total_bytes_pinned(space_info, -block_group->pinned);
		block_group->pinned = 0;

		spin_unlock(&block_group->lock);
@@ -1491,7 +1490,7 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
		container_of(work, struct btrfs_fs_info, reclaim_bgs_work);
	struct btrfs_block_group *bg;
	struct btrfs_space_info *space_info;
	int ret;
	LIST_HEAD(again_list);

	if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags))
		return;
@@ -1502,6 +1501,8 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
	mutex_lock(&fs_info->reclaim_bgs_lock);
	spin_lock(&fs_info->unused_bgs_lock);
	while (!list_empty(&fs_info->reclaim_bgs)) {
		int ret = 0;

		bg = list_first_entry(&fs_info->reclaim_bgs,
				      struct btrfs_block_group,
				      bg_list);
@@ -1547,9 +1548,13 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
				  bg->start);

next:
		btrfs_put_block_group(bg);
		spin_lock(&fs_info->unused_bgs_lock);
		if (ret == -EAGAIN && list_empty(&bg->bg_list))
			list_add_tail(&bg->bg_list, &again_list);
		else
			btrfs_put_block_group(bg);
	}
	list_splice_tail(&again_list, &fs_info->reclaim_bgs);
	spin_unlock(&fs_info->unused_bgs_lock);
	mutex_unlock(&fs_info->reclaim_bgs_lock);
	btrfs_exclop_finish(fs_info);
@@ -2505,7 +2510,7 @@ static int cache_save_setup(struct btrfs_block_group *block_group,
	struct extent_changeset *data_reserved = NULL;
	u64 alloc_hint = 0;
	int dcs = BTRFS_DC_ERROR;
	u64 num_pages = 0;
	u64 cache_size = 0;
	int retries = 0;
	int ret = 0;

@@ -2617,20 +2622,20 @@ static int cache_save_setup(struct btrfs_block_group *block_group,
	 * taking up quite a bit since it's not folded into the other space
	 * cache.
	 */
	num_pages = div_u64(block_group->length, SZ_256M);
	if (!num_pages)
		num_pages = 1;
	cache_size = div_u64(block_group->length, SZ_256M);
	if (!cache_size)
		cache_size = 1;

	num_pages *= 16;
	num_pages *= PAGE_SIZE;
	cache_size *= 16;
	cache_size *= fs_info->sectorsize;

	ret = btrfs_check_data_free_space(BTRFS_I(inode), &data_reserved, 0,
					  num_pages);
					  cache_size);
	if (ret)
		goto out_put;

	ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages,
					      num_pages, num_pages,
	ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, cache_size,
					      cache_size, cache_size,
					      &alloc_hint);
	/*
	 * Our cache requires contiguous chunks so that we don't modify a bunch
@@ -3062,8 +3067,6 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
			spin_unlock(&cache->lock);
			spin_unlock(&cache->space_info->lock);

			__btrfs_mod_total_bytes_pinned(cache->space_info,
						       num_bytes);
			set_extent_dirty(&trans->transaction->pinned_extents,
					 bytenr, bytenr + num_bytes - 1,
					 GFP_NOFS | __GFP_NOFAIL);
+15 −42
Original line number Diff line number Diff line
@@ -149,7 +149,7 @@ static int check_compressed_csum(struct btrfs_inode *inode, struct bio *bio,
	const u32 csum_size = fs_info->csum_size;
	const u32 sectorsize = fs_info->sectorsize;
	struct page *page;
	unsigned long i;
	unsigned int i;
	char *kaddr;
	u8 csum[BTRFS_CSUM_SIZE];
	struct compressed_bio *cb = bio->bi_private;
@@ -208,7 +208,7 @@ static void end_compressed_bio_read(struct bio *bio)
	struct compressed_bio *cb = bio->bi_private;
	struct inode *inode;
	struct page *page;
	unsigned long index;
	unsigned int index;
	unsigned int mirror = btrfs_io_bio(bio)->mirror_num;
	int ret = 0;

@@ -334,7 +334,7 @@ static void end_compressed_bio_write(struct bio *bio)
	struct compressed_bio *cb = bio->bi_private;
	struct inode *inode;
	struct page *page;
	unsigned long index;
	unsigned int index;

	if (bio->bi_status)
		cb->errors = 1;
@@ -349,12 +349,10 @@ static void end_compressed_bio_write(struct bio *bio)
	 * call back into the FS and do all the end_io operations
	 */
	inode = cb->inode;
	cb->compressed_pages[0]->mapping = cb->inode->i_mapping;
	btrfs_record_physical_zoned(inode, cb->start, bio);
	btrfs_writepage_endio_finish_ordered(cb->compressed_pages[0],
	btrfs_writepage_endio_finish_ordered(BTRFS_I(inode), NULL,
			cb->start, cb->start + cb->len - 1,
			bio->bi_status == BLK_STS_OK);
	cb->compressed_pages[0]->mapping = NULL;

	end_compressed_writeback(inode, cb);
	/* note, our inode could be gone now */
@@ -387,10 +385,10 @@ static void end_compressed_bio_write(struct bio *bio)
 * the end io hooks.
 */
blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
				 unsigned long len, u64 disk_start,
				 unsigned long compressed_len,
				 unsigned int len, u64 disk_start,
				 unsigned int compressed_len,
				 struct page **compressed_pages,
				 unsigned long nr_pages,
				 unsigned int nr_pages,
				 unsigned int write_flags,
				 struct cgroup_subsys_state *blkcg_css)
{
@@ -427,24 +425,16 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
	bio->bi_end_io = end_compressed_bio_write;

	if (use_append) {
		struct extent_map *em;
		struct map_lookup *map;
		struct block_device *bdev;
		struct btrfs_device *device;

		em = btrfs_get_chunk_map(fs_info, disk_start, PAGE_SIZE);
		if (IS_ERR(em)) {
		device = btrfs_zoned_get_device(fs_info, disk_start, PAGE_SIZE);
		if (IS_ERR(device)) {
			kfree(cb);
			bio_put(bio);
			return BLK_STS_NOTSUPP;
		}

		map = em->map_lookup;
		/* We only support single profile for now */
		ASSERT(map->num_stripes == 1);
		bdev = map->stripes[0].dev->bdev;

		bio_set_dev(bio, bdev);
		free_extent_map(em);
		bio_set_dev(bio, device->bdev);
	}

	if (blkcg_css) {
@@ -515,7 +505,7 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
		}
		if (bytes_left < PAGE_SIZE) {
			btrfs_info(fs_info,
					"bytes left %lu compress len %lu nr %lu",
					"bytes left %lu compress len %u nr %u",
			       bytes_left, cb->compressed_len, cb->nr_pages);
		}
		bytes_left -= PAGE_SIZE;
@@ -677,9 +667,9 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
	struct extent_map_tree *em_tree;
	struct compressed_bio *cb;
	unsigned long compressed_len;
	unsigned long nr_pages;
	unsigned long pg_index;
	unsigned int compressed_len;
	unsigned int nr_pages;
	unsigned int pg_index;
	struct page *page;
	struct bio *comp_bio;
	u64 cur_disk_byte = bio->bi_iter.bi_sector << 9;
@@ -1202,9 +1192,6 @@ static unsigned int btrfs_compress_set_level(int type, unsigned level)
 *
 * @total_out is an in/out parameter, must be set to the input length and will
 * be also used to return the total number of compressed bytes
 *
 * @max_out tells us the max number of bytes that we're allowed to
 * stuff into pages
 */
int btrfs_compress_pages(unsigned int type_level, struct address_space *mapping,
			 u64 start, struct page **pages,
@@ -1225,20 +1212,6 @@ int btrfs_compress_pages(unsigned int type_level, struct address_space *mapping,
	return ret;
}

/*
 * pages_in is an array of pages with compressed data.
 *
 * disk_start is the starting logical offset of this array in the file
 *
 * orig_bio contains the pages from the file that we want to decompress into
 *
 * srclen is the number of bytes in pages_in
 *
 * The basic idea is that we have a bio that was created by readpages.
 * The pages in the bio are for the uncompressed data, and they may not
 * be contiguous.  They all correspond to the range of bytes covered by
 * the compressed extent.
 */
static int btrfs_decompress_bio(struct compressed_bio *cb)
{
	struct list_head *workspace;
+13 −13
Original line number Diff line number Diff line
@@ -31,6 +31,9 @@ struct compressed_bio {
	/* number of bios pending for this compressed extent */
	refcount_t pending_bios;

	/* Number of compressed pages in the array */
	unsigned int nr_pages;

	/* the pages with the compressed data on them */
	struct page **compressed_pages;

@@ -40,20 +43,17 @@ struct compressed_bio {
	/* starting offset in the inode for our pages */
	u64 start;

	/* number of bytes in the inode we're working on */
	unsigned long len;

	/* number of bytes on disk */
	unsigned long compressed_len;
	/* Number of bytes in the inode we're working on */
	unsigned int len;

	/* the compression algorithm for this bio */
	int compress_type;
	/* Number of bytes on disk */
	unsigned int compressed_len;

	/* number of compressed pages in the array */
	unsigned long nr_pages;
	/* The compression algorithm for this bio */
	u8 compress_type;

	/* IO errors */
	int errors;
	u8 errors;
	int mirror_num;

	/* for reads, this is the bio we are copying the data into */
@@ -91,10 +91,10 @@ int btrfs_decompress_buf2page(const char *buf, unsigned long buf_start,
			      struct bio *bio);

blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
				  unsigned long len, u64 disk_start,
				  unsigned long compressed_len,
				  unsigned int len, u64 disk_start,
				  unsigned int compressed_len,
				  struct page **compressed_pages,
				  unsigned long nr_pages,
				  unsigned int nr_pages,
				  unsigned int write_flags,
				  struct cgroup_subsys_state *blkcg_css);
blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
+1 −4
Original line number Diff line number Diff line
@@ -596,7 +596,6 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
		       trans->transid, fs_info->generation);

	if (!should_cow_block(trans, root, buf)) {
		trans->dirty = true;
		*cow_ret = buf;
		return 0;
	}
@@ -1788,10 +1787,8 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
			 * then we don't want to set the path blocking,
			 * so we test it here
			 */
			if (!should_cow_block(trans, root, b)) {
				trans->dirty = true;
			if (!should_cow_block(trans, root, b))
				goto cow_done;
			}

			/*
			 * must have write locks on this node and the
Loading