Commit 619104ba authored by Filipe Manana's avatar Filipe Manana Committed by David Sterba
Browse files

btrfs: move common NOCOW checks against a file extent into a helper



Verifying if we can do a NOCOW write against a range fully or partially
covered by a file extent item requires verifying several constraints, and
these are currently duplicated at two different places: can_nocow_extent()
and run_delalloc_nocow().

This change moves those checks into a common helper function to avoid
duplication. It adds some comments and also preserves all existing
behaviour like for example can_nocow_extent() treating errors from the
calls to btrfs_cross_ref_exist() and csum_exist_in_range() as meaning
we can not NOCOW, instead of propagating the error back to the caller.
That specific behaviour is questionable but also reasonable to some
degree.

Signed-off-by: default avatarFilipe Manana <fdmanana@suse.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent 395cb57e
Loading
Loading
Loading
Loading
+215 −212
Original line number Diff line number Diff line
@@ -1617,6 +1617,141 @@ static int fallback_to_cow(struct btrfs_inode *inode, struct page *locked_page,
			      nr_written, 1);
}

struct can_nocow_file_extent_args {
	/* Input fields. */

	/* Start file offset of the range we want to NOCOW. */
	u64 start;
	/* End file offset (inclusive) of the range we want to NOCOW. */
	u64 end;
	bool writeback_path;
	bool strict;
	/*
	 * Free the path passed to can_nocow_file_extent() once it's not needed
	 * anymore.
	 */
	bool free_path;

	/* Output fields. Only set when can_nocow_file_extent() returns 1. */

	u64 disk_bytenr;
	u64 disk_num_bytes;
	u64 extent_offset;
	/* Number of bytes that can be written to in NOCOW mode. */
	u64 num_bytes;
};

/*
 * Check if we can NOCOW the file extent that the path points to.
 * This function may return with the path released, so the caller should check
 * if path->nodes[0] is NULL or not if it needs to use the path afterwards.
 *
 * Returns: < 0 on error
 *            0 if we can not NOCOW
 *            1 if we can NOCOW
 */
static int can_nocow_file_extent(struct btrfs_path *path,
				 struct btrfs_key *key,
				 struct btrfs_inode *inode,
				 struct can_nocow_file_extent_args *args)
{
	const bool is_freespace_inode = btrfs_is_free_space_inode(inode);
	struct extent_buffer *leaf = path->nodes[0];
	struct btrfs_root *root = inode->root;
	struct btrfs_file_extent_item *fi;
	u64 extent_end;
	u8 extent_type;
	int can_nocow = 0;
	int ret = 0;

	fi = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item);
	extent_type = btrfs_file_extent_type(leaf, fi);

	if (extent_type == BTRFS_FILE_EXTENT_INLINE)
		goto out;

	/* Can't access these fields unless we know it's not an inline extent. */
	args->disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
	args->disk_num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
	args->extent_offset = btrfs_file_extent_offset(leaf, fi);

	if (!(inode->flags & BTRFS_INODE_NODATACOW) &&
	    extent_type == BTRFS_FILE_EXTENT_REG)
		goto out;

	/*
	 * If the extent was created before the generation where the last snapshot
	 * for its subvolume was created, then this implies the extent is shared,
	 * hence we must COW.
	 */
	if (!args->strict && !is_freespace_inode &&
	    btrfs_file_extent_generation(leaf, fi) <=
	    btrfs_root_last_snapshot(&root->root_item))
		goto out;

	/* An explicit hole, must COW. */
	if (args->disk_bytenr == 0)
		goto out;

	/* Compressed/encrypted/encoded extents must be COWed. */
	if (btrfs_file_extent_compression(leaf, fi) ||
	    btrfs_file_extent_encryption(leaf, fi) ||
	    btrfs_file_extent_other_encoding(leaf, fi))
		goto out;

	extent_end = btrfs_file_extent_end(path);

	/*
	 * The following checks can be expensive, as they need to take other
	 * locks and do btree or rbtree searches, so release the path to avoid
	 * blocking other tasks for too long.
	 */
	btrfs_release_path(path);

	ret = btrfs_cross_ref_exist(root, btrfs_ino(inode),
				    key->offset - args->extent_offset,
				    args->disk_bytenr, false, path);
	WARN_ON_ONCE(ret > 0 && is_freespace_inode);
	if (ret != 0)
		goto out;

	if (args->free_path) {
		/*
		 * We don't need the path anymore, plus through the
		 * csum_exist_in_range() call below we will end up allocating
		 * another path. So free the path to avoid unnecessary extra
		 * memory usage.
		 */
		btrfs_free_path(path);
		path = NULL;
	}

	/* If there are pending snapshots for this root, we must COW. */
	if (args->writeback_path && !is_freespace_inode &&
	    atomic_read(&root->snapshot_force_cow))
		goto out;

	args->disk_bytenr += args->extent_offset;
	args->disk_bytenr += args->start - key->offset;
	args->num_bytes = min(args->end + 1, extent_end) - args->start;

	/*
	 * Force COW if csums exist in the range. This ensures that csums for a
	 * given extent are either valid or do not exist.
	 */
	ret = csum_exist_in_range(root->fs_info, args->disk_bytenr, args->num_bytes);
	WARN_ON_ONCE(ret > 0 && is_freespace_inode);
	if (ret != 0)
		goto out;

	can_nocow = 1;
 out:
	if (args->free_path && path)
		btrfs_free_path(path);

	return ret < 0 ? ret : can_nocow;
}

/*
 * when nowcow writeback call back.  This checks for snapshots or COW copies
 * of the extents that exist in the file, and COWs the file as required.
@@ -1637,11 +1772,9 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
	u64 cur_offset = start;
	int ret;
	bool check_prev = true;
	const bool freespace_inode = btrfs_is_free_space_inode(inode);
	u64 ino = btrfs_ino(inode);
	bool nocow = false;
	u64 disk_bytenr = 0;
	const bool force = inode->flags & BTRFS_INODE_NODATACOW;
	struct can_nocow_file_extent_args nocow_args = { 0 };

	path = btrfs_alloc_path();
	if (!path) {
@@ -1654,15 +1787,16 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
		return -ENOMEM;
	}

	nocow_args.end = end;
	nocow_args.writeback_path = true;

	while (1) {
		struct btrfs_key found_key;
		struct btrfs_file_extent_item *fi;
		struct extent_buffer *leaf;
		u64 extent_end;
		u64 extent_offset;
		u64 num_bytes = 0;
		u64 disk_num_bytes;
		u64 ram_bytes;
		u64 nocow_end;
		int extent_type;

		nocow = false;
@@ -1738,117 +1872,37 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
		fi = btrfs_item_ptr(leaf, path->slots[0],
				    struct btrfs_file_extent_item);
		extent_type = btrfs_file_extent_type(leaf, fi);

		/* If this is triggered then we have a memory corruption. */
		ASSERT(extent_type < BTRFS_NR_FILE_EXTENT_TYPES);
		if (WARN_ON(extent_type >= BTRFS_NR_FILE_EXTENT_TYPES)) {
			ret = -EUCLEAN;
			goto error;
		}
		ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
		if (extent_type == BTRFS_FILE_EXTENT_REG ||
		    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
			disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
			extent_offset = btrfs_file_extent_offset(leaf, fi);
			extent_end = found_key.offset +
				btrfs_file_extent_num_bytes(leaf, fi);
			disk_num_bytes =
				btrfs_file_extent_disk_num_bytes(leaf, fi);
		extent_end = btrfs_file_extent_end(path);

		/*
			 * If the extent we got ends before our current offset,
			 * skip to the next extent.
		 * If the extent we got ends before our current offset, skip to
		 * the next extent.
		 */
		if (extent_end <= cur_offset) {
			path->slots[0]++;
			goto next_slot;
		}
			/* Skip holes */
			if (disk_bytenr == 0)
				goto out_check;
			/* Skip compressed/encrypted/encoded extents */
			if (btrfs_file_extent_compression(leaf, fi) ||
			    btrfs_file_extent_encryption(leaf, fi) ||
			    btrfs_file_extent_other_encoding(leaf, fi))
				goto out_check;
			/*
			 * If extent is created before the last volume's snapshot
			 * this implies the extent is shared, hence we can't do
			 * nocow. This is the same check as in
			 * btrfs_cross_ref_exist but without calling
			 * btrfs_search_slot.
			 */
			if (!freespace_inode &&
			    btrfs_file_extent_generation(leaf, fi) <=
			    btrfs_root_last_snapshot(&root->root_item))
				goto out_check;
			if (extent_type == BTRFS_FILE_EXTENT_REG && !force)
				goto out_check;

			/*
			 * The following checks can be expensive, as they need to
			 * take other locks and do btree or rbtree searches, so
			 * release the path to avoid blocking other tasks for too
			 * long.
			 */
			btrfs_release_path(path);

			ret = btrfs_cross_ref_exist(root, ino,
						    found_key.offset -
						    extent_offset, disk_bytenr,
						    false, path);
			if (ret) {
				/*
				 * ret could be -EIO if the above fails to read
				 * metadata.
				 */
		nocow_args.start = cur_offset;
		ret = can_nocow_file_extent(path, &found_key, inode, &nocow_args);
		if (ret < 0) {
			if (cow_start != (u64)-1)
				cur_offset = cow_start;
			goto error;
				}

				WARN_ON_ONCE(freespace_inode);
		} else if (ret == 0) {
			goto out_check;
		}
			disk_bytenr += extent_offset;
			disk_bytenr += cur_offset - found_key.offset;
			num_bytes = min(end + 1, extent_end) - cur_offset;
			/*
			 * If there are pending snapshots for this root, we
			 * fall into common COW way
			 */
			if (!freespace_inode && atomic_read(&root->snapshot_force_cow))
				goto out_check;
			/*
			 * force cow if csum exists in the range.
			 * this ensure that csum for a given extent are
			 * either valid or do not exist.
			 */
			ret = csum_exist_in_range(fs_info, disk_bytenr,
						  num_bytes);
			if (ret) {
				/*
				 * ret could be -EIO if the above fails to read
				 * metadata.
				 */
				if (ret < 0) {
					if (cow_start != (u64)-1)
						cur_offset = cow_start;
					goto error;
				}
				WARN_ON_ONCE(freespace_inode);
				goto out_check;
			}
			/* If the extent's block group is RO, we must COW */
			if (!btrfs_inc_nocow_writers(fs_info, disk_bytenr))
				goto out_check;

		ret = 0;
		if (btrfs_inc_nocow_writers(fs_info, nocow_args.disk_bytenr))
			nocow = true;
		} else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
			extent_end = found_key.offset + ram_bytes;
			extent_end = ALIGN(extent_end, fs_info->sectorsize);
			/* Skip extents outside of our requested range */
			if (extent_end <= start) {
				path->slots[0]++;
				goto next_slot;
			}
		} else {
			/* If this triggers then we have a memory corruption */
			BUG();
		}
out_check:
		/*
		 * If nocow is false then record the beginning of the range
@@ -1880,15 +1934,17 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
			cow_start = (u64)-1;
		}

		nocow_end = cur_offset + nocow_args.num_bytes - 1;

		if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
			u64 orig_start = found_key.offset - extent_offset;
			u64 orig_start = found_key.offset - nocow_args.extent_offset;
			struct extent_map *em;

			em = create_io_em(inode, cur_offset, num_bytes,
			em = create_io_em(inode, cur_offset, nocow_args.num_bytes,
					  orig_start,
					  disk_bytenr, /* block_start */
					  num_bytes, /* block_len */
					  disk_num_bytes, /* orig_block_len */
					  nocow_args.disk_bytenr, /* block_start */
					  nocow_args.num_bytes, /* block_len */
					  nocow_args.disk_num_bytes, /* orig_block_len */
					  ram_bytes, BTRFS_COMPRESS_NONE,
					  BTRFS_ORDERED_PREALLOC);
			if (IS_ERR(em)) {
@@ -1897,20 +1953,23 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
			}
			free_extent_map(em);
			ret = btrfs_add_ordered_extent(inode,
					cur_offset, num_bytes, num_bytes,
					disk_bytenr, num_bytes, 0,
					cur_offset, nocow_args.num_bytes,
					nocow_args.num_bytes,
					nocow_args.disk_bytenr,
					nocow_args.num_bytes, 0,
					1 << BTRFS_ORDERED_PREALLOC,
					BTRFS_COMPRESS_NONE);
			if (ret) {
				btrfs_drop_extent_cache(inode, cur_offset,
							cur_offset + num_bytes - 1,
							0);
							nocow_end, 0);
				goto error;
			}
		} else {
			ret = btrfs_add_ordered_extent(inode, cur_offset,
						       num_bytes, num_bytes,
						       disk_bytenr, num_bytes,
						       nocow_args.num_bytes,
						       nocow_args.num_bytes,
						       nocow_args.disk_bytenr,
						       nocow_args.num_bytes,
						       0,
						       1 << BTRFS_ORDERED_NOCOW,
						       BTRFS_COMPRESS_NONE);
@@ -1919,7 +1978,7 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
		}

		if (nocow)
			btrfs_dec_nocow_writers(fs_info, disk_bytenr);
			btrfs_dec_nocow_writers(fs_info, nocow_args.disk_bytenr);
		nocow = false;

		if (btrfs_is_data_reloc_root(root))
@@ -1929,10 +1988,9 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
			 * from freeing metadata of created ordered extent.
			 */
			ret = btrfs_reloc_clone_csums(inode, cur_offset,
						      num_bytes);
						      nocow_args.num_bytes);

		extent_clear_unlock_delalloc(inode, cur_offset,
					     cur_offset + num_bytes - 1,
		extent_clear_unlock_delalloc(inode, cur_offset, nocow_end,
					     locked_page, EXTENT_LOCKED |
					     EXTENT_DELALLOC |
					     EXTENT_CLEAR_DATA_RESV,
@@ -1965,7 +2023,7 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode,

error:
	if (nocow)
		btrfs_dec_nocow_writers(fs_info, disk_bytenr);
		btrfs_dec_nocow_writers(fs_info, nocow_args.disk_bytenr);

	if (ret && cur_offset < end)
		extent_clear_unlock_delalloc(inode, cur_offset, end,
@@ -7107,6 +7165,7 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
			      u64 *ram_bytes, bool strict)
{
	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
	struct can_nocow_file_extent_args nocow_args = { 0 };
	struct btrfs_path *path;
	int ret;
	struct extent_buffer *leaf;
@@ -7114,13 +7173,7 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
	struct btrfs_file_extent_item *fi;
	struct btrfs_key key;
	u64 disk_bytenr;
	u64 backref_offset;
	u64 extent_end;
	u64 num_bytes;
	int slot;
	int found_type;
	bool nocow = (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW);

	path = btrfs_alloc_path();
	if (!path)
@@ -7131,18 +7184,17 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
	if (ret < 0)
		goto out;

	slot = path->slots[0];
	if (ret == 1) {
		if (slot == 0) {
		if (path->slots[0] == 0) {
			/* can't find the item, must cow */
			ret = 0;
			goto out;
		}
		slot--;
		path->slots[0]--;
	}
	ret = 0;
	leaf = path->nodes[0];
	btrfs_item_key_to_cpu(leaf, &key, slot);
	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
	if (key.objectid != btrfs_ino(BTRFS_I(inode)) ||
	    key.type != BTRFS_EXTENT_DATA_KEY) {
		/* not our file or wrong item type, must cow */
@@ -7154,57 +7206,38 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
		goto out;
	}

	fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
	found_type = btrfs_file_extent_type(leaf, fi);
	if (found_type != BTRFS_FILE_EXTENT_REG &&
	    found_type != BTRFS_FILE_EXTENT_PREALLOC) {
		/* not a regular extent, must cow */
	if (btrfs_file_extent_end(path) <= offset)
		goto out;
	}

	if (!nocow && found_type == BTRFS_FILE_EXTENT_REG)
		goto out;

	extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
	if (extent_end <= offset)
		goto out;
	fi = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item);
	found_type = btrfs_file_extent_type(leaf, fi);
	if (ram_bytes)
		*ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);

	disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
	if (disk_bytenr == 0)
		goto out;
	nocow_args.start = offset;
	nocow_args.end = offset + *len - 1;
	nocow_args.strict = strict;
	nocow_args.free_path = true;

	if (btrfs_file_extent_compression(leaf, fi) ||
	    btrfs_file_extent_encryption(leaf, fi) ||
	    btrfs_file_extent_other_encoding(leaf, fi))
		goto out;
	ret = can_nocow_file_extent(path, &key, BTRFS_I(inode), &nocow_args);
	/* can_nocow_file_extent() has freed the path. */
	path = NULL;

	/*
	 * Do the same check as in btrfs_cross_ref_exist but without the
	 * unnecessary search.
	 */
	if (!strict &&
	    (btrfs_file_extent_generation(leaf, fi) <=
	     btrfs_root_last_snapshot(&root->root_item)))
	if (ret != 1) {
		/* Treat errors as not being able to NOCOW. */
		ret = 0;
		goto out;

	backref_offset = btrfs_file_extent_offset(leaf, fi);

	if (orig_start) {
		*orig_start = key.offset - backref_offset;
		*orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, fi);
		*ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
	}

	btrfs_release_path(path);

	if (btrfs_extent_readonly(fs_info, disk_bytenr))
	ret = 0;
	if (btrfs_extent_readonly(fs_info, nocow_args.disk_bytenr))
		goto out;

	num_bytes = min(offset + *len, extent_end) - offset;
	if (!nocow && found_type == BTRFS_FILE_EXTENT_PREALLOC) {
	if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) &&
	    found_type == BTRFS_FILE_EXTENT_PREALLOC) {
		u64 range_end;

		range_end = round_up(offset + num_bytes,
		range_end = round_up(offset + nocow_args.num_bytes,
				     root->fs_info->sectorsize) - 1;
		ret = test_range_bit(io_tree, offset, range_end,
				     EXTENT_DELALLOC, 0, NULL);
@@ -7214,42 +7247,12 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
		}
	}

	/*
	 * look for other files referencing this extent, if we
	 * find any we must cow
	 */

	ret = btrfs_cross_ref_exist(root, btrfs_ino(BTRFS_I(inode)),
				    key.offset - backref_offset, disk_bytenr,
				    strict, path);
	if (ret) {
		ret = 0;
		goto out;
	}
	if (orig_start)
		*orig_start = key.offset - nocow_args.extent_offset;
	if (orig_block_len)
		*orig_block_len = nocow_args.disk_num_bytes;

	/*
	 * We don't need the path anymore, plus through the csum_exist_in_range()
	 * call below we will end up allocating another path. So free the path
	 * to avoid unnecessary extra memory usage.
	 */
	btrfs_free_path(path);
	path = NULL;

	/*
	 * adjust disk_bytenr and num_bytes to cover just the bytes
	 * in this extent we are about to write.  If there
	 * are any csums in that range we have to cow in order
	 * to keep the csums correct
	 */
	disk_bytenr += backref_offset;
	disk_bytenr += offset - key.offset;
	if (csum_exist_in_range(fs_info, disk_bytenr, num_bytes))
		goto out;
	/*
	 * all of the above have passed, it is safe to overwrite this extent
	 * without cow
	 */
	*len = num_bytes;
	*len = nocow_args.num_bytes;
	ret = 1;
out:
	btrfs_free_path(path);