Merge branch 'integration-4.4' of... (a408365c) · Commits · EulixOS / Software / Kernel

fs/btrfs/extent_io.c

+6 −2

Original line number	Diff line number	Diff line
		@@ -3070,7 +3070,11 @@ static int __do_readpage(struct extent_io_tree *tree,

		set_extent_uptodate(tree, cur, cur + iosize - 1,
		&cached, GFP_NOFS);
		unlock_extent_cached(tree, cur, cur + iosize - 1,
		if (parent_locked)
		free_extent_state(cached);
		else
		unlock_extent_cached(tree, cur,
		cur + iosize - 1,
		&cached, GFP_NOFS);
		cur = cur + iosize;
		pg_offset += iosize;

fs/btrfs/inode.c

+68 −14

Original line number	Diff line number	Diff line
		@@ -4216,6 +4216,47 @@ static int truncate_space_check(struct btrfs_trans_handle *trans,

		}

		static int truncate_inline_extent(struct inode *inode,
		struct btrfs_path *path,
		struct btrfs_key *found_key,
		const u64 item_end,
		const u64 new_size)
		{
		struct extent_buffer *leaf = path->nodes[0];
		int slot = path->slots[0];
		struct btrfs_file_extent_item *fi;
		u32 size = (u32)(new_size - found_key->offset);
		struct btrfs_root *root = BTRFS_I(inode)->root;

		fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);

		if (btrfs_file_extent_compression(leaf, fi) != BTRFS_COMPRESS_NONE) {
		loff_t offset = new_size;
		loff_t page_end = ALIGN(offset, PAGE_CACHE_SIZE);

		/*
		* Zero out the remaining of the last page of our inline extent,
		* instead of directly truncating our inline extent here - that
		* would be much more complex (decompressing all the data, then
		* compressing the truncated data, which might be bigger than
		* the size of the inline extent, resize the extent, etc).
		* We release the path because to get the page we might need to
		* read the extent item from disk (data not in the page cache).
		*/
		btrfs_release_path(path);
		return btrfs_truncate_page(inode, offset, page_end - offset, 0);
		}

		btrfs_set_file_extent_ram_bytes(leaf, fi, size);
		size = btrfs_file_extent_calc_inline_size(size);
		btrfs_truncate_item(root, path, size, 1);

		if (test_bit(BTRFS_ROOT_REF_COWS, &root->state))
		inode_sub_bytes(inode, item_end + 1 - new_size);

		return 0;
		}

		/*
		* this can truncate away extent items, csum items and directory items.
		* It starts at a high offset and removes keys until it can't find
		@@ -4410,27 +4451,40 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
		* special encodings
		*/
		if (!del_item &&
		btrfs_file_extent_compression(leaf, fi) == 0 &&
		btrfs_file_extent_encryption(leaf, fi) == 0 &&
		btrfs_file_extent_other_encoding(leaf, fi) == 0) {
		u32 size = new_size - found_key.offset;

		if (test_bit(BTRFS_ROOT_REF_COWS, &root->state))
		inode_sub_bytes(inode, item_end + 1 -
		new_size);

		/*
		* update the ram bytes to properly reflect
		* the new size of our item
		* Need to release path in order to truncate a
		* compressed extent. So delete any accumulated
		* extent items so far.
		*/
		btrfs_set_file_extent_ram_bytes(leaf, fi, size);
		size =
		btrfs_file_extent_calc_inline_size(size);
		btrfs_truncate_item(root, path, size, 1);
		if (btrfs_file_extent_compression(leaf, fi) !=
		BTRFS_COMPRESS_NONE && pending_del_nr) {
		err = btrfs_del_items(trans, root, path,
		pending_del_slot,
		pending_del_nr);
		if (err) {
		btrfs_abort_transaction(trans,
		root,
		err);
		goto error;
		}
		pending_del_nr = 0;
		}

		err = truncate_inline_extent(inode, path,
		&found_key,
		item_end,
		new_size);
		if (err) {
		btrfs_abort_transaction(trans,
		root, err);
		goto error;
		}
		} else if (test_bit(BTRFS_ROOT_REF_COWS,
		&root->state)) {
		inode_sub_bytes(inode, item_end + 1 -
		found_key.offset);
		inode_sub_bytes(inode, item_end + 1 - new_size);
		}
		}
		delete:

fs/btrfs/ioctl.c

+152 −43

Original line number	Diff line number	Diff line
		@@ -3327,6 +3327,150 @@ static void clone_update_extent_map(struct inode *inode,
		&BTRFS_I(inode)->runtime_flags);
		}

		/*
		* Make sure we do not end up inserting an inline extent into a file that has
		* already other (non-inline) extents. If a file has an inline extent it can
		* not have any other extents and the (single) inline extent must start at the
		* file offset 0. Failing to respect these rules will lead to file corruption,
		* resulting in EIO errors on read/write operations, hitting BUG_ON's in mm, etc
		*
		* We can have extents that have been already written to disk or we can have
		* dirty ranges still in delalloc, in which case the extent maps and items are
		* created only when we run delalloc, and the delalloc ranges might fall outside
		* the range we are currently locking in the inode's io tree. So we check the
		* inode's i_size because of that (i_size updates are done while holding the
		* i_mutex, which we are holding here).
		* We also check to see if the inode has a size not greater than "datal" but has
		* extents beyond it, due to an fallocate with FALLOC_FL_KEEP_SIZE (and we are
		* protected against such concurrent fallocate calls by the i_mutex).
		*
		* If the file has no extents but a size greater than datal, do not allow the
		* copy because we would need turn the inline extent into a non-inline one (even
		* with NO_HOLES enabled). If we find our destination inode only has one inline
		* extent, just overwrite it with the source inline extent if its size is less
		* than the source extent's size, or we could copy the source inline extent's
		* data into the destination inode's inline extent if the later is greater then
		* the former.
		*/
		static int clone_copy_inline_extent(struct inode *src,
		struct inode *dst,
		struct btrfs_trans_handle *trans,
		struct btrfs_path *path,
		struct btrfs_key *new_key,
		const u64 drop_start,
		const u64 datal,
		const u64 skip,
		const u64 size,
		char *inline_data)
		{
		struct btrfs_root *root = BTRFS_I(dst)->root;
		const u64 aligned_end = ALIGN(new_key->offset + datal,
		root->sectorsize);
		int ret;
		struct btrfs_key key;

		if (new_key->offset > 0)
		return -EOPNOTSUPP;

		key.objectid = btrfs_ino(dst);
		key.type = BTRFS_EXTENT_DATA_KEY;
		key.offset = 0;
		ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
		if (ret < 0) {
		return ret;
		} else if (ret > 0) {
		if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
		ret = btrfs_next_leaf(root, path);
		if (ret < 0)
		return ret;
		else if (ret > 0)
		goto copy_inline_extent;
		}
		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
		if (key.objectid == btrfs_ino(dst) &&
		key.type == BTRFS_EXTENT_DATA_KEY) {
		ASSERT(key.offset > 0);
		return -EOPNOTSUPP;
		}
		} else if (i_size_read(dst) <= datal) {
		struct btrfs_file_extent_item *ei;
		u64 ext_len;

		/*
		* If the file size is <= datal, make sure there are no other
		* extents following (can happen do to an fallocate call with
		* the flag FALLOC_FL_KEEP_SIZE).
		*/
		ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
		struct btrfs_file_extent_item);
		/*
		* If it's an inline extent, it can not have other extents
		* following it.
		*/
		if (btrfs_file_extent_type(path->nodes[0], ei) ==
		BTRFS_FILE_EXTENT_INLINE)
		goto copy_inline_extent;

		ext_len = btrfs_file_extent_num_bytes(path->nodes[0], ei);
		if (ext_len > aligned_end)
		return -EOPNOTSUPP;

		ret = btrfs_next_item(root, path);
		if (ret < 0) {
		return ret;
		} else if (ret == 0) {
		btrfs_item_key_to_cpu(path->nodes[0], &key,
		path->slots[0]);
		if (key.objectid == btrfs_ino(dst) &&
		key.type == BTRFS_EXTENT_DATA_KEY)
		return -EOPNOTSUPP;
		}
		}

		copy_inline_extent:
		/*
		* We have no extent items, or we have an extent at offset 0 which may
		* or may not be inlined. All these cases are dealt the same way.
		*/
		if (i_size_read(dst) > datal) {
		/*
		* If the destination inode has an inline extent...
		* This would require copying the data from the source inline
		* extent into the beginning of the destination's inline extent.
		* But this is really complex, both extents can be compressed
		* or just one of them, which would require decompressing and
		* re-compressing data (which could increase the new compressed
		* size, not allowing the compressed data to fit anymore in an
		* inline extent).
		* So just don't support this case for now (it should be rare,
		* we are not really saving space when cloning inline extents).
		*/
		return -EOPNOTSUPP;
		}

		btrfs_release_path(path);
		ret = btrfs_drop_extents(trans, root, dst, drop_start, aligned_end, 1);
		if (ret)
		return ret;
		ret = btrfs_insert_empty_item(trans, root, path, new_key, size);
		if (ret)
		return ret;

		if (skip) {
		const u32 start = btrfs_file_extent_calc_inline_size(0);

		memmove(inline_data + start, inline_data + start + skip, datal);
		}

		write_extent_buffer(path->nodes[0], inline_data,
		btrfs_item_ptr_offset(path->nodes[0],
		path->slots[0]),
		size);
		inode_add_bytes(dst, datal);

		return 0;
		}

		/**
		* btrfs_clone() - clone a range from inode file to another
		*
		@@ -3593,21 +3737,6 @@ static int btrfs_clone(struct inode src, struct inode inode,
		} else if (type == BTRFS_FILE_EXTENT_INLINE) {
		u64 skip = 0;
		u64 trim = 0;
		u64 aligned_end = 0;

		/*
		* Don't copy an inline extent into an offset
		* greater than zero. Having an inline extent
		* at such an offset results in chaos as btrfs
		* isn't prepared for such cases. Just skip
		* this case for the same reasons as commented
		* at btrfs_ioctl_clone().
		*/
		if (last_dest_end > 0) {
		ret = -EOPNOTSUPP;
		btrfs_end_transaction(trans, root);
		goto out;
		}

		if (off > key.offset) {
		skip = off - key.offset;
		@@ -3625,42 +3754,22 @@ static int btrfs_clone(struct inode src, struct inode inode,
		size -= skip + trim;
		datal -= skip + trim;

		aligned_end = ALIGN(new_key.offset + datal,
		root->sectorsize);
		ret = btrfs_drop_extents(trans, root, inode,
		ret = clone_copy_inline_extent(src, inode,
		trans, path,
		&new_key,
		drop_start,
		aligned_end,
		1);
		datal,
		skip, size, buf);
		if (ret) {
		if (ret != -EOPNOTSUPP)
		btrfs_abort_transaction(trans,
		root, ret);
		btrfs_end_transaction(trans, root);
		goto out;
		}

		ret = btrfs_insert_empty_item(trans, root, path,
		&new_key, size);
		if (ret) {
		btrfs_abort_transaction(trans, root,
		root,
		ret);
		btrfs_end_transaction(trans, root);
		goto out;
		}

		if (skip) {
		u32 start =
		btrfs_file_extent_calc_inline_size(0);
		memmove(buf+start, buf+start+skip,
		datal);
		}

		leaf = path->nodes[0];
		slot = path->slots[0];
		write_extent_buffer(leaf, buf,
		btrfs_item_ptr_offset(leaf, slot),
		size);
		inode_add_bytes(inode, datal);
		}

		/* If we have an implicit hole (NO_HOLES feature). */

fs/btrfs/send.c

+181 −29

Original line number	Diff line number	Diff line
		@@ -1434,16 +1434,6 @@ verbose_printk(KERN_DEBUG "btrfs: find_extent_clone: data_offset=%llu, "
		}

		if (cur_clone_root) {
		if (compressed != BTRFS_COMPRESS_NONE) {
		/*
		* Offsets given by iterate_extent_inodes() are relative
		* to the start of the extent, we need to add logical
		* offset from the file extent item.
		* (See why at backref.c:check_extent_in_eb())
		*/
		cur_clone_root->offset += btrfs_file_extent_offset(eb,
		fi);
		}
		*found = cur_clone_root;
		ret = 0;
		} else {
		@@ -2353,8 +2343,14 @@ static int send_subvol_begin(struct send_ctx *sctx)
		}

		TLV_PUT_STRING(sctx, BTRFS_SEND_A_PATH, name, namelen);

		if (!btrfs_is_empty_uuid(sctx->send_root->root_item.received_uuid))
		TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID,
		sctx->send_root->root_item.received_uuid);
		else
		TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID,
		sctx->send_root->root_item.uuid);

		TLV_PUT_U64(sctx, BTRFS_SEND_A_CTRANSID,
		le64_to_cpu(sctx->send_root->root_item.ctransid));
		if (parent_root) {
		@@ -4687,6 +4683,171 @@ static int send_hole(struct send_ctx *sctx, u64 end)
		return ret;
		}

		static int send_extent_data(struct send_ctx *sctx,
		const u64 offset,
		const u64 len)
		{
		u64 sent = 0;

		if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA)
		return send_update_extent(sctx, offset, len);

		while (sent < len) {
		u64 size = len - sent;
		int ret;

		if (size > BTRFS_SEND_READ_SIZE)
		size = BTRFS_SEND_READ_SIZE;
		ret = send_write(sctx, offset + sent, size);
		if (ret < 0)
		return ret;
		if (!ret)
		break;
		sent += ret;
		}
		return 0;
		}

		static int clone_range(struct send_ctx *sctx,
		struct clone_root *clone_root,
		const u64 disk_byte,
		u64 data_offset,
		u64 offset,
		u64 len)
		{
		struct btrfs_path *path;
		struct btrfs_key key;
		int ret;

		path = alloc_path_for_send();
		if (!path)
		return -ENOMEM;

		/*
		* We can't send a clone operation for the entire range if we find
		* extent items in the respective range in the source file that
		* refer to different extents or if we find holes.
		* So check for that and do a mix of clone and regular write/copy
		* operations if needed.
		*
		* Example:
		*
		* mkfs.btrfs -f /dev/sda
		* mount /dev/sda /mnt
		* xfs_io -f -c "pwrite -S 0xaa 0K 100K" /mnt/foo
		* cp --reflink=always /mnt/foo /mnt/bar
		* xfs_io -c "pwrite -S 0xbb 50K 50K" /mnt/foo
		* btrfs subvolume snapshot -r /mnt /mnt/snap
		*
		* If when we send the snapshot and we are processing file bar (which
		* has a higher inode number than foo) we blindly send a clone operation
		* for the [0, 100K[ range from foo to bar, the receiver ends up getting
		* a file bar that matches the content of file foo - iow, doesn't match
		* the content from bar in the original filesystem.
		*/
		key.objectid = clone_root->ino;
		key.type = BTRFS_EXTENT_DATA_KEY;
		key.offset = clone_root->offset;
		ret = btrfs_search_slot(NULL, clone_root->root, &key, path, 0, 0);
		if (ret < 0)
		goto out;
		if (ret > 0 && path->slots[0] > 0) {
		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0] - 1);
		if (key.objectid == clone_root->ino &&
		key.type == BTRFS_EXTENT_DATA_KEY)
		path->slots[0]--;
		}

		while (true) {
		struct extent_buffer *leaf = path->nodes[0];
		int slot = path->slots[0];
		struct btrfs_file_extent_item *ei;
		u8 type;
		u64 ext_len;
		u64 clone_len;

		if (slot >= btrfs_header_nritems(leaf)) {
		ret = btrfs_next_leaf(clone_root->root, path);
		if (ret < 0)
		goto out;
		else if (ret > 0)
		break;
		continue;
		}

		btrfs_item_key_to_cpu(leaf, &key, slot);

		/*
		* We might have an implicit trailing hole (NO_HOLES feature
		* enabled). We deal with it after leaving this loop.
		*/
		if (key.objectid != clone_root->ino \|\|
		key.type != BTRFS_EXTENT_DATA_KEY)
		break;

		ei = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
		type = btrfs_file_extent_type(leaf, ei);
		if (type == BTRFS_FILE_EXTENT_INLINE) {
		ext_len = btrfs_file_extent_inline_len(leaf, slot, ei);
		ext_len = PAGE_CACHE_ALIGN(ext_len);
		} else {
		ext_len = btrfs_file_extent_num_bytes(leaf, ei);
		}

		if (key.offset + ext_len <= clone_root->offset)
		goto next;

		if (key.offset > clone_root->offset) {
		/* Implicit hole, NO_HOLES feature enabled. */
		u64 hole_len = key.offset - clone_root->offset;

		if (hole_len > len)
		hole_len = len;
		ret = send_extent_data(sctx, offset, hole_len);
		if (ret < 0)
		goto out;

		len -= hole_len;
		if (len == 0)
		break;
		offset += hole_len;
		clone_root->offset += hole_len;
		data_offset += hole_len;
		}

		if (key.offset >= clone_root->offset + len)
		break;

		clone_len = min_t(u64, ext_len, len);

		if (btrfs_file_extent_disk_bytenr(leaf, ei) == disk_byte &&
		btrfs_file_extent_offset(leaf, ei) == data_offset)
		ret = send_clone(sctx, offset, clone_len, clone_root);
		else
		ret = send_extent_data(sctx, offset, clone_len);

		if (ret < 0)
		goto out;

		len -= clone_len;
		if (len == 0)
		break;
		offset += clone_len;
		clone_root->offset += clone_len;
		data_offset += clone_len;
		next:
		path->slots[0]++;
		}

		if (len > 0)
		ret = send_extent_data(sctx, offset, len);
		else
		ret = 0;
		out:
		btrfs_free_path(path);
		return ret;
		}

		static int send_write_or_clone(struct send_ctx *sctx,
		struct btrfs_path *path,
		struct btrfs_key *key,
		@@ -4695,9 +4856,7 @@ static int send_write_or_clone(struct send_ctx *sctx,
		int ret = 0;
		struct btrfs_file_extent_item *ei;
		u64 offset = key->offset;
		u64 pos = 0;
		u64 len;
		u32 l;
		u8 type;
		u64 bs = sctx->send_root->fs_info->sb->s_blocksize;

		@@ -4725,22 +4884,15 @@ static int send_write_or_clone(struct send_ctx *sctx,
		}

		if (clone_root && IS_ALIGNED(offset + len, bs)) {
		ret = send_clone(sctx, offset, len, clone_root);
		} else if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA) {
		ret = send_update_extent(sctx, offset, len);
		u64 disk_byte;
		u64 data_offset;

		disk_byte = btrfs_file_extent_disk_bytenr(path->nodes[0], ei);
		data_offset = btrfs_file_extent_offset(path->nodes[0], ei);
		ret = clone_range(sctx, clone_root, disk_byte, data_offset,
		offset, len);
		} else {
		while (pos < len) {
		l = len - pos;
		if (l > BTRFS_SEND_READ_SIZE)
		l = BTRFS_SEND_READ_SIZE;
		ret = send_write(sctx, pos + offset, l);
		if (ret < 0)
		goto out;
		if (!ret)
		break;
		pos += ret;
		}
		ret = 0;
		ret = send_extent_data(sctx, offset, len);
		}
		out:
		return ret;