Loading fs/btrfs/extent_io.c +6 −2 Original line number Diff line number Diff line Loading @@ -3070,7 +3070,11 @@ static int __do_readpage(struct extent_io_tree *tree, set_extent_uptodate(tree, cur, cur + iosize - 1, &cached, GFP_NOFS); unlock_extent_cached(tree, cur, cur + iosize - 1, if (parent_locked) free_extent_state(cached); else unlock_extent_cached(tree, cur, cur + iosize - 1, &cached, GFP_NOFS); cur = cur + iosize; pg_offset += iosize; Loading fs/btrfs/inode.c +68 −14 Original line number Diff line number Diff line Loading @@ -4216,6 +4216,47 @@ static int truncate_space_check(struct btrfs_trans_handle *trans, } static int truncate_inline_extent(struct inode *inode, struct btrfs_path *path, struct btrfs_key *found_key, const u64 item_end, const u64 new_size) { struct extent_buffer *leaf = path->nodes[0]; int slot = path->slots[0]; struct btrfs_file_extent_item *fi; u32 size = (u32)(new_size - found_key->offset); struct btrfs_root *root = BTRFS_I(inode)->root; fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); if (btrfs_file_extent_compression(leaf, fi) != BTRFS_COMPRESS_NONE) { loff_t offset = new_size; loff_t page_end = ALIGN(offset, PAGE_CACHE_SIZE); /* * Zero out the remaining of the last page of our inline extent, * instead of directly truncating our inline extent here - that * would be much more complex (decompressing all the data, then * compressing the truncated data, which might be bigger than * the size of the inline extent, resize the extent, etc). * We release the path because to get the page we might need to * read the extent item from disk (data not in the page cache). */ btrfs_release_path(path); return btrfs_truncate_page(inode, offset, page_end - offset, 0); } btrfs_set_file_extent_ram_bytes(leaf, fi, size); size = btrfs_file_extent_calc_inline_size(size); btrfs_truncate_item(root, path, size, 1); if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) inode_sub_bytes(inode, item_end + 1 - new_size); return 0; } /* * this can truncate away extent items, csum items and directory items. * It starts at a high offset and removes keys until it can't find Loading Loading @@ -4410,27 +4451,40 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, * special encodings */ if (!del_item && btrfs_file_extent_compression(leaf, fi) == 0 && btrfs_file_extent_encryption(leaf, fi) == 0 && btrfs_file_extent_other_encoding(leaf, fi) == 0) { u32 size = new_size - found_key.offset; if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) inode_sub_bytes(inode, item_end + 1 - new_size); /* * update the ram bytes to properly reflect * the new size of our item * Need to release path in order to truncate a * compressed extent. So delete any accumulated * extent items so far. */ btrfs_set_file_extent_ram_bytes(leaf, fi, size); size = btrfs_file_extent_calc_inline_size(size); btrfs_truncate_item(root, path, size, 1); if (btrfs_file_extent_compression(leaf, fi) != BTRFS_COMPRESS_NONE && pending_del_nr) { err = btrfs_del_items(trans, root, path, pending_del_slot, pending_del_nr); if (err) { btrfs_abort_transaction(trans, root, err); goto error; } pending_del_nr = 0; } err = truncate_inline_extent(inode, path, &found_key, item_end, new_size); if (err) { btrfs_abort_transaction(trans, root, err); goto error; } } else if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) { inode_sub_bytes(inode, item_end + 1 - found_key.offset); inode_sub_bytes(inode, item_end + 1 - new_size); } } delete: Loading fs/btrfs/ioctl.c +152 −43 Original line number Diff line number Diff line Loading @@ -3327,6 +3327,150 @@ static void clone_update_extent_map(struct inode *inode, &BTRFS_I(inode)->runtime_flags); } /* * Make sure we do not end up inserting an inline extent into a file that has * already other (non-inline) extents. If a file has an inline extent it can * not have any other extents and the (single) inline extent must start at the * file offset 0. Failing to respect these rules will lead to file corruption, * resulting in EIO errors on read/write operations, hitting BUG_ON's in mm, etc * * We can have extents that have been already written to disk or we can have * dirty ranges still in delalloc, in which case the extent maps and items are * created only when we run delalloc, and the delalloc ranges might fall outside * the range we are currently locking in the inode's io tree. So we check the * inode's i_size because of that (i_size updates are done while holding the * i_mutex, which we are holding here). * We also check to see if the inode has a size not greater than "datal" but has * extents beyond it, due to an fallocate with FALLOC_FL_KEEP_SIZE (and we are * protected against such concurrent fallocate calls by the i_mutex). * * If the file has no extents but a size greater than datal, do not allow the * copy because we would need turn the inline extent into a non-inline one (even * with NO_HOLES enabled). If we find our destination inode only has one inline * extent, just overwrite it with the source inline extent if its size is less * than the source extent's size, or we could copy the source inline extent's * data into the destination inode's inline extent if the later is greater then * the former. */ static int clone_copy_inline_extent(struct inode *src, struct inode *dst, struct btrfs_trans_handle *trans, struct btrfs_path *path, struct btrfs_key *new_key, const u64 drop_start, const u64 datal, const u64 skip, const u64 size, char *inline_data) { struct btrfs_root *root = BTRFS_I(dst)->root; const u64 aligned_end = ALIGN(new_key->offset + datal, root->sectorsize); int ret; struct btrfs_key key; if (new_key->offset > 0) return -EOPNOTSUPP; key.objectid = btrfs_ino(dst); key.type = BTRFS_EXTENT_DATA_KEY; key.offset = 0; ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) { return ret; } else if (ret > 0) { if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { ret = btrfs_next_leaf(root, path); if (ret < 0) return ret; else if (ret > 0) goto copy_inline_extent; } btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); if (key.objectid == btrfs_ino(dst) && key.type == BTRFS_EXTENT_DATA_KEY) { ASSERT(key.offset > 0); return -EOPNOTSUPP; } } else if (i_size_read(dst) <= datal) { struct btrfs_file_extent_item *ei; u64 ext_len; /* * If the file size is <= datal, make sure there are no other * extents following (can happen do to an fallocate call with * the flag FALLOC_FL_KEEP_SIZE). */ ei = btrfs_item_ptr(path->nodes[0], path->slots[0], struct btrfs_file_extent_item); /* * If it's an inline extent, it can not have other extents * following it. */ if (btrfs_file_extent_type(path->nodes[0], ei) == BTRFS_FILE_EXTENT_INLINE) goto copy_inline_extent; ext_len = btrfs_file_extent_num_bytes(path->nodes[0], ei); if (ext_len > aligned_end) return -EOPNOTSUPP; ret = btrfs_next_item(root, path); if (ret < 0) { return ret; } else if (ret == 0) { btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); if (key.objectid == btrfs_ino(dst) && key.type == BTRFS_EXTENT_DATA_KEY) return -EOPNOTSUPP; } } copy_inline_extent: /* * We have no extent items, or we have an extent at offset 0 which may * or may not be inlined. All these cases are dealt the same way. */ if (i_size_read(dst) > datal) { /* * If the destination inode has an inline extent... * This would require copying the data from the source inline * extent into the beginning of the destination's inline extent. * But this is really complex, both extents can be compressed * or just one of them, which would require decompressing and * re-compressing data (which could increase the new compressed * size, not allowing the compressed data to fit anymore in an * inline extent). * So just don't support this case for now (it should be rare, * we are not really saving space when cloning inline extents). */ return -EOPNOTSUPP; } btrfs_release_path(path); ret = btrfs_drop_extents(trans, root, dst, drop_start, aligned_end, 1); if (ret) return ret; ret = btrfs_insert_empty_item(trans, root, path, new_key, size); if (ret) return ret; if (skip) { const u32 start = btrfs_file_extent_calc_inline_size(0); memmove(inline_data + start, inline_data + start + skip, datal); } write_extent_buffer(path->nodes[0], inline_data, btrfs_item_ptr_offset(path->nodes[0], path->slots[0]), size); inode_add_bytes(dst, datal); return 0; } /** * btrfs_clone() - clone a range from inode file to another * Loading Loading @@ -3593,21 +3737,6 @@ static int btrfs_clone(struct inode *src, struct inode *inode, } else if (type == BTRFS_FILE_EXTENT_INLINE) { u64 skip = 0; u64 trim = 0; u64 aligned_end = 0; /* * Don't copy an inline extent into an offset * greater than zero. Having an inline extent * at such an offset results in chaos as btrfs * isn't prepared for such cases. Just skip * this case for the same reasons as commented * at btrfs_ioctl_clone(). */ if (last_dest_end > 0) { ret = -EOPNOTSUPP; btrfs_end_transaction(trans, root); goto out; } if (off > key.offset) { skip = off - key.offset; Loading @@ -3625,42 +3754,22 @@ static int btrfs_clone(struct inode *src, struct inode *inode, size -= skip + trim; datal -= skip + trim; aligned_end = ALIGN(new_key.offset + datal, root->sectorsize); ret = btrfs_drop_extents(trans, root, inode, ret = clone_copy_inline_extent(src, inode, trans, path, &new_key, drop_start, aligned_end, 1); datal, skip, size, buf); if (ret) { if (ret != -EOPNOTSUPP) btrfs_abort_transaction(trans, root, ret); btrfs_end_transaction(trans, root); goto out; } ret = btrfs_insert_empty_item(trans, root, path, &new_key, size); if (ret) { btrfs_abort_transaction(trans, root, root, ret); btrfs_end_transaction(trans, root); goto out; } if (skip) { u32 start = btrfs_file_extent_calc_inline_size(0); memmove(buf+start, buf+start+skip, datal); } leaf = path->nodes[0]; slot = path->slots[0]; write_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf, slot), size); inode_add_bytes(inode, datal); } /* If we have an implicit hole (NO_HOLES feature). */ Loading fs/btrfs/send.c +181 −29 Original line number Diff line number Diff line Loading @@ -1434,16 +1434,6 @@ verbose_printk(KERN_DEBUG "btrfs: find_extent_clone: data_offset=%llu, " } if (cur_clone_root) { if (compressed != BTRFS_COMPRESS_NONE) { /* * Offsets given by iterate_extent_inodes() are relative * to the start of the extent, we need to add logical * offset from the file extent item. * (See why at backref.c:check_extent_in_eb()) */ cur_clone_root->offset += btrfs_file_extent_offset(eb, fi); } *found = cur_clone_root; ret = 0; } else { Loading Loading @@ -2353,8 +2343,14 @@ static int send_subvol_begin(struct send_ctx *sctx) } TLV_PUT_STRING(sctx, BTRFS_SEND_A_PATH, name, namelen); if (!btrfs_is_empty_uuid(sctx->send_root->root_item.received_uuid)) TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID, sctx->send_root->root_item.received_uuid); else TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID, sctx->send_root->root_item.uuid); TLV_PUT_U64(sctx, BTRFS_SEND_A_CTRANSID, le64_to_cpu(sctx->send_root->root_item.ctransid)); if (parent_root) { Loading Loading @@ -4687,6 +4683,171 @@ static int send_hole(struct send_ctx *sctx, u64 end) return ret; } static int send_extent_data(struct send_ctx *sctx, const u64 offset, const u64 len) { u64 sent = 0; if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA) return send_update_extent(sctx, offset, len); while (sent < len) { u64 size = len - sent; int ret; if (size > BTRFS_SEND_READ_SIZE) size = BTRFS_SEND_READ_SIZE; ret = send_write(sctx, offset + sent, size); if (ret < 0) return ret; if (!ret) break; sent += ret; } return 0; } static int clone_range(struct send_ctx *sctx, struct clone_root *clone_root, const u64 disk_byte, u64 data_offset, u64 offset, u64 len) { struct btrfs_path *path; struct btrfs_key key; int ret; path = alloc_path_for_send(); if (!path) return -ENOMEM; /* * We can't send a clone operation for the entire range if we find * extent items in the respective range in the source file that * refer to different extents or if we find holes. * So check for that and do a mix of clone and regular write/copy * operations if needed. * * Example: * * mkfs.btrfs -f /dev/sda * mount /dev/sda /mnt * xfs_io -f -c "pwrite -S 0xaa 0K 100K" /mnt/foo * cp --reflink=always /mnt/foo /mnt/bar * xfs_io -c "pwrite -S 0xbb 50K 50K" /mnt/foo * btrfs subvolume snapshot -r /mnt /mnt/snap * * If when we send the snapshot and we are processing file bar (which * has a higher inode number than foo) we blindly send a clone operation * for the [0, 100K[ range from foo to bar, the receiver ends up getting * a file bar that matches the content of file foo - iow, doesn't match * the content from bar in the original filesystem. */ key.objectid = clone_root->ino; key.type = BTRFS_EXTENT_DATA_KEY; key.offset = clone_root->offset; ret = btrfs_search_slot(NULL, clone_root->root, &key, path, 0, 0); if (ret < 0) goto out; if (ret > 0 && path->slots[0] > 0) { btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0] - 1); if (key.objectid == clone_root->ino && key.type == BTRFS_EXTENT_DATA_KEY) path->slots[0]--; } while (true) { struct extent_buffer *leaf = path->nodes[0]; int slot = path->slots[0]; struct btrfs_file_extent_item *ei; u8 type; u64 ext_len; u64 clone_len; if (slot >= btrfs_header_nritems(leaf)) { ret = btrfs_next_leaf(clone_root->root, path); if (ret < 0) goto out; else if (ret > 0) break; continue; } btrfs_item_key_to_cpu(leaf, &key, slot); /* * We might have an implicit trailing hole (NO_HOLES feature * enabled). We deal with it after leaving this loop. */ if (key.objectid != clone_root->ino || key.type != BTRFS_EXTENT_DATA_KEY) break; ei = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); type = btrfs_file_extent_type(leaf, ei); if (type == BTRFS_FILE_EXTENT_INLINE) { ext_len = btrfs_file_extent_inline_len(leaf, slot, ei); ext_len = PAGE_CACHE_ALIGN(ext_len); } else { ext_len = btrfs_file_extent_num_bytes(leaf, ei); } if (key.offset + ext_len <= clone_root->offset) goto next; if (key.offset > clone_root->offset) { /* Implicit hole, NO_HOLES feature enabled. */ u64 hole_len = key.offset - clone_root->offset; if (hole_len > len) hole_len = len; ret = send_extent_data(sctx, offset, hole_len); if (ret < 0) goto out; len -= hole_len; if (len == 0) break; offset += hole_len; clone_root->offset += hole_len; data_offset += hole_len; } if (key.offset >= clone_root->offset + len) break; clone_len = min_t(u64, ext_len, len); if (btrfs_file_extent_disk_bytenr(leaf, ei) == disk_byte && btrfs_file_extent_offset(leaf, ei) == data_offset) ret = send_clone(sctx, offset, clone_len, clone_root); else ret = send_extent_data(sctx, offset, clone_len); if (ret < 0) goto out; len -= clone_len; if (len == 0) break; offset += clone_len; clone_root->offset += clone_len; data_offset += clone_len; next: path->slots[0]++; } if (len > 0) ret = send_extent_data(sctx, offset, len); else ret = 0; out: btrfs_free_path(path); return ret; } static int send_write_or_clone(struct send_ctx *sctx, struct btrfs_path *path, struct btrfs_key *key, Loading @@ -4695,9 +4856,7 @@ static int send_write_or_clone(struct send_ctx *sctx, int ret = 0; struct btrfs_file_extent_item *ei; u64 offset = key->offset; u64 pos = 0; u64 len; u32 l; u8 type; u64 bs = sctx->send_root->fs_info->sb->s_blocksize; Loading Loading @@ -4725,22 +4884,15 @@ static int send_write_or_clone(struct send_ctx *sctx, } if (clone_root && IS_ALIGNED(offset + len, bs)) { ret = send_clone(sctx, offset, len, clone_root); } else if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA) { ret = send_update_extent(sctx, offset, len); u64 disk_byte; u64 data_offset; disk_byte = btrfs_file_extent_disk_bytenr(path->nodes[0], ei); data_offset = btrfs_file_extent_offset(path->nodes[0], ei); ret = clone_range(sctx, clone_root, disk_byte, data_offset, offset, len); } else { while (pos < len) { l = len - pos; if (l > BTRFS_SEND_READ_SIZE) l = BTRFS_SEND_READ_SIZE; ret = send_write(sctx, pos + offset, l); if (ret < 0) goto out; if (!ret) break; pos += ret; } ret = 0; ret = send_extent_data(sctx, offset, len); } out: return ret; Loading Loading
fs/btrfs/extent_io.c +6 −2 Original line number Diff line number Diff line Loading @@ -3070,7 +3070,11 @@ static int __do_readpage(struct extent_io_tree *tree, set_extent_uptodate(tree, cur, cur + iosize - 1, &cached, GFP_NOFS); unlock_extent_cached(tree, cur, cur + iosize - 1, if (parent_locked) free_extent_state(cached); else unlock_extent_cached(tree, cur, cur + iosize - 1, &cached, GFP_NOFS); cur = cur + iosize; pg_offset += iosize; Loading
fs/btrfs/inode.c +68 −14 Original line number Diff line number Diff line Loading @@ -4216,6 +4216,47 @@ static int truncate_space_check(struct btrfs_trans_handle *trans, } static int truncate_inline_extent(struct inode *inode, struct btrfs_path *path, struct btrfs_key *found_key, const u64 item_end, const u64 new_size) { struct extent_buffer *leaf = path->nodes[0]; int slot = path->slots[0]; struct btrfs_file_extent_item *fi; u32 size = (u32)(new_size - found_key->offset); struct btrfs_root *root = BTRFS_I(inode)->root; fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); if (btrfs_file_extent_compression(leaf, fi) != BTRFS_COMPRESS_NONE) { loff_t offset = new_size; loff_t page_end = ALIGN(offset, PAGE_CACHE_SIZE); /* * Zero out the remaining of the last page of our inline extent, * instead of directly truncating our inline extent here - that * would be much more complex (decompressing all the data, then * compressing the truncated data, which might be bigger than * the size of the inline extent, resize the extent, etc). * We release the path because to get the page we might need to * read the extent item from disk (data not in the page cache). */ btrfs_release_path(path); return btrfs_truncate_page(inode, offset, page_end - offset, 0); } btrfs_set_file_extent_ram_bytes(leaf, fi, size); size = btrfs_file_extent_calc_inline_size(size); btrfs_truncate_item(root, path, size, 1); if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) inode_sub_bytes(inode, item_end + 1 - new_size); return 0; } /* * this can truncate away extent items, csum items and directory items. * It starts at a high offset and removes keys until it can't find Loading Loading @@ -4410,27 +4451,40 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, * special encodings */ if (!del_item && btrfs_file_extent_compression(leaf, fi) == 0 && btrfs_file_extent_encryption(leaf, fi) == 0 && btrfs_file_extent_other_encoding(leaf, fi) == 0) { u32 size = new_size - found_key.offset; if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) inode_sub_bytes(inode, item_end + 1 - new_size); /* * update the ram bytes to properly reflect * the new size of our item * Need to release path in order to truncate a * compressed extent. So delete any accumulated * extent items so far. */ btrfs_set_file_extent_ram_bytes(leaf, fi, size); size = btrfs_file_extent_calc_inline_size(size); btrfs_truncate_item(root, path, size, 1); if (btrfs_file_extent_compression(leaf, fi) != BTRFS_COMPRESS_NONE && pending_del_nr) { err = btrfs_del_items(trans, root, path, pending_del_slot, pending_del_nr); if (err) { btrfs_abort_transaction(trans, root, err); goto error; } pending_del_nr = 0; } err = truncate_inline_extent(inode, path, &found_key, item_end, new_size); if (err) { btrfs_abort_transaction(trans, root, err); goto error; } } else if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) { inode_sub_bytes(inode, item_end + 1 - found_key.offset); inode_sub_bytes(inode, item_end + 1 - new_size); } } delete: Loading
fs/btrfs/ioctl.c +152 −43 Original line number Diff line number Diff line Loading @@ -3327,6 +3327,150 @@ static void clone_update_extent_map(struct inode *inode, &BTRFS_I(inode)->runtime_flags); } /* * Make sure we do not end up inserting an inline extent into a file that has * already other (non-inline) extents. If a file has an inline extent it can * not have any other extents and the (single) inline extent must start at the * file offset 0. Failing to respect these rules will lead to file corruption, * resulting in EIO errors on read/write operations, hitting BUG_ON's in mm, etc * * We can have extents that have been already written to disk or we can have * dirty ranges still in delalloc, in which case the extent maps and items are * created only when we run delalloc, and the delalloc ranges might fall outside * the range we are currently locking in the inode's io tree. So we check the * inode's i_size because of that (i_size updates are done while holding the * i_mutex, which we are holding here). * We also check to see if the inode has a size not greater than "datal" but has * extents beyond it, due to an fallocate with FALLOC_FL_KEEP_SIZE (and we are * protected against such concurrent fallocate calls by the i_mutex). * * If the file has no extents but a size greater than datal, do not allow the * copy because we would need turn the inline extent into a non-inline one (even * with NO_HOLES enabled). If we find our destination inode only has one inline * extent, just overwrite it with the source inline extent if its size is less * than the source extent's size, or we could copy the source inline extent's * data into the destination inode's inline extent if the later is greater then * the former. */ static int clone_copy_inline_extent(struct inode *src, struct inode *dst, struct btrfs_trans_handle *trans, struct btrfs_path *path, struct btrfs_key *new_key, const u64 drop_start, const u64 datal, const u64 skip, const u64 size, char *inline_data) { struct btrfs_root *root = BTRFS_I(dst)->root; const u64 aligned_end = ALIGN(new_key->offset + datal, root->sectorsize); int ret; struct btrfs_key key; if (new_key->offset > 0) return -EOPNOTSUPP; key.objectid = btrfs_ino(dst); key.type = BTRFS_EXTENT_DATA_KEY; key.offset = 0; ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) { return ret; } else if (ret > 0) { if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { ret = btrfs_next_leaf(root, path); if (ret < 0) return ret; else if (ret > 0) goto copy_inline_extent; } btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); if (key.objectid == btrfs_ino(dst) && key.type == BTRFS_EXTENT_DATA_KEY) { ASSERT(key.offset > 0); return -EOPNOTSUPP; } } else if (i_size_read(dst) <= datal) { struct btrfs_file_extent_item *ei; u64 ext_len; /* * If the file size is <= datal, make sure there are no other * extents following (can happen do to an fallocate call with * the flag FALLOC_FL_KEEP_SIZE). */ ei = btrfs_item_ptr(path->nodes[0], path->slots[0], struct btrfs_file_extent_item); /* * If it's an inline extent, it can not have other extents * following it. */ if (btrfs_file_extent_type(path->nodes[0], ei) == BTRFS_FILE_EXTENT_INLINE) goto copy_inline_extent; ext_len = btrfs_file_extent_num_bytes(path->nodes[0], ei); if (ext_len > aligned_end) return -EOPNOTSUPP; ret = btrfs_next_item(root, path); if (ret < 0) { return ret; } else if (ret == 0) { btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); if (key.objectid == btrfs_ino(dst) && key.type == BTRFS_EXTENT_DATA_KEY) return -EOPNOTSUPP; } } copy_inline_extent: /* * We have no extent items, or we have an extent at offset 0 which may * or may not be inlined. All these cases are dealt the same way. */ if (i_size_read(dst) > datal) { /* * If the destination inode has an inline extent... * This would require copying the data from the source inline * extent into the beginning of the destination's inline extent. * But this is really complex, both extents can be compressed * or just one of them, which would require decompressing and * re-compressing data (which could increase the new compressed * size, not allowing the compressed data to fit anymore in an * inline extent). * So just don't support this case for now (it should be rare, * we are not really saving space when cloning inline extents). */ return -EOPNOTSUPP; } btrfs_release_path(path); ret = btrfs_drop_extents(trans, root, dst, drop_start, aligned_end, 1); if (ret) return ret; ret = btrfs_insert_empty_item(trans, root, path, new_key, size); if (ret) return ret; if (skip) { const u32 start = btrfs_file_extent_calc_inline_size(0); memmove(inline_data + start, inline_data + start + skip, datal); } write_extent_buffer(path->nodes[0], inline_data, btrfs_item_ptr_offset(path->nodes[0], path->slots[0]), size); inode_add_bytes(dst, datal); return 0; } /** * btrfs_clone() - clone a range from inode file to another * Loading Loading @@ -3593,21 +3737,6 @@ static int btrfs_clone(struct inode *src, struct inode *inode, } else if (type == BTRFS_FILE_EXTENT_INLINE) { u64 skip = 0; u64 trim = 0; u64 aligned_end = 0; /* * Don't copy an inline extent into an offset * greater than zero. Having an inline extent * at such an offset results in chaos as btrfs * isn't prepared for such cases. Just skip * this case for the same reasons as commented * at btrfs_ioctl_clone(). */ if (last_dest_end > 0) { ret = -EOPNOTSUPP; btrfs_end_transaction(trans, root); goto out; } if (off > key.offset) { skip = off - key.offset; Loading @@ -3625,42 +3754,22 @@ static int btrfs_clone(struct inode *src, struct inode *inode, size -= skip + trim; datal -= skip + trim; aligned_end = ALIGN(new_key.offset + datal, root->sectorsize); ret = btrfs_drop_extents(trans, root, inode, ret = clone_copy_inline_extent(src, inode, trans, path, &new_key, drop_start, aligned_end, 1); datal, skip, size, buf); if (ret) { if (ret != -EOPNOTSUPP) btrfs_abort_transaction(trans, root, ret); btrfs_end_transaction(trans, root); goto out; } ret = btrfs_insert_empty_item(trans, root, path, &new_key, size); if (ret) { btrfs_abort_transaction(trans, root, root, ret); btrfs_end_transaction(trans, root); goto out; } if (skip) { u32 start = btrfs_file_extent_calc_inline_size(0); memmove(buf+start, buf+start+skip, datal); } leaf = path->nodes[0]; slot = path->slots[0]; write_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf, slot), size); inode_add_bytes(inode, datal); } /* If we have an implicit hole (NO_HOLES feature). */ Loading
fs/btrfs/send.c +181 −29 Original line number Diff line number Diff line Loading @@ -1434,16 +1434,6 @@ verbose_printk(KERN_DEBUG "btrfs: find_extent_clone: data_offset=%llu, " } if (cur_clone_root) { if (compressed != BTRFS_COMPRESS_NONE) { /* * Offsets given by iterate_extent_inodes() are relative * to the start of the extent, we need to add logical * offset from the file extent item. * (See why at backref.c:check_extent_in_eb()) */ cur_clone_root->offset += btrfs_file_extent_offset(eb, fi); } *found = cur_clone_root; ret = 0; } else { Loading Loading @@ -2353,8 +2343,14 @@ static int send_subvol_begin(struct send_ctx *sctx) } TLV_PUT_STRING(sctx, BTRFS_SEND_A_PATH, name, namelen); if (!btrfs_is_empty_uuid(sctx->send_root->root_item.received_uuid)) TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID, sctx->send_root->root_item.received_uuid); else TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID, sctx->send_root->root_item.uuid); TLV_PUT_U64(sctx, BTRFS_SEND_A_CTRANSID, le64_to_cpu(sctx->send_root->root_item.ctransid)); if (parent_root) { Loading Loading @@ -4687,6 +4683,171 @@ static int send_hole(struct send_ctx *sctx, u64 end) return ret; } static int send_extent_data(struct send_ctx *sctx, const u64 offset, const u64 len) { u64 sent = 0; if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA) return send_update_extent(sctx, offset, len); while (sent < len) { u64 size = len - sent; int ret; if (size > BTRFS_SEND_READ_SIZE) size = BTRFS_SEND_READ_SIZE; ret = send_write(sctx, offset + sent, size); if (ret < 0) return ret; if (!ret) break; sent += ret; } return 0; } static int clone_range(struct send_ctx *sctx, struct clone_root *clone_root, const u64 disk_byte, u64 data_offset, u64 offset, u64 len) { struct btrfs_path *path; struct btrfs_key key; int ret; path = alloc_path_for_send(); if (!path) return -ENOMEM; /* * We can't send a clone operation for the entire range if we find * extent items in the respective range in the source file that * refer to different extents or if we find holes. * So check for that and do a mix of clone and regular write/copy * operations if needed. * * Example: * * mkfs.btrfs -f /dev/sda * mount /dev/sda /mnt * xfs_io -f -c "pwrite -S 0xaa 0K 100K" /mnt/foo * cp --reflink=always /mnt/foo /mnt/bar * xfs_io -c "pwrite -S 0xbb 50K 50K" /mnt/foo * btrfs subvolume snapshot -r /mnt /mnt/snap * * If when we send the snapshot and we are processing file bar (which * has a higher inode number than foo) we blindly send a clone operation * for the [0, 100K[ range from foo to bar, the receiver ends up getting * a file bar that matches the content of file foo - iow, doesn't match * the content from bar in the original filesystem. */ key.objectid = clone_root->ino; key.type = BTRFS_EXTENT_DATA_KEY; key.offset = clone_root->offset; ret = btrfs_search_slot(NULL, clone_root->root, &key, path, 0, 0); if (ret < 0) goto out; if (ret > 0 && path->slots[0] > 0) { btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0] - 1); if (key.objectid == clone_root->ino && key.type == BTRFS_EXTENT_DATA_KEY) path->slots[0]--; } while (true) { struct extent_buffer *leaf = path->nodes[0]; int slot = path->slots[0]; struct btrfs_file_extent_item *ei; u8 type; u64 ext_len; u64 clone_len; if (slot >= btrfs_header_nritems(leaf)) { ret = btrfs_next_leaf(clone_root->root, path); if (ret < 0) goto out; else if (ret > 0) break; continue; } btrfs_item_key_to_cpu(leaf, &key, slot); /* * We might have an implicit trailing hole (NO_HOLES feature * enabled). We deal with it after leaving this loop. */ if (key.objectid != clone_root->ino || key.type != BTRFS_EXTENT_DATA_KEY) break; ei = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); type = btrfs_file_extent_type(leaf, ei); if (type == BTRFS_FILE_EXTENT_INLINE) { ext_len = btrfs_file_extent_inline_len(leaf, slot, ei); ext_len = PAGE_CACHE_ALIGN(ext_len); } else { ext_len = btrfs_file_extent_num_bytes(leaf, ei); } if (key.offset + ext_len <= clone_root->offset) goto next; if (key.offset > clone_root->offset) { /* Implicit hole, NO_HOLES feature enabled. */ u64 hole_len = key.offset - clone_root->offset; if (hole_len > len) hole_len = len; ret = send_extent_data(sctx, offset, hole_len); if (ret < 0) goto out; len -= hole_len; if (len == 0) break; offset += hole_len; clone_root->offset += hole_len; data_offset += hole_len; } if (key.offset >= clone_root->offset + len) break; clone_len = min_t(u64, ext_len, len); if (btrfs_file_extent_disk_bytenr(leaf, ei) == disk_byte && btrfs_file_extent_offset(leaf, ei) == data_offset) ret = send_clone(sctx, offset, clone_len, clone_root); else ret = send_extent_data(sctx, offset, clone_len); if (ret < 0) goto out; len -= clone_len; if (len == 0) break; offset += clone_len; clone_root->offset += clone_len; data_offset += clone_len; next: path->slots[0]++; } if (len > 0) ret = send_extent_data(sctx, offset, len); else ret = 0; out: btrfs_free_path(path); return ret; } static int send_write_or_clone(struct send_ctx *sctx, struct btrfs_path *path, struct btrfs_key *key, Loading @@ -4695,9 +4856,7 @@ static int send_write_or_clone(struct send_ctx *sctx, int ret = 0; struct btrfs_file_extent_item *ei; u64 offset = key->offset; u64 pos = 0; u64 len; u32 l; u8 type; u64 bs = sctx->send_root->fs_info->sb->s_blocksize; Loading Loading @@ -4725,22 +4884,15 @@ static int send_write_or_clone(struct send_ctx *sctx, } if (clone_root && IS_ALIGNED(offset + len, bs)) { ret = send_clone(sctx, offset, len, clone_root); } else if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA) { ret = send_update_extent(sctx, offset, len); u64 disk_byte; u64 data_offset; disk_byte = btrfs_file_extent_disk_bytenr(path->nodes[0], ei); data_offset = btrfs_file_extent_offset(path->nodes[0], ei); ret = clone_range(sctx, clone_root, disk_byte, data_offset, offset, len); } else { while (pos < len) { l = len - pos; if (l > BTRFS_SEND_READ_SIZE) l = BTRFS_SEND_READ_SIZE; ret = send_write(sctx, pos + offset, l); if (ret < 0) goto out; if (!ret) break; pos += ret; } ret = 0; ret = send_extent_data(sctx, offset, len); } out: return ret; Loading