Commit f2f32f8a authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull btrfs fixes from David Sterba:
 "A batch of error handling fixes for resource leaks, fixes for nowait
  mode in combination with direct and buffered IO:

   - direct IO + dsync + nowait could miss a sync of the file after
     write, add handling for this combination

   - buffered IO + nowait should not fail with ENOSPC, only blocking IO
     could determine that

   - error handling fixes:
      - fix inode reserve space leak due to nowait buffered write
      - check the correct variable after allocation (direct IO submit)
      - fix inode list leak during backref walking
      - fix ulist freeing in self tests"

* tag 'for-6.1-rc3-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: fix inode reserve space leak due to nowait buffered write
  btrfs: fix nowait buffered write returning -ENOSPC
  btrfs: remove pointless and double ulist frees in error paths of qgroup tests
  btrfs: fix ulist leaks in error paths of qgroup self tests
  btrfs: fix inode list leak during backref walking at find_parent_nodes()
  btrfs: fix inode list leak during backref walking at resolve_indirect_refs()
  btrfs: fix lost file sync on direct IO write with nowait and dsync iocb
  btrfs: fix a memory allocation failure test in btrfs_submit_direct
parents 5fdf9c45 eb81b682
Loading
Loading
Loading
Loading
+34 −20
Original line number Diff line number Diff line
@@ -289,8 +289,10 @@ static void prelim_release(struct preftree *preftree)
	struct prelim_ref *ref, *next_ref;

	rbtree_postorder_for_each_entry_safe(ref, next_ref,
					     &preftree->root.rb_root, rbnode)
					     &preftree->root.rb_root, rbnode) {
		free_inode_elem_list(ref->inode_list);
		free_pref(ref);
	}

	preftree->root = RB_ROOT_CACHED;
	preftree->count = 0;
@@ -648,6 +650,18 @@ unode_aux_to_inode_list(struct ulist_node *node)
	return (struct extent_inode_elem *)(uintptr_t)node->aux;
}

static void free_leaf_list(struct ulist *ulist)
{
	struct ulist_node *node;
	struct ulist_iterator uiter;

	ULIST_ITER_INIT(&uiter);
	while ((node = ulist_next(ulist, &uiter)))
		free_inode_elem_list(unode_aux_to_inode_list(node));

	ulist_free(ulist);
}

/*
 * We maintain three separate rbtrees: one for direct refs, one for
 * indirect refs which have a key, and one for indirect refs which do not
@@ -762,7 +776,11 @@ static int resolve_indirect_refs(struct btrfs_fs_info *fs_info,
		cond_resched();
	}
out:
	ulist_free(parents);
	/*
	 * We may have inode lists attached to refs in the parents ulist, so we
	 * must free them before freeing the ulist and its refs.
	 */
	free_leaf_list(parents);
	return ret;
}

@@ -1368,6 +1386,12 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
				if (ret < 0)
					goto out;
				ref->inode_list = eie;
				/*
				 * We transferred the list ownership to the ref,
				 * so set to NULL to avoid a double free in case
				 * an error happens after this.
				 */
				eie = NULL;
			}
			ret = ulist_add_merge_ptr(refs, ref->parent,
						  ref->inode_list,
@@ -1393,6 +1417,14 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
				eie->next = ref->inode_list;
			}
			eie = NULL;
			/*
			 * We have transferred the inode list ownership from
			 * this ref to the ref we added to the 'refs' ulist.
			 * So set this ref's inode list to NULL to avoid
			 * use-after-free when our caller uses it or double
			 * frees in case an error happens before we return.
			 */
			ref->inode_list = NULL;
		}
		cond_resched();
	}
@@ -1409,24 +1441,6 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
	return ret;
}

static void free_leaf_list(struct ulist *blocks)
{
	struct ulist_node *node = NULL;
	struct extent_inode_elem *eie;
	struct ulist_iterator uiter;

	ULIST_ITER_INIT(&uiter);
	while ((node = ulist_next(blocks, &uiter))) {
		if (!node->aux)
			continue;
		eie = unode_aux_to_inode_list(node);
		free_inode_elem_list(eie);
		node->aux = 0;
	}

	ulist_free(blocks);
}

/*
 * Finds all leafs with a reference to the specified combination of bytenr and
 * offset. key_list_head will point to a list of corresponding keys (caller must
+4 −1
Original line number Diff line number Diff line
@@ -3462,7 +3462,10 @@ ssize_t btrfs_encoded_read(struct kiocb *iocb, struct iov_iter *iter,
ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
			     const struct btrfs_ioctl_encoded_io_args *encoded);

ssize_t btrfs_dio_rw(struct kiocb *iocb, struct iov_iter *iter, size_t done_before);
ssize_t btrfs_dio_read(struct kiocb *iocb, struct iov_iter *iter,
		       size_t done_before);
struct iomap_dio *btrfs_dio_write(struct kiocb *iocb, struct iov_iter *iter,
				  size_t done_before);

extern const struct dentry_operations btrfs_dentry_operations;

+22 −7
Original line number Diff line number Diff line
@@ -1598,14 +1598,19 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
						write_bytes);
			else
				btrfs_check_nocow_unlock(BTRFS_I(inode));

			if (nowait && ret == -ENOSPC)
				ret = -EAGAIN;
			break;
		}

		release_bytes = reserve_bytes;
again:
		ret = balance_dirty_pages_ratelimited_flags(inode->i_mapping, bdp_flags);
		if (ret)
		if (ret) {
			btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes);
			break;
		}

		/*
		 * This is going to setup the pages array with the number of
@@ -1765,6 +1770,7 @@ static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
	loff_t endbyte;
	ssize_t err;
	unsigned int ilock_flags = 0;
	struct iomap_dio *dio;

	if (iocb->ki_flags & IOCB_NOWAIT)
		ilock_flags |= BTRFS_ILOCK_TRY;
@@ -1825,11 +1831,22 @@ static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
	 * So here we disable page faults in the iov_iter and then retry if we
	 * got -EFAULT, faulting in the pages before the retry.
	 */
again:
	from->nofault = true;
	err = btrfs_dio_rw(iocb, from, written);
	dio = btrfs_dio_write(iocb, from, written);
	from->nofault = false;

	/*
	 * iomap_dio_complete() will call btrfs_sync_file() if we have a dsync
	 * iocb, and that needs to lock the inode. So unlock it before calling
	 * iomap_dio_complete() to avoid a deadlock.
	 */
	btrfs_inode_unlock(inode, ilock_flags);

	if (IS_ERR_OR_NULL(dio))
		err = PTR_ERR_OR_ZERO(dio);
	else
		err = iomap_dio_complete(dio);

	/* No increment (+=) because iomap returns a cumulative value. */
	if (err > 0)
		written = err;
@@ -1855,12 +1872,10 @@ static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
		} else {
			fault_in_iov_iter_readable(from, left);
			prev_left = left;
			goto again;
			goto relock;
		}
	}

	btrfs_inode_unlock(inode, ilock_flags);

	/*
	 * If 'err' is -ENOTBLK or we have not written all data, then it means
	 * we must fallback to buffered IO.
@@ -4035,7 +4050,7 @@ static ssize_t btrfs_direct_read(struct kiocb *iocb, struct iov_iter *to)
	 */
	pagefault_disable();
	to->nofault = true;
	ret = btrfs_dio_rw(iocb, to, read);
	ret = btrfs_dio_read(iocb, to, read);
	to->nofault = false;
	pagefault_enable();

+12 −4
Original line number Diff line number Diff line
@@ -7980,7 +7980,7 @@ static void btrfs_submit_direct(const struct iomap_iter *iter,
		 */
		status = BLK_STS_RESOURCE;
		dip->csums = kcalloc(nr_sectors, fs_info->csum_size, GFP_NOFS);
		if (!dip)
		if (!dip->csums)
			goto out_err;

		status = btrfs_lookup_bio_sums(inode, dio_bio, dip->csums);
@@ -8078,13 +8078,21 @@ static const struct iomap_dio_ops btrfs_dio_ops = {
	.bio_set		= &btrfs_dio_bioset,
};

ssize_t btrfs_dio_rw(struct kiocb *iocb, struct iov_iter *iter, size_t done_before)
ssize_t btrfs_dio_read(struct kiocb *iocb, struct iov_iter *iter, size_t done_before)
{
	struct btrfs_dio_data data;

	return iomap_dio_rw(iocb, iter, &btrfs_dio_iomap_ops, &btrfs_dio_ops,
			    IOMAP_DIO_PARTIAL | IOMAP_DIO_NOSYNC,
			    &data, done_before);
			    IOMAP_DIO_PARTIAL, &data, done_before);
}

struct iomap_dio *btrfs_dio_write(struct kiocb *iocb, struct iov_iter *iter,
				  size_t done_before)
{
	struct btrfs_dio_data data;

	return __iomap_dio_rw(iocb, iter, &btrfs_dio_iomap_ops, &btrfs_dio_ops,
			    IOMAP_DIO_PARTIAL, &data, done_before);
}

static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+19 −17
Original line number Diff line number Diff line
@@ -225,20 +225,20 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
	 */
	ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false);
	if (ret) {
		ulist_free(old_roots);
		test_err("couldn't find old roots: %d", ret);
		return ret;
	}

	ret = insert_normal_tree_ref(root, nodesize, nodesize, 0,
				BTRFS_FS_TREE_OBJECTID);
	if (ret)
	if (ret) {
		ulist_free(old_roots);
		return ret;
	}

	ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false);
	if (ret) {
		ulist_free(old_roots);
		ulist_free(new_roots);
		test_err("couldn't find old roots: %d", ret);
		return ret;
	}
@@ -250,29 +250,31 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
		return ret;
	}

	/* btrfs_qgroup_account_extent() always frees the ulists passed to it. */
	old_roots = NULL;
	new_roots = NULL;

	if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FS_TREE_OBJECTID,
				nodesize, nodesize)) {
		test_err("qgroup counts didn't match expected values");
		return -EINVAL;
	}
	old_roots = NULL;
	new_roots = NULL;

	ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false);
	if (ret) {
		ulist_free(old_roots);
		test_err("couldn't find old roots: %d", ret);
		return ret;
	}

	ret = remove_extent_item(root, nodesize, nodesize);
	if (ret)
	if (ret) {
		ulist_free(old_roots);
		return -EINVAL;
	}

	ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false);
	if (ret) {
		ulist_free(old_roots);
		ulist_free(new_roots);
		test_err("couldn't find old roots: %d", ret);
		return ret;
	}
@@ -322,20 +324,20 @@ static int test_multiple_refs(struct btrfs_root *root,

	ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false);
	if (ret) {
		ulist_free(old_roots);
		test_err("couldn't find old roots: %d", ret);
		return ret;
	}

	ret = insert_normal_tree_ref(root, nodesize, nodesize, 0,
				BTRFS_FS_TREE_OBJECTID);
	if (ret)
	if (ret) {
		ulist_free(old_roots);
		return ret;
	}

	ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false);
	if (ret) {
		ulist_free(old_roots);
		ulist_free(new_roots);
		test_err("couldn't find old roots: %d", ret);
		return ret;
	}
@@ -355,20 +357,20 @@ static int test_multiple_refs(struct btrfs_root *root,

	ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false);
	if (ret) {
		ulist_free(old_roots);
		test_err("couldn't find old roots: %d", ret);
		return ret;
	}

	ret = add_tree_ref(root, nodesize, nodesize, 0,
			BTRFS_FIRST_FREE_OBJECTID);
	if (ret)
	if (ret) {
		ulist_free(old_roots);
		return ret;
	}

	ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false);
	if (ret) {
		ulist_free(old_roots);
		ulist_free(new_roots);
		test_err("couldn't find old roots: %d", ret);
		return ret;
	}
@@ -394,20 +396,20 @@ static int test_multiple_refs(struct btrfs_root *root,

	ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false);
	if (ret) {
		ulist_free(old_roots);
		test_err("couldn't find old roots: %d", ret);
		return ret;
	}

	ret = remove_extent_ref(root, nodesize, nodesize, 0,
				BTRFS_FIRST_FREE_OBJECTID);
	if (ret)
	if (ret) {
		ulist_free(old_roots);
		return ret;
	}

	ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false);
	if (ret) {
		ulist_free(old_roots);
		ulist_free(new_roots);
		test_err("couldn't find old roots: %d", ret);
		return ret;
	}