Commit 3d697a4a authored by Eric Biggers's avatar Eric Biggers Committed by Jaegeuk Kim
Browse files

f2fs: rework write preallocations



f2fs_write_begin() assumes that all blocks were preallocated by
default unless FI_NO_PREALLOC is explicitly set.  This invites data
corruption, as there are cases in which not all blocks are preallocated.
Commit 47501f87 ("f2fs: preallocate DIO blocks when forcing
buffered_io") fixed one case, but there are others remaining.

Fix up this logic by replacing this flag with FI_PREALLOCATED_ALL, which
only gets set if all blocks for the current write were preallocated.

Also clean up f2fs_preallocate_blocks(), move it to file.c, and make it
handle some of the logic that was previously in write_iter() directly.

Signed-off-by: default avatarEric Biggers <ebiggers@google.com>
Reviewed-by: default avatarChao Yu <chao@kernel.org>
Signed-off-by: default avatarJaegeuk Kim <jaegeuk@kernel.org>
parent 3271d7eb
Loading
Loading
Loading
Loading
+3 −52
Original line number Diff line number Diff line
@@ -1384,53 +1384,6 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
	return 0;
}

int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
{
	struct inode *inode = file_inode(iocb->ki_filp);
	struct f2fs_map_blocks map;
	int flag;
	int err = 0;
	bool direct_io = iocb->ki_flags & IOCB_DIRECT;

	map.m_lblk = F2FS_BLK_ALIGN(iocb->ki_pos);
	map.m_len = F2FS_BYTES_TO_BLK(iocb->ki_pos + iov_iter_count(from));
	if (map.m_len > map.m_lblk)
		map.m_len -= map.m_lblk;
	else
		map.m_len = 0;

	map.m_next_pgofs = NULL;
	map.m_next_extent = NULL;
	map.m_seg_type = NO_CHECK_TYPE;
	map.m_may_create = true;

	if (direct_io) {
		map.m_seg_type = f2fs_rw_hint_to_seg_type(iocb->ki_hint);
		flag = f2fs_force_buffered_io(inode, iocb, from) ?
					F2FS_GET_BLOCK_PRE_AIO :
					F2FS_GET_BLOCK_PRE_DIO;
		goto map_blocks;
	}
	if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA(inode)) {
		err = f2fs_convert_inline_inode(inode);
		if (err)
			return err;
	}
	if (f2fs_has_inline_data(inode))
		return err;

	flag = F2FS_GET_BLOCK_PRE_AIO;

map_blocks:
	err = f2fs_map_blocks(inode, &map, 1, flag);
	if (map.m_len > 0 && err == -ENOSPC) {
		if (!direct_io)
			set_inode_flag(inode, FI_NO_PREALLOC);
		err = 0;
	}
	return err;
}

void f2fs_do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
{
	if (flag == F2FS_GET_BLOCK_PRE_AIO) {
@@ -3340,12 +3293,10 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
	int flag;

	/*
	 * we already allocated all the blocks, so we don't need to get
	 * the block addresses when there is no need to fill the page.
	 * If a whole page is being written and we already preallocated all the
	 * blocks, then there is no need to get a block address now.
	 */
	if (!f2fs_has_inline_data(inode) && len == PAGE_SIZE &&
	    !is_inode_flag_set(inode, FI_NO_PREALLOC) &&
	    !f2fs_verity_in_progress(inode))
	if (len == PAGE_SIZE && is_inode_flag_set(inode, FI_PREALLOCATED_ALL))
		return 0;

	/* f2fs_lock_op avoids race between write CP and convert_inline_page */
+1 −2
Original line number Diff line number Diff line
@@ -715,7 +715,7 @@ enum {
	FI_INLINE_DOTS,		/* indicate inline dot dentries */
	FI_DO_DEFRAG,		/* indicate defragment is running */
	FI_DIRTY_FILE,		/* indicate regular/symlink has dirty pages */
	FI_NO_PREALLOC,		/* indicate skipped preallocated blocks */
	FI_PREALLOCATED_ALL,	/* all blocks for write were preallocated */
	FI_HOT_DATA,		/* indicate file is hot */
	FI_EXTRA_ATTR,		/* indicate file has extra attribute */
	FI_PROJ_INHERIT,	/* indicate file inherits projectid */
@@ -3615,7 +3615,6 @@ void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr);
int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count);
int f2fs_reserve_new_block(struct dnode_of_data *dn);
int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index);
int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from);
int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index);
struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index,
			int op_flags, bool for_write);
+84 −47
Original line number Diff line number Diff line
@@ -4235,10 +4235,77 @@ static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
	return ret;
}

/*
 * Preallocate blocks for a write request, if it is possible and helpful to do
 * so.  Returns a positive number if blocks may have been preallocated, 0 if no
 * blocks were preallocated, or a negative errno value if something went
 * seriously wrong.  Also sets FI_PREALLOCATED_ALL on the inode if *all* the
 * requested blocks (not just some of them) have been allocated.
 */
static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter)
{
	struct inode *inode = file_inode(iocb->ki_filp);
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	const loff_t pos = iocb->ki_pos;
	const size_t count = iov_iter_count(iter);
	struct f2fs_map_blocks map = {};
	bool dio = (iocb->ki_flags & IOCB_DIRECT) &&
		   !f2fs_force_buffered_io(inode, iocb, iter);
	int flag;
	int ret;

	/* If it will be an out-of-place direct write, don't bother. */
	if (dio && f2fs_lfs_mode(sbi))
		return 0;

	/* No-wait I/O can't allocate blocks. */
	if (iocb->ki_flags & IOCB_NOWAIT)
		return 0;

	/* If it will be a short write, don't bother. */
	if (fault_in_iov_iter_readable(iter, count))
		return 0;

	if (f2fs_has_inline_data(inode)) {
		/* If the data will fit inline, don't bother. */
		if (pos + count <= MAX_INLINE_DATA(inode))
			return 0;
		ret = f2fs_convert_inline_inode(inode);
		if (ret)
			return ret;
	}

	/* Do not preallocate blocks that will be written partially in 4KB. */
	map.m_lblk = F2FS_BLK_ALIGN(pos);
	map.m_len = F2FS_BYTES_TO_BLK(pos + count);
	if (map.m_len > map.m_lblk)
		map.m_len -= map.m_lblk;
	else
		map.m_len = 0;
	map.m_may_create = true;
	if (dio) {
		map.m_seg_type = f2fs_rw_hint_to_seg_type(inode->i_write_hint);
		flag = F2FS_GET_BLOCK_PRE_DIO;
	} else {
		map.m_seg_type = NO_CHECK_TYPE;
		flag = F2FS_GET_BLOCK_PRE_AIO;
	}

	ret = f2fs_map_blocks(inode, &map, 1, flag);
	/* -ENOSPC is only a fatal error if no blocks could be allocated. */
	if (ret < 0 && !(ret == -ENOSPC && map.m_len > 0))
		return ret;
	if (ret == 0)
		set_inode_flag(inode, FI_PREALLOCATED_ALL);
	return map.m_len;
}

static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
	struct file *file = iocb->ki_filp;
	struct inode *inode = file_inode(file);
	loff_t target_size;
	int preallocated;
	ssize_t ret;

	if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) {
@@ -4262,84 +4329,54 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)

	if (unlikely(IS_IMMUTABLE(inode))) {
		ret = -EPERM;
		goto unlock;
		goto out_unlock;
	}

	if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) {
		ret = -EPERM;
		goto unlock;
		goto out_unlock;
	}

	ret = generic_write_checks(iocb, from);
	if (ret > 0) {
		bool preallocated = false;
		size_t target_size = 0;
		int err;

		if (fault_in_iov_iter_readable(from, iov_iter_count(from)))
			set_inode_flag(inode, FI_NO_PREALLOC);

		if ((iocb->ki_flags & IOCB_NOWAIT)) {
		if (iocb->ki_flags & IOCB_NOWAIT) {
			if (!f2fs_overwrite_io(inode, iocb->ki_pos,
						iov_iter_count(from)) ||
				f2fs_has_inline_data(inode) ||
				f2fs_force_buffered_io(inode, iocb, from)) {
				clear_inode_flag(inode, FI_NO_PREALLOC);
				inode_unlock(inode);
				ret = -EAGAIN;
				goto out;
				goto out_unlock;
			}
			goto write;
		}

		if (is_inode_flag_set(inode, FI_NO_PREALLOC))
			goto write;

		if (iocb->ki_flags & IOCB_DIRECT) {
			/*
			 * Convert inline data for Direct I/O before entering
			 * f2fs_direct_IO().
			 */
			err = f2fs_convert_inline_inode(inode);
			if (err)
				goto out_err;
			/*
			 * If force_buffere_io() is true, we have to allocate
			 * blocks all the time, since f2fs_direct_IO will fall
			 * back to buffered IO.
			 */
			if (!f2fs_force_buffered_io(inode, iocb, from) &&
					f2fs_lfs_mode(F2FS_I_SB(inode)))
				goto write;
			ret = f2fs_convert_inline_inode(inode);
			if (ret)
				goto out_unlock;
		}
		preallocated = true;
		/* Possibly preallocate the blocks for the write. */
		target_size = iocb->ki_pos + iov_iter_count(from);

		err = f2fs_preallocate_blocks(iocb, from);
		if (err) {
out_err:
			clear_inode_flag(inode, FI_NO_PREALLOC);
			inode_unlock(inode);
			ret = err;
			goto out;
		preallocated = f2fs_preallocate_blocks(iocb, from);
		if (preallocated < 0) {
			ret = preallocated;
			goto out_unlock;
		}
write:

		ret = __generic_file_write_iter(iocb, from);
		clear_inode_flag(inode, FI_NO_PREALLOC);

		/* if we couldn't write data, we should deallocate blocks. */
		if (preallocated && i_size_read(inode) < target_size) {
		/* Don't leave any preallocated blocks around past i_size. */
		if (preallocated > 0 && i_size_read(inode) < target_size) {
			down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
			filemap_invalidate_lock(inode->i_mapping);
			f2fs_truncate(inode);
			filemap_invalidate_unlock(inode->i_mapping);
			up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
		}
		clear_inode_flag(inode, FI_PREALLOCATED_ALL);

		if (ret > 0)
			f2fs_update_iostat(F2FS_I_SB(inode), APP_WRITE_IO, ret);
	}
unlock:
out_unlock:
	inode_unlock(inode);
out:
	trace_f2fs_file_write_iter(inode, iocb->ki_pos,