Commit 3a307ffc authored by Mark Fasheh's avatar Mark Fasheh
Browse files

ocfs2: rework ocfs2_buffered_write_cluster()



Use some ideas from the new-aops patch series and turn
ocfs2_buffered_write_cluster() into a 2 stage operation with the caller
copying data in between. The code now understands multiple cluster writes as
a result of having to deal with a full page write for greater than 4k pages.

This sets us up to easily call into the write path during ->page_mkwrite().

Signed-off-by: default avatarMark Fasheh <mark.fasheh@oracle.com>
parent 2e89b2e4
Loading
Loading
Loading
Loading
+478 −334

File changed.

Preview size limit exceeded, changes collapsed.

+6 −50
Original line number Diff line number Diff line
@@ -42,57 +42,13 @@ int walk_page_buffers( handle_t *handle,
			int (*fn)(	handle_t *handle,
					struct buffer_head *bh));

struct ocfs2_write_ctxt;
typedef int (ocfs2_page_writer)(struct inode *, struct ocfs2_write_ctxt *,
				u64 *, unsigned int *, unsigned int *);
int ocfs2_write_begin(struct file *file, struct address_space *mapping,
		      loff_t pos, unsigned len, unsigned flags,
		      struct page **pagep, void **fsdata);

ssize_t ocfs2_buffered_write_cluster(struct file *file, loff_t pos,
				     size_t count, ocfs2_page_writer *actor,
				     void *priv);

struct ocfs2_write_ctxt {
	size_t				w_count;
	loff_t				w_pos;
	u32				w_cpos;
	unsigned int			w_finished_copy;

	/* This is true if page_size > cluster_size */
	unsigned int			w_large_pages;

	/* Filler callback and private data */
	ocfs2_page_writer		*w_write_data_page;
	void				*w_private;

	/* Only valid for the filler callback */
	struct page			*w_this_page;
	unsigned int			w_this_page_new;
};

struct ocfs2_buffered_write_priv {
	char				*b_src_buf;
	const struct iovec		*b_cur_iov; /* Current iovec */
	size_t				b_cur_off; /* Offset in the
						    * current iovec */
};
int ocfs2_map_and_write_user_data(struct inode *inode,
				  struct ocfs2_write_ctxt *wc,
				  u64 *p_blkno,
				  unsigned int *ret_from,
				  unsigned int *ret_to);

struct ocfs2_splice_write_priv {
	struct splice_desc		*s_sd;
	struct pipe_buffer		*s_buf;
	struct pipe_inode_info		*s_pipe;
	/* Neither offset value is ever larger than one page */
	unsigned int			s_offset;
	unsigned int			s_buf_offset;
};
int ocfs2_map_and_write_splice_data(struct inode *inode,
				    struct ocfs2_write_ctxt *wc,
				    u64 *p_blkno,
				    unsigned int *ret_from,
				    unsigned int *ret_to);
int ocfs2_write_end(struct file *file, struct address_space *mapping,
		    loff_t pos, unsigned len, unsigned copied,
		    struct page *page, void *fsdata);

/* all ocfs2_dio_end_io()'s fault */
#define ocfs2_iocb_is_rw_locked(iocb) \
+67 −54
Original line number Diff line number Diff line
@@ -1335,15 +1335,16 @@ ocfs2_set_next_iovec(const struct iovec **iovp, size_t *basep, size_t bytes)
	*basep = base;
}

static struct page * ocfs2_get_write_source(struct ocfs2_buffered_write_priv *bp,
static struct page * ocfs2_get_write_source(char **ret_src_buf,
					    const struct iovec *cur_iov,
					    size_t iov_offset)
{
	int ret;
	char *buf;
	char *buf = cur_iov->iov_base + iov_offset;
	struct page *src_page = NULL;
	unsigned long off;

	buf = cur_iov->iov_base + iov_offset;
	off = (unsigned long)(buf) & ~PAGE_CACHE_MASK;

	if (!segment_eq(get_fs(), KERNEL_DS)) {
		/*
@@ -1355,18 +1356,17 @@ static struct page * ocfs2_get_write_source(struct ocfs2_buffered_write_priv *bp
				     (unsigned long)buf & PAGE_CACHE_MASK, 1,
				     0, 0, &src_page, NULL);
		if (ret == 1)
			bp->b_src_buf = kmap(src_page);
			*ret_src_buf = kmap(src_page) + off;
		else
			src_page = ERR_PTR(-EFAULT);
	} else {
		bp->b_src_buf = buf;
		*ret_src_buf = buf;
	}

	return src_page;
}

static void ocfs2_put_write_source(struct ocfs2_buffered_write_priv *bp,
				   struct page *page)
static void ocfs2_put_write_source(struct page *page)
{
	if (page) {
		kunmap(page);
@@ -1382,10 +1382,12 @@ static ssize_t ocfs2_file_buffered_write(struct file *file, loff_t *ppos,
{
	int ret = 0;
	ssize_t copied, total = 0;
	size_t iov_offset = 0;
	size_t iov_offset = 0, bytes;
	loff_t pos;
	const struct iovec *cur_iov = iov;
	struct ocfs2_buffered_write_priv bp;
	struct page *page;
	struct page *user_page, *page;
	char *buf, *dst;
	void *fsdata;

	/*
	 * handle partial DIO write.  Adjust cur_iov if needed.
@@ -1393,21 +1395,38 @@ static ssize_t ocfs2_file_buffered_write(struct file *file, loff_t *ppos,
	ocfs2_set_next_iovec(&cur_iov, &iov_offset, o_direct_written);

	do {
		bp.b_cur_off = iov_offset;
		bp.b_cur_iov = cur_iov;
		pos = *ppos;

		page = ocfs2_get_write_source(&bp, cur_iov, iov_offset);
		if (IS_ERR(page)) {
			ret = PTR_ERR(page);
		user_page = ocfs2_get_write_source(&buf, cur_iov, iov_offset);
		if (IS_ERR(user_page)) {
			ret = PTR_ERR(user_page);
			goto out;
		}

		copied = ocfs2_buffered_write_cluster(file, *ppos, count,
						      ocfs2_map_and_write_user_data,
						      &bp);
		/* Stay within our page boundaries */
		bytes = min((PAGE_CACHE_SIZE - ((unsigned long)pos & ~PAGE_CACHE_MASK)),
			    (PAGE_CACHE_SIZE - ((unsigned long)buf & ~PAGE_CACHE_MASK)));
		/* Stay within the vector boundary */
		bytes = min_t(size_t, bytes, cur_iov->iov_len - iov_offset);
		/* Stay within count */
		bytes = min(bytes, count);

		page = NULL;
		ret = ocfs2_write_begin(file, file->f_mapping, pos, bytes, 0,
					&page, &fsdata);
		if (ret) {
			mlog_errno(ret);
			goto out;
		}

		ocfs2_put_write_source(&bp, page);
		dst = kmap_atomic(page, KM_USER0);
		memcpy(dst + (pos & (PAGE_CACHE_SIZE - 1)), buf, bytes);
		kunmap_atomic(dst, KM_USER0);
		flush_dcache_page(page);
		ocfs2_put_write_source(user_page);

		copied = ocfs2_write_end(file, file->f_mapping, pos, bytes,
					 bytes, page, fsdata);
		if (copied < 0) {
			mlog_errno(copied);
			ret = copied;
@@ -1415,7 +1434,7 @@ static ssize_t ocfs2_file_buffered_write(struct file *file, loff_t *ppos,
		}

		total += copied;
		*ppos = *ppos + copied;
		*ppos = pos + copied;
		count -= copied;

		ocfs2_set_next_iovec(&cur_iov, &iov_offset, copied);
@@ -1585,52 +1604,46 @@ static int ocfs2_splice_write_actor(struct pipe_inode_info *pipe,
				    struct pipe_buffer *buf,
				    struct splice_desc *sd)
{
	int ret, count, total = 0;
	int ret, count;
	ssize_t copied = 0;
	struct ocfs2_splice_write_priv sp;
	struct file *file = sd->u.file;
	unsigned int offset;
	struct page *page = NULL;
	void *fsdata;
	char *src, *dst;

	ret = buf->ops->confirm(pipe, buf);
	if (ret)
		goto out;

	sp.s_sd = sd;
	sp.s_buf = buf;
	sp.s_pipe = pipe;
	sp.s_offset = sd->pos & ~PAGE_CACHE_MASK;
	sp.s_buf_offset = buf->offset;

	offset = sd->pos & ~PAGE_CACHE_MASK;
	count = sd->len;
	if (count + sp.s_offset > PAGE_CACHE_SIZE)
		count = PAGE_CACHE_SIZE - sp.s_offset;
	if (count + offset > PAGE_CACHE_SIZE)
		count = PAGE_CACHE_SIZE - offset;

	do {
		/*
		 * splice wants us to copy up to one page at a
		 * time. For pagesize > cluster size, this means we
		 * might enter ocfs2_buffered_write_cluster() more
		 * than once, so keep track of our progress here.
		 */
		copied = ocfs2_buffered_write_cluster(sd->u.file,
						      (loff_t)sd->pos + total,
						      count,
						      ocfs2_map_and_write_splice_data,
						      &sp);
	ret = ocfs2_write_begin(file, file->f_mapping, sd->pos, count, 0,
				&page, &fsdata);
	if (ret) {
		mlog_errno(ret);
		goto out;
	}

	src = buf->ops->map(pipe, buf, 1);
	dst = kmap_atomic(page, KM_USER1);
	memcpy(dst + offset, src + buf->offset, count);
	kunmap_atomic(page, KM_USER1);
	buf->ops->unmap(pipe, buf, src);

	copied = ocfs2_write_end(file, file->f_mapping, sd->pos, count, count,
				 page, fsdata);
	if (copied < 0) {
		mlog_errno(copied);
		ret = copied;
		goto out;
	}

		count -= copied;
		sp.s_offset += copied;
		sp.s_buf_offset += copied;
		total += copied;
	} while (count);

	ret = 0;
out:

	return total ? total : ret;
	return copied ? copied : ret;
}

static ssize_t __ocfs2_file_splice_write(struct pipe_inode_info *pipe,