Commit d08089f6 authored by David Howells's avatar David Howells Committed by Steve French
Browse files

cifs: Change the I/O paths to use an iterator rather than a page list



Currently, the cifs I/O paths hand lists of pages from the VM interface
routines at the top all the way through the intervening layers to the
socket interface at the bottom.

This is a problem, however, for interfacing with netfslib which passes an
iterator through to the ->issue_read() method (and will pass an iterator
through to the ->issue_write() method in future).  Netfslib takes over
bounce buffering for direct I/O, async I/O and encrypted content, so cifs
doesn't need to do that.  Netfslib also converts IOVEC-type iterators into
BVEC-type iterators if necessary.

Further, cifs needs foliating - and folios may come in a variety of sizes,
so a page list pointing to an array of heterogeneous pages may cause
problems in places such as where crypto is done.

Change the cifs I/O paths to hand iov_iter iterators all the way through
instead.

Notes:

 (1) Some old routines are #if'd out to be removed in a follow up patch so
     as to avoid confusing diff, thereby making the diff output easier to
     follow.  I've removed functions that don't overlap with anything
     added.

 (2) struct smb_rqst loses rq_pages, rq_offset, rq_npages, rq_pagesz and
     rq_tailsz which describe the pages forming the buffer; instead there's
     an rq_iter describing the source buffer and an rq_buffer which is used
     to hold the buffer for encryption.

 (3) struct cifs_readdata and cifs_writedata are similarly modified to
     smb_rqst.  The ->read_into_pages() and ->copy_into_pages() are then
     replaced with passing the iterator directly to the socket.

     The iterators are stored in these structs so that they are persistent
     and don't get deallocated when the function returns (unlike if they
     were stack variables).

 (4) Buffered writeback is overhauled, borrowing the code from the afs
     filesystem to gather up contiguous runs of folios.  The XARRAY-type
     iterator is then used to refer directly to the pagecache and can be
     passed to the socket to transmit data directly from there.

     This includes:

	cifs_extend_writeback()
	cifs_write_back_from_locked_folio()
	cifs_writepages_region()
	cifs_writepages()

 (5) Pages are converted to folios.

 (6) Direct I/O uses netfs_extract_user_iter() to create a BVEC-type
     iterator from an IOBUF/UBUF-type source iterator.

 (7) smb2_get_aead_req() uses netfs_extract_iter_to_sg() to extract page
     fragments from the iterator into the scatterlists that the crypto
     layer prefers.

 (8) smb2_init_transform_rq() attached pages to smb_rqst::rq_buffer, an
     xarray, to use as a bounce buffer for encryption.  An XARRAY-type
     iterator can then be used to pass the bounce buffer to lower layers.

Signed-off-by: default avatarDavid Howells <dhowells@redhat.com>
cc: Steve French <sfrench@samba.org>
cc: Shyam Prasad N <nspmangalore@gmail.com>
cc: Rohith Surabattula <rohiths.msft@gmail.com>
cc: Paulo Alcantara <pc@cjr.nz>
cc: Jeff Layton <jlayton@kernel.org>
cc: linux-cifs@vger.kernel.org

Link: https://lore.kernel.org/r/164311907995.2806745.400147335497304099.stgit@warthog.procyon.org.uk/ # rfc
Link: https://lore.kernel.org/r/164928620163.457102.11602306234438271112.stgit@warthog.procyon.org.uk/ # v1
Link: https://lore.kernel.org/r/165211420279.3154751.15923591172438186144.stgit@warthog.procyon.org.uk/ # v1
Link: https://lore.kernel.org/r/165348880385.2106726.3220789453472800240.stgit@warthog.procyon.org.uk/ # v1
Link: https://lore.kernel.org/r/165364827111.3334034.934805882842932881.stgit@warthog.procyon.org.uk/ # v3
Link: https://lore.kernel.org/r/166126396180.708021.271013668175370826.stgit@warthog.procyon.org.uk/ # v1
Link: https://lore.kernel.org/r/166697259595.61150.5982032408321852414.stgit@warthog.procyon.org.uk/ # rfc
Link: https://lore.kernel.org/r/166732031756.3186319.12528413619888902872.stgit@warthog.procyon.org.uk/

 # rfc
Signed-off-by: default avatarSteve French <stfrench@microsoft.com>
parent 16541195
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -18,6 +18,7 @@ config CIFS
	select DNS_RESOLVER
	select ASN1
	select OID_REGISTRY
	select NETFS_SUPPORT
	help
	  This is the client VFS module for the SMB3 family of network file
	  protocols (including the most recent, most secure dialect SMB3.1.1).
+6 −22
Original line number Diff line number Diff line
@@ -173,7 +173,7 @@ int __cifs_calc_signature(struct smb_rqst *rqst,
			  struct shash_desc *shash)
{
	int i;
	int rc;
	ssize_t rc;
	struct kvec *iov = rqst->rq_iov;
	int n_vec = rqst->rq_nvec;

@@ -205,25 +205,9 @@ int __cifs_calc_signature(struct smb_rqst *rqst,
		}
	}

	/* now hash over the rq_pages array */
	for (i = 0; i < rqst->rq_npages; i++) {
		void *kaddr;
		unsigned int len, offset;

		rqst_page_get_length(rqst, i, &len, &offset);

		kaddr = (char *) kmap(rqst->rq_pages[i]) + offset;

		rc = crypto_shash_update(shash, kaddr, len);
		if (rc) {
			cifs_dbg(VFS, "%s: Could not update with payload\n",
				 __func__);
			kunmap(rqst->rq_pages[i]);
	rc = cifs_shash_iter(&rqst->rq_iter, iov_iter_count(&rqst->rq_iter), shash);
	if (rc < 0)
		return rc;
		}

		kunmap(rqst->rq_pages[i]);
	}

	rc = crypto_shash_final(shash, signature);
	if (rc)
+33 −33
Original line number Diff line number Diff line
@@ -212,11 +212,9 @@ static inline void cifs_free_open_info(struct cifs_open_info_data *data)
struct smb_rqst {
	struct kvec	*rq_iov;	/* array of kvecs */
	unsigned int	rq_nvec;	/* number of kvecs in array */
	struct page	**rq_pages;	/* pointer to array of page ptrs */
	unsigned int	rq_offset;	/* the offset to the 1st page */
	unsigned int	rq_npages;	/* number pages in array */
	unsigned int	rq_pagesz;	/* page size to use */
	unsigned int	rq_tailsz;	/* length of last page */
	size_t		rq_iter_size;	/* Amount of data in ->rq_iter */
	struct iov_iter	rq_iter;	/* Data iterator */
	struct xarray	rq_buffer;	/* Page buffer for encryption */
};

struct mid_q_entry;
@@ -1421,10 +1419,11 @@ struct cifs_aio_ctx {
	struct cifsFileInfo	*cfile;
	struct bio_vec		*bv;
	loff_t			pos;
	unsigned int		npages;
	unsigned int		nr_pinned_pages;
	ssize_t			rc;
	unsigned int		len;
	unsigned int		total_len;
	unsigned int		bv_need_unpin;	/* If ->bv[] needs unpinning */
	bool			should_dirty;
	/*
	 * Indicates if this aio_ctx is for direct_io,
@@ -1442,28 +1441,18 @@ struct cifs_readdata {
	struct address_space		*mapping;
	struct cifs_aio_ctx		*ctx;
	__u64				offset;
	ssize_t				got_bytes;
	unsigned int			bytes;
	unsigned int			got_bytes;
	pid_t				pid;
	int				result;
	struct work_struct		work;
	int (*read_into_pages)(struct TCP_Server_Info *server,
				struct cifs_readdata *rdata,
				unsigned int len);
	int (*copy_into_pages)(struct TCP_Server_Info *server,
				struct cifs_readdata *rdata,
				struct iov_iter *iter);
	struct iov_iter			iter;
	struct kvec			iov[2];
	struct TCP_Server_Info		*server;
#ifdef CONFIG_CIFS_SMB_DIRECT
	struct smbd_mr			*mr;
#endif
	unsigned int			pagesz;
	unsigned int			page_offset;
	unsigned int			tailsz;
	struct cifs_credits		credits;
	unsigned int			nr_pages;
	struct page			**pages;
};

/* asynchronous write support */
@@ -1475,6 +1464,8 @@ struct cifs_writedata {
	struct work_struct		work;
	struct cifsFileInfo		*cfile;
	struct cifs_aio_ctx		*ctx;
	struct iov_iter			iter;
	struct bio_vec			*bv;
	__u64				offset;
	pid_t				pid;
	unsigned int			bytes;
@@ -1483,12 +1474,7 @@ struct cifs_writedata {
#ifdef CONFIG_CIFS_SMB_DIRECT
	struct smbd_mr			*mr;
#endif
	unsigned int			pagesz;
	unsigned int			page_offset;
	unsigned int			tailsz;
	struct cifs_credits		credits;
	unsigned int			nr_pages;
	struct page			**pages;
};

/*
@@ -2148,7 +2134,7 @@ static inline void move_cifs_info_to_smb2(struct smb2_file_all_info *dst, const
	dst->FileNameLength = src->FileNameLength;
}

static inline unsigned int cifs_get_num_sgs(const struct smb_rqst *rqst,
static inline int cifs_get_num_sgs(const struct smb_rqst *rqst,
				   int num_rqst,
				   const u8 *sig)
{
@@ -2170,6 +2156,19 @@ static inline unsigned int cifs_get_num_sgs(const struct smb_rqst *rqst,
	 * rqst[1+].rq_iov[0+] data to be encrypted/decrypted
	 */
	for (i = 0; i < num_rqst; i++) {
		/* We really don't want a mixture of pinned and unpinned pages
		 * in the sglist.  It's hard to keep track of which is what.
		 * Instead, we convert to a BVEC-type iterator higher up.
		 */
		if (WARN_ON_ONCE(user_backed_iter(&rqst[i].rq_iter)))
			return -EIO;

		/* We also don't want to have any extra refs or pins to clean
		 * up in the sglist.
		 */
		if (WARN_ON_ONCE(iov_iter_extract_will_pin(&rqst[i].rq_iter)))
			return -EIO;

		for (j = 0; j < rqst[i].rq_nvec; j++) {
			struct kvec *iov = &rqst[i].rq_iov[j];

@@ -2183,7 +2182,7 @@ static inline unsigned int cifs_get_num_sgs(const struct smb_rqst *rqst,
			}
			skip = 0;
		}
		nents += rqst[i].rq_npages;
		nents += iov_iter_npages(&rqst[i].rq_iter, INT_MAX);
	}
	nents += DIV_ROUND_UP(offset_in_page(sig) + SMB2_SIGNATURE_SIZE, PAGE_SIZE);
	return nents;
@@ -2192,7 +2191,7 @@ static inline unsigned int cifs_get_num_sgs(const struct smb_rqst *rqst,
/* We can not use the normal sg_set_buf() as we will sometimes pass a
 * stack object as buf.
 */
static inline struct scatterlist *cifs_sg_set_buf(struct scatterlist *sg,
static inline void cifs_sg_set_buf(struct sg_table *sgtable,
				   const void *buf,
				   unsigned int buflen)
{
@@ -2204,16 +2203,17 @@ static inline struct scatterlist *cifs_sg_set_buf(struct scatterlist *sg,
		do {
			unsigned int len = min_t(unsigned int, buflen, PAGE_SIZE - off);

			sg_set_page(sg++, vmalloc_to_page((void *)addr), len, off);
			sg_set_page(&sgtable->sgl[sgtable->nents++],
				    vmalloc_to_page((void *)addr), len, off);

			off = 0;
			addr += PAGE_SIZE;
			buflen -= len;
		} while (buflen);
	} else {
		sg_set_page(sg++, virt_to_page(addr), buflen, off);
		sg_set_page(&sgtable->sgl[sgtable->nents++],
			    virt_to_page(addr), buflen, off);
	}
	return sg;
}

#endif	/* _CIFS_GLOB_H */
+1 −7
Original line number Diff line number Diff line
@@ -584,10 +584,7 @@ int cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid);
int cifs_async_writev(struct cifs_writedata *wdata,
		      void (*release)(struct kref *kref));
void cifs_writev_complete(struct work_struct *work);
struct cifs_writedata *cifs_writedata_alloc(unsigned int nr_pages,
						work_func_t complete);
struct cifs_writedata *cifs_writedata_direct_alloc(struct page **pages,
						work_func_t complete);
struct cifs_writedata *cifs_writedata_alloc(work_func_t complete);
void cifs_writedata_release(struct kref *refcount);
int cifs_query_mf_symlink(unsigned int xid, struct cifs_tcon *tcon,
			  struct cifs_sb_info *cifs_sb,
@@ -604,13 +601,10 @@ enum securityEnum cifs_select_sectype(struct TCP_Server_Info *,
					enum securityEnum);
struct cifs_aio_ctx *cifs_aio_ctx_alloc(void);
void cifs_aio_ctx_release(struct kref *refcount);
int setup_aio_ctx_iter(struct cifs_aio_ctx *ctx, struct iov_iter *iter, int rw);

int cifs_alloc_hash(const char *name, struct shash_desc **sdesc);
void cifs_free_hash(struct shash_desc **sdesc);

void rqst_page_get_length(const struct smb_rqst *rqst, unsigned int page,
			  unsigned int *len, unsigned int *offset);
struct cifs_chan *
cifs_ses_find_chan(struct cifs_ses *ses, struct TCP_Server_Info *server);
int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses);
+5 −10
Original line number Diff line number Diff line
@@ -24,6 +24,7 @@
#include <linux/task_io_accounting_ops.h>
#include <linux/uaccess.h>
#include "cifspdu.h"
#include "cifsfs.h"
#include "cifsglob.h"
#include "cifsacl.h"
#include "cifsproto.h"
@@ -1294,11 +1295,8 @@ cifs_readv_callback(struct mid_q_entry *mid)
	struct TCP_Server_Info *server = tcon->ses->server;
	struct smb_rqst rqst = { .rq_iov = rdata->iov,
				 .rq_nvec = 2,
				 .rq_pages = rdata->pages,
				 .rq_offset = rdata->page_offset,
				 .rq_npages = rdata->nr_pages,
				 .rq_pagesz = rdata->pagesz,
				 .rq_tailsz = rdata->tailsz };
				 .rq_iter_size = iov_iter_count(&rdata->iter),
				 .rq_iter = rdata->iter };
	struct cifs_credits credits = { .value = 1, .instance = 0 };

	cifs_dbg(FYI, "%s: mid=%llu state=%d result=%d bytes=%u\n",
@@ -1737,11 +1735,8 @@ cifs_async_writev(struct cifs_writedata *wdata,

	rqst.rq_iov = iov;
	rqst.rq_nvec = 2;
	rqst.rq_pages = wdata->pages;
	rqst.rq_offset = wdata->page_offset;
	rqst.rq_npages = wdata->nr_pages;
	rqst.rq_pagesz = wdata->pagesz;
	rqst.rq_tailsz = wdata->tailsz;
	rqst.rq_iter = wdata->iter;
	rqst.rq_iter_size = iov_iter_count(&wdata->iter);

	cifs_dbg(FYI, "async write at %llu %u bytes\n",
		 wdata->offset, wdata->bytes);
Loading