Commit e5fbdde4 authored by David Howells's avatar David Howells Committed by Steve French
Browse files

cifs: Add a function to build an RDMA SGE list from an iterator



Add a function to add elements onto an RDMA SGE list representing page
fragments extracted from a BVEC-, KVEC- or XARRAY-type iterator and DMA
mapped until the maximum number of elements is reached.

Nothing is done to make sure the pages remain present - that must be done
by the caller.

Signed-off-by: default avatarDavid Howells <dhowells@redhat.com>
cc: Steve French <sfrench@samba.org>
cc: Shyam Prasad N <nspmangalore@gmail.com>
cc: Rohith Surabattula <rohiths.msft@gmail.com>
cc: Tom Talpey <tom@talpey.com>
cc: Jeff Layton <jlayton@kernel.org>
cc: linux-cifs@vger.kernel.org
cc: linux-fsdevel@vger.kernel.org
cc: linux-rdma@vger.kernel.org

Link: https://lore.kernel.org/r/166697256704.61150.17388516338310645808.stgit@warthog.procyon.org.uk/ # rfc
Link: https://lore.kernel.org/r/166732028840.3186319.8512284239779728860.stgit@warthog.procyon.org.uk/

 # rfc
Signed-off-by: default avatarSteve French <stfrench@microsoft.com>
parent 01858469
Loading
Loading
Loading
Loading
+214 −0
Original line number Diff line number Diff line
@@ -44,6 +44,17 @@ static int smbd_post_send_page(struct smbd_connection *info,
static void destroy_mr_list(struct smbd_connection *info);
static int allocate_mr_list(struct smbd_connection *info);

struct smb_extract_to_rdma {
	struct ib_sge		*sge;
	unsigned int		nr_sge;
	unsigned int		max_sge;
	struct ib_device	*device;
	u32			local_dma_lkey;
	enum dma_data_direction	direction;
};
static ssize_t smb_extract_iter_to_rdma(struct iov_iter *iter, size_t len,
					struct smb_extract_to_rdma *rdma);

/* SMBD version number */
#define SMBD_V1	0x0100

@@ -2492,3 +2503,206 @@ int smbd_deregister_mr(struct smbd_mr *smbdirect_mr)

	return rc;
}

static bool smb_set_sge(struct smb_extract_to_rdma *rdma,
			struct page *lowest_page, size_t off, size_t len)
{
	struct ib_sge *sge = &rdma->sge[rdma->nr_sge];
	u64 addr;

	addr = ib_dma_map_page(rdma->device, lowest_page,
			       off, len, rdma->direction);
	if (ib_dma_mapping_error(rdma->device, addr))
		return false;

	sge->addr   = addr;
	sge->length = len;
	sge->lkey   = rdma->local_dma_lkey;
	rdma->nr_sge++;
	return true;
}

/*
 * Extract page fragments from a BVEC-class iterator and add them to an RDMA
 * element list.  The pages are not pinned.
 */
static ssize_t smb_extract_bvec_to_rdma(struct iov_iter *iter,
					struct smb_extract_to_rdma *rdma,
					ssize_t maxsize)
{
	const struct bio_vec *bv = iter->bvec;
	unsigned long start = iter->iov_offset;
	unsigned int i;
	ssize_t ret = 0;

	for (i = 0; i < iter->nr_segs; i++) {
		size_t off, len;

		len = bv[i].bv_len;
		if (start >= len) {
			start -= len;
			continue;
		}

		len = min_t(size_t, maxsize, len - start);
		off = bv[i].bv_offset + start;

		if (!smb_set_sge(rdma, bv[i].bv_page, off, len))
			return -EIO;

		ret += len;
		maxsize -= len;
		if (rdma->nr_sge >= rdma->max_sge || maxsize <= 0)
			break;
		start = 0;
	}

	return ret;
}

/*
 * Extract fragments from a KVEC-class iterator and add them to an RDMA list.
 * This can deal with vmalloc'd buffers as well as kmalloc'd or static buffers.
 * The pages are not pinned.
 */
static ssize_t smb_extract_kvec_to_rdma(struct iov_iter *iter,
					struct smb_extract_to_rdma *rdma,
					ssize_t maxsize)
{
	const struct kvec *kv = iter->kvec;
	unsigned long start = iter->iov_offset;
	unsigned int i;
	ssize_t ret = 0;

	for (i = 0; i < iter->nr_segs; i++) {
		struct page *page;
		unsigned long kaddr;
		size_t off, len, seg;

		len = kv[i].iov_len;
		if (start >= len) {
			start -= len;
			continue;
		}

		kaddr = (unsigned long)kv[i].iov_base + start;
		off = kaddr & ~PAGE_MASK;
		len = min_t(size_t, maxsize, len - start);
		kaddr &= PAGE_MASK;

		maxsize -= len;
		do {
			seg = min_t(size_t, len, PAGE_SIZE - off);

			if (is_vmalloc_or_module_addr((void *)kaddr))
				page = vmalloc_to_page((void *)kaddr);
			else
				page = virt_to_page(kaddr);

			if (!smb_set_sge(rdma, page, off, seg))
				return -EIO;

			ret += seg;
			len -= seg;
			kaddr += PAGE_SIZE;
			off = 0;
		} while (len > 0 && rdma->nr_sge < rdma->max_sge);

		if (rdma->nr_sge >= rdma->max_sge || maxsize <= 0)
			break;
		start = 0;
	}

	return ret;
}

/*
 * Extract folio fragments from an XARRAY-class iterator and add them to an
 * RDMA list.  The folios are not pinned.
 */
static ssize_t smb_extract_xarray_to_rdma(struct iov_iter *iter,
					  struct smb_extract_to_rdma *rdma,
					  ssize_t maxsize)
{
	struct xarray *xa = iter->xarray;
	struct folio *folio;
	loff_t start = iter->xarray_start + iter->iov_offset;
	pgoff_t index = start / PAGE_SIZE;
	ssize_t ret = 0;
	size_t off, len;
	XA_STATE(xas, xa, index);

	rcu_read_lock();

	xas_for_each(&xas, folio, ULONG_MAX) {
		if (xas_retry(&xas, folio))
			continue;
		if (WARN_ON(xa_is_value(folio)))
			break;
		if (WARN_ON(folio_test_hugetlb(folio)))
			break;

		off = offset_in_folio(folio, start);
		len = min_t(size_t, maxsize, folio_size(folio) - off);

		if (!smb_set_sge(rdma, folio_page(folio, 0), off, len)) {
			rcu_read_unlock();
			return -EIO;
		}

		maxsize -= len;
		ret += len;
		if (rdma->nr_sge >= rdma->max_sge || maxsize <= 0)
			break;
	}

	rcu_read_unlock();
	return ret;
}

/*
 * Extract page fragments from up to the given amount of the source iterator
 * and build up an RDMA list that refers to all of those bits.  The RDMA list
 * is appended to, up to the maximum number of elements set in the parameter
 * block.
 *
 * The extracted page fragments are not pinned or ref'd in any way; if an
 * IOVEC/UBUF-type iterator is to be used, it should be converted to a
 * BVEC-type iterator and the pages pinned, ref'd or otherwise held in some
 * way.
 */
static ssize_t smb_extract_iter_to_rdma(struct iov_iter *iter, size_t len,
					struct smb_extract_to_rdma *rdma)
{
	ssize_t ret;
	int before = rdma->nr_sge;

	switch (iov_iter_type(iter)) {
	case ITER_BVEC:
		ret = smb_extract_bvec_to_rdma(iter, rdma, len);
		break;
	case ITER_KVEC:
		ret = smb_extract_kvec_to_rdma(iter, rdma, len);
		break;
	case ITER_XARRAY:
		ret = smb_extract_xarray_to_rdma(iter, rdma, len);
		break;
	default:
		WARN_ON_ONCE(1);
		return -EIO;
	}

	if (ret > 0) {
		iov_iter_advance(iter, ret);
	} else if (ret < 0) {
		while (rdma->nr_sge > before) {
			struct ib_sge *sge = &rdma->sge[rdma->nr_sge--];

			ib_dma_unmap_single(rdma->device, sge->addr, sge->length,
					    rdma->direction);
			sge->addr = 0;
		}
	}

	return ret;
}