Commit 5a7a9e03 authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Jason Gunthorpe
Browse files

RDMA/core: remove use of dma_virt_ops

Use the ib_dma_* helpers to skip the DMA translation instead.  This
removes the last user if dma_virt_ops and keeps the weird layering
violation inside the RDMA core instead of burderning the DMA mapping
subsystems with it.  This also means the software RDMA drivers now don't
have to mess with DMA parameters that are not relevant to them at all, and
that in the future we can use PCI P2P transfers even for software RDMA, as
there is no first fake layer of DMA mapping that the P2P DMA support.

Link: https://lore.kernel.org/r/20201106181941.1878556-8-hch@lst.de


Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Tested-by: default avatarMike Marciniszyn <mike.marciniszyn@cornelisnetworks.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@nvidia.com>
parent 42f2611c
Loading
Loading
Loading
Loading
+23 −20
Original line number Diff line number Diff line
@@ -1209,25 +1209,6 @@ static int assign_name(struct ib_device *device, const char *name)
	return ret;
}

static void setup_dma_device(struct ib_device *device,
			     struct device *dma_device)
{
	/*
	 * If the caller does not provide a DMA capable device then the IB
	 * device will be used. In this case the caller should fully setup the
	 * ibdev for DMA. This usually means using dma_virt_ops.
	 */
#ifdef CONFIG_DMA_VIRT_OPS
	if (!dma_device) {
		device->dev.dma_ops = &dma_virt_ops;
		dma_device = &device->dev;
	}
#endif
	WARN_ON(!dma_device);
	device->dma_device = dma_device;
	WARN_ON(!device->dma_device->dma_parms);
}

/*
 * setup_device() allocates memory and sets up data that requires calling the
 * device ops, this is the only reason these actions are not done during
@@ -1373,7 +1354,14 @@ int ib_register_device(struct ib_device *device, const char *name,
	if (ret)
		return ret;

	setup_dma_device(device, dma_device);
	/*
	 * If the caller does not provide a DMA capable device then the IB core
	 * will set up ib_sge and scatterlist structures that stash the kernel
	 * virtual address into the address field.
	 */
	WARN_ON(dma_device && !dma_device->dma_parms);
	device->dma_device = dma_device;

	ret = setup_device(device);
	if (ret)
		return ret;
@@ -2708,6 +2696,21 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
}
EXPORT_SYMBOL(ib_set_device_ops);

#ifdef CONFIG_INFINIBAND_VIRT_DMA
int ib_dma_virt_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents)
{
	struct scatterlist *s;
	int i;

	for_each_sg(sg, s, nents, i) {
		sg_dma_address(s) = (uintptr_t)sg_virt(s);
		sg_dma_len(s) = s->length;
	}
	return nents;
}
EXPORT_SYMBOL(ib_dma_virt_map_sg);
#endif /* CONFIG_INFINIBAND_VIRT_DMA */

static const struct rdma_nl_cbs ibnl_ls_cb_table[RDMA_NL_LS_NUM_OPS] = {
	[RDMA_NL_LS_OP_RESOLVE] = {
		.doit = ib_nl_handle_resolve_resp,
+4 −1
Original line number Diff line number Diff line
@@ -285,8 +285,11 @@ static void rdma_rw_unmap_sg(struct ib_device *dev, struct scatterlist *sg,
static int rdma_rw_map_sg(struct ib_device *dev, struct scatterlist *sg,
			  u32 sg_cnt, enum dma_data_direction dir)
{
	if (is_pci_p2pdma_page(sg_page(sg)))
	if (is_pci_p2pdma_page(sg_page(sg))) {
		if (WARN_ON_ONCE(ib_uses_virt_dma(dev)))
			return 0;
		return pci_p2pdma_map_sg(dev->dma_device, sg, sg_cnt, dir);
	}
	return ib_dma_map_sg(dev, sg, sg_cnt, dir);
}

+0 −1
Original line number Diff line number Diff line
@@ -4,6 +4,5 @@ config INFINIBAND_RDMAVT
	depends on INFINIBAND_VIRT_DMA
	depends on X86_64
	depends on PCI
	select DMA_VIRT_OPS
	help
	This is a common software verbs provider for RDMA networks.
+2 −4
Original line number Diff line number Diff line
@@ -324,8 +324,6 @@ static void __rvt_free_mr(struct rvt_mr *mr)
 * @acc: access flags
 *
 * Return: the memory region on success, otherwise returns an errno.
 * Note that all DMA addresses should be created via the functions in
 * struct dma_virt_ops.
 */
struct ib_mr *rvt_get_dma_mr(struct ib_pd *pd, int acc)
{
@@ -766,7 +764,7 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,

	/*
	 * We use LKEY == zero for kernel virtual addresses
	 * (see rvt_get_dma_mr() and dma_virt_ops).
	 * (see rvt_get_dma_mr()).
	 */
	if (sge->lkey == 0) {
		struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device);
@@ -877,7 +875,7 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge,

	/*
	 * We use RKEY == zero for kernel virtual addresses
	 * (see rvt_get_dma_mr() and dma_virt_ops).
	 * (see rvt_get_dma_mr()).
	 */
	rcu_read_lock();
	if (rkey == 0) {
+0 −8
Original line number Diff line number Diff line
@@ -525,7 +525,6 @@ static noinline int check_support(struct rvt_dev_info *rdi, int verb)
int rvt_register_device(struct rvt_dev_info *rdi)
{
	int ret = 0, i;
	u64 dma_mask;

	if (!rdi)
		return -EINVAL;
@@ -580,13 +579,6 @@ int rvt_register_device(struct rvt_dev_info *rdi)
	/* Completion queues */
	spin_lock_init(&rdi->n_cqs_lock);

	/* DMA Operations */
	rdi->ibdev.dev.dma_parms = rdi->ibdev.dev.parent->dma_parms;
	dma_mask = IS_ENABLED(CONFIG_64BIT) ? DMA_BIT_MASK(64) : DMA_BIT_MASK(32);
	ret = dma_coerce_mask_and_coherent(&rdi->ibdev.dev, dma_mask);
	if (ret)
		goto bail_wss;

	/* Protection Domain */
	spin_lock_init(&rdi->n_pds_lock);
	rdi->n_pds_allocated = 0;
Loading