Commit d6adc251 authored by Suganath Prabu S's avatar Suganath Prabu S Committed by Martin K. Petersen
Browse files

scsi: mpt3sas: Force PCIe scatterlist allocations to be within same 4 GB region

According to the MPI specification, PCIe SGL buffers can not cross a 4 GB
boundary.

While allocating, if any buffer crosses the 4 GB boundary, then:

 - Release the already allocated memory pools; and

 - Reallocate them by changing the DMA coherent mask to 32-bit

Link: https://lore.kernel.org/r/20210305102904.7560-2-suganath-prabu.subramani@broadcom.com


Signed-off-by: default avatarSuganath Prabu S <suganath-prabu.subramani@broadcom.com>
Signed-off-by: default avatarMartin K. Petersen <martin.petersen@oracle.com>
parent a1c4d774
Loading
Loading
Loading
Loading
+112 −47
Original line number Diff line number Diff line
@@ -2905,23 +2905,22 @@ static int
_base_config_dma_addressing(struct MPT3SAS_ADAPTER *ioc, struct pci_dev *pdev)
{
	struct sysinfo s;
	int dma_mask;

	if (ioc->is_mcpu_endpoint ||
	    sizeof(dma_addr_t) == 4 || ioc->use_32bit_dma ||
	    dma_get_required_mask(&pdev->dev) <= 32)
		dma_mask = 32;
		ioc->dma_mask = 32;
	/* Set 63 bit DMA mask for all SAS3 and SAS35 controllers */
	else if (ioc->hba_mpi_version_belonged > MPI2_VERSION)
		dma_mask = 63;
		ioc->dma_mask = 63;
	else
		dma_mask = 64;
		ioc->dma_mask = 64;

	if (dma_set_mask(&pdev->dev, DMA_BIT_MASK(dma_mask)) ||
	    dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(dma_mask)))
	if (dma_set_mask(&pdev->dev, DMA_BIT_MASK(ioc->dma_mask)) ||
	    dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(ioc->dma_mask)))
		return -ENODEV;

	if (dma_mask > 32) {
	if (ioc->dma_mask > 32) {
		ioc->base_add_sg_single = &_base_add_sg_single_64;
		ioc->sge_size = sizeof(Mpi2SGESimple64_t);
	} else {
@@ -2931,7 +2930,7 @@ _base_config_dma_addressing(struct MPT3SAS_ADAPTER *ioc, struct pci_dev *pdev)

	si_meminfo(&s);
	ioc_info(ioc, "%d BIT PCI BUS DMA ADDRESSING SUPPORTED, total mem (%ld kB)\n",
		dma_mask, convert_to_kb(s.totalram));
		ioc->dma_mask, convert_to_kb(s.totalram));

	return 0;
}
@@ -5337,10 +5336,10 @@ _base_release_memory_pools(struct MPT3SAS_ADAPTER *ioc)
			dma_pool_free(ioc->pcie_sgl_dma_pool,
					ioc->pcie_sg_lookup[i].pcie_sgl,
					ioc->pcie_sg_lookup[i].pcie_sgl_dma);
			ioc->pcie_sg_lookup[i].pcie_sgl = NULL;
		}
		dma_pool_destroy(ioc->pcie_sgl_dma_pool);
	}

	if (ioc->config_page) {
		dexitprintk(ioc,
			    ioc_info(ioc, "config_page(0x%p): free\n",
@@ -5398,6 +5397,89 @@ mpt3sas_check_same_4gb_region(long reply_pool_start_address, u32 pool_sz)
		return 0;
}

/**
 * _base_reduce_hba_queue_depth- Retry with reduced queue depth
 * @ioc: Adapter object
 *
 * Return: 0 for success, non-zero for failure.
 **/
static inline int
_base_reduce_hba_queue_depth(struct MPT3SAS_ADAPTER *ioc)
{
	int reduce_sz = 64;

	if ((ioc->hba_queue_depth - reduce_sz) >
	    (ioc->internal_depth + INTERNAL_SCSIIO_CMDS_COUNT)) {
		ioc->hba_queue_depth -= reduce_sz;
		return 0;
	} else
		return -ENOMEM;
}

/**
 * _base_allocate_pcie_sgl_pool - Allocating DMA'able memory
 *			for pcie sgl pools.
 * @ioc: Adapter object
 * @sz: DMA Pool size
 * @ct: Chain tracker
 * Return: 0 for success, non-zero for failure.
 */

static int
_base_allocate_pcie_sgl_pool(struct MPT3SAS_ADAPTER *ioc, u32 sz)
{
	int i = 0, j = 0;
	struct chain_tracker *ct;

	ioc->pcie_sgl_dma_pool =
	    dma_pool_create("PCIe SGL pool", &ioc->pdev->dev, sz,
	    ioc->page_size, 0);
	if (!ioc->pcie_sgl_dma_pool) {
		ioc_err(ioc, "PCIe SGL pool: dma_pool_create failed\n");
		return -ENOMEM;
	}

	ioc->chains_per_prp_buffer = sz/ioc->chain_segment_sz;
	ioc->chains_per_prp_buffer =
	    min(ioc->chains_per_prp_buffer, ioc->chains_needed_per_io);
	for (i = 0; i < ioc->scsiio_depth; i++) {
		ioc->pcie_sg_lookup[i].pcie_sgl =
		    dma_pool_alloc(ioc->pcie_sgl_dma_pool, GFP_KERNEL,
		    &ioc->pcie_sg_lookup[i].pcie_sgl_dma);
		if (!ioc->pcie_sg_lookup[i].pcie_sgl) {
			ioc_err(ioc, "PCIe SGL pool: dma_pool_alloc failed\n");
			return -EAGAIN;
		}

		if (!mpt3sas_check_same_4gb_region(
		    (long)ioc->pcie_sg_lookup[i].pcie_sgl, sz)) {
			ioc_err(ioc, "PCIE SGLs are not in same 4G !! pcie sgl (0x%p) dma = (0x%llx)\n",
			    ioc->pcie_sg_lookup[i].pcie_sgl,
			    (unsigned long long)
			    ioc->pcie_sg_lookup[i].pcie_sgl_dma);
			ioc->use_32bit_dma = true;
			return -EAGAIN;
		}

		for (j = 0; j < ioc->chains_per_prp_buffer; j++) {
			ct = &ioc->chain_lookup[i].chains_per_smid[j];
			ct->chain_buffer =
			    ioc->pcie_sg_lookup[i].pcie_sgl +
			    (j * ioc->chain_segment_sz);
			ct->chain_buffer_dma =
			    ioc->pcie_sg_lookup[i].pcie_sgl_dma +
			    (j * ioc->chain_segment_sz);
		}
	}
	dinitprintk(ioc, ioc_info(ioc,
	    "PCIe sgl pool depth(%d), element_size(%d), pool_size(%d kB)\n",
	    ioc->scsiio_depth, sz, (sz * ioc->scsiio_depth)/1024));
	dinitprintk(ioc, ioc_info(ioc,
	    "Number of chains can fit in a PRP page(%d)\n",
	    ioc->chains_per_prp_buffer));
	return 0;
}

/**
 * base_alloc_rdpq_dma_pool - Allocating DMA'able memory
 *                     for reply queues.
@@ -5496,7 +5578,7 @@ _base_allocate_memory_pools(struct MPT3SAS_ADAPTER *ioc)
	unsigned short sg_tablesize;
	u16 sge_size;
	int i, j;
	int ret = 0;
	int ret = 0, rc = 0;
	struct chain_tracker *ct;

	dinitprintk(ioc, ioc_info(ioc, "%s\n", __func__));
@@ -5801,6 +5883,7 @@ _base_allocate_memory_pools(struct MPT3SAS_ADAPTER *ioc)
	 * be required for NVMe PRP's, only each set of NVMe blocks will be
	 * contiguous, so a new set is allocated for each possible I/O.
	 */

	ioc->chains_per_prp_buffer = 0;
	if (ioc->facts.ProtocolFlags & MPI2_IOCFACTS_PROTOCOL_NVME_DEVICES) {
		nvme_blocks_needed =
@@ -5815,43 +5898,11 @@ _base_allocate_memory_pools(struct MPT3SAS_ADAPTER *ioc)
			goto out;
		}
		sz = nvme_blocks_needed * ioc->page_size;
		ioc->pcie_sgl_dma_pool =
			dma_pool_create("PCIe SGL pool", &ioc->pdev->dev, sz, 16, 0);
		if (!ioc->pcie_sgl_dma_pool) {
			ioc_info(ioc, "PCIe SGL pool: dma_pool_create failed\n");
			goto out;
		}

		ioc->chains_per_prp_buffer = sz/ioc->chain_segment_sz;
		ioc->chains_per_prp_buffer = min(ioc->chains_per_prp_buffer,
						ioc->chains_needed_per_io);

		for (i = 0; i < ioc->scsiio_depth; i++) {
			ioc->pcie_sg_lookup[i].pcie_sgl = dma_pool_alloc(
				ioc->pcie_sgl_dma_pool, GFP_KERNEL,
				&ioc->pcie_sg_lookup[i].pcie_sgl_dma);
			if (!ioc->pcie_sg_lookup[i].pcie_sgl) {
				ioc_info(ioc, "PCIe SGL pool: dma_pool_alloc failed\n");
				goto out;
			}
			for (j = 0; j < ioc->chains_per_prp_buffer; j++) {
				ct = &ioc->chain_lookup[i].chains_per_smid[j];
				ct->chain_buffer =
				    ioc->pcie_sg_lookup[i].pcie_sgl +
				    (j * ioc->chain_segment_sz);
				ct->chain_buffer_dma =
				    ioc->pcie_sg_lookup[i].pcie_sgl_dma +
				    (j * ioc->chain_segment_sz);
			}
		}

		dinitprintk(ioc,
			    ioc_info(ioc, "PCIe sgl pool depth(%d), element_size(%d), pool_size(%d kB)\n",
				     ioc->scsiio_depth, sz,
				     (sz * ioc->scsiio_depth) / 1024));
		dinitprintk(ioc,
			    ioc_info(ioc, "Number of chains can fit in a PRP page(%d)\n",
				     ioc->chains_per_prp_buffer));
		rc = _base_allocate_pcie_sgl_pool(ioc, sz);
		if (rc == -ENOMEM)
			return -ENOMEM;
		else if (rc == -EAGAIN)
			goto try_32bit_dma;
		total_sz += sz * ioc->scsiio_depth;
	}

@@ -6021,6 +6072,19 @@ _base_allocate_memory_pools(struct MPT3SAS_ADAPTER *ioc)
		 ioc->shost->sg_tablesize);
	return 0;

try_32bit_dma:
	_base_release_memory_pools(ioc);
	if (ioc->use_32bit_dma && (ioc->dma_mask > 32)) {
		/* Change dma coherent mask to 32 bit and reallocate */
		if (_base_config_dma_addressing(ioc, ioc->pdev) != 0) {
			pr_err("Setting 32 bit coherent DMA mask Failed %s\n",
			    pci_name(ioc->pdev));
			return -ENODEV;
		}
	} else if (_base_reduce_hba_queue_depth(ioc) != 0)
		return -ENOMEM;
	goto retry_allocation;

 out:
	return -ENOMEM;
}
@@ -7681,6 +7745,7 @@ mpt3sas_base_attach(struct MPT3SAS_ADAPTER *ioc)

	ioc->rdpq_array_enable_assigned = 0;
	ioc->use_32bit_dma = false;
	ioc->dma_mask = 64;
	if (ioc->is_aero_ioc)
		ioc->base_readl = &_base_readl_aero;
	else
+1 −0
Original line number Diff line number Diff line
@@ -1371,6 +1371,7 @@ struct MPT3SAS_ADAPTER {
	u16		thresh_hold;
	u8		high_iops_queues;
	u32		drv_support_bitmap;
	u32             dma_mask;
	bool		enable_sdev_max_qd;
	bool		use_32bit_dma;