Commit 82d8dfd2 authored by Quinn Tran's avatar Quinn Tran Committed by Martin K. Petersen
Browse files

scsi: qla2xxx: edif: Fix performance dip due to lock contention



User experienced performance dip on measuring IOPS while EDIF
enabled. During I/O time, driver uses dma_pool_zalloc() call to allocate a
chunk of memory. This call contains a lock behind the scene which
contribute to lock contention. Save the allocated memory for reuse and
avoid the lock.

Signed-off-by: default avatarQuinn Tran <qutran@marvell.com>
Signed-off-by: default avatarNilesh Javali <njavali@marvell.com>
Reviewed-by: default avatarHimanshu Madhani <himanshu.madhani@oracle.com>
Signed-off-by: default avatarMartin K. Petersen <martin.petersen@oracle.com>
parent 430eef03
Loading
Loading
Loading
Loading
+21 −1
Original line number Diff line number Diff line
@@ -384,6 +384,13 @@ struct els_reject {
struct req_que;
struct qla_tgt_sess;

struct qla_buf_dsc {
	u16 tag;
#define TAG_FREED 0xffff
	void *buf;
	dma_addr_t buf_dma;
};

/*
 * SCSI Request Block
 */
@@ -392,14 +399,16 @@ struct srb_cmd {
	uint32_t request_sense_length;
	uint32_t fw_sense_length;
	uint8_t *request_sense_ptr;
	struct ct6_dsd *ct6_ctx;
	struct crc_context *crc_ctx;
	struct ct6_dsd ct6_ctx;
	struct qla_buf_dsc buf_dsc;
};

/*
 * SRB flag definitions
 */
#define SRB_DMA_VALID			BIT_0	/* Command sent to ISP */
#define SRB_GOT_BUF			BIT_1
#define SRB_FCP_CMND_DMA_VALID		BIT_12	/* DIF: DSD List valid */
#define SRB_CRC_CTX_DMA_VALID		BIT_2	/* DIF: context DMA valid */
#define SRB_CRC_PROT_DMA_VALID		BIT_4	/* DIF: prot DMA valid */
@@ -3722,6 +3731,16 @@ struct qla_fw_resources {

#define QLA_IOCB_PCT_LIMIT 95

struct  qla_buf_pool {
	u16 num_bufs;
	u16 num_active;
	u16 max_used;
	u16 reserved;
	unsigned long *buf_map;
	void **buf_array;
	dma_addr_t *dma_array;
};

/*Queue pair data structure */
struct qla_qpair {
	spinlock_t qp_lock;
@@ -3775,6 +3794,7 @@ struct qla_qpair {
	struct qla_tgt_counters tgt_counters;
	uint16_t cpuid;
	struct qla_fw_resources fwres ____cacheline_aligned;
	struct  qla_buf_pool buf_pool;
	u32	cmd_cnt;
	u32	cmd_completion_cnt;
	u32	prev_completion_cnt;
+7 −22
Original line number Diff line number Diff line
@@ -3007,26 +3007,16 @@ qla28xx_start_scsi_edif(srb_t *sp)
			goto queuing_error;
	}

	ctx = sp->u.scmd.ct6_ctx =
	    mempool_alloc(ha->ctx_mempool, GFP_ATOMIC);
	if (!ctx) {
		ql_log(ql_log_fatal, vha, 0x3010,
		    "Failed to allocate ctx for cmd=%p.\n", cmd);
		goto queuing_error;
	}

	memset(ctx, 0, sizeof(struct ct6_dsd));
	ctx->fcp_cmnd = dma_pool_zalloc(ha->fcp_cmnd_dma_pool,
	    GFP_ATOMIC, &ctx->fcp_cmnd_dma);
	if (!ctx->fcp_cmnd) {
	if (qla_get_buf(vha, sp->qpair, &sp->u.scmd.buf_dsc)) {
		ql_log(ql_log_fatal, vha, 0x3011,
		    "Failed to allocate fcp_cmnd for cmd=%p.\n", cmd);
		    "Failed to allocate buf for fcp_cmnd for cmd=%p.\n", cmd);
		goto queuing_error;
	}

	/* Initialize the DSD list and dma handle */
	INIT_LIST_HEAD(&ctx->dsd_list);
	ctx->dsd_use_cnt = 0;
	sp->flags |= SRB_GOT_BUF;
	ctx = &sp->u.scmd.ct6_ctx;
	ctx->fcp_cmnd = sp->u.scmd.buf_dsc.buf;
	ctx->fcp_cmnd_dma = sp->u.scmd.buf_dsc.buf_dma;

	if (cmd->cmd_len > 16) {
		additional_cdb_len = cmd->cmd_len - 16;
@@ -3145,7 +3135,6 @@ qla28xx_start_scsi_edif(srb_t *sp)
	cmd_pkt->fcp_cmnd_dseg_len = cpu_to_le16(ctx->fcp_cmnd_len);
	put_unaligned_le64(ctx->fcp_cmnd_dma, &cmd_pkt->fcp_cmnd_dseg_address);

	sp->flags |= SRB_FCP_CMND_DMA_VALID;
	cmd_pkt->byte_count = cpu_to_le32((uint32_t)scsi_bufflen(cmd));
	/* Set total data segment count. */
	cmd_pkt->entry_count = (uint8_t)req_cnt;
@@ -3177,15 +3166,11 @@ qla28xx_start_scsi_edif(srb_t *sp)
	return QLA_SUCCESS;

queuing_error_fcp_cmnd:
	dma_pool_free(ha->fcp_cmnd_dma_pool, ctx->fcp_cmnd, ctx->fcp_cmnd_dma);
queuing_error:
	if (tot_dsds)
		scsi_dma_unmap(cmd);

	if (sp->u.scmd.ct6_ctx) {
		mempool_free(sp->u.scmd.ct6_ctx, ha->ctx_mempool);
		sp->u.scmd.ct6_ctx = NULL;
	}
	qla_put_buf(sp->qpair, &sp->u.scmd.buf_dsc);
	qla_put_fw_resources(sp->qpair, &sp->iores);
	spin_unlock_irqrestore(lock, flags);

+4 −1
Original line number Diff line number Diff line
@@ -1015,5 +1015,8 @@ int qla2xxx_enable_port(struct Scsi_Host *shost);

uint64_t qla2x00_get_num_tgts(scsi_qla_host_t *vha);
uint64_t qla2x00_count_set_bits(u32 num);

int qla_create_buf_pool(struct scsi_qla_host *, struct qla_qpair *);
void qla_free_buf_pool(struct qla_qpair *);
int qla_get_buf(struct scsi_qla_host *, struct qla_qpair *, struct qla_buf_dsc *);
void qla_put_buf(struct qla_qpair *, struct qla_buf_dsc *);
#endif /* _QLA_GBL_H */
+12 −0
Original line number Diff line number Diff line
@@ -9442,6 +9442,13 @@ struct qla_qpair *qla2xxx_create_qpair(struct scsi_qla_host *vha, int qos,
			goto fail_mempool;
		}

		if (qla_create_buf_pool(vha, qpair)) {
			ql_log(ql_log_warn, vha, 0xd036,
			    "Failed to initialize buf pool for qpair %d\n",
			    qpair->id);
			goto fail_bufpool;
		}

		/* Mark as online */
		qpair->online = 1;

@@ -9457,7 +9464,10 @@ struct qla_qpair *qla2xxx_create_qpair(struct scsi_qla_host *vha, int qos,
	}
	return qpair;

fail_bufpool:
	mempool_destroy(qpair->srb_mempool);
fail_mempool:
	qla25xx_delete_req_que(vha, qpair->req);
fail_req:
	qla25xx_delete_rsp_que(vha, qpair->rsp);
fail_rsp:
@@ -9483,6 +9493,8 @@ int qla2xxx_delete_qpair(struct scsi_qla_host *vha, struct qla_qpair *qpair)

	qpair->delete_in_progress = 1;

	qla_free_buf_pool(qpair);

	ret = qla25xx_delete_req_que(vha, qpair->req);
	if (ret != QLA_SUCCESS)
		goto fail;
+2 −8
Original line number Diff line number Diff line
@@ -623,7 +623,7 @@ qla24xx_build_scsi_type_6_iocbs(srb_t *sp, struct cmd_type_6 *cmd_pkt,
	}

	cur_seg = scsi_sglist(cmd);
	ctx = sp->u.scmd.ct6_ctx;
	ctx = &sp->u.scmd.ct6_ctx;

	while (tot_dsds) {
		avail_dsds = (tot_dsds > QLA_DSDS_PER_IOCB) ?
@@ -3459,13 +3459,7 @@ qla82xx_start_scsi(srb_t *sp)
				goto queuing_error;
		}

		ctx = sp->u.scmd.ct6_ctx =
		    mempool_alloc(ha->ctx_mempool, GFP_ATOMIC);
		if (!ctx) {
			ql_log(ql_log_fatal, vha, 0x3010,
			    "Failed to allocate ctx for cmd=%p.\n", cmd);
			goto queuing_error;
		}
		ctx = &sp->u.scmd.ct6_ctx;

		memset(ctx, 0, sizeof(struct ct6_dsd));
		ctx->fcp_cmnd = dma_pool_zalloc(ha->fcp_cmnd_dma_pool,
Loading