Commit a9a7e30f authored by Jens Axboe's avatar Jens Axboe
Browse files

nvme: don't memset() the normal read/write command



This memset in the fast path costs a lot of cycles on my setup. Here's a
top-of-profile of doing ~6.7M IOPS:

+    5.90%  io_uring  [nvme]            [k] nvme_queue_rq
+    5.32%  io_uring  [nvme_core]       [k] nvme_setup_cmd
+    5.17%  io_uring  [kernel.vmlinux]  [k] io_submit_sqes
+    4.97%  io_uring  [kernel.vmlinux]  [k] blkdev_direct_IO

and a perf diff with this patch:

     0.92%     +4.40%  [nvme_core]       [k] nvme_setup_cmd

reducing it from 5.3% to only 0.9%. This takes it from the 2nd most
cycle consumer to something that's mostly irrelevant.

Reviewed-by: default avatarChaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: default avatarKeith Busch <kbusch@kernel.org>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 9c3d2929
Loading
Loading
Loading
Loading
+6 −2
Original line number Diff line number Diff line
@@ -929,8 +929,6 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
	u16 control = 0;
	u32 dsmgmt = 0;

	memset(cmnd, 0, sizeof(*cmnd));

	if (req->cmd_flags & REQ_FUA)
		control |= NVME_RW_FUA;
	if (req->cmd_flags & (REQ_FAILFAST_DEV | REQ_RAHEAD))
@@ -940,9 +938,15 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
		dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH;

	cmnd->rw.opcode = op;
	cmnd->rw.flags = 0;
	cmnd->rw.nsid = cpu_to_le32(ns->head->ns_id);
	cmnd->rw.rsvd2 = 0;
	cmnd->rw.metadata = 0;
	cmnd->rw.slba = cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req)));
	cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
	cmnd->rw.reftag = 0;
	cmnd->rw.apptag = 0;
	cmnd->rw.appmask = 0;

	if (req_op(req) == REQ_OP_WRITE && ctrl->nr_streams)
		nvme_assign_write_stream(ctrl, req, &control, &dsmgmt);