Commit 122e5b9f authored by Sagi Grimberg's avatar Sagi Grimberg Committed by Christoph Hellwig
Browse files

nvme-tcp: optimize network stack with setting msg flags according to batch size



If we have a long list of request to send, signal the network stack
that more is coming (MSG_MORE). If we have nothing else, signal MSG_EOR.

Signed-off-by: default avatarSagi Grimberg <sagi@grimberg.me>
Tested-by: default avatarMark Wunderlich <mark.wunderlich@intel.com>
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
parent 86f0348a
Loading
Loading
Loading
Loading
+17 −3
Original line number Diff line number Diff line
@@ -79,6 +79,7 @@ struct nvme_tcp_queue {
	struct mutex		send_mutex;
	struct llist_head	req_list;
	struct list_head	send_list;
	bool			more_requests;

	/* recv state */
	void			*pdu;
@@ -277,7 +278,9 @@ static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req,
	 */
	if (queue->io_cpu == smp_processor_id() &&
	    sync && empty && mutex_trylock(&queue->send_mutex)) {
		queue->more_requests = !last;
		nvme_tcp_try_send(queue);
		queue->more_requests = false;
		mutex_unlock(&queue->send_mutex);
	} else if (last) {
		queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
@@ -877,6 +880,12 @@ static void nvme_tcp_state_change(struct sock *sk)
	read_unlock(&sk->sk_callback_lock);
}

static inline bool nvme_tcp_queue_more(struct nvme_tcp_queue *queue)
{
	return !list_empty(&queue->send_list) ||
		!llist_empty(&queue->req_list) || queue->more_requests;
}

static inline void nvme_tcp_done_send_req(struct nvme_tcp_queue *queue)
{
	queue->request = NULL;
@@ -898,7 +907,7 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
		bool last = nvme_tcp_pdu_last_send(req, len);
		int ret, flags = MSG_DONTWAIT;

		if (last && !queue->data_digest)
		if (last && !queue->data_digest && !nvme_tcp_queue_more(queue))
			flags |= MSG_EOR;
		else
			flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST;
@@ -945,7 +954,7 @@ static int nvme_tcp_try_send_cmd_pdu(struct nvme_tcp_request *req)
	int flags = MSG_DONTWAIT;
	int ret;

	if (inline_data)
	if (inline_data || nvme_tcp_queue_more(queue))
		flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST;
	else
		flags |= MSG_EOR;
@@ -1010,12 +1019,17 @@ static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req)
{
	struct nvme_tcp_queue *queue = req->queue;
	int ret;
	struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_EOR };
	struct msghdr msg = { .msg_flags = MSG_DONTWAIT };
	struct kvec iov = {
		.iov_base = &req->ddgst + req->offset,
		.iov_len = NVME_TCP_DIGEST_LENGTH - req->offset
	};

	if (nvme_tcp_queue_more(queue))
		msg.msg_flags |= MSG_MORE;
	else
		msg.msg_flags |= MSG_EOR;

	ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len);
	if (unlikely(ret <= 0))
		return ret;