Commit 5aa3bd9b authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'virtio-vsock-seqpacket'



Arseny Krasnov says:

====================
virtio/vsock: introduce SOCK_SEQPACKET support

This patchset implements support of SOCK_SEQPACKET for virtio
transport.
	As SOCK_SEQPACKET guarantees to save record boundaries, so to
do it, new bit for field 'flags' was added: SEQ_EOR. This bit is
set to 1 in last RW packet of message.
	Now as  packets of one socket are not reordered neither on vsock
nor on vhost transport layers, such bit allows to restore original
message on receiver's side. If user's buffer is smaller than message
length, when all out of size data is dropped.
	Maximum length of datagram is limited by 'peer_buf_alloc' value.
	Implementation also supports 'MSG_TRUNC' flags.
	Tests also implemented.

	Thanks to stsp2@yandex.ru for encouragements and initial design
recommendations.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 57806b28 184039ee
Loading
Loading
Loading
Loading
+52 −4
Original line number Diff line number Diff line
@@ -31,7 +31,8 @@

enum {
	VHOST_VSOCK_FEATURES = VHOST_FEATURES |
			       (1ULL << VIRTIO_F_ACCESS_PLATFORM)
			       (1ULL << VIRTIO_F_ACCESS_PLATFORM) |
			       (1ULL << VIRTIO_VSOCK_F_SEQPACKET)
};

enum {
@@ -56,6 +57,7 @@ struct vhost_vsock {
	atomic_t queued_replies;

	u32 guest_cid;
	bool seqpacket_allow;
};

static u32 vhost_transport_get_local_cid(void)
@@ -112,6 +114,7 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
		size_t nbytes;
		size_t iov_len, payload_len;
		int head;
		bool restore_flag = false;

		spin_lock_bh(&vsock->send_pkt_list_lock);
		if (list_empty(&vsock->send_pkt_list)) {
@@ -168,9 +171,26 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
		/* If the packet is greater than the space available in the
		 * buffer, we split it using multiple buffers.
		 */
		if (payload_len > iov_len - sizeof(pkt->hdr))
		if (payload_len > iov_len - sizeof(pkt->hdr)) {
			payload_len = iov_len - sizeof(pkt->hdr);

			/* As we are copying pieces of large packet's buffer to
			 * small rx buffers, headers of packets in rx queue are
			 * created dynamically and are initialized with header
			 * of current packet(except length). But in case of
			 * SOCK_SEQPACKET, we also must clear record delimeter
			 * bit(VIRTIO_VSOCK_SEQ_EOR). Otherwise, instead of one
			 * packet with delimeter(which marks end of record),
			 * there will be sequence of packets with delimeter
			 * bit set. After initialized header will be copied to
			 * rx buffer, this bit will be restored.
			 */
			if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOR) {
				pkt->hdr.flags &= ~cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR);
				restore_flag = true;
			}
		}

		/* Set the correct length in the header */
		pkt->hdr.len = cpu_to_le32(payload_len);

@@ -204,6 +224,9 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
		 * to send it with the next available buffer.
		 */
		if (pkt->off < pkt->len) {
			if (restore_flag)
				pkt->hdr.flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR);

			/* We are queueing the same virtio_vsock_pkt to handle
			 * the remaining bytes, and we want to deliver it
			 * to monitoring devices in the next iteration.
@@ -354,7 +377,6 @@ vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq,
		return NULL;
	}

	if (le16_to_cpu(pkt->hdr.type) == VIRTIO_VSOCK_TYPE_STREAM)
	pkt->len = le32_to_cpu(pkt->hdr.len);

	/* No payload */
@@ -398,6 +420,8 @@ static bool vhost_vsock_more_replies(struct vhost_vsock *vsock)
	return val < vq->num;
}

static bool vhost_transport_seqpacket_allow(u32 remote_cid);

static struct virtio_transport vhost_transport = {
	.transport = {
		.module                   = THIS_MODULE,
@@ -424,6 +448,11 @@ static struct virtio_transport vhost_transport = {
		.stream_is_active         = virtio_transport_stream_is_active,
		.stream_allow             = virtio_transport_stream_allow,

		.seqpacket_dequeue        = virtio_transport_seqpacket_dequeue,
		.seqpacket_enqueue        = virtio_transport_seqpacket_enqueue,
		.seqpacket_allow          = vhost_transport_seqpacket_allow,
		.seqpacket_has_data       = virtio_transport_seqpacket_has_data,

		.notify_poll_in           = virtio_transport_notify_poll_in,
		.notify_poll_out          = virtio_transport_notify_poll_out,
		.notify_recv_init         = virtio_transport_notify_recv_init,
@@ -441,6 +470,22 @@ static struct virtio_transport vhost_transport = {
	.send_pkt = vhost_transport_send_pkt,
};

static bool vhost_transport_seqpacket_allow(u32 remote_cid)
{
	struct vhost_vsock *vsock;
	bool seqpacket_allow = false;

	rcu_read_lock();
	vsock = vhost_vsock_get(remote_cid);

	if (vsock)
		seqpacket_allow = vsock->seqpacket_allow;

	rcu_read_unlock();

	return seqpacket_allow;
}

static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
{
	struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
@@ -785,6 +830,9 @@ static int vhost_vsock_set_features(struct vhost_vsock *vsock, u64 features)
			goto err;
	}

	if (features & (1ULL << VIRTIO_VSOCK_F_SEQPACKET))
		vsock->seqpacket_allow = true;

	for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
		vq = &vsock->vqs[i];
		mutex_lock(&vq->mutex);
+10 −0
Original line number Diff line number Diff line
@@ -36,6 +36,7 @@ struct virtio_vsock_sock {
	u32 rx_bytes;
	u32 buf_alloc;
	struct list_head rx_queue;
	u32 msg_count;
};

struct virtio_vsock_pkt {
@@ -80,8 +81,17 @@ virtio_transport_dgram_dequeue(struct vsock_sock *vsk,
			       struct msghdr *msg,
			       size_t len, int flags);

int
virtio_transport_seqpacket_enqueue(struct vsock_sock *vsk,
				   struct msghdr *msg,
				   size_t len);
ssize_t
virtio_transport_seqpacket_dequeue(struct vsock_sock *vsk,
				   struct msghdr *msg,
				   int flags);
s64 virtio_transport_stream_has_data(struct vsock_sock *vsk);
s64 virtio_transport_stream_has_space(struct vsock_sock *vsk);
u32 virtio_transport_seqpacket_has_data(struct vsock_sock *vsk);

int virtio_transport_do_socket_init(struct vsock_sock *vsk,
				 struct vsock_sock *psk);
+8 −0
Original line number Diff line number Diff line
@@ -135,6 +135,14 @@ struct vsock_transport {
	bool (*stream_is_active)(struct vsock_sock *);
	bool (*stream_allow)(u32 cid, u32 port);

	/* SEQ_PACKET. */
	ssize_t (*seqpacket_dequeue)(struct vsock_sock *vsk, struct msghdr *msg,
				     int flags);
	int (*seqpacket_enqueue)(struct vsock_sock *vsk, struct msghdr *msg,
				 size_t len);
	bool (*seqpacket_allow)(u32 remote_cid);
	u32 (*seqpacket_has_data)(struct vsock_sock *vsk);

	/* Notification. */
	int (*notify_poll_in)(struct vsock_sock *, size_t, bool *);
	int (*notify_poll_out)(struct vsock_sock *, size_t, bool *);
+4 −1
Original line number Diff line number Diff line
@@ -9,9 +9,12 @@
#include <linux/tracepoint.h>

TRACE_DEFINE_ENUM(VIRTIO_VSOCK_TYPE_STREAM);
TRACE_DEFINE_ENUM(VIRTIO_VSOCK_TYPE_SEQPACKET);

#define show_type(val) \
	__print_symbolic(val, { VIRTIO_VSOCK_TYPE_STREAM, "STREAM" })
	__print_symbolic(val, \
			 { VIRTIO_VSOCK_TYPE_STREAM, "STREAM" }, \
			 { VIRTIO_VSOCK_TYPE_SEQPACKET, "SEQPACKET" })

TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_INVALID);
TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_REQUEST);
+9 −0
Original line number Diff line number Diff line
@@ -38,6 +38,9 @@
#include <linux/virtio_ids.h>
#include <linux/virtio_config.h>

/* The feature bitmap for virtio vsock */
#define VIRTIO_VSOCK_F_SEQPACKET	1	/* SOCK_SEQPACKET supported */

struct virtio_vsock_config {
	__le64 guest_cid;
} __attribute__((packed));
@@ -65,6 +68,7 @@ struct virtio_vsock_hdr {

enum virtio_vsock_type {
	VIRTIO_VSOCK_TYPE_STREAM = 1,
	VIRTIO_VSOCK_TYPE_SEQPACKET = 2,
};

enum virtio_vsock_op {
@@ -91,4 +95,9 @@ enum virtio_vsock_shutdown {
	VIRTIO_VSOCK_SHUTDOWN_SEND = 2,
};

/* VIRTIO_VSOCK_OP_RW flags values */
enum virtio_vsock_rw {
	VIRTIO_VSOCK_SEQ_EOR = 1,
};

#endif /* _UAPI_LINUX_VIRTIO_VSOCK_H */
Loading