Commit 63ed1aab authored by Matt Johnston's avatar Matt Johnston Committed by David S. Miller
Browse files

mctp: Add SIOCMCTP{ALLOC,DROP}TAG ioctls for tag control



This change adds a couple of new ioctls for mctp sockets:
SIOCMCTPALLOCTAG and SIOCMCTPDROPTAG.  These ioctls provide facilities
for explicit allocation / release of tags, overriding the automatic
allocate-on-send/release-on-reply and timeout behaviours. This allows
userspace more control over messages that may not fit a simple
request/response model.

In order to indicate a pre-allocated tag to the sendmsg() syscall, we
introduce a new flag to the struct sockaddr_mctp.smctp_tag value:
MCTP_TAG_PREALLOC.

Additional changes from Jeremy Kerr <jk@codeconstruct.com.au>.

Contains a fix that was:
Reported-by: default avatarkernel test robot <lkp@intel.com>

Signed-off-by: default avatarMatt Johnston <matt@codeconstruct.com.au>
Signed-off-by: default avatarJeremy Kerr <jk@codeconstruct.com.au>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 0de55a7d
Loading
Loading
Loading
Loading
+48 −0
Original line number Diff line number Diff line
@@ -212,6 +212,54 @@ remote address is already known, or the message does not require a reply.
Like the send calls, sockets will only receive responses to requests they have
sent (TO=1) and may only respond (TO=0) to requests they have received.

``ioctl(SIOCMCTPALLOCTAG)`` and ``ioctl(SIOCMCTPDROPTAG)``
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

These tags give applications more control over MCTP message tags, by allocating
(and dropping) tag values explicitly, rather than the kernel automatically
allocating a per-message tag at ``sendmsg()`` time.

In general, you will only need to use these ioctls if your MCTP protocol does
not fit the usual request/response model. For example, if you need to persist
tags across multiple requests, or a request may generate more than one response.
In these cases, the ioctls allow you to decouple the tag allocation (and
release) from individual message send and receive operations.

Both ioctls are passed a pointer to a ``struct mctp_ioc_tag_ctl``:

.. code-block:: C

    struct mctp_ioc_tag_ctl {
        mctp_eid_t      peer_addr;
        __u8		tag;
        __u16   	flags;
    };

``SIOCMCTPALLOCTAG`` allocates a tag for a specific peer, which an application
can use in future ``sendmsg()`` calls. The application populates the
``peer_addr`` member with the remote EID. Other fields must be zero.

On return, the ``tag`` member will be populated with the allocated tag value.
The allocated tag will have the following tag bits set:

 - ``MCTP_TAG_OWNER``: it only makes sense to allocate tags if you're the tag
   owner

 - ``MCTP_TAG_PREALLOC``: to indicate to ``sendmsg()`` that this is a
   preallocated tag.

 - ... and the actual tag value, within the least-significant three bits
   (``MCTP_TAG_MASK``). Note that zero is a valid tag value.

The tag value should be used as-is for the ``smctp_tag`` member of ``struct
sockaddr_mctp``.

``SIOCMCTPDROPTAG`` releases a tag that has been previously allocated by a
``SIOCMCTPALLOCTAG`` ioctl. The ``peer_addr`` must be the same as used for the
allocation, and the ``tag`` value must match exactly the tag returned from the
allocation (including the ``MCTP_TAG_OWNER`` and ``MCTP_TAG_PREALLOC`` bits).
The ``flags`` field must be zero.

Kernel internals
================

+10 −1
Original line number Diff line number Diff line
@@ -126,7 +126,7 @@ struct mctp_sock {
 */
struct mctp_sk_key {
	mctp_eid_t	peer_addr;
	mctp_eid_t	local_addr;
	mctp_eid_t	local_addr; /* MCTP_ADDR_ANY for local owned tags */
	__u8		tag; /* incoming tag match; invert TO for local */

	/* we hold a ref to sk when set */
@@ -163,6 +163,12 @@ struct mctp_sk_key {
	 */
	unsigned long	dev_flow_state;
	struct mctp_dev	*dev;

	/* a tag allocated with SIOCMCTPALLOCTAG ioctl will not expire
	 * automatically on timeout or response, instead SIOCMCTPDROPTAG
	 * is used.
	 */
	bool		manual_alloc;
};

struct mctp_skb_cb {
@@ -239,6 +245,9 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
		      struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag);

void mctp_key_unref(struct mctp_sk_key *key);
struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk,
					 mctp_eid_t daddr, mctp_eid_t saddr,
					 bool manual, u8 *tagp);

/* routing <--> device interface */
unsigned int mctp_default_net(struct net *net);
+4 −1
Original line number Diff line number Diff line
@@ -15,6 +15,7 @@ enum {
	MCTP_TRACE_KEY_REPLIED,
	MCTP_TRACE_KEY_INVALIDATED,
	MCTP_TRACE_KEY_CLOSED,
	MCTP_TRACE_KEY_DROPPED,
};
#endif /* __TRACE_MCTP_ENUMS */

@@ -22,6 +23,7 @@ TRACE_DEFINE_ENUM(MCTP_TRACE_KEY_TIMEOUT);
TRACE_DEFINE_ENUM(MCTP_TRACE_KEY_REPLIED);
TRACE_DEFINE_ENUM(MCTP_TRACE_KEY_INVALIDATED);
TRACE_DEFINE_ENUM(MCTP_TRACE_KEY_CLOSED);
TRACE_DEFINE_ENUM(MCTP_TRACE_KEY_DROPPED);

TRACE_EVENT(mctp_key_acquire,
	TP_PROTO(const struct mctp_sk_key *key),
@@ -66,7 +68,8 @@ TRACE_EVENT(mctp_key_release,
				 { MCTP_TRACE_KEY_TIMEOUT, "timeout" },
				 { MCTP_TRACE_KEY_REPLIED, "replied" },
				 { MCTP_TRACE_KEY_INVALIDATED, "invalidated" },
				 { MCTP_TRACE_KEY_CLOSED, "closed" })
				 { MCTP_TRACE_KEY_CLOSED, "closed" },
				 { MCTP_TRACE_KEY_DROPPED, "dropped" })
	)
);

+18 −0
Original line number Diff line number Diff line
@@ -44,7 +44,25 @@ struct sockaddr_mctp_ext {

#define MCTP_TAG_MASK		0x07
#define MCTP_TAG_OWNER		0x08
#define MCTP_TAG_PREALLOC	0x10

#define MCTP_OPT_ADDR_EXT	1

#define SIOCMCTPALLOCTAG	(SIOCPROTOPRIVATE + 0)
#define SIOCMCTPDROPTAG		(SIOCPROTOPRIVATE + 1)

struct mctp_ioc_tag_ctl {
	mctp_eid_t	peer_addr;

	/* For SIOCMCTPALLOCTAG: must be passed as zero, kernel will
	 * populate with the allocated tag value. Returned tag value will
	 * always have TO and PREALLOC set.
	 *
	 * For SIOCMCTPDROPTAG: userspace provides tag value to drop, from
	 * a prior SIOCMCTPALLOCTAG call (and so must have TO and PREALLOC set).
	 */
	__u8		tag;
	__u16		flags;
};

#endif /* __UAPI_MCTP_H */
+163 −26
Original line number Diff line number Diff line
@@ -6,6 +6,7 @@
 * Copyright (c) 2021 Google
 */

#include <linux/compat.h>
#include <linux/if_arp.h>
#include <linux/net.h>
#include <linux/mctp.h>
@@ -21,6 +22,8 @@

/* socket implementation */

static void mctp_sk_expire_keys(struct timer_list *timer);

static int mctp_release(struct socket *sock)
{
	struct sock *sk = sock->sk;
@@ -99,13 +102,20 @@ static int mctp_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
	struct sk_buff *skb;

	if (addr) {
		const u8 tagbits = MCTP_TAG_MASK | MCTP_TAG_OWNER |
			MCTP_TAG_PREALLOC;

		if (addrlen < sizeof(struct sockaddr_mctp))
			return -EINVAL;
		if (addr->smctp_family != AF_MCTP)
			return -EINVAL;
		if (!mctp_sockaddr_is_ok(addr))
			return -EINVAL;
		if (addr->smctp_tag & ~(MCTP_TAG_MASK | MCTP_TAG_OWNER))
		if (addr->smctp_tag & ~tagbits)
			return -EINVAL;
		/* can't preallocate a non-owned tag */
		if (addr->smctp_tag & MCTP_TAG_PREALLOC &&
		    !(addr->smctp_tag & MCTP_TAG_OWNER))
			return -EINVAL;

	} else {
@@ -248,6 +258,32 @@ static int mctp_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
	return rc;
}

/* We're done with the key; invalidate, stop reassembly, and remove from lists.
 */
static void __mctp_key_remove(struct mctp_sk_key *key, struct net *net,
			      unsigned long flags, unsigned long reason)
__releases(&key->lock)
__must_hold(&net->mctp.keys_lock)
{
	struct sk_buff *skb;

	trace_mctp_key_release(key, reason);
	skb = key->reasm_head;
	key->reasm_head = NULL;
	key->reasm_dead = true;
	key->valid = false;
	mctp_dev_release_key(key->dev, key);
	spin_unlock_irqrestore(&key->lock, flags);

	hlist_del(&key->hlist);
	hlist_del(&key->sklist);

	/* unref for the lists */
	mctp_key_unref(key);

	kfree_skb(skb);
}

static int mctp_setsockopt(struct socket *sock, int level, int optname,
			   sockptr_t optval, unsigned int optlen)
{
@@ -293,6 +329,115 @@ static int mctp_getsockopt(struct socket *sock, int level, int optname,
	return -EINVAL;
}

static int mctp_ioctl_alloctag(struct mctp_sock *msk, unsigned long arg)
{
	struct net *net = sock_net(&msk->sk);
	struct mctp_sk_key *key = NULL;
	struct mctp_ioc_tag_ctl ctl;
	unsigned long flags;
	u8 tag;

	if (copy_from_user(&ctl, (void __user *)arg, sizeof(ctl)))
		return -EFAULT;

	if (ctl.tag)
		return -EINVAL;

	if (ctl.flags)
		return -EINVAL;

	key = mctp_alloc_local_tag(msk, ctl.peer_addr, MCTP_ADDR_ANY,
				   true, &tag);
	if (IS_ERR(key))
		return PTR_ERR(key);

	ctl.tag = tag | MCTP_TAG_OWNER | MCTP_TAG_PREALLOC;
	if (copy_to_user((void __user *)arg, &ctl, sizeof(ctl))) {
		spin_lock_irqsave(&key->lock, flags);
		__mctp_key_remove(key, net, flags, MCTP_TRACE_KEY_DROPPED);
		mctp_key_unref(key);
		return -EFAULT;
	}

	mctp_key_unref(key);
	return 0;
}

static int mctp_ioctl_droptag(struct mctp_sock *msk, unsigned long arg)
{
	struct net *net = sock_net(&msk->sk);
	struct mctp_ioc_tag_ctl ctl;
	unsigned long flags, fl2;
	struct mctp_sk_key *key;
	struct hlist_node *tmp;
	int rc;
	u8 tag;

	if (copy_from_user(&ctl, (void __user *)arg, sizeof(ctl)))
		return -EFAULT;

	if (ctl.flags)
		return -EINVAL;

	/* Must be a local tag, TO set, preallocated */
	if ((ctl.tag & ~MCTP_TAG_MASK) != (MCTP_TAG_OWNER | MCTP_TAG_PREALLOC))
		return -EINVAL;

	tag = ctl.tag & MCTP_TAG_MASK;
	rc = -EINVAL;

	spin_lock_irqsave(&net->mctp.keys_lock, flags);
	hlist_for_each_entry_safe(key, tmp, &msk->keys, sklist) {
		/* we do an irqsave here, even though we know the irq state,
		 * so we have the flags to pass to __mctp_key_remove
		 */
		spin_lock_irqsave(&key->lock, fl2);
		if (key->manual_alloc &&
		    ctl.peer_addr == key->peer_addr &&
		    tag == key->tag) {
			__mctp_key_remove(key, net, fl2,
					  MCTP_TRACE_KEY_DROPPED);
			rc = 0;
		} else {
			spin_unlock_irqrestore(&key->lock, fl2);
		}
	}
	spin_unlock_irqrestore(&net->mctp.keys_lock, flags);

	return rc;
}

static int mctp_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
{
	struct mctp_sock *msk = container_of(sock->sk, struct mctp_sock, sk);

	switch (cmd) {
	case SIOCMCTPALLOCTAG:
		return mctp_ioctl_alloctag(msk, arg);
	case SIOCMCTPDROPTAG:
		return mctp_ioctl_droptag(msk, arg);
	}

	return -EINVAL;
}

#ifdef CONFIG_COMPAT
static int mctp_compat_ioctl(struct socket *sock, unsigned int cmd,
			     unsigned long arg)
{
	void __user *argp = compat_ptr(arg);

	switch (cmd) {
	/* These have compatible ptr layouts */
	case SIOCMCTPALLOCTAG:
	case SIOCMCTPDROPTAG:
		return mctp_ioctl(sock, cmd, (unsigned long)argp);
	}

	return -ENOIOCTLCMD;
}
#endif

static const struct proto_ops mctp_dgram_ops = {
	.family		= PF_MCTP,
	.release	= mctp_release,
@@ -302,7 +447,7 @@ static const struct proto_ops mctp_dgram_ops = {
	.accept		= sock_no_accept,
	.getname	= sock_no_getname,
	.poll		= datagram_poll,
	.ioctl		= sock_no_ioctl,
	.ioctl		= mctp_ioctl,
	.gettstamp	= sock_gettstamp,
	.listen		= sock_no_listen,
	.shutdown	= sock_no_shutdown,
@@ -312,6 +457,9 @@ static const struct proto_ops mctp_dgram_ops = {
	.recvmsg	= mctp_recvmsg,
	.mmap		= sock_no_mmap,
	.sendpage	= sock_no_sendpage,
#ifdef CONFIG_COMPAT
	.compat_ioctl	= mctp_compat_ioctl,
#endif
};

static void mctp_sk_expire_keys(struct timer_list *timer)
@@ -319,7 +467,7 @@ static void mctp_sk_expire_keys(struct timer_list *timer)
	struct mctp_sock *msk = container_of(timer, struct mctp_sock,
					     key_expiry);
	struct net *net = sock_net(&msk->sk);
	unsigned long next_expiry, flags;
	unsigned long next_expiry, flags, fl2;
	struct mctp_sk_key *key;
	struct hlist_node *tmp;
	bool next_expiry_valid = false;
@@ -327,15 +475,16 @@ static void mctp_sk_expire_keys(struct timer_list *timer)
	spin_lock_irqsave(&net->mctp.keys_lock, flags);

	hlist_for_each_entry_safe(key, tmp, &msk->keys, sklist) {
		spin_lock(&key->lock);
		/* don't expire. manual_alloc is immutable, no locking
		 * required.
		 */
		if (key->manual_alloc)
			continue;

		spin_lock_irqsave(&key->lock, fl2);
		if (!time_after_eq(key->expiry, jiffies)) {
			trace_mctp_key_release(key, MCTP_TRACE_KEY_TIMEOUT);
			key->valid = false;
			hlist_del_rcu(&key->hlist);
			hlist_del_rcu(&key->sklist);
			spin_unlock(&key->lock);
			mctp_key_unref(key);
			__mctp_key_remove(key, net, fl2,
					  MCTP_TRACE_KEY_TIMEOUT);
			continue;
		}

@@ -346,7 +495,7 @@ static void mctp_sk_expire_keys(struct timer_list *timer)
			next_expiry = key->expiry;
			next_expiry_valid = true;
		}
		spin_unlock(&key->lock);
		spin_unlock_irqrestore(&key->lock, fl2);
	}

	spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
@@ -387,9 +536,9 @@ static void mctp_sk_unhash(struct sock *sk)
{
	struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
	struct net *net = sock_net(sk);
	unsigned long flags, fl2;
	struct mctp_sk_key *key;
	struct hlist_node *tmp;
	unsigned long flags;

	/* remove from any type-based binds */
	mutex_lock(&net->mctp.bind_lock);
@@ -399,20 +548,8 @@ static void mctp_sk_unhash(struct sock *sk)
	/* remove tag allocations */
	spin_lock_irqsave(&net->mctp.keys_lock, flags);
	hlist_for_each_entry_safe(key, tmp, &msk->keys, sklist) {
		hlist_del(&key->sklist);
		hlist_del(&key->hlist);

		trace_mctp_key_release(key, MCTP_TRACE_KEY_CLOSED);

		spin_lock(&key->lock);
		kfree_skb(key->reasm_head);
		key->reasm_head = NULL;
		key->reasm_dead = true;
		key->valid = false;
		spin_unlock(&key->lock);

		/* key is no longer on the lookup lists, unref */
		mctp_key_unref(key);
		spin_lock_irqsave(&key->lock, fl2);
		__mctp_key_remove(key, net, fl2, MCTP_TRACE_KEY_CLOSED);
	}
	spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
}
Loading