Commit 833ef3b9 authored by Jeremy Kerr's avatar Jeremy Kerr Committed by David S. Miller
Browse files

mctp: Populate socket implementation



Start filling-out the socket syscalls: bind, sendmsg & recvmsg.

This requires an input route implementation, so we add to
mctp_route_input, allowing lookups on binds & message tags. This just
handles single-packet messages at present, we will add fragmentation in
a future change.

Signed-off-by: default avatarJeremy Kerr <jk@codeconstruct.com.au>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 831119f8
Loading
Loading
Loading
Loading
+59 −0
Original line number Diff line number Diff line
@@ -12,6 +12,7 @@
#include <linux/bits.h>
#include <linux/mctp.h>
#include <net/net_namespace.h>
#include <net/sock.h>

/* MCTP packet definitions */
struct mctp_hdr {
@@ -46,6 +47,64 @@ static inline struct mctp_hdr *mctp_hdr(struct sk_buff *skb)
	return (struct mctp_hdr *)skb_network_header(skb);
}

/* socket implementation */
struct mctp_sock {
	struct sock	sk;

	/* bind() params */
	int		bind_net;
	mctp_eid_t	bind_addr;
	__u8		bind_type;

	/* list of mctp_sk_key, for incoming tag lookup. updates protected
	 * by sk->net->keys_lock
	 */
	struct hlist_head keys;
};

/* Key for matching incoming packets to sockets or reassembly contexts.
 * Packets are matched on (src,dest,tag).
 *
 * Lifetime requirements:
 *
 *  - keys are free()ed via RCU
 *
 *  - a mctp_sk_key contains a reference to a struct sock; this is valid
 *    for the life of the key. On sock destruction (through unhash), the key is
 *    removed from lists (see below), and will not be observable after a RCU
 *    grace period.
 *
 *    any RX occurring within that grace period may still queue to the socket,
 *    but will hit the SOCK_DEAD case before the socket is freed.
 *
 * - these mctp_sk_keys appear on two lists:
 *     1) the struct mctp_sock->keys list
 *     2) the struct netns_mctp->keys list
 *
 *        updates to either list are performed under the netns_mctp->keys
 *        lock.
 *
 * - there is a single destruction path for a mctp_sk_key - through socket
 *   unhash (see mctp_sk_unhash). This performs the list removal under
 *   keys_lock.
 */
struct mctp_sk_key {
	mctp_eid_t	peer_addr;
	mctp_eid_t	local_addr;
	__u8		tag; /* incoming tag match; invert TO for local */

	/* we hold a ref to sk when set */
	struct sock	*sk;

	/* routing lookup list */
	struct hlist_node hlist;

	/* per-socket list */
	struct hlist_node sklist;

	struct rcu_head	rcu;
};

struct mctp_skb_cb {
	unsigned int	magic;
	unsigned int	net;
+13 −0
Original line number Diff line number Diff line
@@ -12,6 +12,19 @@ struct netns_mctp {
	/* Only updated under RTNL, entries freed via RCU */
	struct list_head routes;

	/* Bound sockets: list of sockets bound by type.
	 * This list is updated from non-atomic contexts (under bind_lock),
	 * and read (under rcu) in packet rx
	 */
	struct mutex bind_lock;
	struct hlist_head binds;

	/* tag allocations. This list is read and updated from atomic contexts,
	 * but elements are free()ed after a RCU grace-period
	 */
	spinlock_t keys_lock;
	struct hlist_head keys;

	/* neighbour table */
	struct mutex neigh_lock;
	struct list_head neighbours;
+196 −7
Original line number Diff line number Diff line
@@ -18,10 +18,6 @@

/* socket implementation */

struct mctp_sock {
	struct sock	sk;
};

static int mctp_release(struct socket *sock)
{
	struct sock *sk = sock->sk;
@@ -36,18 +32,160 @@ static int mctp_release(struct socket *sock)

static int mctp_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
{
	return 0;
	struct sock *sk = sock->sk;
	struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
	struct sockaddr_mctp *smctp;
	int rc;

	if (addrlen < sizeof(*smctp))
		return -EINVAL;

	if (addr->sa_family != AF_MCTP)
		return -EAFNOSUPPORT;

	if (!capable(CAP_NET_BIND_SERVICE))
		return -EACCES;

	/* it's a valid sockaddr for MCTP, cast and do protocol checks */
	smctp = (struct sockaddr_mctp *)addr;

	lock_sock(sk);

	/* TODO: allow rebind */
	if (sk_hashed(sk)) {
		rc = -EADDRINUSE;
		goto out_release;
	}
	msk->bind_net = smctp->smctp_network;
	msk->bind_addr = smctp->smctp_addr.s_addr;
	msk->bind_type = smctp->smctp_type & 0x7f; /* ignore the IC bit */

	rc = sk->sk_prot->hash(sk);

out_release:
	release_sock(sk);

	return rc;
}

static int mctp_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
{
	return 0;
	DECLARE_SOCKADDR(struct sockaddr_mctp *, addr, msg->msg_name);
	const int hlen = MCTP_HEADER_MAXLEN + sizeof(struct mctp_hdr);
	int rc, addrlen = msg->msg_namelen;
	struct sock *sk = sock->sk;
	struct mctp_skb_cb *cb;
	struct mctp_route *rt;
	struct sk_buff *skb;

	if (addr) {
		if (addrlen < sizeof(struct sockaddr_mctp))
			return -EINVAL;
		if (addr->smctp_family != AF_MCTP)
			return -EINVAL;
		if (addr->smctp_tag & ~(MCTP_TAG_MASK | MCTP_TAG_OWNER))
			return -EINVAL;

	} else {
		/* TODO: connect()ed sockets */
		return -EDESTADDRREQ;
	}

	if (!capable(CAP_NET_RAW))
		return -EACCES;

	rt = mctp_route_lookup(sock_net(sk), addr->smctp_network,
			       addr->smctp_addr.s_addr);
	if (!rt)
		return -EHOSTUNREACH;

	skb = sock_alloc_send_skb(sk, hlen + 1 + len,
				  msg->msg_flags & MSG_DONTWAIT, &rc);
	if (!skb)
		return rc;

	skb_reserve(skb, hlen);

	/* set type as fist byte in payload */
	*(u8 *)skb_put(skb, 1) = addr->smctp_type;

	rc = memcpy_from_msg((void *)skb_put(skb, len), msg, len);
	if (rc < 0) {
		kfree_skb(skb);
		return rc;
	}

	/* set up cb */
	cb = __mctp_cb(skb);
	cb->net = addr->smctp_network;

	rc = mctp_local_output(sk, rt, skb, addr->smctp_addr.s_addr,
			       addr->smctp_tag);

	return rc ? : len;
}

static int mctp_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
			int flags)
{
	return 0;
	DECLARE_SOCKADDR(struct sockaddr_mctp *, addr, msg->msg_name);
	struct sock *sk = sock->sk;
	struct sk_buff *skb;
	size_t msglen;
	u8 type;
	int rc;

	if (flags & ~(MSG_DONTWAIT | MSG_TRUNC | MSG_PEEK))
		return -EOPNOTSUPP;

	skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &rc);
	if (!skb)
		return rc;

	if (!skb->len) {
		rc = 0;
		goto out_free;
	}

	/* extract message type, remove from data */
	type = *((u8 *)skb->data);
	msglen = skb->len - 1;

	if (len < msglen)
		msg->msg_flags |= MSG_TRUNC;
	else
		len = msglen;

	rc = skb_copy_datagram_msg(skb, 1, msg, len);
	if (rc < 0)
		goto out_free;

	sock_recv_ts_and_drops(msg, sk, skb);

	if (addr) {
		struct mctp_skb_cb *cb = mctp_cb(skb);
		/* TODO: expand mctp_skb_cb for header fields? */
		struct mctp_hdr *hdr = mctp_hdr(skb);

		hdr = mctp_hdr(skb);
		addr = msg->msg_name;
		addr->smctp_family = AF_MCTP;
		addr->smctp_network = cb->net;
		addr->smctp_addr.s_addr = hdr->src;
		addr->smctp_type = type;
		addr->smctp_tag = hdr->flags_seq_tag &
					(MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO);
		msg->msg_namelen = sizeof(*addr);
	}

	rc = len;

	if (flags & MSG_TRUNC)
		rc = msglen;

out_free:
	skb_free_datagram(sk, skb);
	return rc;
}

static int mctp_setsockopt(struct socket *sock, int level, int optname,
@@ -83,16 +221,63 @@ static const struct proto_ops mctp_dgram_ops = {
	.sendpage	= sock_no_sendpage,
};

static int mctp_sk_init(struct sock *sk)
{
	struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);

	INIT_HLIST_HEAD(&msk->keys);
	return 0;
}

static void mctp_sk_close(struct sock *sk, long timeout)
{
	sk_common_release(sk);
}

static int mctp_sk_hash(struct sock *sk)
{
	struct net *net = sock_net(sk);

	mutex_lock(&net->mctp.bind_lock);
	sk_add_node_rcu(sk, &net->mctp.binds);
	mutex_unlock(&net->mctp.bind_lock);

	return 0;
}

static void mctp_sk_unhash(struct sock *sk)
{
	struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
	struct net *net = sock_net(sk);
	struct mctp_sk_key *key;
	struct hlist_node *tmp;
	unsigned long flags;

	/* remove from any type-based binds */
	mutex_lock(&net->mctp.bind_lock);
	sk_del_node_init_rcu(sk);
	mutex_unlock(&net->mctp.bind_lock);

	/* remove tag allocations */
	spin_lock_irqsave(&net->mctp.keys_lock, flags);
	hlist_for_each_entry_safe(key, tmp, &msk->keys, sklist) {
		hlist_del_rcu(&key->sklist);
		hlist_del_rcu(&key->hlist);
		kfree_rcu(key, rcu);
	}
	spin_unlock_irqrestore(&net->mctp.keys_lock, flags);

	synchronize_rcu();
}

static struct proto mctp_proto = {
	.name		= "MCTP",
	.owner		= THIS_MODULE,
	.obj_size	= sizeof(struct mctp_sock),
	.init		= mctp_sk_init,
	.close		= mctp_sk_close,
	.hash		= mctp_sk_hash,
	.unhash		= mctp_sk_unhash,
};

static int mctp_pf_create(struct net *net, struct socket *sock,
@@ -147,6 +332,10 @@ static __init int mctp_init(void)
{
	int rc;

	/* ensure our uapi tag definitions match the header format */
	BUILD_BUG_ON(MCTP_TAG_OWNER != MCTP_HDR_FLAG_TO);
	BUILD_BUG_ON(MCTP_TAG_MASK != MCTP_HDR_TAG_MASK);

	pr_info("mctp: management component transport protocol core\n");

	rc = sock_register(&mctp_pf);
+223 −3
Original line number Diff line number Diff line
@@ -30,10 +30,139 @@ static int mctp_route_discard(struct mctp_route *route, struct sk_buff *skb)
	return 0;
}

static struct mctp_sock *mctp_lookup_bind(struct net *net, struct sk_buff *skb)
{
	struct mctp_skb_cb *cb = mctp_cb(skb);
	struct mctp_hdr *mh;
	struct sock *sk;
	u8 type;

	WARN_ON(!rcu_read_lock_held());

	/* TODO: look up in skb->cb? */
	mh = mctp_hdr(skb);

	if (!skb_headlen(skb))
		return NULL;

	type = (*(u8 *)skb->data) & 0x7f;

	sk_for_each_rcu(sk, &net->mctp.binds) {
		struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);

		if (msk->bind_net != MCTP_NET_ANY && msk->bind_net != cb->net)
			continue;

		if (msk->bind_type != type)
			continue;

		if (msk->bind_addr != MCTP_ADDR_ANY &&
		    msk->bind_addr != mh->dest)
			continue;

		return msk;
	}

	return NULL;
}

static bool mctp_key_match(struct mctp_sk_key *key, mctp_eid_t local,
			   mctp_eid_t peer, u8 tag)
{
	if (key->local_addr != local)
		return false;

	if (key->peer_addr != peer)
		return false;

	if (key->tag != tag)
		return false;

	return true;
}

static struct mctp_sk_key *mctp_lookup_key(struct net *net, struct sk_buff *skb,
					   mctp_eid_t peer)
{
	struct mctp_sk_key *key, *ret;
	struct mctp_hdr *mh;
	u8 tag;

	WARN_ON(!rcu_read_lock_held());

	mh = mctp_hdr(skb);
	tag = mh->flags_seq_tag & (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO);

	ret = NULL;

	hlist_for_each_entry_rcu(key, &net->mctp.keys, hlist) {
		if (mctp_key_match(key, mh->dest, peer, tag)) {
			ret = key;
			break;
		}
	}

	return ret;
}

static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
{
	/* -> to local stack */
	/* TODO: socket lookup, reassemble */
	struct net *net = dev_net(skb->dev);
	struct mctp_sk_key *key;
	struct mctp_sock *msk;
	struct mctp_hdr *mh;

	msk = NULL;

	/* we may be receiving a locally-routed packet; drop source sk
	 * accounting
	 */
	skb_orphan(skb);

	/* ensure we have enough data for a header and a type */
	if (skb->len < sizeof(struct mctp_hdr) + 1)
		goto drop;

	/* grab header, advance data ptr */
	mh = mctp_hdr(skb);
	skb_pull(skb, sizeof(struct mctp_hdr));

	if (mh->ver != 1)
		goto drop;

	/* TODO: reassembly */
	if ((mh->flags_seq_tag & (MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM))
				!= (MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM))
		goto drop;

	rcu_read_lock();
	/* 1. lookup socket matching (src,dest,tag) */
	key = mctp_lookup_key(net, skb, mh->src);

	/* 2. lookup socket macthing (BCAST,dest,tag) */
	if (!key)
		key = mctp_lookup_key(net, skb, MCTP_ADDR_ANY);

	/* 3. SOM? -> lookup bound socket, conditionally (!EOM) create
	 * mapping for future (1)/(2).
	 */
	if (key)
		msk = container_of(key->sk, struct mctp_sock, sk);
	else if (!msk && (mh->flags_seq_tag & MCTP_HDR_FLAG_SOM))
		msk = mctp_lookup_bind(net, skb);

	if (!msk)
		goto unlock_drop;

	sock_queue_rcv_skb(&msk->sk, skb);

	rcu_read_unlock();

	return 0;

unlock_drop:
	rcu_read_unlock();
drop:
	kfree_skb(skb);
	return 0;
}
@@ -91,6 +220,80 @@ static struct mctp_route *mctp_route_alloc(void)
	return rt;
}

/* tag management */
static void mctp_reserve_tag(struct net *net, struct mctp_sk_key *key,
			     struct mctp_sock *msk)
{
	struct netns_mctp *mns = &net->mctp;

	lockdep_assert_held(&mns->keys_lock);

	key->sk = &msk->sk;

	/* we hold the net->key_lock here, allowing updates to both
	 * then net and sk
	 */
	hlist_add_head_rcu(&key->hlist, &mns->keys);
	hlist_add_head_rcu(&key->sklist, &msk->keys);
}

/* Allocate a locally-owned tag value for (saddr, daddr), and reserve
 * it for the socket msk
 */
static int mctp_alloc_local_tag(struct mctp_sock *msk,
				mctp_eid_t saddr, mctp_eid_t daddr, u8 *tagp)
{
	struct net *net = sock_net(&msk->sk);
	struct netns_mctp *mns = &net->mctp;
	struct mctp_sk_key *key, *tmp;
	unsigned long flags;
	int rc = -EAGAIN;
	u8 tagbits;

	/* be optimistic, alloc now */
	key = kzalloc(sizeof(*key), GFP_KERNEL);
	if (!key)
		return -ENOMEM;
	key->local_addr = saddr;
	key->peer_addr = daddr;

	/* 8 possible tag values */
	tagbits = 0xff;

	spin_lock_irqsave(&mns->keys_lock, flags);

	/* Walk through the existing keys, looking for potential conflicting
	 * tags. If we find a conflict, clear that bit from tagbits
	 */
	hlist_for_each_entry(tmp, &mns->keys, hlist) {
		/* if we don't own the tag, it can't conflict */
		if (tmp->tag & MCTP_HDR_FLAG_TO)
			continue;

		if ((tmp->peer_addr == daddr ||
		     tmp->peer_addr == MCTP_ADDR_ANY) &&
		    tmp->local_addr == saddr)
			tagbits &= ~(1 << tmp->tag);

		if (!tagbits)
			break;
	}

	if (tagbits) {
		key->tag = __ffs(tagbits);
		mctp_reserve_tag(net, key, msk);
		*tagp = key->tag;
		rc = 0;
	}

	spin_unlock_irqrestore(&mns->keys_lock, flags);

	if (!tagbits)
		kfree(key);

	return rc;
}

/* routing lookups */
static bool mctp_rt_match_eid(struct mctp_route *rt,
			      unsigned int net, mctp_eid_t eid)
@@ -140,11 +343,13 @@ int mctp_do_route(struct mctp_route *rt, struct sk_buff *skb)
int mctp_local_output(struct sock *sk, struct mctp_route *rt,
		      struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag)
{
	struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
	struct mctp_skb_cb *cb = mctp_cb(skb);
	struct mctp_hdr *hdr;
	unsigned long flags;
	mctp_eid_t saddr;
	int rc;
	u8 tag;

	if (WARN_ON(!rt->dev))
		return -EINVAL;
@@ -162,6 +367,15 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
	if (rc)
		return rc;

	if (req_tag & MCTP_HDR_FLAG_TO) {
		rc = mctp_alloc_local_tag(msk, saddr, daddr, &tag);
		if (rc)
			return rc;
		tag |= MCTP_HDR_FLAG_TO;
	} else {
		tag = req_tag;
	}

	/* TODO: we have the route MTU here; packetise */

	skb_reset_transport_header(skb);
@@ -171,8 +385,10 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
	hdr->ver = 1;
	hdr->dest = daddr;
	hdr->src = saddr;
	hdr->flags_seq_tag = MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM; /* TODO */
	hdr->flags_seq_tag = MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM | /* TODO */
		tag;

	skb->dev = rt->dev->dev;
	skb->protocol = htons(ETH_P_MCTP);
	skb->priority = 0;

@@ -529,6 +745,10 @@ static int __net_init mctp_routes_net_init(struct net *net)
	struct netns_mctp *ns = &net->mctp;

	INIT_LIST_HEAD(&ns->routes);
	INIT_HLIST_HEAD(&ns->binds);
	mutex_init(&ns->bind_lock);
	INIT_HLIST_HEAD(&ns->keys);
	spin_lock_init(&ns->keys_lock);
	return 0;
}