Commit e4952747 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull rdma updates from Jason Gunthorpe:
 "This cycle we got a new RDMA driver "ERDMA" for the Alibaba cloud
  environment. Otherwise the changes are dominated by rxe fixes.

  There is another RDMA driver on the list that might get merged next
  cycle, 'MANA' for the Azure cloud environment.

  Summary:

   - Bug fixes and small features for irdma, hns, siw, qedr, hfi1, mlx5

   - General spelling/grammer fixes

   - rdma cm can follow changes in neighbours for control packets

   - Significant amounts of rxe fixes and spec compliance changes

   - Use the modern NAPI API

   - Use the bitmap API instead of open coding

   - Performance improvements for rtrs

   - Add the ERDMA driver for Alibaba cloud

   - Fix a use after free bug in SRP"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (99 commits)
  RDMA/ib_srpt: Unify checking rdma_cm_id condition in srpt_cm_req_recv()
  RDMA/rxe: Fix error unwind in rxe_create_qp()
  RDMA/mlx5: Add missing check for return value in get namespace flow
  RDMA/rxe: Split qp state for requester and completer
  RDMA/rxe: Generate error completion for error requester QP state
  RDMA/rxe: Update wqe_index for each wqe error completion
  RDMA/srpt: Fix a use-after-free
  RDMA/srpt: Introduce a reference count in struct srpt_device
  RDMA/srpt: Duplicate port name members
  IB/qib: Fix repeated "in" within comments
  RDMA/erdma: Add driver to kernel build environment
  RDMA/erdma: Add the ABI definitions
  RDMA/erdma: Add the erdma module
  RDMA/erdma: Add connection management (CM) support
  RDMA/erdma: Add verbs implementation
  RDMA/erdma: Add verbs header file
  RDMA/erdma: Add event queue implementation
  RDMA/erdma: Add cmdq implementation
  RDMA/erdma: Add main include file
  RDMA/erdma: Add the hardware related definitions
  ...
parents 746fc76b 6b822d40
Loading
Loading
Loading
Loading
+8 −0
Original line number Diff line number Diff line
@@ -736,6 +736,14 @@ S: Maintained
F:	Documentation/i2c/busses/i2c-ali1563.rst
F:	drivers/i2c/busses/i2c-ali1563.c
ALIBABA ELASTIC RDMA DRIVER
M:	Cheng Xu <chengyou@linux.alibaba.com>
M:	Kai Shen <kaishen@linux.alibaba.com>
L:	linux-rdma@vger.kernel.org
S:	Supported
F:	drivers/infiniband/hw/erdma
F:	include/uapi/rdma/erdma-abi.h
ALIENWARE WMI DRIVER
L:	Dell.Client.Kernel@dell.com
S:	Maintained
+8 −7
Original line number Diff line number Diff line
@@ -78,20 +78,21 @@ config INFINIBAND_VIRT_DMA
	def_bool !HIGHMEM

if INFINIBAND_USER_ACCESS || !INFINIBAND_USER_ACCESS
source "drivers/infiniband/hw/mthca/Kconfig"
source "drivers/infiniband/hw/qib/Kconfig"
source "drivers/infiniband/hw/bnxt_re/Kconfig"
source "drivers/infiniband/hw/cxgb4/Kconfig"
source "drivers/infiniband/hw/efa/Kconfig"
source "drivers/infiniband/hw/erdma/Kconfig"
source "drivers/infiniband/hw/hfi1/Kconfig"
source "drivers/infiniband/hw/hns/Kconfig"
source "drivers/infiniband/hw/irdma/Kconfig"
source "drivers/infiniband/hw/mlx4/Kconfig"
source "drivers/infiniband/hw/mlx5/Kconfig"
source "drivers/infiniband/hw/mthca/Kconfig"
source "drivers/infiniband/hw/ocrdma/Kconfig"
source "drivers/infiniband/hw/vmw_pvrdma/Kconfig"
source "drivers/infiniband/hw/usnic/Kconfig"
source "drivers/infiniband/hw/hns/Kconfig"
source "drivers/infiniband/hw/bnxt_re/Kconfig"
source "drivers/infiniband/hw/hfi1/Kconfig"
source "drivers/infiniband/hw/qedr/Kconfig"
source "drivers/infiniband/hw/qib/Kconfig"
source "drivers/infiniband/hw/usnic/Kconfig"
source "drivers/infiniband/hw/vmw_pvrdma/Kconfig"
source "drivers/infiniband/sw/rdmavt/Kconfig"
source "drivers/infiniband/sw/rxe/Kconfig"
source "drivers/infiniband/sw/siw/Kconfig"
+218 −12
Original line number Diff line number Diff line
@@ -11,6 +11,7 @@
#include <linux/in6.h>
#include <linux/mutex.h>
#include <linux/random.h>
#include <linux/rbtree.h>
#include <linux/igmp.h>
#include <linux/xarray.h>
#include <linux/inetdevice.h>
@@ -20,6 +21,7 @@

#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include <net/netevent.h>
#include <net/tcp.h>
#include <net/ipv6.h>
#include <net/ip_fib.h>
@@ -168,6 +170,9 @@ static struct ib_sa_client sa_client;
static LIST_HEAD(dev_list);
static LIST_HEAD(listen_any_list);
static DEFINE_MUTEX(lock);
static struct rb_root id_table = RB_ROOT;
/* Serialize operations of id_table tree */
static DEFINE_SPINLOCK(id_table_lock);
static struct workqueue_struct *cma_wq;
static unsigned int cma_pernet_id;

@@ -202,6 +207,11 @@ struct xarray *cma_pernet_xa(struct net *net, enum rdma_ucm_port_space ps)
	}
}

struct id_table_entry {
	struct list_head id_list;
	struct rb_node rb_node;
};

struct cma_device {
	struct list_head	list;
	struct ib_device	*device;
@@ -420,11 +430,21 @@ static inline u8 cma_get_ip_ver(const struct cma_hdr *hdr)
	return hdr->ip_version >> 4;
}

static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
static void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
{
	hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF);
}

static struct sockaddr *cma_src_addr(struct rdma_id_private *id_priv)
{
	return (struct sockaddr *)&id_priv->id.route.addr.src_addr;
}

static inline struct sockaddr *cma_dst_addr(struct rdma_id_private *id_priv)
{
	return (struct sockaddr *)&id_priv->id.route.addr.dst_addr;
}

static int cma_igmp_send(struct net_device *ndev, union ib_gid *mgid, bool join)
{
	struct in_device *in_dev = NULL;
@@ -445,6 +465,117 @@ static int cma_igmp_send(struct net_device *ndev, union ib_gid *mgid, bool join)
	return (in_dev) ? 0 : -ENODEV;
}

static int compare_netdev_and_ip(int ifindex_a, struct sockaddr *sa,
				 struct id_table_entry *entry_b)
{
	struct rdma_id_private *id_priv = list_first_entry(
		&entry_b->id_list, struct rdma_id_private, id_list_entry);
	int ifindex_b = id_priv->id.route.addr.dev_addr.bound_dev_if;
	struct sockaddr *sb = cma_dst_addr(id_priv);

	if (ifindex_a != ifindex_b)
		return (ifindex_a > ifindex_b) ? 1 : -1;

	if (sa->sa_family != sb->sa_family)
		return sa->sa_family - sb->sa_family;

	if (sa->sa_family == AF_INET)
		return memcmp((char *)&((struct sockaddr_in *)sa)->sin_addr,
			      (char *)&((struct sockaddr_in *)sb)->sin_addr,
			      sizeof(((struct sockaddr_in *)sa)->sin_addr));

	return ipv6_addr_cmp(&((struct sockaddr_in6 *)sa)->sin6_addr,
			     &((struct sockaddr_in6 *)sb)->sin6_addr);
}

static int cma_add_id_to_tree(struct rdma_id_private *node_id_priv)
{
	struct rb_node **new, *parent = NULL;
	struct id_table_entry *this, *node;
	unsigned long flags;
	int result;

	node = kzalloc(sizeof(*node), GFP_KERNEL);
	if (!node)
		return -ENOMEM;

	spin_lock_irqsave(&id_table_lock, flags);
	new = &id_table.rb_node;
	while (*new) {
		this = container_of(*new, struct id_table_entry, rb_node);
		result = compare_netdev_and_ip(
			node_id_priv->id.route.addr.dev_addr.bound_dev_if,
			cma_dst_addr(node_id_priv), this);

		parent = *new;
		if (result < 0)
			new = &((*new)->rb_left);
		else if (result > 0)
			new = &((*new)->rb_right);
		else {
			list_add_tail(&node_id_priv->id_list_entry,
				      &this->id_list);
			kfree(node);
			goto unlock;
		}
	}

	INIT_LIST_HEAD(&node->id_list);
	list_add_tail(&node_id_priv->id_list_entry, &node->id_list);

	rb_link_node(&node->rb_node, parent, new);
	rb_insert_color(&node->rb_node, &id_table);

unlock:
	spin_unlock_irqrestore(&id_table_lock, flags);
	return 0;
}

static struct id_table_entry *
node_from_ndev_ip(struct rb_root *root, int ifindex, struct sockaddr *sa)
{
	struct rb_node *node = root->rb_node;
	struct id_table_entry *data;
	int result;

	while (node) {
		data = container_of(node, struct id_table_entry, rb_node);
		result = compare_netdev_and_ip(ifindex, sa, data);
		if (result < 0)
			node = node->rb_left;
		else if (result > 0)
			node = node->rb_right;
		else
			return data;
	}

	return NULL;
}

static void cma_remove_id_from_tree(struct rdma_id_private *id_priv)
{
	struct id_table_entry *data;
	unsigned long flags;

	spin_lock_irqsave(&id_table_lock, flags);
	if (list_empty(&id_priv->id_list_entry))
		goto out;

	data = node_from_ndev_ip(&id_table,
				 id_priv->id.route.addr.dev_addr.bound_dev_if,
				 cma_dst_addr(id_priv));
	if (!data)
		goto out;

	list_del_init(&id_priv->id_list_entry);
	if (list_empty(&data->id_list)) {
		rb_erase(&data->rb_node, &id_table);
		kfree(data);
	}
out:
	spin_unlock_irqrestore(&id_table_lock, flags);
}

static void _cma_attach_to_dev(struct rdma_id_private *id_priv,
			       struct cma_device *cma_dev)
{
@@ -481,16 +612,6 @@ static void cma_release_dev(struct rdma_id_private *id_priv)
	mutex_unlock(&lock);
}

static inline struct sockaddr *cma_src_addr(struct rdma_id_private *id_priv)
{
	return (struct sockaddr *) &id_priv->id.route.addr.src_addr;
}

static inline struct sockaddr *cma_dst_addr(struct rdma_id_private *id_priv)
{
	return (struct sockaddr *) &id_priv->id.route.addr.dst_addr;
}

static inline unsigned short cma_family(struct rdma_id_private *id_priv)
{
	return id_priv->id.route.addr.src_addr.ss_family;
@@ -861,6 +982,7 @@ __rdma_create_id(struct net *net, rdma_cm_event_handler event_handler,
	refcount_set(&id_priv->refcount, 1);
	mutex_init(&id_priv->handler_mutex);
	INIT_LIST_HEAD(&id_priv->device_item);
	INIT_LIST_HEAD(&id_priv->id_list_entry);
	INIT_LIST_HEAD(&id_priv->listen_list);
	INIT_LIST_HEAD(&id_priv->mc_list);
	get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num);
@@ -1883,6 +2005,7 @@ static void _destroy_id(struct rdma_id_private *id_priv,
	cma_cancel_operation(id_priv, state);

	rdma_restrack_del(&id_priv->res);
	cma_remove_id_from_tree(id_priv);
	if (id_priv->cma_dev) {
		if (rdma_cap_ib_cm(id_priv->id.device, 1)) {
			if (id_priv->cm_id.ib)
@@ -3172,8 +3295,11 @@ int rdma_resolve_route(struct rdma_cm_id *id, unsigned long timeout_ms)
	cma_id_get(id_priv);
	if (rdma_cap_ib_sa(id->device, id->port_num))
		ret = cma_resolve_ib_route(id_priv, timeout_ms);
	else if (rdma_protocol_roce(id->device, id->port_num))
	else if (rdma_protocol_roce(id->device, id->port_num)) {
		ret = cma_resolve_iboe_route(id_priv);
		if (!ret)
			cma_add_id_to_tree(id_priv);
	}
	else if (rdma_protocol_iwarp(id->device, id->port_num))
		ret = cma_resolve_iw_route(id_priv);
	else
@@ -4922,10 +5048,87 @@ static int cma_netdev_callback(struct notifier_block *self, unsigned long event,
	return ret;
}

static void cma_netevent_work_handler(struct work_struct *_work)
{
	struct rdma_id_private *id_priv =
		container_of(_work, struct rdma_id_private, id.net_work);
	struct rdma_cm_event event = {};

	mutex_lock(&id_priv->handler_mutex);

	if (READ_ONCE(id_priv->state) == RDMA_CM_DESTROYING ||
	    READ_ONCE(id_priv->state) == RDMA_CM_DEVICE_REMOVAL)
		goto out_unlock;

	event.event = RDMA_CM_EVENT_UNREACHABLE;
	event.status = -ETIMEDOUT;

	if (cma_cm_event_handler(id_priv, &event)) {
		__acquire(&id_priv->handler_mutex);
		id_priv->cm_id.ib = NULL;
		cma_id_put(id_priv);
		destroy_id_handler_unlock(id_priv);
		return;
	}

out_unlock:
	mutex_unlock(&id_priv->handler_mutex);
	cma_id_put(id_priv);
}

static int cma_netevent_callback(struct notifier_block *self,
				 unsigned long event, void *ctx)
{
	struct id_table_entry *ips_node = NULL;
	struct rdma_id_private *current_id;
	struct neighbour *neigh = ctx;
	unsigned long flags;

	if (event != NETEVENT_NEIGH_UPDATE)
		return NOTIFY_DONE;

	spin_lock_irqsave(&id_table_lock, flags);
	if (neigh->tbl->family == AF_INET6) {
		struct sockaddr_in6 neigh_sock_6;

		neigh_sock_6.sin6_family = AF_INET6;
		neigh_sock_6.sin6_addr = *(struct in6_addr *)neigh->primary_key;
		ips_node = node_from_ndev_ip(&id_table, neigh->dev->ifindex,
					     (struct sockaddr *)&neigh_sock_6);
	} else if (neigh->tbl->family == AF_INET) {
		struct sockaddr_in neigh_sock_4;

		neigh_sock_4.sin_family = AF_INET;
		neigh_sock_4.sin_addr.s_addr = *(__be32 *)(neigh->primary_key);
		ips_node = node_from_ndev_ip(&id_table, neigh->dev->ifindex,
					     (struct sockaddr *)&neigh_sock_4);
	} else
		goto out;

	if (!ips_node)
		goto out;

	list_for_each_entry(current_id, &ips_node->id_list, id_list_entry) {
		if (!memcmp(current_id->id.route.addr.dev_addr.dst_dev_addr,
			   neigh->ha, ETH_ALEN))
			continue;
		INIT_WORK(&current_id->id.net_work, cma_netevent_work_handler);
		cma_id_get(current_id);
		queue_work(cma_wq, &current_id->id.net_work);
	}
out:
	spin_unlock_irqrestore(&id_table_lock, flags);
	return NOTIFY_DONE;
}

static struct notifier_block cma_nb = {
	.notifier_call = cma_netdev_callback
};

static struct notifier_block cma_netevent_cb = {
	.notifier_call = cma_netevent_callback
};

static void cma_send_device_removal_put(struct rdma_id_private *id_priv)
{
	struct rdma_cm_event event = { .event = RDMA_CM_EVENT_DEVICE_REMOVAL };
@@ -5148,6 +5351,7 @@ static int __init cma_init(void)

	ib_sa_register_client(&sa_client);
	register_netdevice_notifier(&cma_nb);
	register_netevent_notifier(&cma_netevent_cb);

	ret = ib_register_client(&cma_client);
	if (ret)
@@ -5162,6 +5366,7 @@ static int __init cma_init(void)
err_ib:
	ib_unregister_client(&cma_client);
err:
	unregister_netevent_notifier(&cma_netevent_cb);
	unregister_netdevice_notifier(&cma_nb);
	ib_sa_unregister_client(&sa_client);
	unregister_pernet_subsys(&cma_pernet_operations);
@@ -5174,6 +5379,7 @@ static void __exit cma_cleanup(void)
{
	cma_configfs_exit();
	ib_unregister_client(&cma_client);
	unregister_netevent_notifier(&cma_netevent_cb);
	unregister_netdevice_notifier(&cma_nb);
	ib_sa_unregister_client(&sa_client);
	unregister_pernet_subsys(&cma_pernet_operations);
+1 −0
Original line number Diff line number Diff line
@@ -64,6 +64,7 @@ struct rdma_id_private {
		struct list_head listen_item;
		struct list_head listen_list;
	};
	struct list_head        id_list_entry;
	struct cma_device	*cma_dev;
	struct list_head	mc_list;

+1 −1
Original line number Diff line number Diff line
@@ -68,7 +68,7 @@ static int uverbs_try_lock_object(struct ib_uobject *uobj,
	 * In exclusive access mode, we check that the counter is zero (nobody
	 * claimed this object) and we set it to -1. Releasing a shared access
	 * lock is done simply by decreasing the counter. As for exclusive
	 * access locks, since only a single one of them is is allowed
	 * access locks, since only a single one of them is allowed
	 * concurrently, setting the counter to zero is enough for releasing
	 * this lock.
	 */
Loading