Commit 5a83dd14 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'ibmvnic-fixes'



Sukadev Bhattiprolu says:

====================
ibmvnic: Fix a race in ibmvnic_probe()

If we get a transport (reset) event right after a successful CRQ_INIT
during ibmvnic_probe() but before we set the adapter state to VNIC_PROBED,
we will throw away the reset assuming that the adapter is still in the
probing state. But since the adapter has completed the CRQ_INIT any
subsequent CRQs the we send will be ignored by the vnicserver until
we release/init the CRQ again. This can leave the adapter unconfigured.

While here fix a couple of other bugs that were observed (Patches 1,2,4).
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 31372fe9 fd98693c
Loading
Loading
Loading
Loading
+155 −28
Original line number Diff line number Diff line
@@ -2212,6 +2212,19 @@ static const char *reset_reason_to_string(enum ibmvnic_reset_reason reason)
	return "UNKNOWN";
}

/*
 * Initialize the init_done completion and return code values. We
 * can get a transport event just after registering the CRQ and the
 * tasklet will use this to communicate the transport event. To ensure
 * we don't miss the notification/error, initialize these _before_
 * regisering the CRQ.
 */
static inline void reinit_init_done(struct ibmvnic_adapter *adapter)
{
	reinit_completion(&adapter->init_done);
	adapter->init_done_rc = 0;
}

/*
 * do_reset returns zero if we are able to keep processing reset events, or
 * non-zero if we hit a fatal error and must halt.
@@ -2318,6 +2331,8 @@ static int do_reset(struct ibmvnic_adapter *adapter,
		 */
		adapter->state = VNIC_PROBED;

		reinit_init_done(adapter);

		if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM) {
			rc = init_crq_queue(adapter);
		} else if (adapter->reset_reason == VNIC_RESET_MOBILITY) {
@@ -2461,7 +2476,8 @@ static int do_hard_reset(struct ibmvnic_adapter *adapter,
	 */
	adapter->state = VNIC_PROBED;

	reinit_completion(&adapter->init_done);
	reinit_init_done(adapter);

	rc = init_crq_queue(adapter);
	if (rc) {
		netdev_err(adapter->netdev,
@@ -2602,23 +2618,82 @@ static int do_passive_init(struct ibmvnic_adapter *adapter)
static void __ibmvnic_reset(struct work_struct *work)
{
	struct ibmvnic_adapter *adapter;
	bool saved_state = false;
	unsigned int timeout = 5000;
	struct ibmvnic_rwi *tmprwi;
	bool saved_state = false;
	struct ibmvnic_rwi *rwi;
	unsigned long flags;
	u32 reset_state;
	struct device *dev;
	bool need_reset;
	int num_fails = 0;
	u32 reset_state;
	int rc = 0;

	adapter = container_of(work, struct ibmvnic_adapter, ibmvnic_reset);
		dev = &adapter->vdev->dev;

	if (test_and_set_bit_lock(0, &adapter->resetting)) {
	/* Wait for ibmvnic_probe() to complete. If probe is taking too long
	 * or if another reset is in progress, defer work for now. If probe
	 * eventually fails it will flush and terminate our work.
	 *
	 * Three possibilities here:
	 * 1. Adpater being removed  - just return
	 * 2. Timed out on probe or another reset in progress - delay the work
	 * 3. Completed probe - perform any resets in queue
	 */
	if (adapter->state == VNIC_PROBING &&
	    !wait_for_completion_timeout(&adapter->probe_done, timeout)) {
		dev_err(dev, "Reset thread timed out on probe");
		queue_delayed_work(system_long_wq,
				   &adapter->ibmvnic_delayed_reset,
				   IBMVNIC_RESET_DELAY);
		return;
	}

	/* adapter is done with probe (i.e state is never VNIC_PROBING now) */
	if (adapter->state == VNIC_REMOVING)
		return;

	/* ->rwi_list is stable now (no one else is removing entries) */

	/* ibmvnic_probe() may have purged the reset queue after we were
	 * scheduled to process a reset so there maybe no resets to process.
	 * Before setting the ->resetting bit though, we have to make sure
	 * that there is infact a reset to process. Otherwise we may race
	 * with ibmvnic_open() and end up leaving the vnic down:
	 *
	 *	__ibmvnic_reset()	    ibmvnic_open()
	 *	-----------------	    --------------
	 *
	 *  set ->resetting bit
	 *  				find ->resetting bit is set
	 *  				set ->state to IBMVNIC_OPEN (i.e
	 *  				assume reset will open device)
	 *  				return
	 *  find reset queue empty
	 *  return
	 *
	 *  	Neither performed vnic login/open and vnic stays down
	 *
	 * If we hold the lock and conditionally set the bit, either we
	 * or ibmvnic_open() will complete the open.
	 */
	need_reset = false;
	spin_lock(&adapter->rwi_lock);
	if (!list_empty(&adapter->rwi_list)) {
		if (test_and_set_bit_lock(0, &adapter->resetting)) {
			queue_delayed_work(system_long_wq,
					   &adapter->ibmvnic_delayed_reset,
					   IBMVNIC_RESET_DELAY);
		} else {
			need_reset = true;
		}
	}
	spin_unlock(&adapter->rwi_lock);

	if (!need_reset)
		return;

	rwi = get_next_rwi(adapter);
	while (rwi) {
		spin_lock_irqsave(&adapter->state_lock, flags);
@@ -2735,12 +2810,23 @@ static void __ibmvnic_delayed_reset(struct work_struct *work)
	__ibmvnic_reset(&adapter->ibmvnic_reset);
}

static void flush_reset_queue(struct ibmvnic_adapter *adapter)
{
	struct list_head *entry, *tmp_entry;

	if (!list_empty(&adapter->rwi_list)) {
		list_for_each_safe(entry, tmp_entry, &adapter->rwi_list) {
			list_del(entry);
			kfree(list_entry(entry, struct ibmvnic_rwi, list));
		}
	}
}

static int ibmvnic_reset(struct ibmvnic_adapter *adapter,
			 enum ibmvnic_reset_reason reason)
{
	struct list_head *entry, *tmp_entry;
	struct ibmvnic_rwi *rwi, *tmp;
	struct net_device *netdev = adapter->netdev;
	struct ibmvnic_rwi *rwi, *tmp;
	unsigned long flags;
	int ret;

@@ -2759,13 +2845,6 @@ static int ibmvnic_reset(struct ibmvnic_adapter *adapter,
		goto err;
	}

	if (adapter->state == VNIC_PROBING) {
		netdev_warn(netdev, "Adapter reset during probe\n");
		adapter->init_done_rc = -EAGAIN;
		ret = EAGAIN;
		goto err;
	}

	list_for_each_entry(tmp, &adapter->rwi_list, list) {
		if (tmp->reset_reason == reason) {
			netdev_dbg(netdev, "Skipping matching reset, reason=%s\n",
@@ -2783,10 +2862,9 @@ static int ibmvnic_reset(struct ibmvnic_adapter *adapter,
	/* if we just received a transport event,
	 * flush reset queue and process this reset
	 */
	if (adapter->force_reset_recovery && !list_empty(&adapter->rwi_list)) {
		list_for_each_safe(entry, tmp_entry, &adapter->rwi_list)
			list_del(entry);
	}
	if (adapter->force_reset_recovery)
		flush_reset_queue(adapter);

	rwi->reset_reason = reason;
	list_add_tail(&rwi->list, &adapter->rwi_list);
	netdev_dbg(adapter->netdev, "Scheduling reset (reason %s)\n",
@@ -5321,9 +5399,9 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq,
			}

			if (!completion_done(&adapter->init_done)) {
				complete(&adapter->init_done);
				if (!adapter->init_done_rc)
					adapter->init_done_rc = -EAGAIN;
				complete(&adapter->init_done);
			}

			break;
@@ -5346,6 +5424,13 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq,
			adapter->fw_done_rc = -EIO;
			complete(&adapter->fw_done);
		}

		/* if we got here during crq-init, retry crq-init */
		if (!completion_done(&adapter->init_done)) {
			adapter->init_done_rc = -EAGAIN;
			complete(&adapter->init_done);
		}

		if (!completion_done(&adapter->stats_done))
			complete(&adapter->stats_done);
		if (test_bit(0, &adapter->resetting))
@@ -5662,10 +5747,6 @@ static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter, bool reset)

	adapter->from_passive_init = false;

	if (reset)
		reinit_completion(&adapter->init_done);

	adapter->init_done_rc = 0;
	rc = ibmvnic_send_crq_init(adapter);
	if (rc) {
		dev_err(dev, "Send crq init failed with error %d\n", rc);
@@ -5679,12 +5760,14 @@ static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter, bool reset)

	if (adapter->init_done_rc) {
		release_crq_queue(adapter);
		dev_err(dev, "CRQ-init failed, %d\n", adapter->init_done_rc);
		return adapter->init_done_rc;
	}

	if (adapter->from_passive_init) {
		adapter->state = VNIC_OPEN;
		adapter->from_passive_init = false;
		dev_err(dev, "CRQ-init failed, passive-init\n");
		return -EINVAL;
	}

@@ -5724,6 +5807,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
	struct ibmvnic_adapter *adapter;
	struct net_device *netdev;
	unsigned char *mac_addr_p;
	unsigned long flags;
	bool init_success;
	int rc;

@@ -5768,6 +5852,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
	spin_lock_init(&adapter->rwi_lock);
	spin_lock_init(&adapter->state_lock);
	mutex_init(&adapter->fw_lock);
	init_completion(&adapter->probe_done);
	init_completion(&adapter->init_done);
	init_completion(&adapter->fw_done);
	init_completion(&adapter->reset_done);
@@ -5778,6 +5863,33 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)

	init_success = false;
	do {
		reinit_init_done(adapter);

		/* clear any failovers we got in the previous pass
		 * since we are reinitializing the CRQ
		 */
		adapter->failover_pending = false;

		/* If we had already initialized CRQ, we may have one or
		 * more resets queued already. Discard those and release
		 * the CRQ before initializing the CRQ again.
		 */
		release_crq_queue(adapter);

		/* Since we are still in PROBING state, __ibmvnic_reset()
		 * will not access the ->rwi_list and since we released CRQ,
		 * we won't get _new_ transport events. But there maybe an
		 * ongoing ibmvnic_reset() call. So serialize access to
		 * rwi_list. If we win the race, ibvmnic_reset() could add
		 * a reset after we purged but thats ok - we just may end
		 * up with an extra reset (i.e similar to having two or more
		 * resets in the queue at once).
		 * CHECK.
		 */
		spin_lock_irqsave(&adapter->rwi_lock, flags);
		flush_reset_queue(adapter);
		spin_unlock_irqrestore(&adapter->rwi_lock, flags);

		rc = init_crq_queue(adapter);
		if (rc) {
			dev_err(&dev->dev, "Couldn't initialize crq. rc=%d\n",
@@ -5809,12 +5921,6 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
		goto ibmvnic_dev_file_err;

	netif_carrier_off(netdev);
	rc = register_netdev(netdev);
	if (rc) {
		dev_err(&dev->dev, "failed to register netdev rc=%d\n", rc);
		goto ibmvnic_register_fail;
	}
	dev_info(&dev->dev, "ibmvnic registered\n");

	if (init_success) {
		adapter->state = VNIC_PROBED;
@@ -5827,6 +5933,16 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)

	adapter->wait_for_reset = false;
	adapter->last_reset_time = jiffies;

	rc = register_netdev(netdev);
	if (rc) {
		dev_err(&dev->dev, "failed to register netdev rc=%d\n", rc);
		goto ibmvnic_register_fail;
	}
	dev_info(&dev->dev, "ibmvnic registered\n");

	complete(&adapter->probe_done);

	return 0;

ibmvnic_register_fail:
@@ -5841,6 +5957,17 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
ibmvnic_init_fail:
	release_sub_crqs(adapter, 1);
	release_crq_queue(adapter);

	/* cleanup worker thread after releasing CRQ so we don't get
	 * transport events (i.e new work items for the worker thread).
	 */
	adapter->state = VNIC_REMOVING;
	complete(&adapter->probe_done);
	flush_work(&adapter->ibmvnic_reset);
	flush_delayed_work(&adapter->ibmvnic_delayed_reset);

	flush_reset_queue(adapter);

	mutex_destroy(&adapter->fw_lock);
	free_netdev(netdev);

+1 −0
Original line number Diff line number Diff line
@@ -930,6 +930,7 @@ struct ibmvnic_adapter {

	struct ibmvnic_tx_pool *tx_pool;
	struct ibmvnic_tx_pool *tso_pool;
	struct completion probe_done;
	struct completion init_done;
	int init_done_rc;