Commit 0d18c12b authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'block-5.14-2021-07-16' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:

 - NVMe fixes via Christoph:
      - fix various races in nvme-pci when shutting down just after
        probing (Casey Chen)
      - fix a net_device leak in nvme-tcp (Prabhakar Kushwaha)

 - Fix regression in xen-blkfront by cleaning up the removal state
   machine (Christoph)

 - Fix tag_set and queue cleanup ordering regression in nbd (Wang)

 - Fix tag_set and queue cleanup ordering regression in pd (Guoqing)

* tag 'block-5.14-2021-07-16' of git://git.kernel.dk/linux-block:
  xen-blkfront: sanitize the removal state machine
  nbd: fix order of cleaning up the queue and freeing the tagset
  pd: fix order of cleaning up the queue and freeing the tagset
  nvme-pci: do not call nvme_dev_remove_admin from nvme_remove
  nvme-pci: fix multiple races in nvme_setup_io_queues
  nvme-tcp: use __dev_get_by_name instead dev_get_by_name for OPT_HOST_IFACE
parents 13fdaf04 05d69d95
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -239,8 +239,8 @@ static void nbd_dev_remove(struct nbd_device *nbd)

	if (disk) {
		del_gendisk(disk);
		blk_mq_free_tag_set(&nbd->tag_set);
		blk_cleanup_disk(disk);
		blk_mq_free_tag_set(&nbd->tag_set);
	}

	/*
+1 −1
Original line number Diff line number Diff line
@@ -1014,8 +1014,8 @@ static void __exit pd_exit(void)
		if (p) {
			disk->gd = NULL;
			del_gendisk(p);
			blk_mq_free_tag_set(&disk->tag_set);
			blk_cleanup_disk(p);
			blk_mq_free_tag_set(&disk->tag_set);
			pi_release(disk->pi);
		}
	}
+26 −198
Original line number Diff line number Diff line
@@ -502,34 +502,21 @@ static int blkif_getgeo(struct block_device *bd, struct hd_geometry *hg)
static int blkif_ioctl(struct block_device *bdev, fmode_t mode,
		       unsigned command, unsigned long argument)
{
	struct blkfront_info *info = bdev->bd_disk->private_data;
	int i;

	dev_dbg(&info->xbdev->dev, "command: 0x%x, argument: 0x%lx\n",
		command, (long)argument);

	switch (command) {
	case CDROMMULTISESSION:
		dev_dbg(&info->xbdev->dev, "FIXME: support multisession CDs later\n");
		for (i = 0; i < sizeof(struct cdrom_multisession); i++)
			if (put_user(0, (char __user *)(argument + i)))
				return -EFAULT;
		return 0;

	case CDROM_GET_CAPABILITY: {
		struct gendisk *gd = info->gd;
		if (gd->flags & GENHD_FL_CD)
	case CDROM_GET_CAPABILITY:
		if (bdev->bd_disk->flags & GENHD_FL_CD)
			return 0;
		return -EINVAL;
	}

	default:
		/*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n",
		  command);*/
		return -EINVAL; /* same return as native Linux */
		return -EINVAL;
	}

	return 0;
}

static unsigned long blkif_ring_get_request(struct blkfront_ring_info *rinfo,
@@ -1177,36 +1164,6 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
	return err;
}

static void xlvbd_release_gendisk(struct blkfront_info *info)
{
	unsigned int minor, nr_minors, i;
	struct blkfront_ring_info *rinfo;

	if (info->rq == NULL)
		return;

	/* No more blkif_request(). */
	blk_mq_stop_hw_queues(info->rq);

	for_each_rinfo(info, rinfo, i) {
		/* No more gnttab callback work. */
		gnttab_cancel_free_callback(&rinfo->callback);

		/* Flush gnttab callback work. Must be done with no locks held. */
		flush_work(&rinfo->work);
	}

	del_gendisk(info->gd);

	minor = info->gd->first_minor;
	nr_minors = info->gd->minors;
	xlbd_release_minors(minor, nr_minors);

	blk_cleanup_disk(info->gd);
	info->gd = NULL;
	blk_mq_free_tag_set(&info->tag_set);
}

/* Already hold rinfo->ring_lock. */
static inline void kick_pending_request_queues_locked(struct blkfront_ring_info *rinfo)
{
@@ -1756,12 +1713,6 @@ static int write_per_ring_nodes(struct xenbus_transaction xbt,
	return err;
}

static void free_info(struct blkfront_info *info)
{
	list_del(&info->info_list);
	kfree(info);
}

/* Common code used when first setting up, and when resuming. */
static int talk_to_blkback(struct xenbus_device *dev,
			   struct blkfront_info *info)
@@ -1880,13 +1831,6 @@ static int talk_to_blkback(struct xenbus_device *dev,
		xenbus_dev_fatal(dev, err, "%s", message);
 destroy_blkring:
	blkif_free(info, 0);

	mutex_lock(&blkfront_mutex);
	free_info(info);
	mutex_unlock(&blkfront_mutex);

	dev_set_drvdata(&dev->dev, NULL);

	return err;
}

@@ -2126,40 +2070,28 @@ static int blkfront_resume(struct xenbus_device *dev)
static void blkfront_closing(struct blkfront_info *info)
{
	struct xenbus_device *xbdev = info->xbdev;
	struct block_device *bdev = NULL;

	mutex_lock(&info->mutex);
	struct blkfront_ring_info *rinfo;
	unsigned int i;

	if (xbdev->state == XenbusStateClosing) {
		mutex_unlock(&info->mutex);
	if (xbdev->state == XenbusStateClosing)
		return;
	}

	if (info->gd)
		bdev = bdgrab(info->gd->part0);
	/* No more blkif_request(). */
	blk_mq_stop_hw_queues(info->rq);
	blk_set_queue_dying(info->rq);
	set_capacity(info->gd, 0);

	mutex_unlock(&info->mutex);
	for_each_rinfo(info, rinfo, i) {
		/* No more gnttab callback work. */
		gnttab_cancel_free_callback(&rinfo->callback);

	if (!bdev) {
		xenbus_frontend_closed(xbdev);
		return;
		/* Flush gnttab callback work. Must be done with no locks held. */
		flush_work(&rinfo->work);
	}

	mutex_lock(&bdev->bd_disk->open_mutex);

	if (bdev->bd_openers) {
		xenbus_dev_error(xbdev, -EBUSY,
				 "Device in use; refusing to close");
		xenbus_switch_state(xbdev, XenbusStateClosing);
	} else {
		xlvbd_release_gendisk(info);
	xenbus_frontend_closed(xbdev);
}

	mutex_unlock(&bdev->bd_disk->open_mutex);
	bdput(bdev);
}

static void blkfront_setup_discard(struct blkfront_info *info)
{
	info->feature_discard = 1;
@@ -2472,7 +2404,6 @@ static void blkback_changed(struct xenbus_device *dev,
			break;
		fallthrough;
	case XenbusStateClosing:
		if (info)
		blkfront_closing(info);
		break;
	}
@@ -2481,56 +2412,21 @@ static void blkback_changed(struct xenbus_device *dev,
static int blkfront_remove(struct xenbus_device *xbdev)
{
	struct blkfront_info *info = dev_get_drvdata(&xbdev->dev);
	struct block_device *bdev = NULL;
	struct gendisk *disk;

	dev_dbg(&xbdev->dev, "%s removed", xbdev->nodename);

	if (!info)
		return 0;

	blkif_free(info, 0);

	mutex_lock(&info->mutex);

	disk = info->gd;
	if (disk)
		bdev = bdgrab(disk->part0);

	info->xbdev = NULL;
	mutex_unlock(&info->mutex);

	if (!bdev) {
		mutex_lock(&blkfront_mutex);
		free_info(info);
		mutex_unlock(&blkfront_mutex);
		return 0;
	}

	/*
	 * The xbdev was removed before we reached the Closed
	 * state. See if it's safe to remove the disk. If the bdev
	 * isn't closed yet, we let release take care of it.
	 */

	mutex_lock(&disk->open_mutex);
	info = disk->private_data;

	dev_warn(disk_to_dev(disk),
		 "%s was hot-unplugged, %d stale handles\n",
		 xbdev->nodename, bdev->bd_openers);
	del_gendisk(info->gd);

	if (info && !bdev->bd_openers) {
		xlvbd_release_gendisk(info);
		disk->private_data = NULL;
	mutex_lock(&blkfront_mutex);
		free_info(info);
	list_del(&info->info_list);
	mutex_unlock(&blkfront_mutex);
	}

	mutex_unlock(&disk->open_mutex);
	bdput(bdev);
	blkif_free(info, 0);
	xlbd_release_minors(info->gd->first_minor, info->gd->minors);
	blk_cleanup_disk(info->gd);
	blk_mq_free_tag_set(&info->tag_set);

	kfree(info);
	return 0;
}

@@ -2541,77 +2437,9 @@ static int blkfront_is_ready(struct xenbus_device *dev)
	return info->is_ready && info->xbdev;
}

static int blkif_open(struct block_device *bdev, fmode_t mode)
{
	struct gendisk *disk = bdev->bd_disk;
	struct blkfront_info *info;
	int err = 0;

	mutex_lock(&blkfront_mutex);

	info = disk->private_data;
	if (!info) {
		/* xbdev gone */
		err = -ERESTARTSYS;
		goto out;
	}

	mutex_lock(&info->mutex);

	if (!info->gd)
		/* xbdev is closed */
		err = -ERESTARTSYS;

	mutex_unlock(&info->mutex);

out:
	mutex_unlock(&blkfront_mutex);
	return err;
}

static void blkif_release(struct gendisk *disk, fmode_t mode)
{
	struct blkfront_info *info = disk->private_data;
	struct xenbus_device *xbdev;

	mutex_lock(&blkfront_mutex);
	if (disk->part0->bd_openers)
		goto out_mutex;

	/*
	 * Check if we have been instructed to close. We will have
	 * deferred this request, because the bdev was still open.
	 */

	mutex_lock(&info->mutex);
	xbdev = info->xbdev;

	if (xbdev && xbdev->state == XenbusStateClosing) {
		/* pending switch to state closed */
		dev_info(disk_to_dev(disk), "releasing disk\n");
		xlvbd_release_gendisk(info);
		xenbus_frontend_closed(info->xbdev);
 	}

	mutex_unlock(&info->mutex);

	if (!xbdev) {
		/* sudden device removal */
		dev_info(disk_to_dev(disk), "releasing disk\n");
		xlvbd_release_gendisk(info);
		disk->private_data = NULL;
		free_info(info);
	}

out_mutex:
	mutex_unlock(&blkfront_mutex);
}

static const struct block_device_operations xlvbd_block_fops =
{
	.owner = THIS_MODULE,
	.open = blkif_open,
	.release = blkif_release,
	.getgeo = blkif_getgeo,
	.ioctl = blkif_ioctl,
	.compat_ioctl = blkdev_compat_ptr_ioctl,
+58 −9
Original line number Diff line number Diff line
@@ -1554,6 +1554,28 @@ static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid)
	wmb(); /* ensure the first interrupt sees the initialization */
}

/*
 * Try getting shutdown_lock while setting up IO queues.
 */
static int nvme_setup_io_queues_trylock(struct nvme_dev *dev)
{
	/*
	 * Give up if the lock is being held by nvme_dev_disable.
	 */
	if (!mutex_trylock(&dev->shutdown_lock))
		return -ENODEV;

	/*
	 * Controller is in wrong state, fail early.
	 */
	if (dev->ctrl.state != NVME_CTRL_CONNECTING) {
		mutex_unlock(&dev->shutdown_lock);
		return -ENODEV;
	}

	return 0;
}

static int nvme_create_queue(struct nvme_queue *nvmeq, int qid, bool polled)
{
	struct nvme_dev *dev = nvmeq->dev;
@@ -1582,8 +1604,11 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid, bool polled)
		goto release_cq;

	nvmeq->cq_vector = vector;
	nvme_init_queue(nvmeq, qid);

	result = nvme_setup_io_queues_trylock(dev);
	if (result)
		return result;
	nvme_init_queue(nvmeq, qid);
	if (!polled) {
		result = queue_request_irq(nvmeq);
		if (result < 0)
@@ -1591,10 +1616,12 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid, bool polled)
	}

	set_bit(NVMEQ_ENABLED, &nvmeq->flags);
	mutex_unlock(&dev->shutdown_lock);
	return result;

release_sq:
	dev->online_queues--;
	mutex_unlock(&dev->shutdown_lock);
	adapter_delete_sq(dev, qid);
release_cq:
	adapter_delete_cq(dev, qid);
@@ -2167,7 +2194,18 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
	if (nr_io_queues == 0)
		return 0;

	clear_bit(NVMEQ_ENABLED, &adminq->flags);
	/*
	 * Free IRQ resources as soon as NVMEQ_ENABLED bit transitions
	 * from set to unset. If there is a window to it is truely freed,
	 * pci_free_irq_vectors() jumping into this window will crash.
	 * And take lock to avoid racing with pci_free_irq_vectors() in
	 * nvme_dev_disable() path.
	 */
	result = nvme_setup_io_queues_trylock(dev);
	if (result)
		return result;
	if (test_and_clear_bit(NVMEQ_ENABLED, &adminq->flags))
		pci_free_irq(pdev, 0, adminq);

	if (dev->cmb_use_sqes) {
		result = nvme_cmb_qdepth(dev, nr_io_queues,
@@ -2183,13 +2221,16 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
		result = nvme_remap_bar(dev, size);
		if (!result)
			break;
		if (!--nr_io_queues)
			return -ENOMEM;
		if (!--nr_io_queues) {
			result = -ENOMEM;
			goto out_unlock;
		}
	} while (1);
	adminq->q_db = dev->dbs;

 retry:
	/* Deregister the admin queue's interrupt */
	if (test_and_clear_bit(NVMEQ_ENABLED, &adminq->flags))
		pci_free_irq(pdev, 0, adminq);

	/*
@@ -2199,8 +2240,10 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
	pci_free_irq_vectors(pdev);

	result = nvme_setup_irqs(dev, nr_io_queues);
	if (result <= 0)
		return -EIO;
	if (result <= 0) {
		result = -EIO;
		goto out_unlock;
	}

	dev->num_vecs = result;
	result = max(result - 1, 1);
@@ -2214,8 +2257,9 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
	 */
	result = queue_request_irq(adminq);
	if (result)
		return result;
		goto out_unlock;
	set_bit(NVMEQ_ENABLED, &adminq->flags);
	mutex_unlock(&dev->shutdown_lock);

	result = nvme_create_io_queues(dev);
	if (result || dev->online_queues < 2)
@@ -2224,6 +2268,9 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
	if (dev->online_queues - 1 < dev->max_qid) {
		nr_io_queues = dev->online_queues - 1;
		nvme_disable_io_queues(dev);
		result = nvme_setup_io_queues_trylock(dev);
		if (result)
			return result;
		nvme_suspend_io_queues(dev);
		goto retry;
	}
@@ -2232,6 +2279,9 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
					dev->io_queues[HCTX_TYPE_READ],
					dev->io_queues[HCTX_TYPE_POLL]);
	return 0;
out_unlock:
	mutex_unlock(&dev->shutdown_lock);
	return result;
}

static void nvme_del_queue_end(struct request *req, blk_status_t error)
@@ -2962,7 +3012,6 @@ static void nvme_remove(struct pci_dev *pdev)
	if (!pci_device_is_present(pdev)) {
		nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DEAD);
		nvme_dev_disable(dev, true);
		nvme_dev_remove_admin(dev);
	}

	flush_work(&dev->ctrl.reset_work);
+1 −3
Original line number Diff line number Diff line
@@ -123,7 +123,6 @@ struct nvme_tcp_ctrl {
	struct blk_mq_tag_set	admin_tag_set;
	struct sockaddr_storage addr;
	struct sockaddr_storage src_addr;
	struct net_device	*ndev;
	struct nvme_ctrl	ctrl;

	struct work_struct	err_work;
@@ -2533,8 +2532,7 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
	}

	if (opts->mask & NVMF_OPT_HOST_IFACE) {
		ctrl->ndev = dev_get_by_name(&init_net, opts->host_iface);
		if (!ctrl->ndev) {
		if (!__dev_get_by_name(&init_net, opts->host_iface)) {
			pr_err("invalid interface passed: %s\n",
			       opts->host_iface);
			ret = -ENODEV;