Commit a379fbbc authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'block-5.15-2021-10-29' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:

 - NVMe pull request:
      - fix nvmet-tcp header digest verification (Amit Engel)
      - fix a memory leak in nvmet-tcp when releasing a queue (Maurizio
        Lombardi)
      - fix nvme-tcp H2CData PDU send accounting again (Sagi Grimberg)
      - fix digest pointer calculation in nvme-tcp and nvmet-tcp (Varun
        Prakash)
      - fix possible nvme-tcp req->offset corruption (Varun Prakash)

 - Queue drain ordering fix (Ming)

 - Partition check regression for zoned devices (Shin'ichiro)

 - Zone queue restart fix (Naohiro)

* tag 'block-5.15-2021-10-29' of git://git.kernel.dk/linux-block:
  block: Fix partition check for host-aware zoned block devices
  nvmet-tcp: fix header digest verification
  nvmet-tcp: fix data digest pointer calculation
  nvme-tcp: fix data digest pointer calculation
  nvme-tcp: fix possible req->offset corruption
  block: schedule queue restart after BLK_STS_ZONE_RESOURCE
  block: drain queue after disk is removed from sysfs
  nvme-tcp: fix H2CData PDU send accounting (again)
  nvmet-tcp: fix a memory leak when releasing a queue
parents 17d50f89 f4aaf1fa
Loading
Loading
Loading
Loading
+9 −4
Original line number Diff line number Diff line
@@ -1325,6 +1325,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list,
	int errors, queued;
	blk_status_t ret = BLK_STS_OK;
	LIST_HEAD(zone_list);
	bool needs_resource = false;

	if (list_empty(list))
		return false;
@@ -1370,6 +1371,8 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list,
			queued++;
			break;
		case BLK_STS_RESOURCE:
			needs_resource = true;
			fallthrough;
		case BLK_STS_DEV_RESOURCE:
			blk_mq_handle_dev_resource(rq, list);
			goto out;
@@ -1380,6 +1383,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list,
			 * accept.
			 */
			blk_mq_handle_zone_resource(rq, &zone_list);
			needs_resource = true;
			break;
		default:
			errors++;
@@ -1406,7 +1410,6 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list,
		/* For non-shared tags, the RESTART check will suffice */
		bool no_tag = prep == PREP_DISPATCH_NO_TAG &&
			(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED);
		bool no_budget_avail = prep == PREP_DISPATCH_NO_BUDGET;

		if (nr_budgets)
			blk_mq_release_budgets(q, list);
@@ -1447,14 +1450,16 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list,
		 * If driver returns BLK_STS_RESOURCE and SCHED_RESTART
		 * bit is set, run queue after a delay to avoid IO stalls
		 * that could otherwise occur if the queue is idle.  We'll do
		 * similar if we couldn't get budget and SCHED_RESTART is set.
		 * similar if we couldn't get budget or couldn't lock a zone
		 * and SCHED_RESTART is set.
		 */
		needs_restart = blk_mq_sched_needs_restart(hctx);
		if (prep == PREP_DISPATCH_NO_BUDGET)
			needs_resource = true;
		if (!needs_restart ||
		    (no_tag && list_empty_careful(&hctx->dispatch_wait.entry)))
			blk_mq_run_hw_queue(hctx, true);
		else if (needs_restart && (ret == BLK_STS_RESOURCE ||
					   no_budget_avail))
		else if (needs_restart && needs_resource)
			blk_mq_delay_run_hw_queue(hctx, BLK_MQ_RESOURCE_DELAY);

		blk_mq_update_dispatch_busy(hctx, true);
+19 −1
Original line number Diff line number Diff line
@@ -842,6 +842,24 @@ bool blk_queue_can_use_dma_map_merging(struct request_queue *q,
}
EXPORT_SYMBOL_GPL(blk_queue_can_use_dma_map_merging);

static bool disk_has_partitions(struct gendisk *disk)
{
	unsigned long idx;
	struct block_device *part;
	bool ret = false;

	rcu_read_lock();
	xa_for_each(&disk->part_tbl, idx, part) {
		if (bdev_is_partition(part)) {
			ret = true;
			break;
		}
	}
	rcu_read_unlock();

	return ret;
}

/**
 * blk_queue_set_zoned - configure a disk queue zoned model.
 * @disk:	the gendisk of the queue to configure
@@ -876,7 +894,7 @@ void blk_queue_set_zoned(struct gendisk *disk, enum blk_zoned_model model)
		 * we do nothing special as far as the block layer is concerned.
		 */
		if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED) ||
		    !xa_empty(&disk->part_tbl))
		    disk_has_partitions(disk))
			model = BLK_ZONED_NONE;
		break;
	case BLK_ZONED_NONE:
+12 −10
Original line number Diff line number Diff line
@@ -588,16 +588,6 @@ void del_gendisk(struct gendisk *disk)
	 * Prevent new I/O from crossing bio_queue_enter().
	 */
	blk_queue_start_drain(q);
	blk_mq_freeze_queue_wait(q);

	rq_qos_exit(q);
	blk_sync_queue(q);
	blk_flush_integrity();
	/*
	 * Allow using passthrough request again after the queue is torn down.
	 */
	blk_queue_flag_clear(QUEUE_FLAG_INIT_DONE, q);
	__blk_mq_unfreeze_queue(q, true);

	if (!(disk->flags & GENHD_FL_HIDDEN)) {
		sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi");
@@ -620,6 +610,18 @@ void del_gendisk(struct gendisk *disk)
		sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk)));
	pm_runtime_set_memalloc_noio(disk_to_dev(disk), false);
	device_del(disk_to_dev(disk));

	blk_mq_freeze_queue_wait(q);

	rq_qos_exit(q);
	blk_sync_queue(q);
	blk_flush_integrity();
	/*
	 * Allow using passthrough request again after the queue is torn down.
	 */
	blk_queue_flag_clear(QUEUE_FLAG_INIT_DONE, q);
	__blk_mq_unfreeze_queue(q, true);

}
EXPORT_SYMBOL(del_gendisk);

+6 −3
Original line number Diff line number Diff line
@@ -926,12 +926,14 @@ static void nvme_tcp_fail_request(struct nvme_tcp_request *req)
static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
{
	struct nvme_tcp_queue *queue = req->queue;
	int req_data_len = req->data_len;

	while (true) {
		struct page *page = nvme_tcp_req_cur_page(req);
		size_t offset = nvme_tcp_req_cur_offset(req);
		size_t len = nvme_tcp_req_cur_length(req);
		bool last = nvme_tcp_pdu_last_send(req, len);
		int req_data_sent = req->data_sent;
		int ret, flags = MSG_DONTWAIT;

		if (last && !queue->data_digest && !nvme_tcp_queue_more(queue))
@@ -958,7 +960,7 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
		 * in the request where we don't want to modify it as we may
		 * compete with the RX path completing the request.
		 */
		if (req->data_sent + ret < req->data_len)
		if (req_data_sent + ret < req_data_len)
			nvme_tcp_advance_req(req, ret);

		/* fully successful last send in current PDU */
@@ -1048,10 +1050,11 @@ static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req)
static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req)
{
	struct nvme_tcp_queue *queue = req->queue;
	size_t offset = req->offset;
	int ret;
	struct msghdr msg = { .msg_flags = MSG_DONTWAIT };
	struct kvec iov = {
		.iov_base = &req->ddgst + req->offset,
		.iov_base = (u8 *)&req->ddgst + req->offset,
		.iov_len = NVME_TCP_DIGEST_LENGTH - req->offset
	};

@@ -1064,7 +1067,7 @@ static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req)
	if (unlikely(ret <= 0))
		return ret;

	if (req->offset + ret == NVME_TCP_DIGEST_LENGTH) {
	if (offset + ret == NVME_TCP_DIGEST_LENGTH) {
		nvme_tcp_done_send_req(queue);
		return 1;
	}
+5 −2
Original line number Diff line number Diff line
@@ -702,7 +702,7 @@ static int nvmet_try_send_ddgst(struct nvmet_tcp_cmd *cmd, bool last_in_batch)
	struct nvmet_tcp_queue *queue = cmd->queue;
	struct msghdr msg = { .msg_flags = MSG_DONTWAIT };
	struct kvec iov = {
		.iov_base = &cmd->exp_ddgst + cmd->offset,
		.iov_base = (u8 *)&cmd->exp_ddgst + cmd->offset,
		.iov_len = NVME_TCP_DIGEST_LENGTH - cmd->offset
	};
	int ret;
@@ -1096,7 +1096,7 @@ static int nvmet_tcp_try_recv_pdu(struct nvmet_tcp_queue *queue)
	}

	if (queue->hdr_digest &&
	    nvmet_tcp_verify_hdgst(queue, &queue->pdu, queue->offset)) {
	    nvmet_tcp_verify_hdgst(queue, &queue->pdu, hdr->hlen)) {
		nvmet_tcp_fatal_error(queue); /* fatal */
		return -EPROTO;
	}
@@ -1428,6 +1428,7 @@ static void nvmet_tcp_uninit_data_in_cmds(struct nvmet_tcp_queue *queue)

static void nvmet_tcp_release_queue_work(struct work_struct *w)
{
	struct page *page;
	struct nvmet_tcp_queue *queue =
		container_of(w, struct nvmet_tcp_queue, release_work);

@@ -1447,6 +1448,8 @@ static void nvmet_tcp_release_queue_work(struct work_struct *w)
		nvmet_tcp_free_crypto(queue);
	ida_simple_remove(&nvmet_tcp_queue_ida, queue->idx);

	page = virt_to_head_page(queue->pf_cache.va);
	__page_frag_cache_drain(page, queue->pf_cache.pagecnt_bias);
	kfree(queue);
}