Commit 785538bf authored by Bart Van Assche's avatar Bart Van Assche Committed by Martin K. Petersen
Browse files

scsi: sd: Revert "Rework asynchronous resume support"

Although commit 88f16690 ("scsi: sd: Rework asynchronous resume support")
eliminates a delay for some ATA disks after resume, it causes resume of ATA
disks to fail on other setups. See also:

 * "Resume process hangs for 5-6 seconds starting sometime in 5.16"
   (https://bugzilla.kernel.org/show_bug.cgi?id=215880).

 * Geert's regression report
   (https://lore.kernel.org/linux-scsi/alpine.DEB.2.22.394.2207191125130.1006766@ramsan.of.borg/).

This is what I understand about this issue:

 * During resume, ata_port_pm_resume() starts the SCSI error handler.  This
   changes the SCSI host state into SHOST_RECOVERY and causes
   scsi_queue_rq() to return BLK_STS_RESOURCE.

 * sd_resume() calls sd_start_stop_device() for ATA devices. That function
   in turn calls sd_submit_start() which tries to submit a START STOP UNIT
   command. That command can only be submitted after the SCSI error handler
   has changed the SCSI host state back to SHOST_RUNNING.

 * The SCSI error handler runs on its own thread and calls
   schedule_work(&(ap->scsi_rescan_task)). That causes
   ata_scsi_dev_rescan() to be called from the context of a kernel
   workqueue. That call hangs in blk_mq_get_tag(). I'm not sure why - maybe
   because all available tags have been allocated by sd_submit_start()
   calls (this is a guess).

Link: https://lore.kernel.org/r/20220816172638.538734-1-bvanassche@acm.org


Fixes: 88f16690 ("scsi: sd: Rework asynchronous resume support")
Cc: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Cc: Hannes Reinecke <hare@suse.de>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: gzhqyz@gmail.com
Reported-by: default avatarGeert Uytterhoeven <geert@linux-m68k.org>
Reported-by: default avatar <gzhqyz@gmail.com>
Reported-and-tested-by: default avatarVlastimil Babka <vbabka@suse.cz>
Tested-by: default avatarJohn Garry <john.garry@huawei.com>
Tested-by: default avatarHans de Goede <hdegoede@redhat.com>
Signed-off-by: default avatarBart Van Assche <bvanassche@acm.org>
Signed-off-by: default avatarMartin K. Petersen <martin.petersen@oracle.com>
parent fac8e558
Loading
Loading
Loading
Loading
+18 −66
Original line number Diff line number Diff line
@@ -103,7 +103,6 @@ static void sd_config_discard(struct scsi_disk *, unsigned int);
static void sd_config_write_same(struct scsi_disk *);
static int  sd_revalidate_disk(struct gendisk *);
static void sd_unlock_native_capacity(struct gendisk *disk);
static void sd_start_done_work(struct work_struct *work);
static int  sd_probe(struct device *);
static int  sd_remove(struct device *);
static void sd_shutdown(struct device *);
@@ -3471,7 +3470,6 @@ static int sd_probe(struct device *dev)
	sdkp->max_retries = SD_MAX_RETRIES;
	atomic_set(&sdkp->openers, 0);
	atomic_set(&sdkp->device->ioerr_cnt, 0);
	INIT_WORK(&sdkp->start_done_work, sd_start_done_work);

	if (!sdp->request_queue->rq_timeout) {
		if (sdp->type != TYPE_MOD)
@@ -3594,69 +3592,12 @@ static void scsi_disk_release(struct device *dev)
	kfree(sdkp);
}

/* Process sense data after a START command finished. */
static void sd_start_done_work(struct work_struct *work)
{
	struct scsi_disk *sdkp = container_of(work, typeof(*sdkp),
					      start_done_work);
	struct scsi_sense_hdr sshdr;
	int res = sdkp->start_result;

	if (res == 0)
		return;

	sd_print_result(sdkp, "Start/Stop Unit failed", res);

	if (res < 0)
		return;

	if (scsi_normalize_sense(sdkp->start_sense_buffer,
				 sdkp->start_sense_len, &sshdr))
		sd_print_sense_hdr(sdkp, &sshdr);
}

/* A START command finished. May be called from interrupt context. */
static void sd_start_done(struct request *req, blk_status_t status)
{
	const struct scsi_cmnd *scmd = blk_mq_rq_to_pdu(req);
	struct scsi_disk *sdkp = scsi_disk(req->q->disk);

	sdkp->start_result = scmd->result;
	WARN_ON_ONCE(scmd->sense_len > SCSI_SENSE_BUFFERSIZE);
	sdkp->start_sense_len = scmd->sense_len;
	memcpy(sdkp->start_sense_buffer, scmd->sense_buffer,
	       ARRAY_SIZE(sdkp->start_sense_buffer));
	WARN_ON_ONCE(!schedule_work(&sdkp->start_done_work));
}

/* Submit a START command asynchronously. */
static int sd_submit_start(struct scsi_disk *sdkp, u8 cmd[], u8 cmd_len)
{
	struct scsi_device *sdev = sdkp->device;
	struct request_queue *q = sdev->request_queue;
	struct request *req;
	struct scsi_cmnd *scmd;

	req = scsi_alloc_request(q, REQ_OP_DRV_IN, BLK_MQ_REQ_PM);
	if (IS_ERR(req))
		return PTR_ERR(req);

	scmd = blk_mq_rq_to_pdu(req);
	scmd->cmd_len = cmd_len;
	memcpy(scmd->cmnd, cmd, cmd_len);
	scmd->allowed = sdkp->max_retries;
	req->timeout = SD_TIMEOUT;
	req->rq_flags |= RQF_PM | RQF_QUIET;
	req->end_io = sd_start_done;
	blk_execute_rq_nowait(req, /*at_head=*/true);

	return 0;
}

static int sd_start_stop_device(struct scsi_disk *sdkp, int start)
{
	unsigned char cmd[6] = { START_STOP };	/* START_VALID */
	struct scsi_sense_hdr sshdr;
	struct scsi_device *sdp = sdkp->device;
	int res;

	if (start)
		cmd[4] |= 1;	/* START */
@@ -3667,10 +3608,23 @@ static int sd_start_stop_device(struct scsi_disk *sdkp, int start)
	if (!scsi_device_online(sdp))
		return -ENODEV;

	/* Wait until processing of sense data has finished. */
	flush_work(&sdkp->start_done_work);
	res = scsi_execute(sdp, cmd, DMA_NONE, NULL, 0, NULL, &sshdr,
			SD_TIMEOUT, sdkp->max_retries, 0, RQF_PM, NULL);
	if (res) {
		sd_print_result(sdkp, "Start/Stop Unit failed", res);
		if (res > 0 && scsi_sense_valid(&sshdr)) {
			sd_print_sense_hdr(sdkp, &sshdr);
			/* 0x3a is medium not present */
			if (sshdr.asc == 0x3a)
				res = 0;
		}
	}

	return sd_submit_start(sdkp, cmd, sizeof(cmd));
	/* SCSI error codes must not go to the generic layer */
	if (res)
		return -EIO;

	return 0;
}

/*
@@ -3697,8 +3651,6 @@ static void sd_shutdown(struct device *dev)
		sd_printk(KERN_NOTICE, sdkp, "Stopping disk\n");
		sd_start_stop_device(sdkp, 0);
	}

	flush_work(&sdkp->start_done_work);
}

static int sd_suspend_common(struct device *dev, bool ignore_stop_errors)
+0 −5
Original line number Diff line number Diff line
@@ -150,11 +150,6 @@ struct scsi_disk {
	unsigned	urswrz : 1;
	unsigned	security : 1;
	unsigned	ignore_medium_access_errors : 1;

	int		start_result;
	u32		start_sense_len;
	u8		start_sense_buffer[SCSI_SENSE_BUFFERSIZE];
	struct work_struct start_done_work;
};
#define to_scsi_disk(obj) container_of(obj, struct scsi_disk, disk_dev)