From a66ac07165855d8144f69a68d5c0ab0eb36e97e2 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Wed, 29 Mar 2017 12:29:44 +0300 Subject: [PATCH] [scsi] Retry TEST UNIT READY command The TEST UNIT READY command is issued automatically when the device is opened, and is not the result of a command being issued by the caller. This is required in order that a permanent TEST UNIT READY failure can be used to identify unusable paths in a multipath SAN device. Since the TEST UNIT READY command is not part of the caller's command issuing process, it is not covered by any external retry loops (such as the main retry loop in sandev_command()). We must therefore be prepared to retry the TEST UNIT READY command within the SCSI layer itself. We retry only the TEST UNIT READY command so as not to multiply the number of potential retries for normal commands (which are already retried by sandev_command()). Signed-off-by: Michael Brown --- src/drivers/block/scsi.c | 45 +++++++++++++++++++++++++++++++--------- 1 file changed, 35 insertions(+), 10 deletions(-) diff --git a/src/drivers/block/scsi.c b/src/drivers/block/scsi.c index d51b5cfa..f765c976 100644 --- a/src/drivers/block/scsi.c +++ b/src/drivers/block/scsi.c @@ -40,6 +40,9 @@ FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL ); * */ +/** Maximum number of TEST UNIT READY retries */ +#define SCSI_READY_MAX_RETRIES 10 + /* Error numbers generated by SCSI sense data */ #define EIO_NO_SENSE __einfo_error ( EINFO_EIO_NO_SENSE ) #define EINFO_EIO_NO_SENSE \ @@ -240,6 +243,8 @@ struct scsi_device { struct interface ready; /** TEST UNIT READY process */ struct process process; + /** TEST UNIT READY retry count */ + unsigned int retries; /** List of commands */ struct list_head cmds; @@ -875,20 +880,40 @@ static struct interface_descriptor scsidev_block_desc = static void scsidev_ready ( struct scsi_device *scsidev, int rc ) { /* Shut down interface */ - intf_shutdown ( &scsidev->ready, rc ); + intf_restart ( &scsidev->ready, rc ); - /* Close device on failure */ - if ( rc != 0 ) { - DBGC ( scsidev, "SCSI %p not ready: %s\n", - scsidev, strerror ( rc ) ); - scsidev_close ( scsidev, rc ); + /* Mark device as ready, if applicable */ + if ( rc == 0 ) { + DBGC ( scsidev, "SCSI %p unit is ready\n", scsidev ); + scsidev->flags |= SCSIDEV_UNIT_READY; + xfer_window_changed ( &scsidev->block ); + return; + } + DBGC ( scsidev, "SCSI %p not ready: %s\n", scsidev, strerror ( rc ) ); + + /* SCSI targets have an annoying habit of returning occasional + * pointless "error" messages such as "power-on occurred", so + * we have to be prepared to retry commands. + * + * For most commands, we rely on the caller (e.g. the generic + * SAN device layer) to retry commands as needed. However, a + * TEST UNIT READY failure is used as an indication that the + * whole SCSI device is unavailable and should be closed. We + * must therefore perform this retry loop within the SCSI + * layer. + */ + if ( scsidev->retries++ < SCSI_READY_MAX_RETRIES ) { + DBGC ( scsidev, "SCSI %p retrying (retry %d)\n", + scsidev, scsidev->retries ); + scsidev->flags &= ~SCSIDEV_UNIT_TESTED; + process_add ( &scsidev->process ); return; } - /* Mark device as ready */ - scsidev->flags |= SCSIDEV_UNIT_READY; - xfer_window_changed ( &scsidev->block ); - DBGC ( scsidev, "SCSI %p unit is ready\n", scsidev ); + /* Close device */ + DBGC ( scsidev, "SCSI %p never became ready: %s\n", + scsidev, strerror ( rc ) ); + scsidev_close ( scsidev, rc ); } /** SCSI device TEST UNIT READY interface operations */