ChangeSet 1.1722.97.72, 2004/06/14 10:35:15-07:00, mdharm-usb@one-eyed-alien.net [PATCH] USB Storage: Fix race when removing the SCSI host This patch fixes a race is disconnecting a usb-storage device that occurs with the SCSI layer. It's primarily reproducable via adding delays into various disconnect and reset processing paths, but has also been encountered in the field. This patch started life as as281b, and was modified by me only to patch properly against current kernels. The main features of the patch are: Store the host pointer at the start of the control thread rather than trying to get it from srb->device; after the host is removed the SCSI device structure may no longer exist. Keep dev_semaphore locked during the entire time the control thread or reset handlers are using the us_data structure. Reorder the items in dissociate_dev() and release_resources() so that things are released in the opposite order from the way they were acquired originally. Don't bother to increment and decrement the usb_device's reference count; it's unnecessary. In disconnect(), first set the DISCONNECTING flag so that no more I/O will take place and no more requests will be accepted. Next, cut short the current command and wait for it to finish. Then call scsi_remove_host(). The SCSI core guarantees that when scsi_remove_host() returns, the host will not be in error recovery and all outstanding commands will have been cancelled. Remove some old useless left-over code that was #if'ed out. Use a wait_queue for the 6-second delay during device resets so that we can be woken up in the middle if a disconnect occurs. The key point here is that after scsi_remove_host(), everything is idle as far as the SCSI midlayer is concerned. But if there was a command in progress at the time, the midlayer will abort it without telling us or waiting for it to complete. Hence we have to wait for the control thread to be idle before we can try to kill it. This should happen quickly, since all I/O attempted by the thread will fail immediately. Signed-off-by: Matthew Dharm Signed-off-by: Greg Kroah-Hartman drivers/usb/storage/transport.c | 9 - drivers/usb/storage/usb.c | 211 ++++++++++++++++------------------------ drivers/usb/storage/usb.h | 5 3 files changed, 93 insertions(+), 132 deletions(-) diff -Nru a/drivers/usb/storage/transport.c b/drivers/usb/storage/transport.c --- a/drivers/usb/storage/transport.c Fri Jun 18 10:54:07 2004 +++ b/drivers/usb/storage/transport.c Fri Jun 18 10:54:07 2004 @@ -1113,10 +1113,11 @@ goto Done; } - /* long wait for reset, so unlock to allow disconnects */ - up(&us->dev_semaphore); - msleep(6000); - down(&us->dev_semaphore); + /* Give the device some time to recover from the reset, + * but don't delay disconnect processing. */ + wait_event_interruptible_timeout(us->dev_reset_wait, + test_bit(US_FLIDX_DISCONNECTING, &us->flags), + HZ*6); if (test_bit(US_FLIDX_DISCONNECTING, &us->flags)) { US_DEBUGP("Reset interrupted by disconnect\n"); goto Done; diff -Nru a/drivers/usb/storage/usb.c b/drivers/usb/storage/usb.c --- a/drivers/usb/storage/usb.c Fri Jun 18 10:54:07 2004 +++ b/drivers/usb/storage/usb.c Fri Jun 18 10:54:07 2004 @@ -266,6 +266,7 @@ static int usb_stor_control_thread(void * __us) { struct us_data *us = (struct us_data *)__us; + struct Scsi_Host *host = us->host; lock_kernel(); @@ -283,19 +284,21 @@ complete(&(us->notify)); for(;;) { - struct Scsi_Host *host; US_DEBUGP("*** thread sleeping.\n"); if(down_interruptible(&us->sema)) break; US_DEBUGP("*** thread awakened.\n"); + /* lock the device pointers */ + down(&(us->dev_semaphore)); + /* if us->srb is NULL, we are being asked to exit */ if (us->srb == NULL) { US_DEBUGP("-- exit command received\n"); + up(&(us->dev_semaphore)); break; } - host = us->srb->device->host; /* lock access to the state */ scsi_lock(host); @@ -306,23 +309,20 @@ goto SkipForAbort; } - /* set the state and release the lock */ - us->sm_state = US_STATE_RUNNING; - scsi_unlock(host); - - /* lock the device pointers */ - down(&(us->dev_semaphore)); - /* don't do anything if we are disconnecting */ if (test_bit(US_FLIDX_DISCONNECTING, &us->flags)) { US_DEBUGP("No command during disconnect\n"); - us->srb->result = DID_BAD_TARGET << 16; + goto SkipForDisconnect; } + /* set the state and release the lock */ + us->sm_state = US_STATE_RUNNING; + scsi_unlock(host); + /* reject the command if the direction indicator * is UNKNOWN */ - else if (us->srb->sc_data_direction == SCSI_DATA_UNKNOWN) { + if (us->srb->sc_data_direction == SCSI_DATA_UNKNOWN) { US_DEBUGP("UNKNOWN data direction\n"); us->srb->result = DID_ERROR << 16; } @@ -362,9 +362,6 @@ us->proto_handler(us->srb, us); } - /* unlock the device pointers */ - up(&(us->dev_semaphore)); - /* lock access to the state */ scsi_lock(host); @@ -374,7 +371,7 @@ us->srb->result); us->srb->scsi_done(us->srb); } else { - SkipForAbort: +SkipForAbort: US_DEBUGP("scsi command aborted\n"); } @@ -387,9 +384,13 @@ complete(&(us->notify)); /* empty the queue, reset the state, and release the lock */ +SkipForDisconnect: us->srb = NULL; us->sm_state = US_STATE_IDLE; scsi_unlock(host); + + /* unlock the device pointers */ + up(&(us->dev_semaphore)); } /* for (;;) */ /* notify the exit routine that we're actually exiting now @@ -423,10 +424,8 @@ us->pusb_intf = intf; us->ifnum = intf->cur_altsetting->desc.bInterfaceNumber; - /* Store our private data in the interface and increment the - * device's reference count */ + /* Store our private data in the interface */ usb_set_intfdata(intf, us); - usb_get_dev(us->pusb_dev); /* Allocate the device-related DMA-mapped buffers */ us->cr = usb_buffer_alloc(us->pusb_dev, sizeof(*us->cr), @@ -770,19 +769,6 @@ up(&us->dev_semaphore); - /* Start up our control thread */ - us->sm_state = US_STATE_IDLE; - p = kernel_thread(usb_stor_control_thread, us, CLONE_VM); - if (p < 0) { - printk(KERN_WARNING USB_STORAGE - "Unable to start control thread\n"); - return p; - } - us->pid = p; - - /* Wait for the thread to start */ - wait_for_completion(&(us->notify)); - /* * Since this is a new device, we need to register a SCSI * host definition with the higher SCSI layers. @@ -790,69 +776,61 @@ us->host = scsi_host_alloc(&usb_stor_host_template, sizeof(us)); if (!us->host) { printk(KERN_WARNING USB_STORAGE - "Unable to register the scsi host\n"); + "Unable to allocate the scsi host\n"); return -EBUSY; } /* Set the hostdata to prepare for scanning */ us->host->hostdata[0] = (unsigned long) us; - return 0; -} - -/* Dissociate from the USB device */ -static void dissociate_dev(struct us_data *us) -{ - US_DEBUGP("-- %s\n", __FUNCTION__); - down(&us->dev_semaphore); - - /* Free the device-related DMA-mapped buffers */ - if (us->cr) { - usb_buffer_free(us->pusb_dev, sizeof(*us->cr), us->cr, - us->cr_dma); - us->cr = NULL; - } - if (us->iobuf) { - usb_buffer_free(us->pusb_dev, US_IOBUF_SIZE, us->iobuf, - us->iobuf_dma); - us->iobuf = NULL; + /* Start up our control thread */ + us->sm_state = US_STATE_IDLE; + p = kernel_thread(usb_stor_control_thread, us, CLONE_VM); + if (p < 0) { + printk(KERN_WARNING USB_STORAGE + "Unable to start control thread\n"); + return p; } + us->pid = p; - /* Remove our private data from the interface and decrement the - * device's reference count */ - usb_set_intfdata(us->pusb_intf, NULL); - usb_put_dev(us->pusb_dev); + /* Wait for the thread to start */ + wait_for_completion(&(us->notify)); - us->pusb_dev = NULL; - us->pusb_intf = NULL; - up(&us->dev_semaphore); + return 0; } -/* Release all our static and dynamic resources */ +/* Release all our dynamic resources */ void usb_stor_release_resources(struct us_data *us) { - /* - * The host must already have been removed - * and dissociate_dev() must have been called. - */ - - /* Finish the SCSI host removal sequence */ - if (us->host) { - us->host->hostdata[0] = 0; - scsi_host_put(us->host); - } + US_DEBUGP("-- %s\n", __FUNCTION__); - /* Kill the control thread - * - * Enqueue the command, wake up the thread, and wait for - * notification that it has exited. + /* Kill the control thread. The SCSI host must already have been + * removed so it won't try to queue any more commands. */ if (us->pid) { + + /* Wait for the thread to be idle */ + down(&us->dev_semaphore); US_DEBUGP("-- sending exit command to thread\n"); BUG_ON(us->sm_state != US_STATE_IDLE); + + /* If the SCSI midlayer queued a final command just before + * scsi_remove_host() was called, us->srb might not be + * NULL. We can overwrite it safely, because the midlayer + * will not wait for the command to finish. Also the + * control thread will already have been awakened. + * That's okay, an extra up() on us->sema won't hurt. + * + * Enqueue the command, wake up the thread, and wait for + * notification that it has exited. + */ + scsi_lock(us->host); us->srb = NULL; - up(&(us->sema)); - wait_for_completion(&(us->notify)); + scsi_unlock(us->host); + up(&us->dev_semaphore); + + up(&us->sema); + wait_for_completion(&us->notify); } /* Call the destructor routine, if it exists */ @@ -861,15 +839,36 @@ us->extra_destructor(us->extra); } + /* Finish the host removal sequence */ + if (us->host) + scsi_host_put(us->host); + /* Free the extra data and the URB */ if (us->extra) kfree(us->extra); if (us->current_urb) usb_free_urb(us->current_urb); +} + +/* Dissociate from the USB device */ +static void dissociate_dev(struct us_data *us) +{ + US_DEBUGP("-- %s\n", __FUNCTION__); + + /* Free the device-related DMA-mapped buffers */ + if (us->cr) + usb_buffer_free(us->pusb_dev, sizeof(*us->cr), us->cr, + us->cr_dma); + if (us->iobuf) + usb_buffer_free(us->pusb_dev, US_IOBUF_SIZE, us->iobuf, + us->iobuf_dma); + + /* Remove our private data from the interface */ + usb_set_intfdata(us->pusb_intf, NULL); + /* Free the structure itself */ kfree(us); - US_DEBUGP("-- %s finished\n", __FUNCTION__); } /* Probe to see if we can drive a newly-connected USB device */ @@ -895,6 +894,7 @@ init_MUTEX(&(us->dev_semaphore)); init_MUTEX_LOCKED(&(us->sema)); init_completion(&(us->notify)); + init_waitqueue_head(&us->dev_reset_wait); /* Associate the us_data structure with the USB device */ result = associate_dev(us, intf); @@ -965,8 +965,8 @@ /* We come here if there are any problems */ BadDevice: US_DEBUGP("storage_probe() failed\n"); - dissociate_dev(us); usb_stor_release_resources(us); + dissociate_dev(us); return result; } @@ -977,20 +977,20 @@ US_DEBUGP("storage_disconnect() called\n"); - /* Prevent new USB transfers and stop the current command */ + /* Prevent new USB transfers, stop the current command, and + * interrupt a device-reset delay */ set_bit(US_FLIDX_DISCONNECTING, &us->flags); usb_stor_stop_transport(us); + wake_up(&us->dev_reset_wait); - /* Dissociate from the USB device */ - dissociate_dev(us); - + /* Wait for the current command to finish, then remove the host */ + down(&us->dev_semaphore); + up(&us->dev_semaphore); scsi_remove_host(us->host); - /* TODO: somehow, wait for the device to - * be 'idle' (tasklet completion) */ - - /* Release all our other resources */ + /* Wait for everything to become idle and release all our resources */ usb_stor_release_resources(us); + dissociate_dev(us); } /*********************************************************************** @@ -1023,47 +1023,6 @@ */ US_DEBUGP("-- calling usb_deregister()\n"); usb_deregister(&usb_storage_driver) ; - -#if 0 - /* While there are still virtual hosts, unregister them - * Note that it's important to do this completely before removing - * the structures because of possible races with the /proc - * interface - */ - for (next = us_list; next; next = next->next) { - US_DEBUGP("-- calling scsi_unregister_host()\n"); - scsi_unregister_host(&usb_stor_host_template); - } - - /* While there are still structures, free them. Note that we are - * now race-free, since these structures can no longer be accessed - * from either the SCSI command layer or the /proc interface - */ - while (us_list) { - /* keep track of where the next one is */ - next = us_list->next; - - /* If there's extra data in the us_data structure then - * free that first */ - if (us_list->extra) { - /* call the destructor routine, if it exists */ - if (us_list->extra_destructor) { - US_DEBUGP("-- calling extra_destructor()\n"); - us_list->extra_destructor(us_list->extra); - } - - /* destroy the extra data */ - US_DEBUGP("-- freeing the data structure\n"); - kfree(us_list->extra); - } - - /* free the structure itself */ - kfree (us_list); - - /* advance the list pointer */ - us_list = next; - } -#endif } module_init(usb_stor_init); diff -Nru a/drivers/usb/storage/usb.h b/drivers/usb/storage/usb.h --- a/drivers/usb/storage/usb.h Fri Jun 18 10:54:07 2004 +++ b/drivers/usb/storage/usb.h Fri Jun 18 10:54:07 2004 @@ -158,9 +158,10 @@ dma_addr_t cr_dma; /* buffer DMA addresses */ dma_addr_t iobuf_dma; - /* mutual exclusion structures */ + /* mutual exclusion and synchronization structures */ struct semaphore sema; /* to sleep thread on */ - struct completion notify; /* thread begin/end */ + struct completion notify; /* thread begin/end */ + wait_queue_head_t dev_reset_wait; /* wait during reset */ /* subdriver information */ void *extra; /* Any extra data */