diff -uNr linux-2.4.21/drivers/scsi/scsi.c linux-2.4.21.neweh/drivers/scsi/scsi.c --- linux-2.4.21/drivers/scsi/scsi.c 2003-08-14 22:39:00.000000000 +0200 +++ linux-2.4.21.neweh/drivers/scsi/scsi.c 2003-08-14 22:53:43.000000000 +0200 @@ -542,22 +542,10 @@ SCpnt->target, atomic_read(&SCpnt->host->host_active), SCpnt->host->host_failed)); - if (SCpnt->host->host_failed != 0) { - SCSI_LOG_ERROR_RECOVERY(5, printk("Error handler thread %d %d\n", - SCpnt->host->in_recovery, - SCpnt->host->eh_active)); - } - /* - * If the host is having troubles, then look to see if this was the last - * command that might have failed. If so, wake up the error handler. - */ - if (SCpnt->host->in_recovery - && !SCpnt->host->eh_active - && atomic_read(&SCpnt->host->host_busy) == SCpnt->host->host_failed) { - SCSI_LOG_ERROR_RECOVERY(5, printk("Waking error handler thread (%d)\n", - atomic_read(&SCpnt->host->eh_wait->count))); - up(SCpnt->host->eh_wait); - } + + /* Note: The eh_thread is now started in scsi_bottom_half_handler for + * all cases except command timeout + */ spin_unlock_irqrestore(&device_request_lock, flags); @@ -1281,24 +1269,34 @@ SCpnt->owner = SCSI_OWNER_ERROR_HANDLER; SCpnt->state = SCSI_STATE_FAILED; SCpnt->host->in_recovery = 1; - /* - * If the host is having troubles, then look to see if this was the last - * command that might have failed. If so, wake up the error handler. - */ - if (atomic_read(&SCpnt->host->host_busy) == SCpnt->host->host_failed) { - SCSI_LOG_ERROR_RECOVERY(5, printk("Waking error handler thread (%d)\n", - atomic_read(&SCpnt->host->eh_wait->count))); - up(SCpnt->host->eh_wait); - } } else { - /* - * We only get here if the error recovery thread has died. - */ + /* eh not present....trying to continue anyway */ scsi_finish_command(SCpnt); } - } + break; + } // switch + if (SCpnt->host->eh_wait != NULL) { + /* + * If the host is having troubles, then look to see if this was the last + * command that might have failed. If so, wake up the error handler. + */ + if (SCpnt->host->in_recovery && + !SCpnt->host->eh_active && + (atomic_read(&SCpnt->host->host_busy) == SCpnt->host->host_failed)) { + SCSI_LOG_ERROR_RECOVERY(5, printk("Waking error handler thread (%d)\n", + atomic_read(&SCpnt->host->eh_wait->count))); + printk("(in_recovery=%d, host_busy=%d, host_failed=%d) " + "Waking error handler thread bh(%d)\n", + SCpnt->host->in_recovery, + atomic_read(&SCpnt->host->host_busy), + SCpnt->host->host_failed, + atomic_read(&SCpnt->host->eh_wait->count)); + up(SCpnt->host->eh_wait); + } + } else { + SCSI_LOG_ERROR_RECOVERY(5, printk("Warning: eh_thread not present\n")); + } } /* for(; SCpnt...) */ - } /* while(1==1) */ } diff -uNr linux-2.4.21/drivers/scsi/scsi_error.c linux-2.4.21.neweh/drivers/scsi/scsi_error.c --- linux-2.4.21/drivers/scsi/scsi_error.c 2003-08-14 22:37:07.000000000 +0200 +++ linux-2.4.21.neweh/drivers/scsi/scsi_error.c 2003-08-14 22:57:48.000000000 +0200 @@ -1333,9 +1331,18 @@ */ for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) { for (SCpnt = SDpnt->device_queue; SCpnt; SCpnt = SCpnt->next) { + /* + * Treat SCSI_STATE_FINISHED as a completed IO. It can + * be active and since IO's are blocked from getting + * started the upper layer can not retry a finished + * IO so we will forever for a FINISHED command to go + * away! + */ if (SCpnt->state == SCSI_STATE_FAILED || SCpnt->state == SCSI_STATE_TIMEOUT || SCpnt->state == SCSI_STATE_INITIALIZING + || SCpnt->state == SCSI_STATE_FINISHED + || SCpnt->state == SCSI_STATE_MLQUEUE || SCpnt->state == SCSI_STATE_UNUSED) { continue; } @@ -1347,6 +1354,15 @@ * the command will be queued and will be finished along the way. */ SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler prematurely woken - commands still active (%p %x %d)\n", SCpnt, SCpnt->state, SCpnt->target)); + /* + * we now call the error handler more often so this + * is not an error case so I am glad we no longer + * panic below. But if there are IO's still bouncing + * around active I don't think we want to go through + * the error recovery. So for now lets just exit out + * of here. + */ + return TRUE; /* * panic("SCSI Error handler woken too early\n");