diff -uNr linux-2.4.21.bh/drivers/scsi/scsi.c linux-2.4.21.ehn/drivers/scsi/scsi.c --- linux-2.4.21.bh/drivers/scsi/scsi.c 2003-09-01 17:39:00.000000000 +0200 +++ linux-2.4.21.ehn/drivers/scsi/scsi.c 2003-09-01 17:46:54.000000000 +0200 @@ -117,8 +117,7 @@ 16, 12, 10, 10 }; static unsigned long serial_number; -static Scsi_Cmnd *scsi_bh_queue_head; -static Scsi_Cmnd *scsi_bh_queue_tail; +static struct list_head done_q[NR_CPUS] __cacheline_aligned; /* * Note - the initial logging level can be set here to log events at boot time. @@ -255,12 +254,6 @@ static spinlock_t device_request_lock = SPIN_LOCK_UNLOCKED; /* - * Used to protect insertion into and removal from the queue of - * commands to be processed by the bottom half handler. - */ -static spinlock_t scsi_bhqueue_lock = SPIN_LOCK_UNLOCKED; - -/* * Function: scsi_allocate_request * * Purpose: Allocate a request descriptor. @@ -1061,53 +1054,37 @@ SCSI_LOG_MLQUEUE(3, printk("Leaving scsi_do_cmd()\n")); } -/* - * This function is the mid-level interrupt routine, which decides how - * to handle error conditions. Each invocation of this function must - * do one and *only* one of the following: - * - * 1) Insert command in BH queue. - * 2) Activate error handler for host. - * - * FIXME(eric) - I am concerned about stack overflow (still). An - * interrupt could come while we are processing the bottom queue, - * which would cause another command to be stuffed onto the bottom - * queue, and it would in turn be processed as that interrupt handler - * is returning. Given a sufficiently steady rate of returning - * commands, this could cause the stack to overflow. I am not sure - * what is the most appropriate solution here - we should probably - * keep a depth count, and not process any commands while we still - * have a bottom handler active higher in the stack. - * - * There is currently code in the bottom half handler to monitor - * recursion in the bottom handler and report if it ever happens. If - * this becomes a problem, it won't be hard to engineer something to - * deal with it so that only the outer layer ever does any real - * processing. +/** + * scsi_done - Enqueue the finished SCSI command into the done queue. + * @cmd: The SCSI Command for which a low-level device driver (LLDD) gives + * ownership back to SCSI Core -- i.e. the LLDD has finished with it. + * + * This function is the mid-level's (SCSI Core) interrupt routine, which + * regains ownership of the SCSI command (de facto) from a LLDD, and enqueues + * the command to the done queue for further processing. + * + * This is the producer of the done queue who enqueues at the tail. + * + * This function is interrupt context safe. */ void scsi_done(Scsi_Cmnd * SCpnt) { + int cpu; unsigned long flags; - int tstatus; + struct list_head *pdone_q; /* * We don't have to worry about this one timing out any more. - */ - tstatus = scsi_delete_timer(SCpnt); - - /* * If we are unable to remove the timer, it means that the command * has already timed out. In this case, we have no choice but to * let the timeout function run, as we have no idea where in fact * that function could really be. It might be on another processor, * etc, etc. */ - if (!tstatus) { + if (!scsi_delete_timer(SCpnt)) { SCpnt->done_late = 1; return; } - /* Set the serial numbers back to zero */ - SCpnt->serial_number = 0; /* * First, see whether this command already timed out. If so, we ignore @@ -1124,181 +1101,144 @@ SCSI_LOG_MLCOMPLETE(1, printk("Ignoring completion of %p due to timeout status", SCpnt)); return; } - spin_lock_irqsave(&scsi_bhqueue_lock, flags); + /* Set the serial numbers back to zero */ + SCpnt->serial_number = 0; SCpnt->serial_number_at_timeout = 0; SCpnt->state = SCSI_STATE_BHQUEUE; SCpnt->owner = SCSI_OWNER_BH_HANDLER; - SCpnt->bh_next = NULL; - - /* - * Next, put this command in the BH queue. - * - * We need a spinlock here, or compare and exchange if we can reorder incoming - * Scsi_Cmnds, as it happens pretty often scsi_done is called multiple times - * before bh is serviced. -jj - * - * We already have the io_request_lock here, since we are called from the - * interrupt handler or the error handler. (DB) - * - * This may be true at the moment, but I would like to wean all of the low - * level drivers away from using io_request_lock. Technically they should - * all use their own locking. I am adding a small spinlock to protect - * this datastructure to make it safe for that day. (ERY) - * - * We do *NOT* hold the io_request_lock for certain at this point. - * Don't make any assumptions, and we also don't need any other lock - * besides the bh queue lock. (DL) - */ - if (!scsi_bh_queue_head) { - scsi_bh_queue_head = SCpnt; - scsi_bh_queue_tail = SCpnt; - } else { - scsi_bh_queue_tail->bh_next = SCpnt; - scsi_bh_queue_tail = SCpnt; - } - spin_unlock_irqrestore(&scsi_bhqueue_lock, flags); /* - * Mark the bottom half handler to be run. + * Next, enqueue the command into the done queue. + * It is a per-CPU queue, so we just disable local interrupts + * and need no spinlock. */ - mark_bh(SCSI_BH); + local_irq_save(flags); + + cpu = smp_processor_id(); + pdone_q = &done_q[cpu]; + list_add_tail(&SCpnt->eh_entry, pdone_q); + cpu_raise_softirq(cpu, SCSI_SOFTIRQ); + + local_irq_restore(flags); } -/* - * Procedure: scsi_bottom_half_handler - * - * Purpose: Called after we have finished processing interrupts, it - * performs post-interrupt handling for commands that may - * have completed. - * - * Notes: This is called with all interrupts enabled. This should reduce - * interrupt latency, stack depth, and reentrancy of the low-level - * drivers. +/** + * scsi_softirq - Perform post-interrupt processing of finished SCSI commands. * - * The io_request_lock is required in all the routine. There was a subtle - * race condition when scsi_done is called after a command has already - * timed out but before the time out is processed by the error handler. - * (DB) + * This is the consumer of the done queue. * - * I believe I have corrected this. We simply monitor the return status of - * del_timer() - if this comes back as 0, it means that the timer has fired - * and that a timeout is in progress. I have modified scsi_done() such - * that in this instance the command is never inserted in the bottom - * half queue. Thus the only time we hold the lock here is when - * we wish to atomically remove the contents of the queue. + * This is called with all interrupts enabled. This should reduce + * interrupt latency, stack depth, and reentrancy of the low-level + * drivers. */ -void scsi_bottom_half_handler(void) +static void scsi_softirq(struct softirq_action *h) { - struct Scsi_Host *host; - Scsi_Cmnd *SCpnt; - Scsi_Cmnd *SCnext; - unsigned long flags; + LIST_HEAD(local_q); + local_irq_disable(); + list_splice_init(&done_q[smp_processor_id()], &local_q); + local_irq_enable(); - while (1 == 1) { - spin_lock_irqsave(&scsi_bhqueue_lock, flags); - SCpnt = scsi_bh_queue_head; - scsi_bh_queue_head = NULL; - spin_unlock_irqrestore(&scsi_bhqueue_lock, flags); - - if (SCpnt == NULL) { - return; - } - SCnext = SCpnt->bh_next; - - for (; SCpnt; SCpnt = SCnext) { - SCnext = SCpnt->bh_next; - - host = SCpnt->host; - switch (scsi_decide_disposition(SCpnt)) { - case SUCCESS: - /* - * Add to BH queue. - */ - SCSI_LOG_MLCOMPLETE(3, printk("Command finished %d %d 0x%x\n", atomic_read(&SCpnt->host->host_busy), - SCpnt->host->host_failed, - SCpnt->result)); + while (!list_empty(&local_q)) { + struct scsi_cmnd *SCpnt = list_entry(local_q.next, struct scsi_cmnd, eh_entry); - scsi_finish_command(SCpnt); - break; - case NEEDS_RETRY: - /* - * We only come in here if we want to retry a command. The - * test to see whether the command should be retried should be - * keeping track of the number of tries, so we don't end up looping, - * of course. - */ - SCSI_LOG_MLCOMPLETE(3, printk("Command needs retry %d %d 0x%x\n", atomic_read(&SCpnt->host->host_busy), - SCpnt->host->host_failed, SCpnt->result)); + list_del_init(&SCpnt->eh_entry); - scsi_retry_command(SCpnt); - break; - case ADD_TO_MLQUEUE: - /* - * This typically happens for a QUEUE_FULL message - - * typically only when the queue depth is only - * approximate for a given device. Adding a command - * to the queue for the device will prevent further commands - * from being sent to the device, so we shouldn't end up - * with tons of things being sent down that shouldn't be. - */ - SCSI_LOG_MLCOMPLETE(3, printk("Command rejected as device queue full, put on ml queue %p\n", - SCpnt)); - scsi_mlqueue_insert(SCpnt, SCSI_MLQUEUE_DEVICE_BUSY); - break; - default: - /* - * Here we have a fatal error of some sort. Turn it over to - * the error handler. - */ - SCSI_LOG_MLCOMPLETE(3, printk("Command failed %p %x active=%d busy=%d failed=%d\n", - SCpnt, SCpnt->result, - atomic_read(&SCpnt->host->host_active), - atomic_read(&SCpnt->host->host_busy), - SCpnt->host->host_failed)); + switch (scsi_decide_disposition(SCpnt)) { + case SUCCESS: + /* + * Add to BH queue. + */ + SCSI_LOG_MLCOMPLETE(3, printk("Command finished %d" + " %d 0x%x\n", + SCpnt->host->host_busy, + SCpnt->host->host_failed, + SCpnt->result)); + scsi_finish_command(SCpnt); + break; + case NEEDS_RETRY: + /* + * We only come in here if we want to retry a + * command. The test to see whether the command + * should be retried should be keeping track of + * the number of tries, so we don't end up + * looping, of course. + */ + SCSI_LOG_MLCOMPLETE(3, printk("Command needs retry" + " %d %d 0x%x\n", + SCpnt->host->host_busy, + SCpnt->host->host_failed, SCpnt->result)); + + scsi_retry_command(SCpnt); + break; + case ADD_TO_MLQUEUE: + /* + * This typically happens for a QUEUE_FULL message + * - typically only when the queue depth is only + * approximate for a given device. Adding a + * command to the queue for the device will + * prevent further commands from being sent to the + * device, so we shouldn't end up with tons of + * things being sent down that shouldn't be. + */ + SCSI_LOG_MLCOMPLETE(3, printk("Command rejected as" + " device queue full," + " put on ml queue" + " %p\n", SCpnt)); + scsi_mlqueue_insert(SCpnt, SCSI_MLQUEUE_DEVICE_BUSY); + break; + default: + /* + * Here we have a fatal error of some sort. Turn + * it over to the error handler. + */ + SCSI_LOG_MLCOMPLETE(3, printk("Command failed %p" + " %x active=%d busy=%d" + " failed=%d\n", SCpnt, + SCpnt->result, + atomic_read(&SCpnt->host->host_active), + atomic_read(&SCpnt->host->host_busy), + SCpnt->host->host_failed)); + + /* + * Dump the sense information too. + */ + if ((status_byte(SCpnt->result) & CHECK_CONDITION) != 0) + SCSI_LOG_MLCOMPLETE(3, print_sense("bh", SCpnt)); - /* - * Dump the sense information too. - */ - if ((status_byte(SCpnt->result) & CHECK_CONDITION) != 0) { - SCSI_LOG_MLCOMPLETE(3, print_sense("bh", SCpnt)); - } - if (SCpnt->host->eh_wait != NULL) { - SCpnt->host->host_failed++; - SCpnt->owner = SCSI_OWNER_ERROR_HANDLER; - SCpnt->state = SCSI_STATE_FAILED; - SCpnt->host->in_recovery = 1; - } else { - /* eh not present....trying to continue anyway */ - scsi_finish_command(SCpnt); - } - break; - } // switch if (SCpnt->host->eh_wait != NULL) { + SCpnt->host->host_failed++; + SCpnt->owner = SCSI_OWNER_ERROR_HANDLER; + SCpnt->state = SCSI_STATE_FAILED; + SCpnt->host->in_recovery = 1; + } else { /* - * If the host is having troubles, then look to see if this was the last - * command that might have failed. If so, wake up the error handler. + * We only get here if the error recovery + * thread has died. */ - if (SCpnt->host->in_recovery && - !SCpnt->host->eh_active && - (atomic_read(&SCpnt->host->host_busy) == SCpnt->host->host_failed)) { - SCSI_LOG_ERROR_RECOVERY(5, printk("Waking error handler thread (%d)\n", - atomic_read(&SCpnt->host->eh_wait->count))); - printk("(in_recovery=%d, host_busy=%d, host_failed=%d) " - "Waking error handler thread bh(%d)\n", - SCpnt->host->in_recovery, - atomic_read(&SCpnt->host->host_busy), - SCpnt->host->host_failed, - atomic_read(&SCpnt->host->eh_wait->count)); - up(SCpnt->host->eh_wait); - } - } else { - SCSI_LOG_ERROR_RECOVERY(5, printk("Warning: eh_thread not present\n")); - } - } /* for(; SCpnt...) */ - } /* while(1==1) */ + scsi_finish_command(SCpnt); + } + } + if (SCpnt->host->eh_wait != NULL) { + /* + * If the host is having troubles, then look to see + * if this was the last command that might have failed. + * If so, wake up the error handler. + */ + if (SCpnt->host->in_recovery && + !SCpnt->host->eh_active && + (atomic_read(&SCpnt->host->host_busy) + == SCpnt->host->host_failed)) { + SCSI_LOG_ERROR_RECOVERY(5, printk("Waking error handler thread (%d)\n", atomic_read(&SCpnt->host->eh_wait->count))); + up(SCpnt->host->eh_wait); + } + } else { + if (SCpnt->host->in_recovery) + SCSI_LOG_ERROR_RECOVERY(5, printk("Warning: eh_thread not present\n")); + } + } } /* @@ -2622,6 +2562,7 @@ static int __init init_scsi(void) { struct proc_dir_entry *generic; + int i; printk(KERN_INFO "SCSI subsystem driver " REVISION "\n"); @@ -2652,11 +2593,11 @@ if (scsihosts) printk(KERN_INFO "scsi: host order: %s\n", scsihosts); scsi_host_no_init (scsihosts); - /* - * This is where the processing takes place for most everything - * when commands are completed. - */ - init_bh(SCSI_BH, scsi_bottom_half_handler); + + for (i = 0; i < NR_CPUS; i++) + INIT_LIST_HEAD(&done_q[i]); + + open_softirq(SCSI_SOFTIRQ, scsi_softirq, NULL); return 0; } diff -uNr linux-2.4.21.bh/drivers/scsi/scsi.h linux-2.4.21.ehn/drivers/scsi/scsi.h --- linux-2.4.21.bh/drivers/scsi/scsi.h 2003-09-01 17:39:00.000000000 +0200 +++ linux-2.4.21.ehn/drivers/scsi/scsi.h 2003-09-01 17:27:36.000000000 +0200 @@ -692,6 +692,7 @@ struct scsi_cmnd *next; struct scsi_cmnd *reset_chain; + struct list_head eh_entry; int eh_state; /* Used for state tracking in error handlr */ void (*done) (struct scsi_cmnd *); /* Mid-level done function */ /* diff -uNr linux-2.4.21.bh/drivers/scsi/scsi_error.c linux-2.4.21.ehn/drivers/scsi/scsi_error.c --- linux-2.4.21.bh/drivers/scsi/scsi_error.c 2003-09-01 17:27:36.000000000 +0200 +++ linux-2.4.21.ehn/drivers/scsi/scsi_error.c 2003-09-01 17:48:20.000000000 +0200 @@ -234,6 +234,7 @@ panic("Error handler thread not present at %p %p %s %d", SCpnt, SCpnt->host, __FILE__, __LINE__); } + /* FIXME: Don't we need to check for !SCpnt->host->eh_active ? */ if (atomic_read(&SCpnt->host->host_busy) == SCpnt->host->host_failed) { up(SCpnt->host->eh_wait); } diff -uNr linux-2.4.21.bh/include/linux/interrupt.h linux-2.4.21.ehn/include/linux/interrupt.h --- linux-2.4.21.bh/include/linux/interrupt.h 2003-09-01 17:39:00.000000000 +0200 +++ linux-2.4.21.ehn/include/linux/interrupt.h 2003-09-01 17:27:36.000000000 +0200 @@ -59,6 +59,7 @@ HI_SOFTIRQ=0, NET_TX_SOFTIRQ, NET_RX_SOFTIRQ, + SCSI_SOFTIRQ, TASKLET_SOFTIRQ };