Can't use keventd to run the request_fn, because keventd sometimes blocks on disk IO (call_usermodehelper). It deadlocks. So make the expiry timer handler run the request_fn directly. That works fine. I _was_ making as_antic_stop run the request_fn directly too. That caused all sorts of horrid things to happen because as_antic_stop cals request_fn calls as_next_request which changes the state of lots of things and everything dies. So I wimped out and created a tasklet for when the scheduler directly calls as_antic_stop. This tasklet will run "soon" on the current CPU in semi-interrupt context. This may improve performance. It's not as nice as running the request_fn directly. But it can potentially save a ton of stack space, so... So far so good. But it still goes BUG in Program received signal SIGTRAP, Trace/breakpoint trap. 0xc0241bfe in as_can_anticipate (ad=0xc1b6bbcc, arq=0x0) at drivers/block/as-iosched.c:1118 1118 ad->antic_status == ANTIC_WAIT_NEXT); (gdb) bt #0 0xc0241bfe in as_can_anticipate (ad=0xc1b6bbcc, arq=0x0) at drivers/block/as-iosched.c:1118 #1 0xc0241e1b in as_dispatch_request (ad=0xc1b6bbcc) at drivers/block/as-iosched.c:1259 #2 0xc0241f12 in as_next_request (q=0xc1ba7000) at drivers/block/as-iosched.c:1345 #3 0xc023a62a in elv_next_request (q=0xc1ba7000) at drivers/block/elevator.c:308 #4 0xc02712fb in scsi_request_fn (q=0xc1ba7000) at drivers/scsi/scsi_lib.c:1082 #5 0xc023bf82 in __blk_run_queue (q=0xc1ba7000) at drivers/block/ll_rw_blk.c:1082 #6 0xc0270653 in scsi_queue_next_request (q=0xc1ba7000, cmd=0x0) at drivers/scsi/scsi_lib.c:390 #7 0xc02708b1 in scsi_end_request (cmd=0xd9d47e7c, uptodate=1, sectors=1024, requeue=1) at drivers/scsi/scsi_lib.c:504 #8 0xc0270c49 in scsi_io_completion (cmd=0xd9d47e7c, good_sectors=1024, block_sectors=1) at drivers/scsi/scsi_lib.c:722 #9 0xc028edbb in sd_rw_intr (SCpnt=0xd9d47e7c) at drivers/scsi/sd.c:749 #10 0xc026bb9e in scsi_finish_command (SCpnt=0xd9d47e7c) at drivers/scsi/scsi.c:896 #11 0xc026babd in scsi_softirq (h=0xc03f0540) at drivers/scsi/scsi.c:797 Because antic_status == ANTIC_WAIT_NEXT. What's happening here is that the tasklet is scheduled, but it has not run yet. So I changed it so we set state ANTIC_FINISHED as soon as we queue the tasklet in as_antic_stop(). That didn't work very well - the machine got stuck during boot due to unsubmited IO somewhere. So I just nobbled the assertion in as_can_anticipate. It seems to run OK now. You need to work out exactly what state you want the thing to be in when as_antic_stop has been called, and we are waiting for the callback to run. Is it FINISHED? Or WAIT_REQ? It's not clear. (But please don't add another doggone state). Could you do a few things for me? - We have 54 functions in that file now, and they all have the same name. See the nice comments which I added which help to make this less than totally confusing. - Convert the #defines to eunms. - See if we can reorder the functions so there are no forward decls and so that logically-related stuff is near to itself. drivers/block/as-iosched.c | 104 +++++++++++++++++++++++++++------------------ 1 files changed, 63 insertions(+), 41 deletions(-) diff -puN drivers/block/as-iosched.c~as-request_fn-in-timer drivers/block/as-iosched.c --- 25/drivers/block/as-iosched.c~as-request_fn-in-timer 2003-03-15 22:58:02.000000000 -0800 +++ 25-akpm/drivers/block/as-iosched.c 2003-03-15 22:58:02.000000000 -0800 @@ -20,6 +20,7 @@ #include #include #include +#include struct ant_stats { int reads; /* total read requests */ @@ -28,6 +29,7 @@ struct ant_stats { int expired_read_batches; int expired_write_batches; int timeouts; + int bottom_halves; int anticipate_hits; int expired_fifo_reads; int expired_fifo_writes; @@ -141,7 +143,7 @@ struct as_data { int antic_status; unsigned long antic_start; /* jiffies: when it started */ struct timer_list antic_timer; /* anticipatory scheduling timer */ - struct work_struct antic_work; /* anticipatory scheduling work */ + struct tasklet_struct tasklet; /* Deferred unplugging */ struct as_io_context *as_io_context;/* Identify the expected process */ int aic_finished; /* IO associated with as_io_context finished */ @@ -154,8 +156,8 @@ struct as_data { unsigned long antic_expire; }; -#define AS_RQ_NEW 0 -#define AS_RQ_QUEUED 1 +#define AS_RQ_NEW 0 +#define AS_RQ_QUEUED 1 #define AS_RQ_DISPATCHED 2 /* @@ -564,15 +566,13 @@ static void as_add_request(struct as_dat as_add_arq_rb(ad, arq); - as_update_arq(ad, arq); /* keep state machine up to date */ - /* * set expire time (only used for reads) and add to fifo list */ arq->expires = jiffies + ad->fifo_expire[data_dir]; list_add_tail(&arq->fifo, &ad->fifo_list[data_dir]); - arq->state = AS_RQ_QUEUED; + as_update_arq(ad, arq); /* keep state machine up to date */ } /* @@ -872,21 +872,6 @@ static inline int as_batch_expired(struc static int as_queue_notready(request_queue_t *q); -/* - * as_antic_work is scheduled by as_antic_timeout. It - * stops anticipation, ie. resumes dispatching requests to a device. - */ -static void as_antic_work(void *data) -{ - struct request_queue *q = data; - unsigned long flags; - - spin_lock_irqsave(q->queue_lock, flags); - if (!as_queue_notready(q)) - q->request_fn(q); - spin_unlock_irqrestore(q->queue_lock, flags); -} - static void as_antic_waitreq(struct as_data *ad) { BUG_ON(ad->antic_status == ANTIC_FINISHED); @@ -917,38 +902,71 @@ static void as_antic_waitnext(struct as_ ad->antic_status = ANTIC_WAIT_NEXT; } -static void as_antic_stop_notimer(struct as_data *ad) -{ - if (ad->antic_status == ANTIC_WAIT_REQ || ad->antic_status == ANTIC_WAIT_NEXT) - schedule_work(&ad->antic_work); - ad->antic_status = ANTIC_FINISHED; -} - -static void as_antic_stop(struct as_data *ad) +/* + * This is executed in a "deferred" context. Either in a timer handler or + * in tasklet. It tries to move a request into the dispatch queue (which is + * not a queue at all! FIXME!) and then calls the driver's request_fn so the + * driver can submit that request. + */ +static void __as_antic_stop(struct as_data *ad) { - if (ad->antic_status == ANTIC_WAIT_NEXT) - del_timer(&ad->antic_timer); + int status = ad->antic_status; - as_antic_stop_notimer(ad); + ad->antic_status = ANTIC_FINISHED; + if (status == ANTIC_WAIT_REQ || status == ANTIC_WAIT_NEXT) { + struct request_queue *q = ad->q; + + if (!as_queue_notready(q)) + q->request_fn(q); + } } /* - * as_antic_timeout is the timer function set by - * as_antic_waitnext. + * Conveniently, this function is identical whether called via tasklet or via + * timer. */ -static void as_antic_timeout(unsigned long data) +static void __as_bottom_half(unsigned long data) { struct request_queue *q = (struct request_queue *)data; struct as_data *ad = q->elevator.elevator_data; unsigned long flags; spin_lock_irqsave(q->queue_lock, flags); - as_antic_stop_notimer(ad); - ant_stats.timeouts++; + __as_antic_stop(ad); spin_unlock_irqrestore(q->queue_lock, flags); } /* + * as_antic_timeout is the timer function set by as_antic_waitnext. + */ +static void as_antic_timeout(unsigned long data) +{ + ant_stats.timeouts++; + __as_bottom_half(data); +} + +/* + * This is the tasklet handler. It is called from "bottom half" context + * shortly after the functions in this file called as_antic_stop(). + */ +static void as_tasklet_handler(unsigned long data) +{ + ant_stats.bottom_halves++; + __as_bottom_half(data); +} + +/* + * This is called directly by the functions in this file to stop anticipation. + * We kill the timer and schedule a call to the request_fn asap. + */ +static void as_antic_stop(struct as_data *ad) +{ + if (ad->antic_status == ANTIC_WAIT_NEXT) + del_timer(&ad->antic_timer); + tasklet_schedule(&ad->tasklet); +} + +/* * as_close_req decides if one request is considered "close" to the * previous one issued. */ @@ -1096,8 +1114,8 @@ static void as_update_arq(struct as_data */ static int as_can_anticipate(struct as_data *ad, struct as_rq *arq) { - BUG_ON(ad->antic_status == ANTIC_WAIT_REQ || - ad->antic_status == ANTIC_WAIT_NEXT); +// BUG_ON(ad->antic_status == ANTIC_WAIT_REQ || +// ad->antic_status == ANTIC_WAIT_NEXT); if (!ad->as_io_context) /* @@ -1384,7 +1402,8 @@ static int as_queue_notready(request_que if (!list_empty(ad->dispatch)) goto out; - if (ad->antic_status == ANTIC_WAIT_REQ || ad->antic_status == ANTIC_WAIT_NEXT) { + if (ad->antic_status == ANTIC_WAIT_REQ || + ad->antic_status == ANTIC_WAIT_NEXT) { ret = 1; goto out; } @@ -1431,6 +1450,9 @@ static void as_exit(request_queue_t *q, struct request *rq; int i; + del_timer_sync(&ad->antic_timer); + tasklet_kill(&ad->tasklet); + BUG_ON(!list_empty(&ad->fifo_list[READ])); BUG_ON(!list_empty(&ad->fifo_list[WRITE])); @@ -1485,7 +1507,7 @@ static int as_init(request_queue_t *q, e ad->antic_timer.function = as_antic_timeout; ad->antic_timer.data = (unsigned long)q; init_timer(&ad->antic_timer); - INIT_WORK(&ad->antic_work, as_antic_work, q); + tasklet_init(&ad->tasklet, as_tasklet_handler, (unsigned long)q); for (i = 0; i < AS_HASH_ENTRIES; i++) INIT_LIST_HEAD(&ad->hash[i]); _