This patch includes also the fix to the last_merge logic which solves a similar problem (I think... I would hope Jens gets time to look at this). I tested with 10 runs of contest read_load, io_load, list_load with no problems. Without these fixes it would never get through a single run of io_load so hopefully I'm on the right track. If you can't get it to oops I'll send you a small patch on top of plain mm8 because it should fix a general problem in the code. If you can get it to oops, please tell me what load you're using, and IDE or SCSI. Thanks. block/deadline-iosched.c | 187 +++++++++++++++++++++++------------------------ 1 files changed, 93 insertions(+), 94 deletions(-) diff -puN drivers/block/deadline-iosched.c~ant-sched-9feb drivers/block/deadline-iosched.c --- 25/drivers/block/deadline-iosched.c~ant-sched-9feb 2003-02-09 19:36:25.000000000 -0800 +++ 25-akpm/drivers/block/deadline-iosched.c 2003-02-09 19:36:25.000000000 -0800 @@ -44,7 +44,7 @@ struct ant_stats { /* * max time before a read is submitted. */ -static int read_expire = HZ / 20; +static int read_expire = HZ / 10; /* * ditto for writes, these limits are not hard, even @@ -56,7 +56,7 @@ static int write_expire = 5 * HZ; * read_batch_expire describes how long we will allow a stream of reads to * persist before looking to see whether it is time to switch over to writes. */ -static int read_batch_expire = HZ / 10; +static int read_batch_expire = HZ / 4; /* * write_batch_expire describes how long we will allow a stream of writes to @@ -67,7 +67,7 @@ static int write_batch_expire = HZ / 10; /* * max time we may wait to anticipate a read */ -static int antic_expire = HZ / 100; +static int antic_expire = HZ / 50; static const int deadline_hash_shift = 10; #define DL_HASH_BLOCK(sec) ((sec) >> 3) @@ -83,6 +83,11 @@ static const int deadline_hash_shift = 1 (dd)->hash_valid_count = 1; \ } while (0) +#define ANTIC_OFF 0 +#define ANTIC_WAIT 1 +#define ANTIC_TIMEOUT 2 +#define ANTIC_FOUND 3 + struct deadline_data { /* * run time data @@ -103,12 +108,11 @@ struct deadline_data { unsigned long current_check_fifo[2]; int batch_data_dir; /* current/last batch READ or WRITE */ - int anticipating; /* bool: anticipating a request */ - int antic_found; - unsigned long anticipate_start; /* jiffies: when it started */ + int antic_status; + unsigned long antic_start; /* jiffies: when it started */ struct timer_list antic_timer; /* anticipatory scheduling timer */ struct work_struct antic_work; /* anticipatory scheduling work */ - unsigned long anticipate_id; /* Identify the expected process */ + unsigned long current_id; /* Identify the expected process */ /* * settings that change how the i/o scheduler behaves @@ -524,6 +528,8 @@ deadline_move_request(struct deadline_da struct rb_node *rbprev = rb_prev(&drq->rb_node); struct deadline_rq *drq_next, *drq_prev; + BUG_ON(!ON_RB(&drq->rb_node)); + if (rbprev) drq_prev = rb_entry_drq(rbprev); else @@ -539,7 +545,7 @@ deadline_move_request(struct deadline_da if (data_dir == READ) /* In case we have to anticipate after this */ - dd->anticipate_id = drq->request_id; + dd->current_id = drq->request_id; /* * take it off the sort and fifo list, move @@ -601,12 +607,9 @@ static int deadline_queue_empty(request_ static void deadline_anticipate_work(void *data) { struct request_queue *q = data; - struct deadline_data *dd = q->elevator.elevator_data; unsigned long flags; spin_lock_irqsave(q->queue_lock, flags); - dd->anticipating = 0; - blk_remove_plug(q); if (!deadline_queue_empty(q)) q->request_fn(q); spin_unlock_irqrestore(q->queue_lock, flags); @@ -620,58 +623,47 @@ static void deadline_anticipate_timeout( { struct request_queue *q = (struct request_queue *)data; struct deadline_data *dd = q->elevator.elevator_data; + unsigned long flags; - dd->batch_data_dir = WRITE; + spin_lock_irqsave(q->queue_lock, flags); + + if (dd->antic_status != ANTIC_FOUND) + dd->antic_status = ANTIC_TIMEOUT; + + blk_remove_plug(q); schedule_work(&dd->antic_work); ant_stats.timeouts++; + + spin_unlock_irqrestore(q->queue_lock, flags); } -#define MAXBACK (512 * 1024) - /* - * deadline_close_req decides if one request is considered "close" to the next. - * The allowable distance between close requests is exponentially related to - * @factor. @factor is only useful between about 1 and 10, it is used to - * increase our tolerance of a useful request the longer we anticipate one. + * deadline_close_req decides if one request is considered "close" to the + * previous one issued. */ static int -deadline_close_req(sector_t last, sector_t next, int factor) +deadline_close_req(struct deadline_data *dd, struct deadline_rq *drq) { - sector_t delta, backdelta; /* acceptable close offset (in sectors) */ - switch (factor) { - case 0: - case 1: - delta = 16; - break; - case 2: - delta = 64; - break; - case 3: - delta = 1024; - break; - case 4: - delta = 16 * 1024; - break; - case 5: - delta = 256 * 1024; - break; - case 6: - delta = 4 * 1024 * 1024; - break; - default: - return 1; - - } + unsigned long delay = jiffies - dd->antic_start; + sector_t last = dd->last_sector[dd->batch_data_dir]; + sector_t next = drq->request->sector; + + sector_t delta; /* acceptable close offset (in sectors) */ - backdelta = min_t(sector_t, MAXBACK, delta); + if (dd->antic_status == ANTIC_OFF || delay <= 3) + delta = 16; + else if (delay <= 10) + delta = 64 << ((delay - 3)*2); + else if (delay <= dd->antic_expire / 2) + delta = 1024 * 1024; + else + return 1; - /* - * Forward seeks are favoured to bias the elevator in the - * "upward" direction. This is questionable. - */ - return (last - backdelta <= next) && (next <= last + delta); + return (last <= next) && (next <= last + delta); } +#define MAXBACK (256 * 1024) + static struct deadline_rq * deadline_choose_req(struct deadline_data *dd, struct deadline_rq *drq1, struct deadline_rq *drq2) @@ -679,9 +671,6 @@ deadline_choose_req(struct deadline_data int data_dir; sector_t last, s1, s2, d1, d2; const sector_t maxback = MAXBACK; - sector_t highnum = 0; - highnum -= 1; - highnum /= 2; if (drq1 == NULL) return drq2; @@ -699,19 +688,19 @@ deadline_choose_req(struct deadline_data d1 = s1 - last; else { /* count large back seeks as a forward seek */ - if (s1+maxback >= last) - d1 = last - s1; + if (dd->current_id == drq1->request_id && s1+maxback >= last) + d1 = (last - s1)*2; else - d1 = highnum-last+s1; /* TODO should be (maxdisk-last)+s1; */ + d1 = (last - s1)*8; } if (s2 >= last) d2 = s2 - last; else { - if (s2+maxback >= last) - d2 = last - s2; + if (dd->current_id == drq2->request_id && s2+maxback >= last) + d2 = (last - s2)*2; else - d2 = highnum-last+s2; + d2 = (last - s2)*8; } if (d1 < d2) @@ -732,12 +721,8 @@ deadline_choose_req(struct deadline_data static int deadline_antic_req(struct deadline_data *dd, struct deadline_rq *drq) { - unsigned long delay = jiffies - dd->anticipate_start; - sector_t last = dd->last_sector[READ]; - sector_t next = drq->request->sector; - - if (deadline_close_req(last, next, delay) - || dd->anticipate_id == drq->request_id) + if (deadline_close_req(dd, drq) + || dd->current_id == drq->request_id) return 1; return 0; @@ -754,7 +739,7 @@ deadline_update_drq(struct deadline_data const int data_dir = rq_data_dir(drq->request); sector_t last = dd->last_sector[data_dir]; sector_t this = drq->request->sector; - unsigned long delay = jiffies - dd->anticipate_start; + unsigned long delay = jiffies - dd->antic_start; drq->request_id = request_id(); @@ -767,7 +752,7 @@ deadline_update_drq(struct deadline_data dd->next_drq[data_dir] = deadline_choose_req(dd, drq, dd->next_drq[data_dir]); /* have we been anticipating this request? */ - if (dd->anticipating && data_dir == READ && deadline_antic_req(dd, drq)) { + if (dd->antic_status != ANTIC_OFF && data_dir == READ && deadline_antic_req(dd, drq)) { long lba_offset; int neg; int log2; @@ -790,8 +775,9 @@ deadline_update_drq(struct deadline_data else ant_stats.lba_forward_offsets[log2]++; - dd->antic_found = 1; del_timer(&dd->antic_timer); + dd->antic_status = ANTIC_FOUND; + blk_remove_plug(drq->request->q); schedule_work(&dd->antic_work); } } @@ -808,6 +794,9 @@ static int deadline_dispatch_request(str const int reads = !list_empty(&dd->fifo_list[READ]); const int writes = !list_empty(&dd->fifo_list[WRITE]); + if (!(reads || writes)) + return 0; + if (deadline_batch_expired(dd)) { if (dd->batch_data_dir == READ) ant_stats.expired_read_batches++; @@ -821,33 +810,43 @@ static int deadline_dispatch_request(str */ drq = dd->next_drq[dd->batch_data_dir]; - if (dd->batch_data_dir == READ - && (!drq || !deadline_antic_req(dd, drq) || dd->antic_found)) { - unsigned long timeout; - + if (dd->batch_data_dir == READ && dd->antic_expire) { if (deadline_check_fifo(dd, READ)) { if (deadline_fifo_expired(dd, READ)) goto dispatch_request; dd->current_check_fifo[READ] = jiffies + - dd->fifo_expire[READ] / 2; + dd->fifo_expire[READ]; } - - timeout = min(jiffies + dd->antic_expire, - dd->current_batch_expires); - timeout = min(timeout, dd->current_check_fifo[READ]); - - ant_stats.anticipate_starts++; - blk_plug_device(q); - dd->anticipating = 1; - dd->antic_found = 0; - dd->anticipate_start = jiffies; - mod_timer(&dd->antic_timer, timeout); - return 0; + + if (dd->antic_status != ANTIC_FOUND + && (dd->antic_status == ANTIC_OFF || jiffies < dd->antic_start + dd->antic_expire) + && (!drq || !deadline_antic_req(dd, drq)) ) { + unsigned long timeout; + + if (dd->antic_status == ANTIC_OFF) { + ant_stats.anticipate_starts++; + dd->antic_start = jiffies; + } + timeout = min(dd->antic_start + dd->antic_expire, + dd->current_batch_expires); + timeout = min(timeout, dd->current_check_fifo[READ]); + mod_timer(&dd->antic_timer, timeout); + + dd->antic_status = ANTIC_WAIT; + blk_plug_device(q); + + return 0; + } + } - if (drq) + + if (drq) { /* we have a "next request" */ + if (reads && !writes) + dd->current_batch_expires = jiffies + dd->batch_expire[READ]; goto dispatch_request; + } } /* @@ -886,6 +885,7 @@ dispatch_writes: goto dispatch_request; } + BUG(); return 0; dispatch_request: @@ -893,9 +893,9 @@ dispatch_request: * check fifo if it is due */ if (deadline_check_fifo(dd, dd->batch_data_dir)) { - /* reset timer to check twice per expire interval */ + /* reset timer to check once per expire interval */ dd->current_check_fifo[dd->batch_data_dir] = jiffies + - dd->fifo_expire[dd->batch_data_dir] / 2; + dd->fifo_expire[dd->batch_data_dir]; if (deadline_fifo_expired(dd, dd->batch_data_dir)) /* An expired request exists - satisfy it */ @@ -906,6 +906,7 @@ dispatch_request: /* * drq is the selected appropriate request. */ + dd->antic_status = ANTIC_OFF; deadline_move_request(dd, drq); return 1; } @@ -948,8 +949,10 @@ deadline_insert_request(request_queue_t list_add(&rq->queuelist, insert_here); - if (dd->anticipating) { + if (rq_data_dir(rq) == READ && dd->antic_status != ANTIC_OFF) { del_timer(&dd->antic_timer); + dd->antic_status = ANTIC_FOUND; + blk_remove_plug(q); schedule_work(&dd->antic_work); } @@ -992,11 +995,7 @@ static int deadline_queue_notready(reque if (!list_empty(dd->dispatch)) return 0; - if (dd->anticipating) - return 1; - - if (list_empty(&dd->fifo_list[WRITE]) - && list_empty(&dd->fifo_list[READ]) ) + if (dd->antic_status == ANTIC_WAIT) return 1; if (!deadline_dispatch_request(q)) _