Patch from: Nick Piggin Fix up naming and comments, and properly separates the as_remove and __as_remove functions which is much more straightforward I think, and allows conditions on them to be tightened and more BUGs added. If requests are getting lost, its quite likely that its here. drivers/block/as-iosched.c | 199 +++++++++++++++++++++++++++++---------------- 1 files changed, 129 insertions(+), 70 deletions(-) diff -puN drivers/block/as-iosched.c~as-naming-comments-BUG drivers/block/as-iosched.c --- 25/drivers/block/as-iosched.c~as-naming-comments-BUG 2003-03-03 02:10:24.000000000 -0800 +++ 25-akpm/drivers/block/as-iosched.c 2003-03-03 02:10:24.000000000 -0800 @@ -79,29 +79,29 @@ static unsigned long write_batch_expire static unsigned long antic_expire = HZ / 100; #define ANTIC_OFF 0 /* Not anticipating (normal operation) */ -#define ANTIC_WAIT 1 /* Currently anticipating a request */ -#define ANTIC_FINISHED 2 /* Anticipating but have found a candidate +#define ANTIC_WAIT_REQ 1 /* The last read has not yet completed */ +#define ANTIC_WAIT_NEXT 2 /* Currently anticipating a request vs + last read (which has completed) */ +#define ANTIC_FINISHED 3 /* Anticipating but have found a candidate or timed out */ - - /* * This is the per-process anticipatory I/O scheduler state. It is refcounted * and kmalloc'ed. - * - * At present it is merely used to determine whether the task is still running. */ struct as_io_context { atomic_t refcount; pid_t pid; unsigned long state; - unsigned long nr_requests; /* outstanding reads & sync writes */ + unsigned long nr_queued; /* queued reads & sync writes */ + unsigned long nr_dispatched; /* number of requests gone to the driver */ }; /* Bits in as_io_context.state */ enum as_io_states { AS_TASK_RUNNING=0, /* Process has not exitted */ + AS_REQ_FINISHED, /* Set in ad->as_io_context upon completion */ }; struct as_data { @@ -217,7 +217,8 @@ static struct as_io_context *get_as_io_c atomic_set(&ret->refcount, 1); ret->pid = tsk->pid; ret->state = 1 << AS_TASK_RUNNING; - ret->nr_requests = 0; + ret->nr_queued = 0; + ret->nr_dispatched = 0; tsk->as_io_context = ret; } } @@ -448,6 +449,23 @@ as_find_arq_rb(struct as_data *ad, secto return NULL; } +static void as_antic_waitnext(struct as_data *ad); + +static void as_complete_arq(struct as_data *ad, struct as_rq *arq) +{ + set_bit(AS_REQ_FINISHED, &arq->as_io_context->state); + + if (ad->as_io_context == arq->as_io_context) { + ad->antic_start = jiffies; + if (ad->antic_status == ANTIC_WAIT_REQ) + /* We were waiting on this request, now anticipate the + * next one */ + as_antic_waitnext(ad); + } + + put_as_io_context(&arq->as_io_context); +} + static void as_update_arq(struct as_data *ad, struct as_rq *arq); /* @@ -458,7 +476,7 @@ static void as_add_request(struct as_dat const int data_dir = rq_data_dir(arq->request); arq->as_io_context = get_as_io_context(); - arq->as_io_context->nr_requests++; + arq->as_io_context->nr_queued++; as_add_arq_rb(ad, arq); @@ -472,18 +490,24 @@ static void as_add_request(struct as_dat } /* - * __as_remove_request removes a request from the elevator without updating - * refcountss etc. It is expected the caller will replace the request at some - * part of the elevator, also without updating refcounts. + * as_remove_queued_request removes a request from the pre dispatch queue + * without updating refcounts. It is expected the caller will drop the + * reference unless it replaces the request at somepart of the elevator + * (ie. the dispatch queue) */ -static void __as_remove_request(request_queue_t *q, struct request *rq) +static void as_remove_queued_request(request_queue_t *q, struct request *rq) { struct as_rq *arq = RQ_DATA(rq); - if (arq) { + if (!arq) + BUG(); + else { const int data_dir = rq_data_dir(arq->request); struct as_data *ad = q->elevator.elevator_data; + BUG_ON(arq->as_io_context->nr_queued == 0); + arq->as_io_context->nr_queued--; + /* * Update the "next_arq" cache if we are about to remove its * entry @@ -494,26 +518,37 @@ static void __as_remove_request(request_ list_del_init(&arq->fifo); as_del_arq_hash(arq); as_del_arq_rb(ad, arq); - } - if (q->last_merge == &rq->queuelist) - q->last_merge = NULL; + if (q->last_merge == &rq->queuelist) + q->last_merge = NULL; - list_del_init(&rq->queuelist); + list_del_init(&rq->queuelist); + } } /* - * Removes a request at any stage of its way through the elevator. Fixes - * up everything. + * as_remove_dispatched_request is called when a driver has completed the + * request (or it has caused an error), and is finished with it. It assumes + * the request is on the dispatch queue. */ -static void as_remove_request(request_queue_t *q, struct request *rq) +static void as_remove_dispatched_request(request_queue_t *q, struct request *rq) { struct as_rq *arq = RQ_DATA(rq); + struct as_data *ad = q->elevator.elevator_data; + + if (q->last_merge == &rq->queuelist) + q->last_merge = NULL; - __as_remove_request(q, rq); + list_del_init(&rq->queuelist); - put_as_io_context(&arq->as_io_context); + if (arq) { + BUG_ON(ON_RB(&arq->rb_node)); + BUG_ON(arq->as_io_context->nr_dispatched == 0); + arq->as_io_context->nr_dispatched--; + + as_complete_arq(ad, arq); + } } static int @@ -640,25 +675,28 @@ as_merged_requests(request_queue_t *q, s /* * kill knowledge of next, this one is a goner */ - as_remove_request(q, next); - if (anext->as_io_context) { - BUG_ON(anext->as_io_context->nr_requests == 0); - anext->as_io_context->nr_requests--; - } + as_remove_queued_request(q, next); + if (anext->as_io_context) + put_as_io_context(&anext->as_io_context); } +static void as_antic_stop(struct as_data *ad); + /* * move an entry to dispatch queue */ static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq) { const int data_dir = rq_data_dir(arq->request); - + BUG_ON(!ON_RB(&arq->rb_node)); + as_antic_stop(ad); + ad->antic_status = ANTIC_OFF; + /* * This has to be set in order to be correctly updated by - * __as_remove_request + * as_find_next_arq */ ad->last_sector[data_dir] = arq->request->sector + arq->request->nr_sectors; @@ -671,12 +709,11 @@ static void as_move_to_dispatch(struct a ad->next_arq[data_dir] = as_find_next_arq(ad, arq); /* - * take it off the sort and fifo list, move to dispatch queue + * take it off the sort and fifo list, add to dispatch queue */ - __as_remove_request(ad->q, arq->request); + as_remove_queued_request(ad->q, arq->request); list_add_tail(&arq->request->queuelist, ad->dispatch); - BUG_ON(arq->as_io_context->nr_requests == 0); - arq->as_io_context->nr_requests--; + arq->as_io_context->nr_dispatched++; } #define list_entry_fifo(ptr) list_entry((ptr), struct as_rq, fifo) @@ -739,10 +776,10 @@ static inline int as_batch_expired(struc static int as_queue_empty(request_queue_t *q); /* - * as_anticipate_work is scheduled by as_anticipate_timeout. It + * as_antic_work is scheduled by as_antic_timeout. It * stops anticipation, ie. resumes dispatching requests to a device. */ -static void as_anticipate_work(void *data) +static void as_antic_work(void *data) { struct request_queue *q = data; unsigned long flags; @@ -753,14 +790,26 @@ static void as_anticipate_work(void *dat spin_unlock_irqrestore(q->queue_lock, flags); } -static void as_start_anticipation(struct as_data *ad) +static void as_antic_waitreq(struct as_data *ad) { - unsigned long timeout; - + BUG_ON(ad->antic_status == ANTIC_FINISHED); if (ad->antic_status == ANTIC_OFF) { ant_stats.anticipate_starts++; - ad->antic_start = jiffies; + + if (test_bit(AS_REQ_FINISHED, &ad->as_io_context->state)) + as_antic_waitnext(ad); + else + ad->antic_status = ANTIC_WAIT_REQ; } +} + +static void as_antic_waitnext(struct as_data *ad) +{ + unsigned long timeout; + + BUG_ON(ad->antic_status != ANTIC_OFF + && ad->antic_status != ANTIC_WAIT_REQ); + timeout = ad->antic_start + ad->antic_expire; #if 0 /* FIX THIS!!! */ @@ -768,36 +817,36 @@ static void as_start_anticipation(struct #endif mod_timer(&ad->antic_timer, timeout); - ad->antic_status = ANTIC_WAIT; + ad->antic_status = ANTIC_WAIT_NEXT; } -static void as_stop_anticipation_notimer(struct as_data *ad) +static void as_antic_stop_notimer(struct as_data *ad) { - if (ad->antic_status == ANTIC_WAIT) + if (ad->antic_status == ANTIC_WAIT_REQ || ad->antic_status == ANTIC_WAIT_NEXT) schedule_work(&ad->antic_work); ad->antic_status = ANTIC_FINISHED; } -static void as_stop_anticipation(struct as_data *ad) +static void as_antic_stop(struct as_data *ad) { - if (ad->antic_status == ANTIC_WAIT) + if (ad->antic_status == ANTIC_WAIT_NEXT) del_timer(&ad->antic_timer); - as_stop_anticipation_notimer(ad); + as_antic_stop_notimer(ad); } /* - * as_anticipate_timeout is the timer function set by - * as_start_anticipation. + * as_antic_timeout is the timer function set by + * as_antic_waitnext. */ -static void as_anticipate_timeout(unsigned long data) +static void as_antic_timeout(unsigned long data) { struct request_queue *q = (struct request_queue *)data; struct as_data *ad = q->elevator.elevator_data; unsigned long flags; spin_lock_irqsave(q->queue_lock, flags); - as_stop_anticipation_notimer(ad); + as_antic_stop_notimer(ad); ant_stats.timeouts++; spin_unlock_irqrestore(q->queue_lock, flags); } @@ -814,8 +863,12 @@ as_close_req(struct as_data *ad, struct sector_t next = arq->request->sector; sector_t delta; /* acceptable close offset (in sectors) */ - delay = ((jiffies - ad->antic_start) * 1000) / HZ; - if (ad->antic_status == ANTIC_OFF || delay <= 1) + if (ad->antic_status == ANTIC_OFF || ad->antic_status == ANTIC_WAIT_REQ) + delay = 0; + else + delay = ((jiffies - ad->antic_start) * 1000) / HZ; + + if (delay <= 1) delta = 32; else if (delay <= 20 && delay <= ad->antic_expire / 2) delta = 32 << (delay-1); @@ -860,7 +913,7 @@ static int as_can_break_anticipation(str return 1; } - if (aic && aic->nr_requests > 0) { + if (aic && aic->nr_queued > 0) { ant_stats.queued_request++; return 1; } @@ -894,11 +947,16 @@ static void as_update_arq(struct as_data && as_can_break_anticipation(ad, arq)) { sector_t last = ad->last_sector[data_dir]; sector_t this = arq->request->sector; - unsigned long delay = jiffies - ad->antic_start; + unsigned long delay; long lba_offset; int neg; int log2; + if (ad->antic_status == ANTIC_WAIT_REQ) + delay = 0; + else + delay = jiffies - ad->antic_start; + if (data_dir == READ) { if (delay >= ARRAY_SIZE(ant_stats.ant_delay_hist)) delay = ARRAY_SIZE(ant_stats.ant_delay_hist)-1; @@ -919,7 +977,7 @@ static void as_update_arq(struct as_data ant_stats.lba_forward_offsets[log2]++; } - as_stop_anticipation(ad); + as_antic_stop(ad); } } @@ -934,13 +992,13 @@ static int as_can_anticipate(struct as_d * Don't restart if we have just finished. Run the next request */ return 0; - - if (ad->antic_status == ANTIC_WAIT && as_antic_expired(ad)) { + + if (ad->antic_status == ANTIC_WAIT_NEXT && as_antic_expired(ad)) { /* - * In this situation status should really be FINISHED, however - * the timer hasn't had the chance to run yet. + * In this situation status should really be FINISHED, + * however the timer hasn't had the chance to run yet. */ - as_stop_anticipation(ad); + as_antic_stop(ad); return 0; } @@ -954,8 +1012,9 @@ static int as_can_anticipate(struct as_d /* * OK from here, we haven't finished, haven't timed out, and don't * have a decent request! - * status is either ANTIC_WAIT or ANTIC_OFF. So we'll either keep - * waiting or start waiting respectively. + * Status can be: ANTIC_OFF so start waiting, + * ANTIC_WAIT_REQ so continue to wait for request to complete, + * ANTIC_WAIT_NEXT so continue to wait for timeout or suitable request. */ return 1; @@ -1074,7 +1133,7 @@ static int as_dispatch_request(struct as goto fifo_expired; if (as_can_anticipate(ad, arq)) { - as_start_anticipation(ad); + as_antic_waitreq(ad); return 0; } } @@ -1146,7 +1205,6 @@ fifo_expired: /* * arq is the selected appropriate request. */ - ad->antic_status = ANTIC_OFF; as_move_to_dispatch(ad, arq); return 1; @@ -1184,8 +1242,9 @@ as_insert_request(request_queue_t *q, st list_add(&rq->queuelist, insert_here); if (!list_empty(ad->dispatch) && rq_data_dir(rq) == READ - && ad->antic_status == ANTIC_WAIT) - as_stop_anticipation(ad); + && (ad->antic_status == ANTIC_WAIT_REQ + || ad->antic_status == ANTIC_WAIT_NEXT)) + as_antic_stop(ad); return; } @@ -1226,7 +1285,7 @@ static int as_queue_notready(request_que if (!list_empty(ad->dispatch)) return 0; - if (ad->antic_status == ANTIC_WAIT) + if (ad->antic_status == ANTIC_WAIT_REQ || ad->antic_status == ANTIC_WAIT_NEXT) return 1; if (!as_dispatch_request(ad)) @@ -1317,10 +1376,10 @@ static int as_init(request_queue_t *q, e } /* anticipatory scheduling helpers */ - ad->antic_timer.function = as_anticipate_timeout; + ad->antic_timer.function = as_antic_timeout; ad->antic_timer.data = (unsigned long)q; init_timer(&ad->antic_timer); - INIT_WORK(&ad->antic_work, as_anticipate_work, q); + INIT_WORK(&ad->antic_work, as_antic_work, q); for (i = 0; i < AS_HASH_ENTRIES; i++) INIT_LIST_HEAD(&ad->hash[i]); @@ -1521,7 +1580,7 @@ elevator_t iosched_as = { .elevator_merge_req_fn = as_merged_requests, .elevator_next_req_fn = as_next_request, .elevator_add_req_fn = as_insert_request, - .elevator_remove_req_fn = as_remove_request, + .elevator_remove_req_fn = as_remove_dispatched_request, .elevator_queue_empty_fn = as_queue_notready, .elevator_former_req_fn = as_former_request, .elevator_latter_req_fn = as_latter_request, _