Patch from Nick Piggin * The queued read logic is changed to queued read or sync write logic. (not a bugfix) * Fixed refcount leak due to copy_as_io_context used in as_merged_requests - uses swap_as_io_context * Fixed refcounting to keep hold of the io context the whole way through the cycle until data is returned from the request. Previously would drop after it left the rbtree to the dispatch list. (not a bugfix but cleaner and required for thinktime) * Fixed bug in as_remove_request where if there was only one read on the queue, then next_arq would not be cleared to NULL when the last request moved to dispatch. It would be set to NULL when the request completed regardless of new requests on the list, and cause as_dispatch_request would forget the batch it should have been running... This bug seems to have been the cause of the bad 0 readahead performance I was seeing. It might help fix the read unfairness. It could probably cause crashes with TCQ disks. * Elevator algorithm (as_choose_req) correctness fix. (not really a bugfix) drivers/block/as-iosched.c | 184 ++++++++++++++++++++++++++++----------------- 1 files changed, 116 insertions(+), 68 deletions(-) diff -puN drivers/block/as-iosched.c~as-random-fixes drivers/block/as-iosched.c --- 25/drivers/block/as-iosched.c~as-random-fixes 2003-03-01 01:51:56.000000000 -0800 +++ 25-akpm/drivers/block/as-iosched.c 2003-03-01 01:51:56.000000000 -0800 @@ -35,7 +35,7 @@ struct ant_stats { int matching_ids; int broken_by_write; int exitted_tasks; - int queued_read; + int queued_request; int ant_delay_hist[100]; /* milliseconds */ @@ -96,7 +96,7 @@ struct as_io_context { atomic_t refcount; pid_t pid; unsigned long state; - unsigned long nr_reads; /* outstanding reads */ + unsigned long nr_requests; /* outstanding reads & sync writes */ }; /* Bits in as_io_context.state */ @@ -217,7 +217,7 @@ static struct as_io_context *get_as_io_c atomic_set(&ret->refcount, 1); ret->pid = tsk->pid; ret->state = 1 << AS_TASK_RUNNING; - ret->nr_reads = 0; + ret->nr_requests = 0; tsk->as_io_context = ret; } } @@ -238,6 +238,15 @@ copy_as_io_context(struct as_io_context } } +static void +swap_as_io_context(struct as_io_context **aic1, struct as_io_context **aic2) +{ + struct as_io_context *temp; + temp = *aic1; + *aic1 = *aic2; + *aic2 = temp; +} + /* * the back merge hash support functions */ @@ -334,6 +343,40 @@ static struct as_rq *as_find_first_arq(s } } +static struct as_rq * +as_choose_req(struct as_data *ad, struct as_rq *arq1, struct as_rq *arq2); + +/* + * as_find_next_arq finds the next request after @prev in elevator order. + */ +static struct as_rq *as_find_next_arq(struct as_data *ad, struct as_rq *last) +{ + const int data_dir = rq_data_dir(last->request); + struct as_rq *ret; + struct rb_node *rbnext = rb_next(&last->rb_node); + struct rb_node *rbprev = rb_prev(&last->rb_node); + struct as_rq *arq_next, *arq_prev; + + BUG_ON(!ON_RB(&last->rb_node)); + + if (rbprev) + arq_prev = rb_entry_arq(rbprev); + else + arq_prev = NULL; + + if (rbnext) + arq_next = rb_entry_arq(rbnext); + else { + arq_next = as_find_first_arq(ad, data_dir); + if (arq_next == last) + arq_next = NULL; + } + + ret = as_choose_req(ad, arq_next, arq_prev); + + return ret; +} + static struct as_rq *__as_add_arq_rb(struct as_data *ad, struct as_rq *arq) { struct rb_node **p = &ARQ_RB_ROOT(ad, arq)->rb_node; @@ -415,8 +458,7 @@ static void as_add_request(struct as_dat const int data_dir = rq_data_dir(arq->request); arq->as_io_context = get_as_io_context(); - if (data_dir == READ) - arq->as_io_context->nr_reads++; + arq->as_io_context->nr_requests++; as_add_arq_rb(ad, arq); @@ -429,56 +471,49 @@ static void as_add_request(struct as_dat list_add_tail(&arq->fifo, &ad->fifo_list[data_dir]); } -static struct as_rq * -as_choose_req(struct as_data *ad, struct as_rq *arq1, struct as_rq *arq2); - /* - * remove rq from rbtree, fifo, and hash + * __as_remove_request removes a request from the elevator without updating + * statistics etc. It is expected the caller will replace the request at some + * part of the elevator, also without updating statistics. */ -static void as_remove_request(request_queue_t *q, struct request *rq) +static void __as_remove_request(request_queue_t *q, struct request *rq) { struct as_rq *arq = RQ_DATA(rq); - const int data_dir = rq_data_dir(arq->request); if (arq) { + const int data_dir = rq_data_dir(arq->request); struct as_data *ad = q->elevator.elevator_data; - list_del_init(&arq->fifo); - as_del_arq_hash(arq); - /* - * Update the "next_arq" cache as we are about to remove its + * Update the "next_arq" cache if we are about to remove its * entry */ - if (ad->next_arq[data_dir] == arq) { - struct rb_node *rbnext = rb_next(&arq->rb_node); - struct rb_node *rbprev = rb_prev(&arq->rb_node); - struct as_rq *arq_next, *arq_prev; - - if (rbprev) - arq_prev = rb_entry_arq(rbprev); - else - arq_prev = NULL; - - if (rbnext) - arq_next = rb_entry_arq(rbnext); - else - arq_next = as_find_first_arq(ad, data_dir); - - ad->next_arq[data_dir] = as_choose_req(ad, - arq_next, arq_prev); - } - if (ON_RB(&arq->rb_node)) { - if (data_dir == READ) - arq->as_io_context->nr_reads--; - as_del_arq_rb(ad, arq); - } + if (ad->next_arq[data_dir] == arq) + ad->next_arq[data_dir] = as_find_next_arq(ad, arq); + + list_del_init(&arq->fifo); + as_del_arq_hash(arq); + as_del_arq_rb(ad, arq); } if (q->last_merge == &rq->queuelist) q->last_merge = NULL; list_del_init(&rq->queuelist); + +} + +/* + * Removes a request at any stage of its way through the elevator. Fixes + * up everything. + */ +static void as_remove_request(request_queue_t *q, struct request *rq) +{ + struct as_rq *arq = RQ_DATA(rq); + + __as_remove_request(q, rq); + + put_as_io_context(&arq->as_io_context); } static int @@ -593,7 +628,11 @@ as_merged_requests(request_queue_t *q, s if (time_before(anext->expires, arq->expires)) { list_move(&arq->fifo, &anext->fifo); arq->expires = anext->expires; - copy_as_io_context(&arq->as_io_context, + /* + * Don't copy here but swap, because when anext is + * removed below, it must contain the unused context + */ + swap_as_io_context(&arq->as_io_context, &anext->as_io_context); } } @@ -602,6 +641,10 @@ as_merged_requests(request_queue_t *q, s * kill knowledge of next, this one is a goner */ as_remove_request(q, next); + if (anext->as_io_context) { + BUG_ON(anext->as_io_context->nr_requests == 0); + anext->as_io_context->nr_requests--; + } } /* @@ -613,6 +656,10 @@ static void as_move_to_dispatch(struct a BUG_ON(!ON_RB(&arq->rb_node)); + /* + * This has to be set in order to be correctly updated by + * __as_remove_request + */ ad->last_sector[data_dir] = arq->request->sector + arq->request->nr_sectors; @@ -620,14 +667,16 @@ static void as_move_to_dispatch(struct a /* In case we have to anticipate after this */ copy_as_io_context(&ad->as_io_context, &arq->as_io_context); } - + + ad->next_arq[data_dir] = as_find_next_arq(ad, arq); + /* - * take it off the sort and fifo list, move - * to dispatch queue + * take it off the sort and fifo list, move to dispatch queue */ - as_remove_request(ad->q, arq->request); - put_as_io_context(&arq->as_io_context); + __as_remove_request(ad->q, arq->request); list_add_tail(&arq->request->queuelist, ad->dispatch); + BUG_ON(arq->as_io_context->nr_requests == 0); + arq->as_io_context->nr_requests--; } #define list_entry_fifo(ptr) list_entry((ptr), struct as_rq, fifo) @@ -811,9 +860,9 @@ static int as_can_break_anticipation(str return 1; } - if (aic && aic->nr_reads > 0) { - ant_stats.queued_read++; -// return 1; + if (aic && aic->nr_requests > 0) { + ant_stats.queued_request++; + return 1; } return 0; @@ -923,6 +972,7 @@ as_choose_req(struct as_data *ad, struct { int data_dir; sector_t last, s1, s2, d1, d2; + int r1_wrap=0, r2_wrap=0; /* requests are behind the disk head */ const sector_t maxback = MAXBACK; if (arq1 == NULL || arq1 == arq2) @@ -931,6 +981,7 @@ as_choose_req(struct as_data *ad, struct return arq1; data_dir = rq_data_dir(arq1->request); + last = ad->last_sector[data_dir]; s1 = arq1->request->sector; s2 = arq2->request->sector; @@ -949,8 +1000,10 @@ as_choose_req(struct as_data *ad, struct && ad->as_io_context == arq1->as_io_context && s1+maxback >= last) d1 = (last - s1)*2; - else - goto elevator_wrap; + else { + r1_wrap = 1; + d1 = 0; /* shut up, gcc */ + } if (s2 >= last) d2 = s2 - last; @@ -958,35 +1011,31 @@ as_choose_req(struct as_data *ad, struct && ad->as_io_context == arq2->as_io_context && s2+maxback >= last) d2 = (last - s2)*2; - else - goto elevator_wrap; + else { + r2_wrap = 1; + d2 = 0; + } - /* Found the deltas */ - if (d1 < d2) + /* Found required data */ + if (!r1_wrap && r2_wrap) return arq1; - else if (d2 < d1) + else if (!r2_wrap && r1_wrap) return arq2; - else { - if (s1 >= s2) + else if (r1_wrap && r2_wrap) { + /* both behind the head */ + if (s1 <= s2) return arq1; else return arq2; } -elevator_wrap: - /* - * a request is behind the disk head, in most - * cases this is treated as the elevator wrapping - * to the start, so a lower valued request (further away) - * is favourable - */ - if (s1 >= last && s2 < last) + /* Both requests in front of the head */ + if (d1 < d2) return arq1; - else if (s2 >= last && s1 < last) + else if (d2 < d1) return arq2; else { - /* both behind the head */ - if (s1 <= s2) + if (s1 >= s2) return arq1; else return arq2; @@ -1026,7 +1075,6 @@ static int as_dispatch_request(struct as if (as_can_anticipate(ad, arq)) { as_start_anticipation(ad); - return 0; } } _