From: Jens Axboe This update: - Removes some debugging code and the dprintk() stuff - Detects when tagging is used or not, defines a threshold of CFQ_MAX_TAG (default: 4) for when we change from non-tagged to tagged operations. This affects accounting. - Fix a bug where the rr_list was sorted badly. Also make sure we always keep it sorted, and add some logic to detect when to resort. This could trigger a harmless WARN_ON at line 428. - Don't set ->service_start when queue is removed from rr_list, this could starve a queue if it was quickly readded to the rr_list again. In that case, we never would hit the 1 second interval for aging the service_used down. - Cache jiffies in some function instead of re-reading it 2-3 times. Signed-off-by: Jens Axboe Signed-off-by: Andrew Morton --- 25-akpm/drivers/block/cfq-iosched.c | 177 +++++++++++++++++++++--------------- 1 files changed, 104 insertions(+), 73 deletions(-) diff -puN drivers/block/cfq-iosched.c~cfq-v2-update drivers/block/cfq-iosched.c --- 25/drivers/block/cfq-iosched.c~cfq-v2-update Tue Sep 14 16:17:24 2004 +++ 25-akpm/drivers/block/cfq-iosched.c Tue Sep 14 16:17:24 2004 @@ -22,14 +22,6 @@ #include #include -#undef CFQ_DEBUG - -#ifdef CFQ_DEBUG -#define dprintk(fmt, args...) printk(KERN_ERR "cfq: " fmt, ##args) -#else -#define dprintk(fmt, args...) -#endif - static unsigned long max_elapsed_crq; static unsigned long max_elapsed_dispatch; @@ -84,6 +76,11 @@ static int cfq_back_penalty = 2; /* pena #define rq_rb_key(rq) (rq)->sector /* + * threshold for switching off non-tag accounting + */ +#define CFQ_MAX_TAG (4) + +/* * sort key types and names */ enum { @@ -125,18 +122,21 @@ struct cfq_data { sector_t last_sector; + int rq_in_driver; + /* * tunables, see top of file */ unsigned int cfq_quantum; unsigned int cfq_queued; - unsigned int cfq_tagged; unsigned int cfq_fifo_expire_r; unsigned int cfq_fifo_expire_w; unsigned int cfq_fifo_batch_expire; unsigned int cfq_back_penalty; unsigned int cfq_back_max; unsigned int find_best_crq; + + unsigned int cfq_tagged; }; struct cfq_queue { @@ -170,14 +170,12 @@ struct cfq_queue { unsigned long service_start; unsigned long service_used; + unsigned int max_rate; + /* number of requests that have been handed to the driver */ int in_flight; /* number of currently allocated requests */ int alloc_limit[2]; - -#ifdef CFQ_DEBUG - char name[16]; -#endif }; struct cfq_rq { @@ -404,11 +402,42 @@ static void cfq_update_next_crq(struct c cfqq->next_crq = cfq_find_next_crq(cfqq->cfqd, cfqq, crq); } -static inline void -cfq_sort_rr_list(struct cfq_queue *cfqq) +static int cfq_check_sort_rr_list(struct cfq_queue *cfqq) +{ + struct list_head *head = &cfqq->cfqd->rr_list; + struct list_head *next, *prev; + + /* + * list might still be ordered + */ + next = cfqq->cfq_list.next; + if (next != head) { + struct cfq_queue *cnext = list_entry_cfqq(next); + + if (cfqq->service_used > cnext->service_used) + return 1; + } + + prev = cfqq->cfq_list.prev; + if (prev != head) { + struct cfq_queue *cprev = list_entry_cfqq(prev); + + if (cfqq->service_used < cprev->service_used) + return 1; + } + + return 0; +} + +static void cfq_sort_rr_list(struct cfq_queue *cfqq, int new_queue) { struct list_head *entry = &cfqq->cfqd->rr_list; + if (!cfqq->on_rr) + return; + if (!new_queue && !cfq_check_sort_rr_list(cfqq)) + return; + list_del(&cfqq->cfq_list); /* @@ -446,21 +475,16 @@ cfq_sort_rr_list(struct cfq_queue *cfqq) static inline void cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) { - BUG_ON(cfqq->on_rr); - /* * it's currently on the empty list */ - cfq_sort_rr_list(cfqq); cfqq->on_rr = 1; cfqd->busy_queues++; - /* - * if the queue is on the empty_list, service_start was the time - * where it was deleted from the rr_list. - */ if (time_after(jiffies, cfqq->service_start + cfq_service)) cfqq->service_used >>= 3; + + cfq_sort_rr_list(cfqq, 1); } static inline void @@ -468,7 +492,6 @@ cfq_del_cfqq_rr(struct cfq_data *cfqd, s { list_move(&cfqq->cfq_list, &cfqd->empty_list); cfqq->on_rr = 0; - cfqq->service_start = jiffies; BUG_ON(!cfqd->busy_queues); cfqd->busy_queues--; @@ -492,10 +515,8 @@ static inline void cfq_del_crq_rb(struct rb_erase(&crq->rb_node, &cfqq->sort_list); RB_CLEAR_COLOR(&crq->rb_node); - if (RB_EMPTY(&cfqq->sort_list) && cfqq->on_rr) { - dprintk("moving 0x%p empty_list\n", cfqq); + if (RB_EMPTY(&cfqq->sort_list) && cfqq->on_rr) cfq_del_cfqq_rr(cfqd, cfqq); - } } } @@ -541,11 +562,8 @@ static void cfq_add_crq_rb(struct cfq_rq rb_insert_color(&crq->rb_node, &cfqq->sort_list); - if (!cfqq->on_rr) { + if (!cfqq->on_rr) cfq_add_cfqq_rr(cfqd, cfqq); - dprintk("moving to rr list %d\n", cfqd->busy_queues); - } else - dprintk("already on rr list %d\n", cfqd->busy_queues); /* * check if this request is a better next-serve candidate @@ -590,11 +608,30 @@ out: return NULL; } -static void cfq_remove_request(request_queue_t *q, struct request *rq) +/* + * make sure the service time gets corrected on reissue of this request + */ +static void cfq_requeue_request(request_queue_t *q, struct request *rq) { struct cfq_rq *crq = RQ_DATA(rq); - dprintk("removing 0x%p\n", rq); + if (crq) { + struct cfq_queue *cfqq = crq->cfq_queue; + + if (cfqq->cfqd->cfq_tagged) { + cfqq->service_used--; + cfq_sort_rr_list(cfqq, 0); + } + + crq->accounted = 0; + cfqq->cfqd->rq_in_driver--; + } + list_add(&rq->queuelist, &q->queue_head); +} + +static void cfq_remove_request(request_queue_t *q, struct request *rq) +{ + struct cfq_rq *crq = RQ_DATA(rq); if (crq) { cfq_remove_merge_hints(q, crq); @@ -730,20 +767,21 @@ static inline struct cfq_rq *cfq_check_f struct cfq_data *cfqd = cfqq->cfqd; const int reads = !list_empty(&cfqq->fifo[0]); const int writes = !list_empty(&cfqq->fifo[1]); + unsigned long now = jiffies; struct cfq_rq *crq; - if (jiffies - cfqq->last_fifo_expire < cfqd->cfq_fifo_batch_expire) + if (time_before(now, cfqq->last_fifo_expire + cfqd->cfq_fifo_batch_expire)) return NULL; crq = RQ_DATA(list_entry(cfqq->fifo[0].next, struct request, queuelist)); - if (reads && time_after(jiffies, crq->queue_start + cfqd->cfq_fifo_expire_r)) { - cfqq->last_fifo_expire = jiffies; + if (reads && time_after(now, crq->queue_start + cfqd->cfq_fifo_expire_r)) { + cfqq->last_fifo_expire = now; return crq; } crq = RQ_DATA(list_entry(cfqq->fifo[1].next, struct request, queuelist)); - if (writes && time_after(jiffies, crq->queue_start + cfqd->cfq_fifo_expire_w)) { - cfqq->last_fifo_expire = jiffies; + if (writes && time_after(now, crq->queue_start + cfqd->cfq_fifo_expire_w)) { + cfqq->last_fifo_expire = now; return crq; } @@ -822,7 +860,8 @@ restart: static inline void cfq_account_dispatch(struct cfq_rq *crq) { struct cfq_queue *cfqq = crq->cfq_queue; - unsigned long elapsed = jiffies - crq->queue_start; + struct cfq_data *cfqd = cfqq->cfqd; + unsigned long now, elapsed; /* * accounted bit is necessary since some drivers will call @@ -831,52 +870,62 @@ static inline void cfq_account_dispatch( if (crq->accounted) return; + now = jiffies; + if (cfqq->service_start == ~0UL) + cfqq->service_start = now; + /* * on drives with tagged command queueing, command turn-around time * doesn't necessarily reflect the time spent processing this very * command inside the drive. so do the accounting differently there, * by just sorting on the number of requests */ - if (cfqq->cfqd->cfq_tagged) { - if (time_after(jiffies, cfqq->service_start + cfq_service)) { - cfqq->service_start = jiffies; + if (cfqd->cfq_tagged) { + if (time_after(now, cfqq->service_start + cfq_service)) { + cfqq->service_start = now; cfqq->service_used /= 10; } cfqq->service_used++; + cfq_sort_rr_list(cfqq, 0); } + elapsed = now - crq->queue_start; if (elapsed > max_elapsed_dispatch) max_elapsed_dispatch = elapsed; crq->accounted = 1; - crq->service_start = jiffies; + crq->service_start = now; + + if (++cfqd->rq_in_driver >= CFQ_MAX_TAG && !cfqd->cfq_tagged) { + cfqq->cfqd->cfq_tagged = 1; + printk("cfq: depth %d reached, tagging now on\n", CFQ_MAX_TAG); + } } static inline void cfq_account_completion(struct cfq_queue *cfqq, struct cfq_rq *crq) { - unsigned long start_val = cfqq->service_used; + struct cfq_data *cfqd = cfqq->cfqd; - if (!cfqq->cfqd->cfq_tagged) { - unsigned long duration = jiffies - crq->service_start; + WARN_ON(!cfqd->rq_in_driver); + cfqd->rq_in_driver--; - if (time_after(jiffies, cfqq->service_start + cfq_service)) { - cfqq->service_start = jiffies; + if (!cfqd->cfq_tagged) { + unsigned long now = jiffies; + unsigned long duration = now - crq->service_start; + + if (time_after(now, cfqq->service_start + cfq_service)) { + cfqq->service_start = now; cfqq->service_used >>= 3; } cfqq->service_used += duration; + cfq_sort_rr_list(cfqq, 0); if (duration > max_elapsed_crq) max_elapsed_crq = duration; } - - /* - * make sure list stays properly sorted, but only do so if necessary - */ - if (cfqq->on_rr && cfqq->service_used != start_val) - cfq_sort_rr_list(cfqq); } static struct request *cfq_next_request(request_queue_t *q) @@ -913,13 +962,9 @@ static void cfq_put_queue(struct cfq_que { BUG_ON(!atomic_read(&cfqq->ref)); - dprintk("cfq_put_queue 0x%p, ref\n", atomic_read(&cfqq->ref)); - if (!atomic_dec_and_test(&cfqq->ref)) return; - dprintk("killing queue 0x%p/%s\n", cfqq, cfqq->name); - BUG_ON(rb_first(&cfqq->sort_list)); BUG_ON(cfqq->on_rr); @@ -1163,11 +1208,8 @@ retry: hlist_add_head(&cfqq->cfq_hash, &cfqd->cfq_hash[hashval]); atomic_set(&cfqq->ref, 0); cfqq->cfqd = cfqd; -#ifdef CFQ_DEBUG - strncpy(cfqq->name, current->comm, sizeof(cfqq->name)-1); -#endif - dprintk("cfqq set up for 0x%p/%s\n", cfqq, cfqq->name); cfqq->key_type = cfqd->key_type; + cfqq->service_start = ~0UL; } if (new_cfqq) @@ -1212,17 +1254,14 @@ cfq_insert_request(request_queue_t *q, s switch (where) { case ELEVATOR_INSERT_BACK: - dprintk("adding back 0x%p\n", rq); while (cfq_dispatch_requests(q, cfqd->cfq_quantum)) ; list_add_tail(&rq->queuelist, &q->queue_head); break; case ELEVATOR_INSERT_FRONT: - dprintk("adding front 0x%p\n", rq); list_add(&rq->queuelist, &q->queue_head); break; case ELEVATOR_INSERT_SORT: - dprintk("adding sort 0x%p\n", rq); BUG_ON(!blk_fs_request(rq)); cfq_enqueue(cfqd, crq); break; @@ -1511,7 +1550,6 @@ static int cfq_init(request_queue_t *q, cfqd->cfq_back_max = cfq_back_max; cfqd->cfq_back_penalty = cfq_back_penalty; - dprintk("cfq on queue 0x%p\n", q); return 0; out_spare: mempool_destroy(cfqd->crq_pool); @@ -1654,7 +1692,6 @@ static ssize_t __FUNC(struct cfq_data *c } SHOW_FUNCTION(cfq_quantum_show, cfqd->cfq_quantum); SHOW_FUNCTION(cfq_queued_show, cfqd->cfq_queued); -SHOW_FUNCTION(cfq_tagged_show, cfqd->cfq_tagged); SHOW_FUNCTION(cfq_fifo_expire_r_show, cfqd->cfq_fifo_expire_r); SHOW_FUNCTION(cfq_fifo_expire_w_show, cfqd->cfq_fifo_expire_w); SHOW_FUNCTION(cfq_fifo_batch_expire_show, cfqd->cfq_fifo_batch_expire); @@ -1675,7 +1712,6 @@ static ssize_t __FUNC(struct cfq_data *c } STORE_FUNCTION(cfq_quantum_store, &cfqd->cfq_quantum, 1, UINT_MAX); STORE_FUNCTION(cfq_queued_store, &cfqd->cfq_queued, 1, UINT_MAX); -STORE_FUNCTION(cfq_tagged_store, &cfqd->cfq_tagged, 0, 1); STORE_FUNCTION(cfq_fifo_expire_r_store, &cfqd->cfq_fifo_expire_r, 1, UINT_MAX); STORE_FUNCTION(cfq_fifo_expire_w_store, &cfqd->cfq_fifo_expire_w, 1, UINT_MAX); STORE_FUNCTION(cfq_fifo_batch_expire_store, &cfqd->cfq_fifo_batch_expire, 0, UINT_MAX); @@ -1694,11 +1730,6 @@ static struct cfq_fs_entry cfq_queued_en .show = cfq_queued_show, .store = cfq_queued_store, }; -static struct cfq_fs_entry cfq_tagged_entry = { - .attr = {.name = "tagged", .mode = S_IRUGO | S_IWUSR }, - .show = cfq_tagged_show, - .store = cfq_tagged_store, -}; static struct cfq_fs_entry cfq_fifo_expire_r_entry = { .attr = {.name = "fifo_expire_sync", .mode = S_IRUGO | S_IWUSR }, .show = cfq_fifo_expire_r_show, @@ -1746,7 +1777,6 @@ static struct cfq_fs_entry cfq_key_type_ static struct attribute *default_attrs[] = { &cfq_quantum_entry.attr, &cfq_queued_entry.attr, - &cfq_tagged_entry.attr, &cfq_fifo_expire_r_entry.attr, &cfq_fifo_expire_w_entry.attr, &cfq_fifo_batch_expire_entry.attr, @@ -1805,6 +1835,7 @@ elevator_t iosched_cfq = { .elevator_next_req_fn = cfq_next_request, .elevator_add_req_fn = cfq_insert_request, .elevator_remove_req_fn = cfq_remove_request, + .elevator_requeue_req_fn = cfq_requeue_request, .elevator_queue_empty_fn = cfq_queue_empty, .elevator_completed_req_fn = cfq_completed_request, .elevator_former_req_fn = cfq_former_request, _