aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJens Axboe <axboe@fb.com>2017-01-10 19:59:52 -0700
committerJens Axboe <axboe@fb.com>2017-01-10 19:59:52 -0700
commitddb08a630e369fa2e327ece8e406bcdc3285b064 (patch)
tree11ee2ff1087953325d14368ffdee112c94ff55aa
parent0443f9bee59cc636412a215599741be4e951b006 (diff)
downloadlinux-block-tmp-sched.tar.gz
blk-mq-sched: fixestmp-sched
Signed-off-by: Jens Axboe <axboe@fb.com>
-rw-r--r--block/blk-flush.c2
-rw-r--r--block/blk-mq-sched.c53
-rw-r--r--block/blk-mq-sched.h8
-rw-r--r--block/blk-mq-sysfs.c13
-rw-r--r--block/blk-mq.c107
-rw-r--r--block/blk-mq.h3
-rw-r--r--block/elevator.c13
-rw-r--r--include/linux/blk-mq.h1
8 files changed, 124 insertions, 76 deletions
diff --git a/block/blk-flush.c b/block/blk-flush.c
index 46d12bbfde85c6..9f09031ac58379 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -170,7 +170,7 @@ static bool blk_flush_complete_seq(struct request *rq,
struct list_head *pending = &fq->flush_queue[fq->flush_pending_idx];
bool queued = false, kicked;
- BUG_ON(rq->tag < 0);
+ BUG_ON(q->mq_ops && rq->tag < 0);
BUG_ON(rq->flush.seq & seq);
rq->flush.seq |= seq;
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 234f45b812cebe..b1ab572cb89c82 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -117,10 +117,18 @@ struct request *blk_mq_sched_get_request(struct request_queue *q,
blk_mq_set_alloc_data(data, q, 0, ctx, hctx);
- if (e && e->type->ops.mq.get_request)
- rq = e->type->ops.mq.get_request(q, op, data);
- else
- rq = __blk_mq_alloc_request(data, op);
+ if (e) {
+ if (e->type->ops.mq.get_request)
+ rq = e->type->ops.mq.get_request(q, op, data);
+ else
+ rq = __blk_mq_alloc_request(data, hctx->sched_tags, op);
+ } else {
+ rq = __blk_mq_alloc_request(data, hctx->tags, op);
+ if (rq) {
+ rq->tag = rq->sched_tag;
+ rq->sched_tag = -1;
+ }
+ }
if (rq) {
rq->elv.icq = NULL;
@@ -265,3 +273,40 @@ int blk_mq_sched_init(struct request_queue *q)
return ret;
}
+
+int blk_mq_sched_setup(struct request_queue *q)
+{
+ struct blk_mq_tag_set *set = q->tag_set;
+ struct blk_mq_hw_ctx *hctx;
+ int i;
+
+ printk("blk_mq_sched_setup\n");
+
+ /*
+ * scheduler init success. teardown and reinit queues, since we're
+ * now switching the rq map from ->tags to ->scheduler_tags
+ */
+ queue_for_each_hw_ctx(q, hctx, i) {
+ blk_mq_free_rqs(set, hctx->tags, i);
+
+ hctx->sched_tags = blk_mq_alloc_rq_map(set, i, 256, 0);
+ blk_mq_alloc_rqs(set, hctx->sched_tags, i, 256);
+ }
+
+ return 0;
+}
+
+void blk_mq_sched_teardown(struct request_queue *q)
+{
+ struct blk_mq_tag_set *set = q->tag_set;
+ struct blk_mq_hw_ctx *hctx;
+ int i;
+
+ queue_for_each_hw_ctx(q, hctx, i) {
+ blk_mq_free_rqs(set, hctx->sched_tags, i);
+ blk_mq_free_rq_map(hctx->sched_tags);
+ hctx->sched_tags = NULL;
+
+ blk_mq_alloc_rqs(set, hctx->tags, i, set->queue_depth);
+ }
+}
diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h
index dd5c85b18400e5..8f26979c66aa58 100644
--- a/block/blk-mq-sched.h
+++ b/block/blk-mq-sched.h
@@ -2,6 +2,7 @@
#define BLK_MQ_SCHED_H
#include "blk-mq.h"
+#include "blk-mq-tag.h"
int blk_mq_sched_init_hctx_data(struct request_queue *q, size_t size,
int (*init)(struct blk_mq_hw_ctx *),
@@ -23,6 +24,9 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx);
int blk_mq_sched_init(struct request_queue *q);
+int blk_mq_sched_setup(struct request_queue *q);
+void blk_mq_sched_teardown(struct request_queue *q);
+
static inline bool
blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
{
@@ -113,6 +117,10 @@ blk_mq_sched_completed_request(struct blk_mq_hw_ctx *hctx, struct request *rq)
if (e && e->type->ops.mq.completed_request)
e->type->ops.mq.completed_request(hctx, rq);
+ BUG_ON(rq->sched_tag == -1);
+
+ blk_mq_put_tag(hctx, hctx->sched_tags, rq->mq_ctx, rq->sched_tag);
+
if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) {
clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
blk_mq_run_hw_queue(hctx, true);
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index eacd3af7209901..2caecaa98e40bb 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -231,6 +231,14 @@ static ssize_t blk_mq_hw_sysfs_rq_list_show(struct blk_mq_hw_ctx *hctx,
return ret;
}
+static ssize_t blk_mq_hw_sysfs_sched_tags_show(struct blk_mq_hw_ctx *hctx, char *page)
+{
+ if (hctx->sched_tags)
+ return blk_mq_tag_sysfs_show(hctx->sched_tags, page);
+
+ return 0;
+}
+
static ssize_t blk_mq_hw_sysfs_tags_show(struct blk_mq_hw_ctx *hctx, char *page)
{
return blk_mq_tag_sysfs_show(hctx->tags, page);
@@ -345,6 +353,10 @@ static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_pending = {
.attr = {.name = "pending", .mode = S_IRUGO },
.show = blk_mq_hw_sysfs_rq_list_show,
};
+static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_sched_tags = {
+ .attr = {.name = "sched_tags", .mode = S_IRUGO },
+ .show = blk_mq_hw_sysfs_sched_tags_show,
+};
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_tags = {
.attr = {.name = "tags", .mode = S_IRUGO },
.show = blk_mq_hw_sysfs_tags_show,
@@ -370,6 +382,7 @@ static struct attribute *default_hw_ctx_attrs[] = {
&blk_mq_hw_sysfs_dispatched.attr,
&blk_mq_hw_sysfs_pending.attr,
&blk_mq_hw_sysfs_tags.attr,
+ &blk_mq_hw_sysfs_sched_tags.attr,
&blk_mq_hw_sysfs_cpus.attr,
&blk_mq_hw_sysfs_active.attr,
&blk_mq_hw_sysfs_poll.attr,
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 4f111f94ba4872..a51de56cdd4502 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -218,23 +218,23 @@ void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
EXPORT_SYMBOL_GPL(blk_mq_rq_ctx_init);
struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data,
+ struct blk_mq_tags *tags,
unsigned int op)
{
- struct blk_mq_tags *tags = data->hctx->sched_tags;
struct request *rq;
- unsigned int sched_tag;
+ unsigned int tag;
- sched_tag = blk_mq_get_tag(data, tags);
- if (sched_tag != BLK_MQ_TAG_FAIL) {
- rq = tags->rqs[sched_tag];
- rq->tag = -1;
+ tag = blk_mq_get_tag(data, tags);
+ if (tag != BLK_MQ_TAG_FAIL) {
+ rq = tags->rqs[tag];
if (blk_mq_tag_busy(data->hctx)) {
rq->rq_flags = RQF_MQ_INFLIGHT;
atomic_inc(&data->hctx->nr_active);
}
- rq->sched_tag = sched_tag;
+ rq->tag = -1;
+ rq->sched_tag = tag;
blk_mq_rq_ctx_init(data->q, data->ctx, rq, op);
return rq;
}
@@ -306,12 +306,15 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw,
ctx = __blk_mq_get_ctx(q, cpumask_first(hctx->cpumask));
blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx);
- rq = __blk_mq_alloc_request(&alloc_data, rw);
+ rq = __blk_mq_alloc_request(&alloc_data, hctx->tags, rw);
if (!rq) {
ret = -EWOULDBLOCK;
goto out_queue_exit;
}
+ rq->tag = rq->sched_tag;
+ rq->sched_tag = -1;
+
return rq;
out_queue_exit:
@@ -323,12 +326,9 @@ EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx);
void __blk_mq_finish_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
struct request *rq)
{
- const int tag = rq->tag;
const int sched_tag = rq->sched_tag;
struct request_queue *q = rq->q;
- blk_mq_sched_completed_request(hctx, rq);
-
if (rq->rq_flags & RQF_MQ_INFLIGHT)
atomic_dec(&hctx->nr_active);
@@ -337,13 +337,12 @@ void __blk_mq_finish_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
clear_bit(REQ_ATOM_POLL_SLEPT, &rq->atomic_flags);
- if (tag >= 0) {
- WARN_ON_ONCE(hctx->tags->rqs[tag] != rq);
- hctx->tags->rqs[tag] = NULL;
- blk_mq_put_tag(hctx, hctx->tags, ctx, tag);
+ if (rq->tag != -1) {
+ WARN_ON_ONCE(hctx->tags->rqs[rq->tag] != rq);
+ blk_mq_put_tag(hctx, hctx->tags, ctx, rq->tag);
}
- if (sched_tag >= 0)
- blk_mq_put_tag(hctx, hctx->sched_tags, ctx, sched_tag);
+ if (sched_tag != -1)
+ blk_mq_sched_completed_request(hctx, rq);
blk_queue_exit(q);
}
@@ -858,12 +857,9 @@ int blk_mq_assign_drv_tag(struct request *rq)
};
rq->tag = blk_mq_get_tag(&data, hctx->tags);
- if (rq->tag < 0)
- goto out;
- WARN_ON_ONCE(hctx->tags->rqs[rq->tag]);
- hctx->tags->rqs[rq->tag] = rq;
+ if (rq->tag >= 0)
+ hctx->tags->rqs[rq->tag] = rq;
-out:
return rq->tag;
}
@@ -1557,7 +1553,7 @@ void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
{
struct page *page;
- if (tags->rqs && set && set->ops->exit_request) {
+ if (tags->rqs && set->ops->exit_request) {
int i;
for (i = 0; i < tags->nr_tags; i++) {
@@ -1584,6 +1580,7 @@ void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
void blk_mq_free_rq_map(struct blk_mq_tags *tags)
{
kfree(tags->rqs);
+ tags->rqs = NULL;
blk_mq_free_tags(tags);
}
@@ -1618,7 +1615,7 @@ static size_t order_to_size(unsigned int order)
}
int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
- unsigned int hctx_idx)
+ unsigned int hctx_idx, unsigned int depth)
{
unsigned int i, j, entries_per_page, max_order = 4;
size_t rq_size, left;
@@ -1631,9 +1628,9 @@ int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
*/
rq_size = round_up(sizeof(struct request) + set->cmd_size,
cache_line_size());
- left = rq_size * set->queue_depth;
+ left = rq_size * depth;
- for (i = 0; i < set->queue_depth; ) {
+ for (i = 0; i < depth; ) {
int this_order = max_order;
struct page *page;
int to_do;
@@ -1667,7 +1664,7 @@ int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
*/
kmemleak_alloc(p, order_to_size(this_order), 1, GFP_NOIO);
entries_per_page = order_to_size(this_order) / rq_size;
- to_do = min(entries_per_page, set->queue_depth - i);
+ to_do = min(entries_per_page, depth - i);
left -= to_do * rq_size;
for (j = 0; j < to_do; j++) {
tags->rqs[i] = p;
@@ -1684,6 +1681,7 @@ int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
i++;
}
}
+
return 0;
fail:
@@ -1799,7 +1797,6 @@ static int blk_mq_init_hctx(struct request_queue *q,
cpuhp_state_add_instance_nocalls(CPUHP_BLK_MQ_DEAD, &hctx->cpuhp_dead);
hctx->tags = set->tags[hctx_idx];
- hctx->sched_tags = set->sched_tags[hctx_idx];
/*
* Allocate space for all possible cpus to avoid allocation at
@@ -1881,35 +1878,29 @@ static void blk_mq_init_cpu_queues(struct request_queue *q,
}
}
-static void __blk_mq_free_rq_map_i(struct blk_mq_tag_set *set, int hctx_idx)
+static void __blk_mq_free_rq_map_all(struct blk_mq_tag_set *set, int hctx_idx)
{
- if (set->sched_tags[hctx_idx]) {
- blk_mq_free_rqs(set, set->sched_tags[hctx_idx], hctx_idx);
- blk_mq_free_rq_map(set->sched_tags[hctx_idx]);
- set->sched_tags[hctx_idx] = NULL;
- }
if (set->tags[hctx_idx]) {
+ blk_mq_free_rqs(set, set->tags[hctx_idx], hctx_idx);
blk_mq_free_rq_map(set->tags[hctx_idx]);
set->tags[hctx_idx] = NULL;
}
}
-static bool __blk_mq_alloc_rq_map_i(struct blk_mq_tag_set *set, int hctx_idx,
- unsigned int nr_requests)
+static bool __blk_mq_alloc_rq_map(struct blk_mq_tag_set *set, int hctx_idx)
{
int ret = 0;
set->tags[hctx_idx] = blk_mq_alloc_rq_map(set, hctx_idx,
set->queue_depth, set->reserved_tags);
- set->sched_tags[hctx_idx] = blk_mq_alloc_rq_map(set, hctx_idx,
- nr_requests, 0);
- if (set->sched_tags[hctx_idx])
- ret = blk_mq_alloc_rqs(set, set->sched_tags[hctx_idx],
- hctx_idx);
- if (!set->tags[hctx_idx] || !set->sched_tags[hctx_idx] || ret < 0) {
- __blk_mq_free_rq_map_i(set, hctx_idx);
+ if (set->tags[hctx_idx])
+ ret = blk_mq_alloc_rqs(set, set->tags[hctx_idx], hctx_idx, set->queue_depth);
+
+ if (!set->tags[hctx_idx] || ret < 0) {
+ __blk_mq_free_rq_map_all(set, hctx_idx);
return false;
}
+
return true;
}
@@ -1942,7 +1933,7 @@ static void blk_mq_map_swqueue(struct request_queue *q,
hctx_idx = q->mq_map[i];
/* unmapped hw queue can be remapped after CPU topo changed */
if (!set->tags[hctx_idx] &&
- !__blk_mq_alloc_rq_map_i(set, hctx_idx, q->nr_requests)) {
+ !__blk_mq_alloc_rq_map(set, hctx_idx)) {
/*
* If tags initialization fail for some hctx,
* that hctx won't be brought online. In this
@@ -2347,20 +2338,19 @@ static int blk_mq_queue_reinit_prepare(unsigned int cpu)
return 0;
}
-static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set,
- unsigned int nr_requests)
+static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
{
int i;
for (i = 0; i < set->nr_hw_queues; i++)
- if (!__blk_mq_alloc_rq_map_i(set, i, nr_requests))
+ if (!__blk_mq_alloc_rq_map(set, i))
goto out_unwind;
return 0;
out_unwind:
while (--i >= 0)
- __blk_mq_free_rq_map_i(set, i);
+ blk_mq_free_rq_map(set->tags[i]);
return -ENOMEM;
}
@@ -2370,15 +2360,14 @@ out_unwind:
* may reduce the depth asked for, if memory is tight. set->queue_depth
* will be updated to reflect the allocated depth.
*/
-static int blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set,
- unsigned int nr_requests)
+static int blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
{
unsigned int depth;
int err;
depth = set->queue_depth;
do {
- err = __blk_mq_alloc_rq_maps(set, nr_requests);
+ err = __blk_mq_alloc_rq_maps(set);
if (!err)
break;
@@ -2449,15 +2438,10 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
if (!set->tags)
return -ENOMEM;
- set->sched_tags = kzalloc_node(nr_cpu_ids * sizeof(struct blk_mq_tags *),
- GFP_KERNEL, set->numa_node);
- if (!set->sched_tags)
- goto free_drv_tags;
-
set->mq_map = kzalloc_node(sizeof(*set->mq_map) * nr_cpu_ids,
GFP_KERNEL, set->numa_node);
if (!set->mq_map)
- goto free_sched_tags;
+ goto free_drv_tags;
if (set->ops->map_queues)
ret = set->ops->map_queues(set);
@@ -2466,7 +2450,7 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
if (ret)
goto free_mq_map;
- ret = blk_mq_alloc_rq_maps(set, set->queue_depth/*q->nr_requests*/);
+ ret = blk_mq_alloc_rq_maps(set);
if (ret)
goto free_mq_map;
@@ -2478,9 +2462,6 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
free_mq_map:
kfree(set->mq_map);
set->mq_map = NULL;
-free_sched_tags:
- kfree(set->sched_tags);
- set->sched_tags = NULL;
free_drv_tags:
kfree(set->tags);
set->tags = NULL;
@@ -2496,16 +2477,12 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
if (set->tags[i]) {
blk_mq_free_rqs(set, set->tags[i], i);
blk_mq_free_rq_map(set->tags[i]);
- blk_mq_free_rq_map(set->sched_tags[i]);
}
}
kfree(set->mq_map);
set->mq_map = NULL;
- kfree(set->sched_tags);
- set->sched_tags = NULL;
-
kfree(set->tags);
set->tags = NULL;
}
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 0368c513c2ab7b..486884e0314a5d 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -46,7 +46,7 @@ struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set,
unsigned int nr_tags,
unsigned int reserved_tags);
int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
- unsigned int hctx_idx);
+ unsigned int hctx_idx, unsigned int depth);
/*
* Internal helpers for request insertion into sw queues
@@ -134,6 +134,7 @@ void __blk_mq_finish_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
struct request *rq);
void blk_mq_finish_request(struct request *rq);
struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data,
+ struct blk_mq_tags *tags,
unsigned int op);
static inline bool blk_mq_hctx_stopped(struct blk_mq_hw_ctx *hctx)
diff --git a/block/elevator.c b/block/elevator.c
index f4309db61e3121..40d98b074fe3be 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -235,12 +235,17 @@ int elevator_init(struct request_queue *q, char *name)
}
}
- if (e->uses_mq)
- err = e->ops.mq.init_sched(q, e);
- else
+ if (e->uses_mq) {
+ err = blk_mq_sched_setup(q);
+ if (!err)
+ err = e->ops.mq.init_sched(q, e);
+ } else
err = e->ops.sq.elevator_init_fn(q, e);
- if (err)
+ if (err) {
+ if (e->uses_mq)
+ blk_mq_sched_teardown(q);
elevator_put(e);
+ }
return err;
}
EXPORT_SYMBOL(elevator_init);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 377594bcda8dd6..684512ab6211bb 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -73,7 +73,6 @@ struct blk_mq_tag_set {
void *driver_data;
struct blk_mq_tags **tags;
- struct blk_mq_tags **sched_tags;
struct mutex tag_list_lock;
struct list_head tag_list;