diff options
author | Ben Hutchings <ben@decadent.org.uk> | 2020-05-20 14:15:08 +0100 |
---|---|---|
committer | Ben Hutchings <ben@decadent.org.uk> | 2020-05-20 14:23:05 +0100 |
commit | 8a0edd061294a4e379d79be4dd73495b20842530 (patch) | |
tree | eb5e998387c425cb3cc6e15749adbbdbe171e5e5 | |
parent | b27a1734a37afe075a62e42a0b44dab8fdce3c09 (diff) | |
download | linux-stable-queue-8a0edd061294a4e379d79be4dd73495b20842530.tar.gz |
Add more padata fixes
11 files changed, 836 insertions, 14 deletions
diff --git a/queue-3.16/padata-always-acquire-cpu_hotplug_lock-before-pinst-lock.patch b/queue-3.16/padata-always-acquire-cpu_hotplug_lock-before-pinst-lock.patch index a596e547..4db3abc1 100644 --- a/queue-3.16/padata-always-acquire-cpu_hotplug_lock-before-pinst-lock.patch +++ b/queue-3.16/padata-always-acquire-cpu_hotplug_lock-before-pinst-lock.patch @@ -41,7 +41,7 @@ Signed-off-by: Ben Hutchings <ben@decadent.org.uk> --- a/kernel/padata.c +++ b/kernel/padata.c -@@ -639,8 +639,8 @@ int padata_set_cpumask(struct padata_ins +@@ -614,8 +614,8 @@ int padata_set_cpumask(struct padata_ins struct cpumask *serial_mask, *parallel_mask; int err = -EINVAL; @@ -51,7 +51,7 @@ Signed-off-by: Ben Hutchings <ben@decadent.org.uk> switch (cpumask_type) { case PADATA_CPU_PARALLEL: -@@ -658,8 +658,8 @@ int padata_set_cpumask(struct padata_ins +@@ -633,8 +633,8 @@ int padata_set_cpumask(struct padata_ins err = __padata_set_cpumasks(pinst, parallel_mask, serial_mask); out: diff --git a/queue-3.16/padata-avoid-race-in-reordering.patch b/queue-3.16/padata-avoid-race-in-reordering.patch new file mode 100644 index 00000000..f6e2efef --- /dev/null +++ b/queue-3.16/padata-avoid-race-in-reordering.patch @@ -0,0 +1,92 @@ +From: "Jason A. Donenfeld" <Jason@zx2c4.com> +Date: Thu, 23 Mar 2017 12:24:43 +0100 +Subject: padata: avoid race in reordering + +commit de5540d088fe97ad583cc7d396586437b32149a5 upstream. + +Under extremely heavy uses of padata, crashes occur, and with list +debugging turned on, this happens instead: + +[87487.298728] WARNING: CPU: 1 PID: 882 at lib/list_debug.c:33 +__list_add+0xae/0x130 +[87487.301868] list_add corruption. prev->next should be next +(ffffb17abfc043d0), but was ffff8dba70872c80. (prev=ffff8dba70872b00). +[87487.339011] [<ffffffff9a53d075>] dump_stack+0x68/0xa3 +[87487.342198] [<ffffffff99e119a1>] ? console_unlock+0x281/0x6d0 +[87487.345364] [<ffffffff99d6b91f>] __warn+0xff/0x140 +[87487.348513] [<ffffffff99d6b9aa>] warn_slowpath_fmt+0x4a/0x50 +[87487.351659] [<ffffffff9a58b5de>] __list_add+0xae/0x130 +[87487.354772] [<ffffffff9add5094>] ? _raw_spin_lock+0x64/0x70 +[87487.357915] [<ffffffff99eefd66>] padata_reorder+0x1e6/0x420 +[87487.361084] [<ffffffff99ef0055>] padata_do_serial+0xa5/0x120 + +padata_reorder calls list_add_tail with the list to which its adding +locked, which seems correct: + +spin_lock(&squeue->serial.lock); +list_add_tail(&padata->list, &squeue->serial.list); +spin_unlock(&squeue->serial.lock); + +This therefore leaves only place where such inconsistency could occur: +if padata->list is added at the same time on two different threads. +This pdata pointer comes from the function call to +padata_get_next(pd), which has in it the following block: + +next_queue = per_cpu_ptr(pd->pqueue, cpu); +padata = NULL; +reorder = &next_queue->reorder; +if (!list_empty(&reorder->list)) { + padata = list_entry(reorder->list.next, + struct padata_priv, list); + spin_lock(&reorder->lock); + list_del_init(&padata->list); + atomic_dec(&pd->reorder_objects); + spin_unlock(&reorder->lock); + + pd->processed++; + + goto out; +} +out: +return padata; + +I strongly suspect that the problem here is that two threads can race +on reorder list. Even though the deletion is locked, call to +list_entry is not locked, which means it's feasible that two threads +pick up the same padata object and subsequently call list_add_tail on +them at the same time. The fix is thus be hoist that lock outside of +that block. + +Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> +Acked-by: Steffen Klassert <steffen.klassert@secunet.com> +Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> +Signed-off-by: Ben Hutchings <ben@decadent.org.uk> +--- + kernel/padata.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/kernel/padata.c ++++ b/kernel/padata.c +@@ -185,19 +185,20 @@ static struct padata_priv *padata_get_ne + + reorder = &next_queue->reorder; + ++ spin_lock(&reorder->lock); + if (!list_empty(&reorder->list)) { + padata = list_entry(reorder->list.next, + struct padata_priv, list); + +- spin_lock(&reorder->lock); + list_del_init(&padata->list); + atomic_dec(&pd->reorder_objects); +- spin_unlock(&reorder->lock); + + pd->processed++; + ++ spin_unlock(&reorder->lock); + goto out; + } ++ spin_unlock(&reorder->lock); + + if (__this_cpu_read(pd->pqueue->cpu_index) == next_queue->cpu_index) { + padata = ERR_PTR(-ENODATA); diff --git a/queue-3.16/padata-ensure-padata_do_serial-runs-on-the-correct-cpu.patch b/queue-3.16/padata-ensure-padata_do_serial-runs-on-the-correct-cpu.patch new file mode 100644 index 00000000..eee7e4c8 --- /dev/null +++ b/queue-3.16/padata-ensure-padata_do_serial-runs-on-the-correct-cpu.patch @@ -0,0 +1,90 @@ +From: Mathias Krause <minipli@googlemail.com> +Date: Fri, 8 Sep 2017 20:57:11 +0200 +Subject: padata: ensure padata_do_serial() runs on the correct CPU + +commit 350ef88e7e922354f82a931897ad4a4ce6c686ff upstream. + +If the algorithm we're parallelizing is asynchronous we might change +CPUs between padata_do_parallel() and padata_do_serial(). However, we +don't expect this to happen as we need to enqueue the padata object into +the per-cpu reorder queue we took it from, i.e. the same-cpu's parallel +queue. + +Ensure we're not switching CPUs for a given padata object by tracking +the CPU within the padata object. If the serial callback gets called on +the wrong CPU, defer invoking padata_reorder() via a kernel worker on +the CPU we're expected to run on. + +Signed-off-by: Mathias Krause <minipli@googlemail.com> +Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> +Signed-off-by: Ben Hutchings <ben@decadent.org.uk> +--- + include/linux/padata.h | 2 ++ + kernel/padata.c | 20 +++++++++++++++++++- + 2 files changed, 21 insertions(+), 1 deletion(-) + +--- a/include/linux/padata.h ++++ b/include/linux/padata.h +@@ -37,6 +37,7 @@ + * @list: List entry, to attach to the padata lists. + * @pd: Pointer to the internal control structure. + * @cb_cpu: Callback cpu for serializatioon. ++ * @cpu: Cpu for parallelization. + * @seq_nr: Sequence number of the parallelized data object. + * @info: Used to pass information from the parallel to the serial function. + * @parallel: Parallel execution function. +@@ -46,6 +47,7 @@ struct padata_priv { + struct list_head list; + struct parallel_data *pd; + int cb_cpu; ++ int cpu; + int info; + void (*parallel)(struct padata_priv *padata); + void (*serial)(struct padata_priv *padata); +--- a/kernel/padata.c ++++ b/kernel/padata.c +@@ -130,6 +130,7 @@ int padata_do_parallel(struct padata_ins + padata->cb_cpu = cb_cpu; + + target_cpu = padata_cpu_hash(pd); ++ padata->cpu = target_cpu; + queue = per_cpu_ptr(pd->pqueue, target_cpu); + + spin_lock(&queue->parallel.lock); +@@ -367,10 +368,21 @@ void padata_do_serial(struct padata_priv + int cpu; + struct padata_parallel_queue *pqueue; + struct parallel_data *pd; ++ int reorder_via_wq = 0; + + pd = padata->pd; + + cpu = get_cpu(); ++ ++ /* We need to run on the same CPU padata_do_parallel(.., padata, ..) ++ * was called on -- or, at least, enqueue the padata object into the ++ * correct per-cpu queue. ++ */ ++ if (cpu != padata->cpu) { ++ reorder_via_wq = 1; ++ cpu = padata->cpu; ++ } ++ + pqueue = per_cpu_ptr(pd->pqueue, cpu); + + spin_lock(&pqueue->reorder.lock); +@@ -387,7 +399,13 @@ void padata_do_serial(struct padata_priv + + put_cpu(); + +- padata_reorder(pd); ++ /* If we're running on the wrong CPU, call padata_reorder() via a ++ * kernel worker. ++ */ ++ if (reorder_via_wq) ++ queue_work_on(cpu, pd->pinst->wq, &pqueue->reorder_work); ++ else ++ padata_reorder(pd); + } + EXPORT_SYMBOL(padata_do_serial); + diff --git a/queue-3.16/padata-ensure-the-reorder-timer-callback-runs-on-the-correct-cpu.patch b/queue-3.16/padata-ensure-the-reorder-timer-callback-runs-on-the-correct-cpu.patch new file mode 100644 index 00000000..3e4a2653 --- /dev/null +++ b/queue-3.16/padata-ensure-the-reorder-timer-callback-runs-on-the-correct-cpu.patch @@ -0,0 +1,105 @@ +From: Mathias Krause <minipli@googlemail.com> +Date: Fri, 8 Sep 2017 20:57:10 +0200 +Subject: padata: ensure the reorder timer callback runs on the correct CPU + +commit cf5868c8a22dc2854b96e9569064bb92365549ca upstream. + +The reorder timer function runs on the CPU where the timer interrupt was +handled which is not necessarily one of the CPUs of the 'pcpu' CPU mask +set. + +Ensure the padata_reorder() callback runs on the correct CPU, which is +one in the 'pcpu' CPU mask set and, preferrably, the next expected one. +Do so by comparing the current CPU with the expected target CPU. If they +match, call padata_reorder() right away. If they differ, schedule a work +item on the target CPU that does the padata_reorder() call for us. + +Signed-off-by: Mathias Krause <minipli@googlemail.com> +Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> +Signed-off-by: Ben Hutchings <ben@decadent.org.uk> +--- + include/linux/padata.h | 2 ++ + kernel/padata.c | 43 +++++++++++++++++++++++++++++++++++++++++- + 2 files changed, 44 insertions(+), 1 deletion(-) + +--- a/include/linux/padata.h ++++ b/include/linux/padata.h +@@ -85,6 +85,7 @@ struct padata_serial_queue { + * @swork: work struct for serialization. + * @pd: Backpointer to the internal control structure. + * @work: work struct for parallelization. ++ * @reorder_work: work struct for reordering. + * @num_obj: Number of objects that are processed by this cpu. + * @cpu_index: Index of the cpu. + */ +@@ -93,6 +94,7 @@ struct padata_parallel_queue { + struct padata_list reorder; + struct parallel_data *pd; + struct work_struct work; ++ struct work_struct reorder_work; + atomic_t num_obj; + int cpu_index; + }; +--- a/kernel/padata.c ++++ b/kernel/padata.c +@@ -279,11 +279,51 @@ static void padata_reorder(struct parall + return; + } + ++static void invoke_padata_reorder(struct work_struct *work) ++{ ++ struct padata_parallel_queue *pqueue; ++ struct parallel_data *pd; ++ ++ local_bh_disable(); ++ pqueue = container_of(work, struct padata_parallel_queue, reorder_work); ++ pd = pqueue->pd; ++ padata_reorder(pd); ++ local_bh_enable(); ++} ++ + static void padata_reorder_timer(unsigned long arg) + { + struct parallel_data *pd = (struct parallel_data *)arg; ++ unsigned int weight; ++ int target_cpu, cpu; + +- padata_reorder(pd); ++ cpu = get_cpu(); ++ ++ /* We don't lock pd here to not interfere with parallel processing ++ * padata_reorder() calls on other CPUs. We just need any CPU out of ++ * the cpumask.pcpu set. It would be nice if it's the right one but ++ * it doesn't matter if we're off to the next one by using an outdated ++ * pd->processed value. ++ */ ++ weight = cpumask_weight(pd->cpumask.pcpu); ++ target_cpu = padata_index_to_cpu(pd, pd->processed % weight); ++ ++ /* ensure to call the reorder callback on the correct CPU */ ++ if (cpu != target_cpu) { ++ struct padata_parallel_queue *pqueue; ++ struct padata_instance *pinst; ++ ++ /* The timer function is serialized wrt itself -- no locking ++ * needed. ++ */ ++ pinst = pd->pinst; ++ pqueue = per_cpu_ptr(pd->pqueue, target_cpu); ++ queue_work_on(target_cpu, pinst->wq, &pqueue->reorder_work); ++ } else { ++ padata_reorder(pd); ++ } ++ ++ put_cpu(); + } + + static void padata_serial_worker(struct work_struct *serial_work) +@@ -404,6 +444,7 @@ static void padata_init_pqueues(struct p + __padata_list_init(&pqueue->reorder); + __padata_list_init(&pqueue->parallel); + INIT_WORK(&pqueue->work, padata_parallel_worker); ++ INIT_WORK(&pqueue->reorder_work, invoke_padata_reorder); + atomic_set(&pqueue->num_obj, 0); + } + } diff --git a/queue-3.16/padata-get_next-is-never-null.patch b/queue-3.16/padata-get_next-is-never-null.patch new file mode 100644 index 00000000..a1dd0721 --- /dev/null +++ b/queue-3.16/padata-get_next-is-never-null.patch @@ -0,0 +1,56 @@ +From: "Jason A. Donenfeld" <Jason@zx2c4.com> +Date: Wed, 12 Apr 2017 10:40:19 +0200 +Subject: padata: get_next is never NULL + +commit 69b348449bda0f9588737539cfe135774c9939a7 upstream. + +Per Dan's static checker warning, the code that returns NULL was removed +in 2010, so this patch updates the comments and fixes the code +assumptions. + +Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> +Reported-by: Dan Carpenter <dan.carpenter@oracle.com> +Acked-by: Steffen Klassert <steffen.klassert@secunet.com> +Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> +Signed-off-by: Ben Hutchings <ben@decadent.org.uk> +--- + kernel/padata.c | 13 ++++--------- + 1 file changed, 4 insertions(+), 9 deletions(-) + +--- a/kernel/padata.c ++++ b/kernel/padata.c +@@ -153,8 +153,6 @@ EXPORT_SYMBOL(padata_do_parallel); + * A pointer to the control struct of the next object that needs + * serialization, if present in one of the percpu reorder queues. + * +- * NULL, if all percpu reorder queues are empty. +- * + * -EINPROGRESS, if the next object that needs serialization will + * be parallel processed by another cpu and is not yet present in + * the cpu's reorder queue. +@@ -181,8 +179,6 @@ static struct padata_priv *padata_get_ne + cpu = padata_index_to_cpu(pd, next_index); + next_queue = per_cpu_ptr(pd->pqueue, cpu); + +- padata = NULL; +- + reorder = &next_queue->reorder; + + spin_lock(&reorder->lock); +@@ -234,12 +230,11 @@ static void padata_reorder(struct parall + padata = padata_get_next(pd); + + /* +- * All reorder queues are empty, or the next object that needs +- * serialization is parallel processed by another cpu and is +- * still on it's way to the cpu's reorder queue, nothing to +- * do for now. ++ * If the next object that needs serialization is parallel ++ * processed by another cpu and is still on it's way to the ++ * cpu's reorder queue, nothing to do for now. + */ +- if (!padata || PTR_ERR(padata) == -EINPROGRESS) ++ if (PTR_ERR(padata) == -EINPROGRESS) + break; + + /* diff --git a/queue-3.16/padata-initialize-pd-cpu-with-effective-cpumask.patch b/queue-3.16/padata-initialize-pd-cpu-with-effective-cpumask.patch new file mode 100644 index 00000000..0dec176e --- /dev/null +++ b/queue-3.16/padata-initialize-pd-cpu-with-effective-cpumask.patch @@ -0,0 +1,67 @@ +From: Daniel Jordan <daniel.m.jordan@oracle.com> +Date: Thu, 8 Aug 2019 12:05:35 -0400 +Subject: padata: initialize pd->cpu with effective cpumask + +commit ec9c7d19336ee98ecba8de80128aa405c45feebb upstream. + +Exercising CPU hotplug on a 5.2 kernel with recent padata fixes from +cryptodev-2.6.git in an 8-CPU kvm guest... + + # modprobe tcrypt alg="pcrypt(rfc4106(gcm(aes)))" type=3 + # echo 0 > /sys/devices/system/cpu/cpu1/online + # echo c > /sys/kernel/pcrypt/pencrypt/parallel_cpumask + # modprobe tcrypt mode=215 + +...caused the following crash: + + BUG: kernel NULL pointer dereference, address: 0000000000000000 + #PF: supervisor read access in kernel mode + #PF: error_code(0x0000) - not-present page + PGD 0 P4D 0 + Oops: 0000 [#1] SMP PTI + CPU: 2 PID: 134 Comm: kworker/2:2 Not tainted 5.2.0-padata-base+ #7 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-<snip> + Workqueue: pencrypt padata_parallel_worker + RIP: 0010:padata_reorder+0xcb/0x180 + ... + Call Trace: + padata_do_serial+0x57/0x60 + pcrypt_aead_enc+0x3a/0x50 [pcrypt] + padata_parallel_worker+0x9b/0xe0 + process_one_work+0x1b5/0x3f0 + worker_thread+0x4a/0x3c0 + ... + +In padata_alloc_pd, pd->cpu is set using the user-supplied cpumask +instead of the effective cpumask, and in this case cpumask_first picked +an offline CPU. + +The offline CPU's reorder->list.next is NULL in padata_reorder because +the list wasn't initialized in padata_init_pqueues, which only operates +on CPUs in the effective mask. + +Fix by using the effective mask in padata_alloc_pd. + +Fixes: 6fc4dbcf0276 ("padata: Replace delayed timer with immediate workqueue in padata_reorder") +Signed-off-by: Daniel Jordan <daniel.m.jordan@oracle.com> +Cc: Herbert Xu <herbert@gondor.apana.org.au> +Cc: Steffen Klassert <steffen.klassert@secunet.com> +Cc: linux-crypto@vger.kernel.org +Cc: linux-kernel@vger.kernel.org +Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> +Signed-off-by: Ben Hutchings <ben@decadent.org.uk> +--- + kernel/padata.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/kernel/padata.c ++++ b/kernel/padata.c +@@ -435,7 +435,7 @@ static struct parallel_data *padata_allo + atomic_set(&pd->refcnt, 0); + pd->pinst = pinst; + spin_lock_init(&pd->lock); +- pd->cpu = cpumask_first(pcpumask); ++ pd->cpu = cpumask_first(pd->cpumask.pcpu); + INIT_WORK(&pd->reorder_work, invoke_padata_reorder); + + return pd; diff --git a/queue-3.16/padata-purge-get_cpu-and-reorder_via_wq-from-padata_do_serial.patch b/queue-3.16/padata-purge-get_cpu-and-reorder_via_wq-from-padata_do_serial.patch new file mode 100644 index 00000000..53da7ef7 --- /dev/null +++ b/queue-3.16/padata-purge-get_cpu-and-reorder_via_wq-from-padata_do_serial.patch @@ -0,0 +1,59 @@ +From: Daniel Jordan <daniel.m.jordan@oracle.com> +Date: Fri, 19 Jul 2019 15:04:44 -0400 +Subject: padata: purge get_cpu and reorder_via_wq from padata_do_serial + +commit 065cf577135a4977931c7a1e1edf442bfd9773dd upstream. + +With the removal of the padata timer, padata_do_serial no longer +needs special CPU handling, so remove it. + +Signed-off-by: Daniel Jordan <daniel.m.jordan@oracle.com> +Cc: Herbert Xu <herbert@gondor.apana.org.au> +Cc: Steffen Klassert <steffen.klassert@secunet.com> +Cc: linux-crypto@vger.kernel.org +Cc: linux-kernel@vger.kernel.org +Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> +Signed-off-by: Ben Hutchings <ben@decadent.org.uk> +--- + kernel/padata.c | 23 +++-------------------- + 1 file changed, 3 insertions(+), 20 deletions(-) + +--- a/kernel/padata.c ++++ b/kernel/padata.c +@@ -322,24 +322,9 @@ static void padata_serial_worker(struct + */ + void padata_do_serial(struct padata_priv *padata) + { +- int cpu; +- struct padata_parallel_queue *pqueue; +- struct parallel_data *pd; +- int reorder_via_wq = 0; +- +- pd = padata->pd; +- +- cpu = get_cpu(); +- +- /* We need to enqueue the padata object into the correct +- * per-cpu queue. +- */ +- if (cpu != padata->cpu) { +- reorder_via_wq = 1; +- cpu = padata->cpu; +- } +- +- pqueue = per_cpu_ptr(pd->pqueue, cpu); ++ struct parallel_data *pd = padata->pd; ++ struct padata_parallel_queue *pqueue = per_cpu_ptr(pd->pqueue, ++ padata->cpu); + + spin_lock(&pqueue->reorder.lock); + list_add_tail(&padata->list, &pqueue->reorder.list); +@@ -353,8 +338,6 @@ void padata_do_serial(struct padata_priv + */ + smp_mb__after_atomic(); + +- put_cpu(); +- + padata_reorder(pd); + } + EXPORT_SYMBOL(padata_do_serial); diff --git a/queue-3.16/padata-remove-broken-queue-flushing.patch b/queue-3.16/padata-remove-broken-queue-flushing.patch index 91628553..f2cd9764 100644 --- a/queue-3.16/padata-remove-broken-queue-flushing.patch +++ b/queue-3.16/padata-remove-broken-queue-flushing.patch @@ -22,7 +22,6 @@ Fixes: 2b73b07ab8a4 ("padata: Flush the padata queues actively") Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Reviewed-by: Daniel Jordan <daniel.m.jordan@oracle.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -[bwh: Backported to 3.16: padata_flush_queues() also called del_timer_sync()] Signed-off-by: Ben Hutchings <ben@decadent.org.uk> --- --- a/kernel/padata.c @@ -36,7 +35,7 @@ Signed-off-by: Ben Hutchings <ben@decadent.org.uk> static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index) { int cpu, target_cpu; -@@ -299,6 +301,7 @@ static void padata_serial_worker(struct +@@ -281,6 +283,7 @@ static void padata_serial_worker(struct struct padata_serial_queue *squeue; struct parallel_data *pd; LIST_HEAD(local_list); @@ -44,7 +43,7 @@ Signed-off-by: Ben Hutchings <ben@decadent.org.uk> local_bh_disable(); squeue = container_of(serial_work, struct padata_serial_queue, work); -@@ -308,6 +311,8 @@ static void padata_serial_worker(struct +@@ -290,6 +293,8 @@ static void padata_serial_worker(struct list_replace_init(&squeue->serial.list, &local_list); spin_unlock(&squeue->serial.lock); @@ -53,7 +52,7 @@ Signed-off-by: Ben Hutchings <ben@decadent.org.uk> while (!list_empty(&local_list)) { struct padata_priv *padata; -@@ -317,9 +322,12 @@ static void padata_serial_worker(struct +@@ -299,9 +304,12 @@ static void padata_serial_worker(struct list_del_init(&padata->list); padata->serial(padata); @@ -67,16 +66,16 @@ Signed-off-by: Ben Hutchings <ben@decadent.org.uk> } /** -@@ -442,7 +450,7 @@ static struct parallel_data *padata_allo - setup_timer(&pd->timer, padata_reorder_timer, (unsigned long)pd); +@@ -432,7 +440,7 @@ static struct parallel_data *padata_allo + padata_init_squeues(pd); atomic_set(&pd->seq_nr, -1); atomic_set(&pd->reorder_objects, 0); - atomic_set(&pd->refcnt, 0); + atomic_set(&pd->refcnt, 1); pd->pinst = pinst; spin_lock_init(&pd->lock); - -@@ -467,31 +475,6 @@ static void padata_free_pd(struct parall + pd->cpu = cpumask_first(pd->cpumask.pcpu); +@@ -459,29 +467,6 @@ static void padata_free_pd(struct parall kfree(pd); } @@ -92,8 +91,6 @@ Signed-off-by: Ben Hutchings <ben@decadent.org.uk> - flush_work(&pqueue->work); - } - -- del_timer_sync(&pd->timer); -- - if (atomic_read(&pd->reorder_objects)) - padata_reorder(pd); - @@ -108,7 +105,7 @@ Signed-off-by: Ben Hutchings <ben@decadent.org.uk> static void __padata_start(struct padata_instance *pinst) { pinst->flags |= PADATA_INIT; -@@ -505,10 +488,6 @@ static void __padata_stop(struct padata_ +@@ -495,10 +480,6 @@ static void __padata_stop(struct padata_ pinst->flags &= ~PADATA_INIT; synchronize_rcu(); @@ -119,7 +116,7 @@ Signed-off-by: Ben Hutchings <ben@decadent.org.uk> } /* Replace the internal control structure with a new one. */ -@@ -529,8 +508,8 @@ static void padata_replace(struct padata +@@ -519,8 +500,8 @@ static void padata_replace(struct padata if (!cpumask_equal(pd_old->cpumask.cbcpu, pd_new->cpumask.cbcpu)) notification_mask |= PADATA_CPU_SERIAL; diff --git a/queue-3.16/padata-remove-unused-but-set-variables.patch b/queue-3.16/padata-remove-unused-but-set-variables.patch new file mode 100644 index 00000000..4fec625f --- /dev/null +++ b/queue-3.16/padata-remove-unused-but-set-variables.patch @@ -0,0 +1,43 @@ +From: Tobias Klauser <tklauser@distanz.ch> +Date: Mon, 17 Oct 2016 12:16:08 +0200 +Subject: padata: Remove unused but set variables +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +commit 119a0798dc42ed4c4f96d39b8b676efcea73aec6 upstream. + +Remove the unused but set variable pinst in padata_parallel_worker to +fix the following warning when building with 'W=1': + + kernel/padata.c: In function ‘padata_parallel_worker’: + kernel/padata.c:68:26: warning: variable ‘pinst’ set but not used [-Wunused-but-set-variable] + +Also remove the now unused variable pd which is only used to set pinst. + +Signed-off-by: Tobias Klauser <tklauser@distanz.ch> +Acked-by: Steffen Klassert <steffen.klassert@secunet.com> +Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> +Signed-off-by: Ben Hutchings <ben@decadent.org.uk> +--- + kernel/padata.c | 4 ---- + 1 file changed, 4 deletions(-) + +--- a/kernel/padata.c ++++ b/kernel/padata.c +@@ -63,15 +63,11 @@ static int padata_cpu_hash(struct parall + static void padata_parallel_worker(struct work_struct *parallel_work) + { + struct padata_parallel_queue *pqueue; +- struct parallel_data *pd; +- struct padata_instance *pinst; + LIST_HEAD(local_list); + + local_bh_disable(); + pqueue = container_of(parallel_work, + struct padata_parallel_queue, work); +- pd = pqueue->pd; +- pinst = pd->pinst; + + spin_lock(&pqueue->parallel.lock); + list_replace_init(&pqueue->parallel.list, &local_list); diff --git a/queue-3.16/padata-replace-delayed-timer-with-immediate-workqueue-in.patch b/queue-3.16/padata-replace-delayed-timer-with-immediate-workqueue-in.patch new file mode 100644 index 00000000..a3b7cec1 --- /dev/null +++ b/queue-3.16/padata-replace-delayed-timer-with-immediate-workqueue-in.patch @@ -0,0 +1,305 @@ +From: Herbert Xu <herbert@gondor.apana.org.au> +Date: Thu, 18 Jul 2019 23:01:46 +0800 +Subject: padata: Replace delayed timer with immediate workqueue in + padata_reorder + +commit 6fc4dbcf0276279d488c5fbbfabe94734134f4fa upstream. + +The function padata_reorder will use a timer when it cannot progress +while completed jobs are outstanding (pd->reorder_objects > 0). This +is suboptimal as if we do end up using the timer then it would have +introduced a gratuitous delay of one second. + +In fact we can easily distinguish between whether completed jobs +are outstanding and whether we can make progress. All we have to +do is look at the next pqueue list. + +This patch does that by replacing pd->processed with pd->cpu so +that the next pqueue is more accessible. + +A work queue is used instead of the original try_again to avoid +hogging the CPU. + +Note that we don't bother removing the work queue in +padata_flush_queues because the whole premise is broken. You +cannot flush async crypto requests so it makes no sense to even +try. A subsequent patch will fix it by replacing it with a ref +counting scheme. + +Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> +[bwh: Backported to 3.16: + - Deleted code used the old timer API here + - Adjust context] +Signed-off-by: Ben Hutchings <ben@decadent.org.uk> +--- + include/linux/padata.h | 13 ++---- + kernel/padata.c | 97 ++++++++---------------------------------- + 2 files changed, 22 insertions(+), 88 deletions(-) + +--- a/include/linux/padata.h ++++ b/include/linux/padata.h +@@ -24,7 +24,6 @@ + #include <linux/workqueue.h> + #include <linux/spinlock.h> + #include <linux/list.h> +-#include <linux/timer.h> + #include <linux/notifier.h> + #include <linux/kobject.h> + +@@ -85,18 +84,14 @@ struct padata_serial_queue { + * @serial: List to wait for serialization after reordering. + * @pwork: work struct for parallelization. + * @swork: work struct for serialization. +- * @pd: Backpointer to the internal control structure. + * @work: work struct for parallelization. +- * @reorder_work: work struct for reordering. + * @num_obj: Number of objects that are processed by this cpu. + * @cpu_index: Index of the cpu. + */ + struct padata_parallel_queue { + struct padata_list parallel; + struct padata_list reorder; +- struct parallel_data *pd; + struct work_struct work; +- struct work_struct reorder_work; + atomic_t num_obj; + int cpu_index; + }; +@@ -122,10 +117,10 @@ struct padata_cpumask { + * @reorder_objects: Number of objects waiting in the reorder queues. + * @refcnt: Number of objects holding a reference on this parallel_data. + * @max_seq_nr: Maximal used sequence number. ++ * @cpu: Next CPU to be processed. + * @cpumask: The cpumasks in use for parallel and serial workers. ++ * @reorder_work: work struct for reordering. + * @lock: Reorder lock. +- * @processed: Number of already processed objects. +- * @timer: Reorder timer. + */ + struct parallel_data { + struct padata_instance *pinst; +@@ -134,10 +129,10 @@ struct parallel_data { + atomic_t reorder_objects; + atomic_t refcnt; + atomic_t seq_nr; ++ int cpu; + struct padata_cpumask cpumask; ++ struct work_struct reorder_work; + spinlock_t lock ____cacheline_aligned; +- unsigned int processed; +- struct timer_list timer; + }; + + /** +--- a/kernel/padata.c ++++ b/kernel/padata.c +@@ -163,23 +163,12 @@ EXPORT_SYMBOL(padata_do_parallel); + */ + static struct padata_priv *padata_get_next(struct parallel_data *pd) + { +- int cpu, num_cpus; +- unsigned int next_nr, next_index; + struct padata_parallel_queue *next_queue; + struct padata_priv *padata; + struct padata_list *reorder; ++ int cpu = pd->cpu; + +- num_cpus = cpumask_weight(pd->cpumask.pcpu); +- +- /* +- * Calculate the percpu reorder queue and the sequence +- * number of the next object. +- */ +- next_nr = pd->processed; +- next_index = next_nr % num_cpus; +- cpu = padata_index_to_cpu(pd, next_index); + next_queue = per_cpu_ptr(pd->pqueue, cpu); +- + reorder = &next_queue->reorder; + + spin_lock(&reorder->lock); +@@ -190,7 +179,8 @@ static struct padata_priv *padata_get_ne + list_del_init(&padata->list); + atomic_dec(&pd->reorder_objects); + +- pd->processed++; ++ pd->cpu = cpumask_next_wrap(cpu, pd->cpumask.pcpu, -1, ++ false); + + spin_unlock(&reorder->lock); + goto out; +@@ -213,6 +203,7 @@ static void padata_reorder(struct parall + struct padata_priv *padata; + struct padata_serial_queue *squeue; + struct padata_instance *pinst = pd->pinst; ++ struct padata_parallel_queue *next_queue; + + /* + * We need to ensure that only one cpu can work on dequeueing of +@@ -244,7 +235,6 @@ static void padata_reorder(struct parall + * so exit immediately. + */ + if (PTR_ERR(padata) == -ENODATA) { +- del_timer(&pd->timer); + spin_unlock_bh(&pd->lock); + return; + } +@@ -263,70 +253,29 @@ static void padata_reorder(struct parall + + /* + * The next object that needs serialization might have arrived to +- * the reorder queues in the meantime, we will be called again +- * from the timer function if no one else cares for it. ++ * the reorder queues in the meantime. + * +- * Ensure reorder_objects is read after pd->lock is dropped so we see +- * an increment from another task in padata_do_serial. Pairs with ++ * Ensure reorder queue is read after pd->lock is dropped so we see ++ * new objects from another task in padata_do_serial. Pairs with + * smp_mb__after_atomic in padata_do_serial. + */ + smp_mb(); +- if (atomic_read(&pd->reorder_objects) +- && !(pinst->flags & PADATA_RESET)) +- mod_timer(&pd->timer, jiffies + HZ); +- else +- del_timer(&pd->timer); + +- return; ++ next_queue = per_cpu_ptr(pd->pqueue, pd->cpu); ++ if (!list_empty(&next_queue->reorder.list)) ++ queue_work(pinst->wq, &pd->reorder_work); + } + + static void invoke_padata_reorder(struct work_struct *work) + { +- struct padata_parallel_queue *pqueue; + struct parallel_data *pd; + + local_bh_disable(); +- pqueue = container_of(work, struct padata_parallel_queue, reorder_work); +- pd = pqueue->pd; ++ pd = container_of(work, struct parallel_data, reorder_work); + padata_reorder(pd); + local_bh_enable(); + } + +-static void padata_reorder_timer(unsigned long arg) +-{ +- struct parallel_data *pd = (struct parallel_data *)arg; +- unsigned int weight; +- int target_cpu, cpu; +- +- cpu = get_cpu(); +- +- /* We don't lock pd here to not interfere with parallel processing +- * padata_reorder() calls on other CPUs. We just need any CPU out of +- * the cpumask.pcpu set. It would be nice if it's the right one but +- * it doesn't matter if we're off to the next one by using an outdated +- * pd->processed value. +- */ +- weight = cpumask_weight(pd->cpumask.pcpu); +- target_cpu = padata_index_to_cpu(pd, pd->processed % weight); +- +- /* ensure to call the reorder callback on the correct CPU */ +- if (cpu != target_cpu) { +- struct padata_parallel_queue *pqueue; +- struct padata_instance *pinst; +- +- /* The timer function is serialized wrt itself -- no locking +- * needed. +- */ +- pinst = pd->pinst; +- pqueue = per_cpu_ptr(pd->pqueue, target_cpu); +- queue_work_on(target_cpu, pinst->wq, &pqueue->reorder_work); +- } else { +- padata_reorder(pd); +- } +- +- put_cpu(); +-} +- + static void padata_serial_worker(struct work_struct *serial_work) + { + struct padata_serial_queue *squeue; +@@ -374,9 +323,8 @@ void padata_do_serial(struct padata_priv + + cpu = get_cpu(); + +- /* We need to run on the same CPU padata_do_parallel(.., padata, ..) +- * was called on -- or, at least, enqueue the padata object into the +- * correct per-cpu queue. ++ /* We need to enqueue the padata object into the correct ++ * per-cpu queue. + */ + if (cpu != padata->cpu) { + reorder_via_wq = 1; +@@ -386,12 +334,12 @@ void padata_do_serial(struct padata_priv + pqueue = per_cpu_ptr(pd->pqueue, cpu); + + spin_lock(&pqueue->reorder.lock); +- atomic_inc(&pd->reorder_objects); + list_add_tail(&padata->list, &pqueue->reorder.list); ++ atomic_inc(&pd->reorder_objects); + spin_unlock(&pqueue->reorder.lock); + + /* +- * Ensure the atomic_inc of reorder_objects above is ordered correctly ++ * Ensure the addition to the reorder list is ordered correctly + * with the trylock of pd->lock in padata_reorder. Pairs with smp_mb + * in padata_reorder. + */ +@@ -399,13 +347,7 @@ void padata_do_serial(struct padata_priv + + put_cpu(); + +- /* If we're running on the wrong CPU, call padata_reorder() via a +- * kernel worker. +- */ +- if (reorder_via_wq) +- queue_work_on(cpu, pd->pinst->wq, &pqueue->reorder_work); +- else +- padata_reorder(pd); ++ padata_reorder(pd); + } + EXPORT_SYMBOL(padata_do_serial); + +@@ -455,14 +397,12 @@ static void padata_init_pqueues(struct p + cpu_index = 0; + for_each_cpu(cpu, pd->cpumask.pcpu) { + pqueue = per_cpu_ptr(pd->pqueue, cpu); +- pqueue->pd = pd; + pqueue->cpu_index = cpu_index; + cpu_index++; + + __padata_list_init(&pqueue->reorder); + __padata_list_init(&pqueue->parallel); + INIT_WORK(&pqueue->work, padata_parallel_worker); +- INIT_WORK(&pqueue->reorder_work, invoke_padata_reorder); + atomic_set(&pqueue->num_obj, 0); + } + } +@@ -490,12 +430,13 @@ static struct parallel_data *padata_allo + + padata_init_pqueues(pd); + padata_init_squeues(pd); +- setup_timer(&pd->timer, padata_reorder_timer, (unsigned long)pd); + atomic_set(&pd->seq_nr, -1); + atomic_set(&pd->reorder_objects, 0); + atomic_set(&pd->refcnt, 0); + pd->pinst = pinst; + spin_lock_init(&pd->lock); ++ pd->cpu = cpumask_first(pcpumask); ++ INIT_WORK(&pd->reorder_work, invoke_padata_reorder); + + return pd; + +@@ -530,8 +471,6 @@ static void padata_flush_queues(struct p + flush_work(&pqueue->work); + } + +- del_timer_sync(&pd->timer); +- + if (atomic_read(&pd->reorder_objects)) + padata_reorder(pd); + diff --git a/queue-3.16/series b/queue-3.16/series index 17b019e1..43cf85f5 100644 --- a/queue-3.16/series +++ b/queue-3.16/series @@ -1,7 +1,15 @@ fs-namespace.c-fix-mountpoint-reference-counter-race.patch propagate_one-mnt_set_mountpoint-needs-mount_lock.patch spi-spi-dw-add-lock-protect-dw_spi-rx-tx-to-prevent-concurrent-calls.patch +padata-remove-unused-but-set-variables.patch +padata-avoid-race-in-reordering.patch +padata-get_next-is-never-null.patch +padata-ensure-the-reorder-timer-callback-runs-on-the-correct-cpu.patch +padata-ensure-padata_do_serial-runs-on-the-correct-cpu.patch +padata-replace-delayed-timer-with-immediate-workqueue-in.patch +padata-initialize-pd-cpu-with-effective-cpumask.patch padata-remove-broken-queue-flushing.patch +padata-purge-get_cpu-and-reorder_via_wq-from-padata_do_serial.patch crypto-pcrypt-fix-user-after-free-on-module-unload.patch crypto-pcrypt-do-not-clear-may_sleep-flag-in-original-request.patch padata-always-acquire-cpu_hotplug_lock-before-pinst-lock.patch |