summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBen Hutchings <ben@decadent.org.uk>2020-05-20 14:15:08 +0100
committerBen Hutchings <ben@decadent.org.uk>2020-05-20 14:23:05 +0100
commit8a0edd061294a4e379d79be4dd73495b20842530 (patch)
treeeb5e998387c425cb3cc6e15749adbbdbe171e5e5
parentb27a1734a37afe075a62e42a0b44dab8fdce3c09 (diff)
downloadlinux-stable-queue-8a0edd061294a4e379d79be4dd73495b20842530.tar.gz
Add more padata fixes
-rw-r--r--queue-3.16/padata-always-acquire-cpu_hotplug_lock-before-pinst-lock.patch4
-rw-r--r--queue-3.16/padata-avoid-race-in-reordering.patch92
-rw-r--r--queue-3.16/padata-ensure-padata_do_serial-runs-on-the-correct-cpu.patch90
-rw-r--r--queue-3.16/padata-ensure-the-reorder-timer-callback-runs-on-the-correct-cpu.patch105
-rw-r--r--queue-3.16/padata-get_next-is-never-null.patch56
-rw-r--r--queue-3.16/padata-initialize-pd-cpu-with-effective-cpumask.patch67
-rw-r--r--queue-3.16/padata-purge-get_cpu-and-reorder_via_wq-from-padata_do_serial.patch59
-rw-r--r--queue-3.16/padata-remove-broken-queue-flushing.patch21
-rw-r--r--queue-3.16/padata-remove-unused-but-set-variables.patch43
-rw-r--r--queue-3.16/padata-replace-delayed-timer-with-immediate-workqueue-in.patch305
-rw-r--r--queue-3.16/series8
11 files changed, 836 insertions, 14 deletions
diff --git a/queue-3.16/padata-always-acquire-cpu_hotplug_lock-before-pinst-lock.patch b/queue-3.16/padata-always-acquire-cpu_hotplug_lock-before-pinst-lock.patch
index a596e547..4db3abc1 100644
--- a/queue-3.16/padata-always-acquire-cpu_hotplug_lock-before-pinst-lock.patch
+++ b/queue-3.16/padata-always-acquire-cpu_hotplug_lock-before-pinst-lock.patch
@@ -41,7 +41,7 @@ Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
--- a/kernel/padata.c
+++ b/kernel/padata.c
-@@ -639,8 +639,8 @@ int padata_set_cpumask(struct padata_ins
+@@ -614,8 +614,8 @@ int padata_set_cpumask(struct padata_ins
struct cpumask *serial_mask, *parallel_mask;
int err = -EINVAL;
@@ -51,7 +51,7 @@ Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
switch (cpumask_type) {
case PADATA_CPU_PARALLEL:
-@@ -658,8 +658,8 @@ int padata_set_cpumask(struct padata_ins
+@@ -633,8 +633,8 @@ int padata_set_cpumask(struct padata_ins
err = __padata_set_cpumasks(pinst, parallel_mask, serial_mask);
out:
diff --git a/queue-3.16/padata-avoid-race-in-reordering.patch b/queue-3.16/padata-avoid-race-in-reordering.patch
new file mode 100644
index 00000000..f6e2efef
--- /dev/null
+++ b/queue-3.16/padata-avoid-race-in-reordering.patch
@@ -0,0 +1,92 @@
+From: "Jason A. Donenfeld" <Jason@zx2c4.com>
+Date: Thu, 23 Mar 2017 12:24:43 +0100
+Subject: padata: avoid race in reordering
+
+commit de5540d088fe97ad583cc7d396586437b32149a5 upstream.
+
+Under extremely heavy uses of padata, crashes occur, and with list
+debugging turned on, this happens instead:
+
+[87487.298728] WARNING: CPU: 1 PID: 882 at lib/list_debug.c:33
+__list_add+0xae/0x130
+[87487.301868] list_add corruption. prev->next should be next
+(ffffb17abfc043d0), but was ffff8dba70872c80. (prev=ffff8dba70872b00).
+[87487.339011] [<ffffffff9a53d075>] dump_stack+0x68/0xa3
+[87487.342198] [<ffffffff99e119a1>] ? console_unlock+0x281/0x6d0
+[87487.345364] [<ffffffff99d6b91f>] __warn+0xff/0x140
+[87487.348513] [<ffffffff99d6b9aa>] warn_slowpath_fmt+0x4a/0x50
+[87487.351659] [<ffffffff9a58b5de>] __list_add+0xae/0x130
+[87487.354772] [<ffffffff9add5094>] ? _raw_spin_lock+0x64/0x70
+[87487.357915] [<ffffffff99eefd66>] padata_reorder+0x1e6/0x420
+[87487.361084] [<ffffffff99ef0055>] padata_do_serial+0xa5/0x120
+
+padata_reorder calls list_add_tail with the list to which its adding
+locked, which seems correct:
+
+spin_lock(&squeue->serial.lock);
+list_add_tail(&padata->list, &squeue->serial.list);
+spin_unlock(&squeue->serial.lock);
+
+This therefore leaves only place where such inconsistency could occur:
+if padata->list is added at the same time on two different threads.
+This pdata pointer comes from the function call to
+padata_get_next(pd), which has in it the following block:
+
+next_queue = per_cpu_ptr(pd->pqueue, cpu);
+padata = NULL;
+reorder = &next_queue->reorder;
+if (!list_empty(&reorder->list)) {
+ padata = list_entry(reorder->list.next,
+ struct padata_priv, list);
+ spin_lock(&reorder->lock);
+ list_del_init(&padata->list);
+ atomic_dec(&pd->reorder_objects);
+ spin_unlock(&reorder->lock);
+
+ pd->processed++;
+
+ goto out;
+}
+out:
+return padata;
+
+I strongly suspect that the problem here is that two threads can race
+on reorder list. Even though the deletion is locked, call to
+list_entry is not locked, which means it's feasible that two threads
+pick up the same padata object and subsequently call list_add_tail on
+them at the same time. The fix is thus be hoist that lock outside of
+that block.
+
+Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
+Acked-by: Steffen Klassert <steffen.klassert@secunet.com>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ kernel/padata.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/kernel/padata.c
++++ b/kernel/padata.c
+@@ -185,19 +185,20 @@ static struct padata_priv *padata_get_ne
+
+ reorder = &next_queue->reorder;
+
++ spin_lock(&reorder->lock);
+ if (!list_empty(&reorder->list)) {
+ padata = list_entry(reorder->list.next,
+ struct padata_priv, list);
+
+- spin_lock(&reorder->lock);
+ list_del_init(&padata->list);
+ atomic_dec(&pd->reorder_objects);
+- spin_unlock(&reorder->lock);
+
+ pd->processed++;
+
++ spin_unlock(&reorder->lock);
+ goto out;
+ }
++ spin_unlock(&reorder->lock);
+
+ if (__this_cpu_read(pd->pqueue->cpu_index) == next_queue->cpu_index) {
+ padata = ERR_PTR(-ENODATA);
diff --git a/queue-3.16/padata-ensure-padata_do_serial-runs-on-the-correct-cpu.patch b/queue-3.16/padata-ensure-padata_do_serial-runs-on-the-correct-cpu.patch
new file mode 100644
index 00000000..eee7e4c8
--- /dev/null
+++ b/queue-3.16/padata-ensure-padata_do_serial-runs-on-the-correct-cpu.patch
@@ -0,0 +1,90 @@
+From: Mathias Krause <minipli@googlemail.com>
+Date: Fri, 8 Sep 2017 20:57:11 +0200
+Subject: padata: ensure padata_do_serial() runs on the correct CPU
+
+commit 350ef88e7e922354f82a931897ad4a4ce6c686ff upstream.
+
+If the algorithm we're parallelizing is asynchronous we might change
+CPUs between padata_do_parallel() and padata_do_serial(). However, we
+don't expect this to happen as we need to enqueue the padata object into
+the per-cpu reorder queue we took it from, i.e. the same-cpu's parallel
+queue.
+
+Ensure we're not switching CPUs for a given padata object by tracking
+the CPU within the padata object. If the serial callback gets called on
+the wrong CPU, defer invoking padata_reorder() via a kernel worker on
+the CPU we're expected to run on.
+
+Signed-off-by: Mathias Krause <minipli@googlemail.com>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ include/linux/padata.h | 2 ++
+ kernel/padata.c | 20 +++++++++++++++++++-
+ 2 files changed, 21 insertions(+), 1 deletion(-)
+
+--- a/include/linux/padata.h
++++ b/include/linux/padata.h
+@@ -37,6 +37,7 @@
+ * @list: List entry, to attach to the padata lists.
+ * @pd: Pointer to the internal control structure.
+ * @cb_cpu: Callback cpu for serializatioon.
++ * @cpu: Cpu for parallelization.
+ * @seq_nr: Sequence number of the parallelized data object.
+ * @info: Used to pass information from the parallel to the serial function.
+ * @parallel: Parallel execution function.
+@@ -46,6 +47,7 @@ struct padata_priv {
+ struct list_head list;
+ struct parallel_data *pd;
+ int cb_cpu;
++ int cpu;
+ int info;
+ void (*parallel)(struct padata_priv *padata);
+ void (*serial)(struct padata_priv *padata);
+--- a/kernel/padata.c
++++ b/kernel/padata.c
+@@ -130,6 +130,7 @@ int padata_do_parallel(struct padata_ins
+ padata->cb_cpu = cb_cpu;
+
+ target_cpu = padata_cpu_hash(pd);
++ padata->cpu = target_cpu;
+ queue = per_cpu_ptr(pd->pqueue, target_cpu);
+
+ spin_lock(&queue->parallel.lock);
+@@ -367,10 +368,21 @@ void padata_do_serial(struct padata_priv
+ int cpu;
+ struct padata_parallel_queue *pqueue;
+ struct parallel_data *pd;
++ int reorder_via_wq = 0;
+
+ pd = padata->pd;
+
+ cpu = get_cpu();
++
++ /* We need to run on the same CPU padata_do_parallel(.., padata, ..)
++ * was called on -- or, at least, enqueue the padata object into the
++ * correct per-cpu queue.
++ */
++ if (cpu != padata->cpu) {
++ reorder_via_wq = 1;
++ cpu = padata->cpu;
++ }
++
+ pqueue = per_cpu_ptr(pd->pqueue, cpu);
+
+ spin_lock(&pqueue->reorder.lock);
+@@ -387,7 +399,13 @@ void padata_do_serial(struct padata_priv
+
+ put_cpu();
+
+- padata_reorder(pd);
++ /* If we're running on the wrong CPU, call padata_reorder() via a
++ * kernel worker.
++ */
++ if (reorder_via_wq)
++ queue_work_on(cpu, pd->pinst->wq, &pqueue->reorder_work);
++ else
++ padata_reorder(pd);
+ }
+ EXPORT_SYMBOL(padata_do_serial);
+
diff --git a/queue-3.16/padata-ensure-the-reorder-timer-callback-runs-on-the-correct-cpu.patch b/queue-3.16/padata-ensure-the-reorder-timer-callback-runs-on-the-correct-cpu.patch
new file mode 100644
index 00000000..3e4a2653
--- /dev/null
+++ b/queue-3.16/padata-ensure-the-reorder-timer-callback-runs-on-the-correct-cpu.patch
@@ -0,0 +1,105 @@
+From: Mathias Krause <minipli@googlemail.com>
+Date: Fri, 8 Sep 2017 20:57:10 +0200
+Subject: padata: ensure the reorder timer callback runs on the correct CPU
+
+commit cf5868c8a22dc2854b96e9569064bb92365549ca upstream.
+
+The reorder timer function runs on the CPU where the timer interrupt was
+handled which is not necessarily one of the CPUs of the 'pcpu' CPU mask
+set.
+
+Ensure the padata_reorder() callback runs on the correct CPU, which is
+one in the 'pcpu' CPU mask set and, preferrably, the next expected one.
+Do so by comparing the current CPU with the expected target CPU. If they
+match, call padata_reorder() right away. If they differ, schedule a work
+item on the target CPU that does the padata_reorder() call for us.
+
+Signed-off-by: Mathias Krause <minipli@googlemail.com>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ include/linux/padata.h | 2 ++
+ kernel/padata.c | 43 +++++++++++++++++++++++++++++++++++++++++-
+ 2 files changed, 44 insertions(+), 1 deletion(-)
+
+--- a/include/linux/padata.h
++++ b/include/linux/padata.h
+@@ -85,6 +85,7 @@ struct padata_serial_queue {
+ * @swork: work struct for serialization.
+ * @pd: Backpointer to the internal control structure.
+ * @work: work struct for parallelization.
++ * @reorder_work: work struct for reordering.
+ * @num_obj: Number of objects that are processed by this cpu.
+ * @cpu_index: Index of the cpu.
+ */
+@@ -93,6 +94,7 @@ struct padata_parallel_queue {
+ struct padata_list reorder;
+ struct parallel_data *pd;
+ struct work_struct work;
++ struct work_struct reorder_work;
+ atomic_t num_obj;
+ int cpu_index;
+ };
+--- a/kernel/padata.c
++++ b/kernel/padata.c
+@@ -279,11 +279,51 @@ static void padata_reorder(struct parall
+ return;
+ }
+
++static void invoke_padata_reorder(struct work_struct *work)
++{
++ struct padata_parallel_queue *pqueue;
++ struct parallel_data *pd;
++
++ local_bh_disable();
++ pqueue = container_of(work, struct padata_parallel_queue, reorder_work);
++ pd = pqueue->pd;
++ padata_reorder(pd);
++ local_bh_enable();
++}
++
+ static void padata_reorder_timer(unsigned long arg)
+ {
+ struct parallel_data *pd = (struct parallel_data *)arg;
++ unsigned int weight;
++ int target_cpu, cpu;
+
+- padata_reorder(pd);
++ cpu = get_cpu();
++
++ /* We don't lock pd here to not interfere with parallel processing
++ * padata_reorder() calls on other CPUs. We just need any CPU out of
++ * the cpumask.pcpu set. It would be nice if it's the right one but
++ * it doesn't matter if we're off to the next one by using an outdated
++ * pd->processed value.
++ */
++ weight = cpumask_weight(pd->cpumask.pcpu);
++ target_cpu = padata_index_to_cpu(pd, pd->processed % weight);
++
++ /* ensure to call the reorder callback on the correct CPU */
++ if (cpu != target_cpu) {
++ struct padata_parallel_queue *pqueue;
++ struct padata_instance *pinst;
++
++ /* The timer function is serialized wrt itself -- no locking
++ * needed.
++ */
++ pinst = pd->pinst;
++ pqueue = per_cpu_ptr(pd->pqueue, target_cpu);
++ queue_work_on(target_cpu, pinst->wq, &pqueue->reorder_work);
++ } else {
++ padata_reorder(pd);
++ }
++
++ put_cpu();
+ }
+
+ static void padata_serial_worker(struct work_struct *serial_work)
+@@ -404,6 +444,7 @@ static void padata_init_pqueues(struct p
+ __padata_list_init(&pqueue->reorder);
+ __padata_list_init(&pqueue->parallel);
+ INIT_WORK(&pqueue->work, padata_parallel_worker);
++ INIT_WORK(&pqueue->reorder_work, invoke_padata_reorder);
+ atomic_set(&pqueue->num_obj, 0);
+ }
+ }
diff --git a/queue-3.16/padata-get_next-is-never-null.patch b/queue-3.16/padata-get_next-is-never-null.patch
new file mode 100644
index 00000000..a1dd0721
--- /dev/null
+++ b/queue-3.16/padata-get_next-is-never-null.patch
@@ -0,0 +1,56 @@
+From: "Jason A. Donenfeld" <Jason@zx2c4.com>
+Date: Wed, 12 Apr 2017 10:40:19 +0200
+Subject: padata: get_next is never NULL
+
+commit 69b348449bda0f9588737539cfe135774c9939a7 upstream.
+
+Per Dan's static checker warning, the code that returns NULL was removed
+in 2010, so this patch updates the comments and fixes the code
+assumptions.
+
+Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
+Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
+Acked-by: Steffen Klassert <steffen.klassert@secunet.com>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ kernel/padata.c | 13 ++++---------
+ 1 file changed, 4 insertions(+), 9 deletions(-)
+
+--- a/kernel/padata.c
++++ b/kernel/padata.c
+@@ -153,8 +153,6 @@ EXPORT_SYMBOL(padata_do_parallel);
+ * A pointer to the control struct of the next object that needs
+ * serialization, if present in one of the percpu reorder queues.
+ *
+- * NULL, if all percpu reorder queues are empty.
+- *
+ * -EINPROGRESS, if the next object that needs serialization will
+ * be parallel processed by another cpu and is not yet present in
+ * the cpu's reorder queue.
+@@ -181,8 +179,6 @@ static struct padata_priv *padata_get_ne
+ cpu = padata_index_to_cpu(pd, next_index);
+ next_queue = per_cpu_ptr(pd->pqueue, cpu);
+
+- padata = NULL;
+-
+ reorder = &next_queue->reorder;
+
+ spin_lock(&reorder->lock);
+@@ -234,12 +230,11 @@ static void padata_reorder(struct parall
+ padata = padata_get_next(pd);
+
+ /*
+- * All reorder queues are empty, or the next object that needs
+- * serialization is parallel processed by another cpu and is
+- * still on it's way to the cpu's reorder queue, nothing to
+- * do for now.
++ * If the next object that needs serialization is parallel
++ * processed by another cpu and is still on it's way to the
++ * cpu's reorder queue, nothing to do for now.
+ */
+- if (!padata || PTR_ERR(padata) == -EINPROGRESS)
++ if (PTR_ERR(padata) == -EINPROGRESS)
+ break;
+
+ /*
diff --git a/queue-3.16/padata-initialize-pd-cpu-with-effective-cpumask.patch b/queue-3.16/padata-initialize-pd-cpu-with-effective-cpumask.patch
new file mode 100644
index 00000000..0dec176e
--- /dev/null
+++ b/queue-3.16/padata-initialize-pd-cpu-with-effective-cpumask.patch
@@ -0,0 +1,67 @@
+From: Daniel Jordan <daniel.m.jordan@oracle.com>
+Date: Thu, 8 Aug 2019 12:05:35 -0400
+Subject: padata: initialize pd->cpu with effective cpumask
+
+commit ec9c7d19336ee98ecba8de80128aa405c45feebb upstream.
+
+Exercising CPU hotplug on a 5.2 kernel with recent padata fixes from
+cryptodev-2.6.git in an 8-CPU kvm guest...
+
+ # modprobe tcrypt alg="pcrypt(rfc4106(gcm(aes)))" type=3
+ # echo 0 > /sys/devices/system/cpu/cpu1/online
+ # echo c > /sys/kernel/pcrypt/pencrypt/parallel_cpumask
+ # modprobe tcrypt mode=215
+
+...caused the following crash:
+
+ BUG: kernel NULL pointer dereference, address: 0000000000000000
+ #PF: supervisor read access in kernel mode
+ #PF: error_code(0x0000) - not-present page
+ PGD 0 P4D 0
+ Oops: 0000 [#1] SMP PTI
+ CPU: 2 PID: 134 Comm: kworker/2:2 Not tainted 5.2.0-padata-base+ #7
+ Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-<snip>
+ Workqueue: pencrypt padata_parallel_worker
+ RIP: 0010:padata_reorder+0xcb/0x180
+ ...
+ Call Trace:
+ padata_do_serial+0x57/0x60
+ pcrypt_aead_enc+0x3a/0x50 [pcrypt]
+ padata_parallel_worker+0x9b/0xe0
+ process_one_work+0x1b5/0x3f0
+ worker_thread+0x4a/0x3c0
+ ...
+
+In padata_alloc_pd, pd->cpu is set using the user-supplied cpumask
+instead of the effective cpumask, and in this case cpumask_first picked
+an offline CPU.
+
+The offline CPU's reorder->list.next is NULL in padata_reorder because
+the list wasn't initialized in padata_init_pqueues, which only operates
+on CPUs in the effective mask.
+
+Fix by using the effective mask in padata_alloc_pd.
+
+Fixes: 6fc4dbcf0276 ("padata: Replace delayed timer with immediate workqueue in padata_reorder")
+Signed-off-by: Daniel Jordan <daniel.m.jordan@oracle.com>
+Cc: Herbert Xu <herbert@gondor.apana.org.au>
+Cc: Steffen Klassert <steffen.klassert@secunet.com>
+Cc: linux-crypto@vger.kernel.org
+Cc: linux-kernel@vger.kernel.org
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ kernel/padata.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/padata.c
++++ b/kernel/padata.c
+@@ -435,7 +435,7 @@ static struct parallel_data *padata_allo
+ atomic_set(&pd->refcnt, 0);
+ pd->pinst = pinst;
+ spin_lock_init(&pd->lock);
+- pd->cpu = cpumask_first(pcpumask);
++ pd->cpu = cpumask_first(pd->cpumask.pcpu);
+ INIT_WORK(&pd->reorder_work, invoke_padata_reorder);
+
+ return pd;
diff --git a/queue-3.16/padata-purge-get_cpu-and-reorder_via_wq-from-padata_do_serial.patch b/queue-3.16/padata-purge-get_cpu-and-reorder_via_wq-from-padata_do_serial.patch
new file mode 100644
index 00000000..53da7ef7
--- /dev/null
+++ b/queue-3.16/padata-purge-get_cpu-and-reorder_via_wq-from-padata_do_serial.patch
@@ -0,0 +1,59 @@
+From: Daniel Jordan <daniel.m.jordan@oracle.com>
+Date: Fri, 19 Jul 2019 15:04:44 -0400
+Subject: padata: purge get_cpu and reorder_via_wq from padata_do_serial
+
+commit 065cf577135a4977931c7a1e1edf442bfd9773dd upstream.
+
+With the removal of the padata timer, padata_do_serial no longer
+needs special CPU handling, so remove it.
+
+Signed-off-by: Daniel Jordan <daniel.m.jordan@oracle.com>
+Cc: Herbert Xu <herbert@gondor.apana.org.au>
+Cc: Steffen Klassert <steffen.klassert@secunet.com>
+Cc: linux-crypto@vger.kernel.org
+Cc: linux-kernel@vger.kernel.org
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ kernel/padata.c | 23 +++--------------------
+ 1 file changed, 3 insertions(+), 20 deletions(-)
+
+--- a/kernel/padata.c
++++ b/kernel/padata.c
+@@ -322,24 +322,9 @@ static void padata_serial_worker(struct
+ */
+ void padata_do_serial(struct padata_priv *padata)
+ {
+- int cpu;
+- struct padata_parallel_queue *pqueue;
+- struct parallel_data *pd;
+- int reorder_via_wq = 0;
+-
+- pd = padata->pd;
+-
+- cpu = get_cpu();
+-
+- /* We need to enqueue the padata object into the correct
+- * per-cpu queue.
+- */
+- if (cpu != padata->cpu) {
+- reorder_via_wq = 1;
+- cpu = padata->cpu;
+- }
+-
+- pqueue = per_cpu_ptr(pd->pqueue, cpu);
++ struct parallel_data *pd = padata->pd;
++ struct padata_parallel_queue *pqueue = per_cpu_ptr(pd->pqueue,
++ padata->cpu);
+
+ spin_lock(&pqueue->reorder.lock);
+ list_add_tail(&padata->list, &pqueue->reorder.list);
+@@ -353,8 +338,6 @@ void padata_do_serial(struct padata_priv
+ */
+ smp_mb__after_atomic();
+
+- put_cpu();
+-
+ padata_reorder(pd);
+ }
+ EXPORT_SYMBOL(padata_do_serial);
diff --git a/queue-3.16/padata-remove-broken-queue-flushing.patch b/queue-3.16/padata-remove-broken-queue-flushing.patch
index 91628553..f2cd9764 100644
--- a/queue-3.16/padata-remove-broken-queue-flushing.patch
+++ b/queue-3.16/padata-remove-broken-queue-flushing.patch
@@ -22,7 +22,6 @@ Fixes: 2b73b07ab8a4 ("padata: Flush the padata queues actively")
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Reviewed-by: Daniel Jordan <daniel.m.jordan@oracle.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
-[bwh: Backported to 3.16: padata_flush_queues() also called del_timer_sync()]
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
---
--- a/kernel/padata.c
@@ -36,7 +35,7 @@ Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index)
{
int cpu, target_cpu;
-@@ -299,6 +301,7 @@ static void padata_serial_worker(struct
+@@ -281,6 +283,7 @@ static void padata_serial_worker(struct
struct padata_serial_queue *squeue;
struct parallel_data *pd;
LIST_HEAD(local_list);
@@ -44,7 +43,7 @@ Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
local_bh_disable();
squeue = container_of(serial_work, struct padata_serial_queue, work);
-@@ -308,6 +311,8 @@ static void padata_serial_worker(struct
+@@ -290,6 +293,8 @@ static void padata_serial_worker(struct
list_replace_init(&squeue->serial.list, &local_list);
spin_unlock(&squeue->serial.lock);
@@ -53,7 +52,7 @@ Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
while (!list_empty(&local_list)) {
struct padata_priv *padata;
-@@ -317,9 +322,12 @@ static void padata_serial_worker(struct
+@@ -299,9 +304,12 @@ static void padata_serial_worker(struct
list_del_init(&padata->list);
padata->serial(padata);
@@ -67,16 +66,16 @@ Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
}
/**
-@@ -442,7 +450,7 @@ static struct parallel_data *padata_allo
- setup_timer(&pd->timer, padata_reorder_timer, (unsigned long)pd);
+@@ -432,7 +440,7 @@ static struct parallel_data *padata_allo
+ padata_init_squeues(pd);
atomic_set(&pd->seq_nr, -1);
atomic_set(&pd->reorder_objects, 0);
- atomic_set(&pd->refcnt, 0);
+ atomic_set(&pd->refcnt, 1);
pd->pinst = pinst;
spin_lock_init(&pd->lock);
-
-@@ -467,31 +475,6 @@ static void padata_free_pd(struct parall
+ pd->cpu = cpumask_first(pd->cpumask.pcpu);
+@@ -459,29 +467,6 @@ static void padata_free_pd(struct parall
kfree(pd);
}
@@ -92,8 +91,6 @@ Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
- flush_work(&pqueue->work);
- }
-
-- del_timer_sync(&pd->timer);
--
- if (atomic_read(&pd->reorder_objects))
- padata_reorder(pd);
-
@@ -108,7 +105,7 @@ Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
static void __padata_start(struct padata_instance *pinst)
{
pinst->flags |= PADATA_INIT;
-@@ -505,10 +488,6 @@ static void __padata_stop(struct padata_
+@@ -495,10 +480,6 @@ static void __padata_stop(struct padata_
pinst->flags &= ~PADATA_INIT;
synchronize_rcu();
@@ -119,7 +116,7 @@ Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
}
/* Replace the internal control structure with a new one. */
-@@ -529,8 +508,8 @@ static void padata_replace(struct padata
+@@ -519,8 +500,8 @@ static void padata_replace(struct padata
if (!cpumask_equal(pd_old->cpumask.cbcpu, pd_new->cpumask.cbcpu))
notification_mask |= PADATA_CPU_SERIAL;
diff --git a/queue-3.16/padata-remove-unused-but-set-variables.patch b/queue-3.16/padata-remove-unused-but-set-variables.patch
new file mode 100644
index 00000000..4fec625f
--- /dev/null
+++ b/queue-3.16/padata-remove-unused-but-set-variables.patch
@@ -0,0 +1,43 @@
+From: Tobias Klauser <tklauser@distanz.ch>
+Date: Mon, 17 Oct 2016 12:16:08 +0200
+Subject: padata: Remove unused but set variables
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+commit 119a0798dc42ed4c4f96d39b8b676efcea73aec6 upstream.
+
+Remove the unused but set variable pinst in padata_parallel_worker to
+fix the following warning when building with 'W=1':
+
+ kernel/padata.c: In function ‘padata_parallel_worker’:
+ kernel/padata.c:68:26: warning: variable ‘pinst’ set but not used [-Wunused-but-set-variable]
+
+Also remove the now unused variable pd which is only used to set pinst.
+
+Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
+Acked-by: Steffen Klassert <steffen.klassert@secunet.com>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ kernel/padata.c | 4 ----
+ 1 file changed, 4 deletions(-)
+
+--- a/kernel/padata.c
++++ b/kernel/padata.c
+@@ -63,15 +63,11 @@ static int padata_cpu_hash(struct parall
+ static void padata_parallel_worker(struct work_struct *parallel_work)
+ {
+ struct padata_parallel_queue *pqueue;
+- struct parallel_data *pd;
+- struct padata_instance *pinst;
+ LIST_HEAD(local_list);
+
+ local_bh_disable();
+ pqueue = container_of(parallel_work,
+ struct padata_parallel_queue, work);
+- pd = pqueue->pd;
+- pinst = pd->pinst;
+
+ spin_lock(&pqueue->parallel.lock);
+ list_replace_init(&pqueue->parallel.list, &local_list);
diff --git a/queue-3.16/padata-replace-delayed-timer-with-immediate-workqueue-in.patch b/queue-3.16/padata-replace-delayed-timer-with-immediate-workqueue-in.patch
new file mode 100644
index 00000000..a3b7cec1
--- /dev/null
+++ b/queue-3.16/padata-replace-delayed-timer-with-immediate-workqueue-in.patch
@@ -0,0 +1,305 @@
+From: Herbert Xu <herbert@gondor.apana.org.au>
+Date: Thu, 18 Jul 2019 23:01:46 +0800
+Subject: padata: Replace delayed timer with immediate workqueue in
+ padata_reorder
+
+commit 6fc4dbcf0276279d488c5fbbfabe94734134f4fa upstream.
+
+The function padata_reorder will use a timer when it cannot progress
+while completed jobs are outstanding (pd->reorder_objects > 0). This
+is suboptimal as if we do end up using the timer then it would have
+introduced a gratuitous delay of one second.
+
+In fact we can easily distinguish between whether completed jobs
+are outstanding and whether we can make progress. All we have to
+do is look at the next pqueue list.
+
+This patch does that by replacing pd->processed with pd->cpu so
+that the next pqueue is more accessible.
+
+A work queue is used instead of the original try_again to avoid
+hogging the CPU.
+
+Note that we don't bother removing the work queue in
+padata_flush_queues because the whole premise is broken. You
+cannot flush async crypto requests so it makes no sense to even
+try. A subsequent patch will fix it by replacing it with a ref
+counting scheme.
+
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+[bwh: Backported to 3.16:
+ - Deleted code used the old timer API here
+ - Adjust context]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+---
+ include/linux/padata.h | 13 ++----
+ kernel/padata.c | 97 ++++++++----------------------------------
+ 2 files changed, 22 insertions(+), 88 deletions(-)
+
+--- a/include/linux/padata.h
++++ b/include/linux/padata.h
+@@ -24,7 +24,6 @@
+ #include <linux/workqueue.h>
+ #include <linux/spinlock.h>
+ #include <linux/list.h>
+-#include <linux/timer.h>
+ #include <linux/notifier.h>
+ #include <linux/kobject.h>
+
+@@ -85,18 +84,14 @@ struct padata_serial_queue {
+ * @serial: List to wait for serialization after reordering.
+ * @pwork: work struct for parallelization.
+ * @swork: work struct for serialization.
+- * @pd: Backpointer to the internal control structure.
+ * @work: work struct for parallelization.
+- * @reorder_work: work struct for reordering.
+ * @num_obj: Number of objects that are processed by this cpu.
+ * @cpu_index: Index of the cpu.
+ */
+ struct padata_parallel_queue {
+ struct padata_list parallel;
+ struct padata_list reorder;
+- struct parallel_data *pd;
+ struct work_struct work;
+- struct work_struct reorder_work;
+ atomic_t num_obj;
+ int cpu_index;
+ };
+@@ -122,10 +117,10 @@ struct padata_cpumask {
+ * @reorder_objects: Number of objects waiting in the reorder queues.
+ * @refcnt: Number of objects holding a reference on this parallel_data.
+ * @max_seq_nr: Maximal used sequence number.
++ * @cpu: Next CPU to be processed.
+ * @cpumask: The cpumasks in use for parallel and serial workers.
++ * @reorder_work: work struct for reordering.
+ * @lock: Reorder lock.
+- * @processed: Number of already processed objects.
+- * @timer: Reorder timer.
+ */
+ struct parallel_data {
+ struct padata_instance *pinst;
+@@ -134,10 +129,10 @@ struct parallel_data {
+ atomic_t reorder_objects;
+ atomic_t refcnt;
+ atomic_t seq_nr;
++ int cpu;
+ struct padata_cpumask cpumask;
++ struct work_struct reorder_work;
+ spinlock_t lock ____cacheline_aligned;
+- unsigned int processed;
+- struct timer_list timer;
+ };
+
+ /**
+--- a/kernel/padata.c
++++ b/kernel/padata.c
+@@ -163,23 +163,12 @@ EXPORT_SYMBOL(padata_do_parallel);
+ */
+ static struct padata_priv *padata_get_next(struct parallel_data *pd)
+ {
+- int cpu, num_cpus;
+- unsigned int next_nr, next_index;
+ struct padata_parallel_queue *next_queue;
+ struct padata_priv *padata;
+ struct padata_list *reorder;
++ int cpu = pd->cpu;
+
+- num_cpus = cpumask_weight(pd->cpumask.pcpu);
+-
+- /*
+- * Calculate the percpu reorder queue and the sequence
+- * number of the next object.
+- */
+- next_nr = pd->processed;
+- next_index = next_nr % num_cpus;
+- cpu = padata_index_to_cpu(pd, next_index);
+ next_queue = per_cpu_ptr(pd->pqueue, cpu);
+-
+ reorder = &next_queue->reorder;
+
+ spin_lock(&reorder->lock);
+@@ -190,7 +179,8 @@ static struct padata_priv *padata_get_ne
+ list_del_init(&padata->list);
+ atomic_dec(&pd->reorder_objects);
+
+- pd->processed++;
++ pd->cpu = cpumask_next_wrap(cpu, pd->cpumask.pcpu, -1,
++ false);
+
+ spin_unlock(&reorder->lock);
+ goto out;
+@@ -213,6 +203,7 @@ static void padata_reorder(struct parall
+ struct padata_priv *padata;
+ struct padata_serial_queue *squeue;
+ struct padata_instance *pinst = pd->pinst;
++ struct padata_parallel_queue *next_queue;
+
+ /*
+ * We need to ensure that only one cpu can work on dequeueing of
+@@ -244,7 +235,6 @@ static void padata_reorder(struct parall
+ * so exit immediately.
+ */
+ if (PTR_ERR(padata) == -ENODATA) {
+- del_timer(&pd->timer);
+ spin_unlock_bh(&pd->lock);
+ return;
+ }
+@@ -263,70 +253,29 @@ static void padata_reorder(struct parall
+
+ /*
+ * The next object that needs serialization might have arrived to
+- * the reorder queues in the meantime, we will be called again
+- * from the timer function if no one else cares for it.
++ * the reorder queues in the meantime.
+ *
+- * Ensure reorder_objects is read after pd->lock is dropped so we see
+- * an increment from another task in padata_do_serial. Pairs with
++ * Ensure reorder queue is read after pd->lock is dropped so we see
++ * new objects from another task in padata_do_serial. Pairs with
+ * smp_mb__after_atomic in padata_do_serial.
+ */
+ smp_mb();
+- if (atomic_read(&pd->reorder_objects)
+- && !(pinst->flags & PADATA_RESET))
+- mod_timer(&pd->timer, jiffies + HZ);
+- else
+- del_timer(&pd->timer);
+
+- return;
++ next_queue = per_cpu_ptr(pd->pqueue, pd->cpu);
++ if (!list_empty(&next_queue->reorder.list))
++ queue_work(pinst->wq, &pd->reorder_work);
+ }
+
+ static void invoke_padata_reorder(struct work_struct *work)
+ {
+- struct padata_parallel_queue *pqueue;
+ struct parallel_data *pd;
+
+ local_bh_disable();
+- pqueue = container_of(work, struct padata_parallel_queue, reorder_work);
+- pd = pqueue->pd;
++ pd = container_of(work, struct parallel_data, reorder_work);
+ padata_reorder(pd);
+ local_bh_enable();
+ }
+
+-static void padata_reorder_timer(unsigned long arg)
+-{
+- struct parallel_data *pd = (struct parallel_data *)arg;
+- unsigned int weight;
+- int target_cpu, cpu;
+-
+- cpu = get_cpu();
+-
+- /* We don't lock pd here to not interfere with parallel processing
+- * padata_reorder() calls on other CPUs. We just need any CPU out of
+- * the cpumask.pcpu set. It would be nice if it's the right one but
+- * it doesn't matter if we're off to the next one by using an outdated
+- * pd->processed value.
+- */
+- weight = cpumask_weight(pd->cpumask.pcpu);
+- target_cpu = padata_index_to_cpu(pd, pd->processed % weight);
+-
+- /* ensure to call the reorder callback on the correct CPU */
+- if (cpu != target_cpu) {
+- struct padata_parallel_queue *pqueue;
+- struct padata_instance *pinst;
+-
+- /* The timer function is serialized wrt itself -- no locking
+- * needed.
+- */
+- pinst = pd->pinst;
+- pqueue = per_cpu_ptr(pd->pqueue, target_cpu);
+- queue_work_on(target_cpu, pinst->wq, &pqueue->reorder_work);
+- } else {
+- padata_reorder(pd);
+- }
+-
+- put_cpu();
+-}
+-
+ static void padata_serial_worker(struct work_struct *serial_work)
+ {
+ struct padata_serial_queue *squeue;
+@@ -374,9 +323,8 @@ void padata_do_serial(struct padata_priv
+
+ cpu = get_cpu();
+
+- /* We need to run on the same CPU padata_do_parallel(.., padata, ..)
+- * was called on -- or, at least, enqueue the padata object into the
+- * correct per-cpu queue.
++ /* We need to enqueue the padata object into the correct
++ * per-cpu queue.
+ */
+ if (cpu != padata->cpu) {
+ reorder_via_wq = 1;
+@@ -386,12 +334,12 @@ void padata_do_serial(struct padata_priv
+ pqueue = per_cpu_ptr(pd->pqueue, cpu);
+
+ spin_lock(&pqueue->reorder.lock);
+- atomic_inc(&pd->reorder_objects);
+ list_add_tail(&padata->list, &pqueue->reorder.list);
++ atomic_inc(&pd->reorder_objects);
+ spin_unlock(&pqueue->reorder.lock);
+
+ /*
+- * Ensure the atomic_inc of reorder_objects above is ordered correctly
++ * Ensure the addition to the reorder list is ordered correctly
+ * with the trylock of pd->lock in padata_reorder. Pairs with smp_mb
+ * in padata_reorder.
+ */
+@@ -399,13 +347,7 @@ void padata_do_serial(struct padata_priv
+
+ put_cpu();
+
+- /* If we're running on the wrong CPU, call padata_reorder() via a
+- * kernel worker.
+- */
+- if (reorder_via_wq)
+- queue_work_on(cpu, pd->pinst->wq, &pqueue->reorder_work);
+- else
+- padata_reorder(pd);
++ padata_reorder(pd);
+ }
+ EXPORT_SYMBOL(padata_do_serial);
+
+@@ -455,14 +397,12 @@ static void padata_init_pqueues(struct p
+ cpu_index = 0;
+ for_each_cpu(cpu, pd->cpumask.pcpu) {
+ pqueue = per_cpu_ptr(pd->pqueue, cpu);
+- pqueue->pd = pd;
+ pqueue->cpu_index = cpu_index;
+ cpu_index++;
+
+ __padata_list_init(&pqueue->reorder);
+ __padata_list_init(&pqueue->parallel);
+ INIT_WORK(&pqueue->work, padata_parallel_worker);
+- INIT_WORK(&pqueue->reorder_work, invoke_padata_reorder);
+ atomic_set(&pqueue->num_obj, 0);
+ }
+ }
+@@ -490,12 +430,13 @@ static struct parallel_data *padata_allo
+
+ padata_init_pqueues(pd);
+ padata_init_squeues(pd);
+- setup_timer(&pd->timer, padata_reorder_timer, (unsigned long)pd);
+ atomic_set(&pd->seq_nr, -1);
+ atomic_set(&pd->reorder_objects, 0);
+ atomic_set(&pd->refcnt, 0);
+ pd->pinst = pinst;
+ spin_lock_init(&pd->lock);
++ pd->cpu = cpumask_first(pcpumask);
++ INIT_WORK(&pd->reorder_work, invoke_padata_reorder);
+
+ return pd;
+
+@@ -530,8 +471,6 @@ static void padata_flush_queues(struct p
+ flush_work(&pqueue->work);
+ }
+
+- del_timer_sync(&pd->timer);
+-
+ if (atomic_read(&pd->reorder_objects))
+ padata_reorder(pd);
+
diff --git a/queue-3.16/series b/queue-3.16/series
index 17b019e1..43cf85f5 100644
--- a/queue-3.16/series
+++ b/queue-3.16/series
@@ -1,7 +1,15 @@
fs-namespace.c-fix-mountpoint-reference-counter-race.patch
propagate_one-mnt_set_mountpoint-needs-mount_lock.patch
spi-spi-dw-add-lock-protect-dw_spi-rx-tx-to-prevent-concurrent-calls.patch
+padata-remove-unused-but-set-variables.patch
+padata-avoid-race-in-reordering.patch
+padata-get_next-is-never-null.patch
+padata-ensure-the-reorder-timer-callback-runs-on-the-correct-cpu.patch
+padata-ensure-padata_do_serial-runs-on-the-correct-cpu.patch
+padata-replace-delayed-timer-with-immediate-workqueue-in.patch
+padata-initialize-pd-cpu-with-effective-cpumask.patch
padata-remove-broken-queue-flushing.patch
+padata-purge-get_cpu-and-reorder_via_wq-from-padata_do_serial.patch
crypto-pcrypt-fix-user-after-free-on-module-unload.patch
crypto-pcrypt-do-not-clear-may_sleep-flag-in-original-request.patch
padata-always-acquire-cpu_hotplug_lock-before-pinst-lock.patch