summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGreg Kroah-Hartman <gregkh@suse.de>2011-08-19 09:58:51 -0700
committerGreg Kroah-Hartman <gregkh@suse.de>2011-08-19 09:58:51 -0700
commit947bd36fdaa2b980949a83579f04076b7e90dc6a (patch)
tree4c93486d66e5c0ff134f2536e2af87075d41e491
parent18651efa7d309f1cd2c1ed1a305a654fc645b5a3 (diff)
downloadstable-queue-947bd36fdaa2b980949a83579f04076b7e90dc6a.tar.gz
3.0 patches
-rw-r--r--queue-3.0/nfsv4.1-fix-the-callback-highest_used_slotid-behaviour.patch179
-rw-r--r--queue-3.0/nfsv4.1-return-nfs4err_badsession-to-callbacks-during.patch44
-rw-r--r--queue-3.0/pata_via-disable-atapi-dma-on-averatec-3200.patch59
-rw-r--r--queue-3.0/pnfs-obj-bug-when-we-are-running-out-of-bio.patch69
-rw-r--r--queue-3.0/pnfs-obj-fix-the-comp_index-0-case.patch97
-rw-r--r--queue-3.0/series6
-rw-r--r--queue-3.0/x86-mtrr-lock-stop-machine-during-mtrr-rendezvous-sequence.patch109
7 files changed, 563 insertions, 0 deletions
diff --git a/queue-3.0/nfsv4.1-fix-the-callback-highest_used_slotid-behaviour.patch b/queue-3.0/nfsv4.1-fix-the-callback-highest_used_slotid-behaviour.patch
new file mode 100644
index 0000000000..3c53bf3500
--- /dev/null
+++ b/queue-3.0/nfsv4.1-fix-the-callback-highest_used_slotid-behaviour.patch
@@ -0,0 +1,179 @@
+From 55a673990ec04cf63005318bcf08c2b0046e5778 Mon Sep 17 00:00:00 2001
+From: Trond Myklebust <Trond.Myklebust@netapp.com>
+Date: Tue, 2 Aug 2011 14:46:29 -0400
+Subject: NFSv4.1: Fix the callback 'highest_used_slotid' behaviour
+
+From: Trond Myklebust <Trond.Myklebust@netapp.com>
+
+commit 55a673990ec04cf63005318bcf08c2b0046e5778 upstream.
+
+Currently, there is no guarantee that we will call nfs4_cb_take_slot() even
+though nfs4_callback_compound() will consistently call
+nfs4_cb_free_slot() provided the cb_process_state has set the 'clp' field.
+The result is that we can trigger the BUG_ON() upon the next call to
+nfs4_cb_take_slot().
+
+This patch fixes the above problem by using the slot id that was taken in
+the CB_SEQUENCE operation as a flag for whether or not we need to call
+nfs4_cb_free_slot().
+It also fixes an atomicity problem: we need to set tbl->highest_used_slotid
+atomically with the check for NFS4_SESSION_DRAINING, otherwise we end up
+racing with the various tests in nfs4_begin_drain_session().
+
+Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/nfs/callback.h | 2 +-
+ fs/nfs/callback_proc.c | 20 ++++++++++++++------
+ fs/nfs/callback_xdr.c | 24 +++++++-----------------
+ 3 files changed, 22 insertions(+), 24 deletions(-)
+
+--- a/fs/nfs/callback.h
++++ b/fs/nfs/callback.h
+@@ -38,6 +38,7 @@ enum nfs4_callback_opnum {
+ struct cb_process_state {
+ __be32 drc_status;
+ struct nfs_client *clp;
++ int slotid;
+ };
+
+ struct cb_compound_hdr_arg {
+@@ -166,7 +167,6 @@ extern unsigned nfs4_callback_layoutreca
+ void *dummy, struct cb_process_state *cps);
+
+ extern void nfs4_check_drain_bc_complete(struct nfs4_session *ses);
+-extern void nfs4_cb_take_slot(struct nfs_client *clp);
+
+ struct cb_devicenotifyitem {
+ uint32_t cbd_notify_type;
+--- a/fs/nfs/callback_proc.c
++++ b/fs/nfs/callback_proc.c
+@@ -333,7 +333,7 @@ validate_seqid(struct nfs4_slot_table *t
+ /* Normal */
+ if (likely(args->csa_sequenceid == slot->seq_nr + 1)) {
+ slot->seq_nr++;
+- return htonl(NFS4_OK);
++ goto out_ok;
+ }
+
+ /* Replay */
+@@ -352,11 +352,14 @@ validate_seqid(struct nfs4_slot_table *t
+ /* Wraparound */
+ if (args->csa_sequenceid == 1 && (slot->seq_nr + 1) == 0) {
+ slot->seq_nr = 1;
+- return htonl(NFS4_OK);
++ goto out_ok;
+ }
+
+ /* Misordered request */
+ return htonl(NFS4ERR_SEQ_MISORDERED);
++out_ok:
++ tbl->highest_used_slotid = args->csa_slotid;
++ return htonl(NFS4_OK);
+ }
+
+ /*
+@@ -418,26 +421,32 @@ __be32 nfs4_callback_sequence(struct cb_
+ struct cb_sequenceres *res,
+ struct cb_process_state *cps)
+ {
++ struct nfs4_slot_table *tbl;
+ struct nfs_client *clp;
+ int i;
+ __be32 status = htonl(NFS4ERR_BADSESSION);
+
+- cps->clp = NULL;
+-
+ clp = nfs4_find_client_sessionid(args->csa_addr, &args->csa_sessionid);
+ if (clp == NULL)
+ goto out;
+
++ tbl = &clp->cl_session->bc_slot_table;
++
++ spin_lock(&tbl->slot_tbl_lock);
+ /* state manager is resetting the session */
+ if (test_bit(NFS4_SESSION_DRAINING, &clp->cl_session->session_state)) {
+- status = NFS4ERR_DELAY;
++ spin_unlock(&tbl->slot_tbl_lock);
++ status = htonl(NFS4ERR_DELAY);
+ goto out;
+ }
+
+ status = validate_seqid(&clp->cl_session->bc_slot_table, args);
++ spin_unlock(&tbl->slot_tbl_lock);
+ if (status)
+ goto out;
+
++ cps->slotid = args->csa_slotid;
++
+ /*
+ * Check for pending referring calls. If a match is found, a
+ * related callback was received before the response to the original
+@@ -454,7 +463,6 @@ __be32 nfs4_callback_sequence(struct cb_
+ res->csr_slotid = args->csa_slotid;
+ res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
+ res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
+- nfs4_cb_take_slot(clp);
+
+ out:
+ cps->clp = clp; /* put in nfs4_callback_compound */
+--- a/fs/nfs/callback_xdr.c
++++ b/fs/nfs/callback_xdr.c
+@@ -754,26 +754,15 @@ static void nfs4_callback_free_slot(stru
+ * Let the state manager know callback processing done.
+ * A single slot, so highest used slotid is either 0 or -1
+ */
+- tbl->highest_used_slotid--;
++ tbl->highest_used_slotid = -1;
+ nfs4_check_drain_bc_complete(session);
+ spin_unlock(&tbl->slot_tbl_lock);
+ }
+
+-static void nfs4_cb_free_slot(struct nfs_client *clp)
++static void nfs4_cb_free_slot(struct cb_process_state *cps)
+ {
+- if (clp && clp->cl_session)
+- nfs4_callback_free_slot(clp->cl_session);
+-}
+-
+-/* A single slot, so highest used slotid is either 0 or -1 */
+-void nfs4_cb_take_slot(struct nfs_client *clp)
+-{
+- struct nfs4_slot_table *tbl = &clp->cl_session->bc_slot_table;
+-
+- spin_lock(&tbl->slot_tbl_lock);
+- tbl->highest_used_slotid++;
+- BUG_ON(tbl->highest_used_slotid != 0);
+- spin_unlock(&tbl->slot_tbl_lock);
++ if (cps->slotid != -1)
++ nfs4_callback_free_slot(cps->clp->cl_session);
+ }
+
+ #else /* CONFIG_NFS_V4_1 */
+@@ -784,7 +773,7 @@ preprocess_nfs41_op(int nop, unsigned in
+ return htonl(NFS4ERR_MINOR_VERS_MISMATCH);
+ }
+
+-static void nfs4_cb_free_slot(struct nfs_client *clp)
++static void nfs4_cb_free_slot(struct cb_process_state *cps)
+ {
+ }
+ #endif /* CONFIG_NFS_V4_1 */
+@@ -866,6 +855,7 @@ static __be32 nfs4_callback_compound(str
+ struct cb_process_state cps = {
+ .drc_status = 0,
+ .clp = NULL,
++ .slotid = -1,
+ };
+ unsigned int nops = 0;
+
+@@ -906,7 +896,7 @@ static __be32 nfs4_callback_compound(str
+
+ *hdr_res.status = status;
+ *hdr_res.nops = htonl(nops);
+- nfs4_cb_free_slot(cps.clp);
++ nfs4_cb_free_slot(&cps);
+ nfs_put_client(cps.clp);
+ dprintk("%s: done, status = %u\n", __func__, ntohl(status));
+ return rpc_success;
diff --git a/queue-3.0/nfsv4.1-return-nfs4err_badsession-to-callbacks-during.patch b/queue-3.0/nfsv4.1-return-nfs4err_badsession-to-callbacks-during.patch
new file mode 100644
index 0000000000..48806c3dfd
--- /dev/null
+++ b/queue-3.0/nfsv4.1-return-nfs4err_badsession-to-callbacks-during.patch
@@ -0,0 +1,44 @@
+From 910ac68a2b80c7de95bc8488734067b1bb15d583 Mon Sep 17 00:00:00 2001
+From: Trond Myklebust <Trond.Myklebust@netapp.com>
+Date: Tue, 2 Aug 2011 14:46:52 -0400
+Subject: NFSv4.1: Return NFS4ERR_BADSESSION to callbacks during
+ session resets
+
+From: Trond Myklebust <Trond.Myklebust@netapp.com>
+
+commit 910ac68a2b80c7de95bc8488734067b1bb15d583 upstream.
+
+If the client is in the process of resetting the session when it receives
+a callback, then returning NFS4ERR_DELAY may cause a deadlock with the
+DESTROY_SESSION call.
+
+Basically, if the client returns NFS4ERR_DELAY in response to the
+CB_SEQUENCE call, then the server is entitled to believe that the
+client is busy because it is already processing that call. In that
+case, the server is perfectly entitled to respond with a
+NFS4ERR_BACK_CHAN_BUSY to any DESTROY_SESSION call.
+
+Fix this by having the client reply with a NFS4ERR_BADSESSION in
+response to the callback if it is resetting the session.
+
+Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/nfs/callback_proc.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/fs/nfs/callback_proc.c
++++ b/fs/nfs/callback_proc.c
+@@ -437,6 +437,11 @@ __be32 nfs4_callback_sequence(struct cb_
+ if (test_bit(NFS4_SESSION_DRAINING, &clp->cl_session->session_state)) {
+ spin_unlock(&tbl->slot_tbl_lock);
+ status = htonl(NFS4ERR_DELAY);
++ /* Return NFS4ERR_BADSESSION if we're draining the session
++ * in order to reset it.
++ */
++ if (test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state))
++ status = htonl(NFS4ERR_BADSESSION);
+ goto out;
+ }
+
diff --git a/queue-3.0/pata_via-disable-atapi-dma-on-averatec-3200.patch b/queue-3.0/pata_via-disable-atapi-dma-on-averatec-3200.patch
new file mode 100644
index 0000000000..065f37e90f
--- /dev/null
+++ b/queue-3.0/pata_via-disable-atapi-dma-on-averatec-3200.patch
@@ -0,0 +1,59 @@
+From 6d0e194d2eefcaab6dbdca1f639748660144acb5 Mon Sep 17 00:00:00 2001
+From: Tejun Heo <tj@kernel.org>
+Date: Thu, 4 Aug 2011 11:15:07 +0200
+Subject: pata_via: disable ATAPI DMA on AVERATEC 3200
+
+From: Tejun Heo <tj@kernel.org>
+
+commit 6d0e194d2eefcaab6dbdca1f639748660144acb5 upstream.
+
+On AVERATEC 3200, pata_via causes memory corruption with ATAPI DMA,
+which often leads to random kernel oops. The cause of the problem is
+not well understood yet and only small subset of machines using the
+controller seem affected. Blacklist ATAPI DMA on the machine.
+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=11426
+Reported-and-tested-by: Jim Bray <jimsantelmo@gmail.com>
+Cc: Alan Cox <alan@linux.intel.com>
+Signed-off-by: Jeff Garzik <jgarzik@pobox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/ata/pata_via.c | 18 ++++++++++++++++++
+ 1 file changed, 18 insertions(+)
+
+--- a/drivers/ata/pata_via.c
++++ b/drivers/ata/pata_via.c
+@@ -124,6 +124,17 @@ static const struct via_isa_bridge {
+ { NULL }
+ };
+
++static const struct dmi_system_id no_atapi_dma_dmi_table[] = {
++ {
++ .ident = "AVERATEC 3200",
++ .matches = {
++ DMI_MATCH(DMI_BOARD_VENDOR, "AVERATEC"),
++ DMI_MATCH(DMI_BOARD_NAME, "3200"),
++ },
++ },
++ { }
++};
++
+ struct via_port {
+ u8 cached_device;
+ };
+@@ -355,6 +366,13 @@ static unsigned long via_mode_filter(str
+ mask &= ~ ATA_MASK_UDMA;
+ }
+ }
++
++ if (dev->class == ATA_DEV_ATAPI &&
++ dmi_check_system(no_atapi_dma_dmi_table)) {
++ ata_dev_warn(dev, "controller locks up on ATAPI DMA, forcing PIO\n");
++ mask &= ATA_MASK_PIO;
++ }
++
+ return mask;
+ }
+
diff --git a/queue-3.0/pnfs-obj-bug-when-we-are-running-out-of-bio.patch b/queue-3.0/pnfs-obj-bug-when-we-are-running-out-of-bio.patch
new file mode 100644
index 0000000000..a9db245bdf
--- /dev/null
+++ b/queue-3.0/pnfs-obj-bug-when-we-are-running-out-of-bio.patch
@@ -0,0 +1,69 @@
+From 20618b21da0796115e81906d24ff1601552701b7 Mon Sep 17 00:00:00 2001
+From: Boaz Harrosh <bharrosh@panasas.com>
+Date: Wed, 3 Aug 2011 21:54:33 -0700
+Subject: pnfs-obj: Bug when we are running out of bio
+
+From: Boaz Harrosh <bharrosh@panasas.com>
+
+commit 20618b21da0796115e81906d24ff1601552701b7 upstream.
+
+When we have a situation that the number of pages we want
+to encode is bigger then the size of the bio. (Which can
+currently happen only when all IO is going to a single device
+.e.g group_width==1) then the IO is submitted short and we
+report back only the amount of bytes we actually wrote/read
+and all is fine. BUT ...
+
+There was a bug that the current length counter was advanced
+before the fail to add the extra page, and we come to a situation
+that the CDB length was one-page longer then the actual bio size,
+which is of course rejected by the osd-target.
+
+While here also fix the bio size calculation, in the case
+that we received more then one group of devices.
+
+Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
+Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/nfs/objlayout/objio_osd.c | 12 +++++-------
+ 1 file changed, 5 insertions(+), 7 deletions(-)
+
+--- a/fs/nfs/objlayout/objio_osd.c
++++ b/fs/nfs/objlayout/objio_osd.c
+@@ -587,22 +587,19 @@ static void _calc_stripe_info(struct obj
+ }
+
+ static int _add_stripe_unit(struct objio_state *ios, unsigned *cur_pg,
+- unsigned pgbase, struct _objio_per_comp *per_dev, int cur_len,
++ unsigned pgbase, struct _objio_per_comp *per_dev, int len,
+ gfp_t gfp_flags)
+ {
+ unsigned pg = *cur_pg;
++ int cur_len = len;
+ struct request_queue *q =
+ osd_request_queue(_io_od(ios, per_dev->dev));
+
+- per_dev->length += cur_len;
+-
+ if (per_dev->bio == NULL) {
+- unsigned stripes = ios->layout->num_comps /
+- ios->layout->mirrors_p1;
+- unsigned pages_in_stripe = stripes *
++ unsigned pages_in_stripe = ios->layout->group_width *
+ (ios->layout->stripe_unit / PAGE_SIZE);
+ unsigned bio_size = (ios->ol_state.nr_pages + pages_in_stripe) /
+- stripes;
++ ios->layout->group_width;
+
+ if (BIO_MAX_PAGES_KMALLOC < bio_size)
+ bio_size = BIO_MAX_PAGES_KMALLOC;
+@@ -630,6 +627,7 @@ static int _add_stripe_unit(struct objio
+ }
+ BUG_ON(cur_len);
+
++ per_dev->length += len;
+ *cur_pg = pg;
+ return 0;
+ }
diff --git a/queue-3.0/pnfs-obj-fix-the-comp_index-0-case.patch b/queue-3.0/pnfs-obj-fix-the-comp_index-0-case.patch
new file mode 100644
index 0000000000..6d10741b5a
--- /dev/null
+++ b/queue-3.0/pnfs-obj-fix-the-comp_index-0-case.patch
@@ -0,0 +1,97 @@
+From 9af7db3228acc286c50e3a0f054ec982efdbc6c6 Mon Sep 17 00:00:00 2001
+From: Boaz Harrosh <bharrosh@panasas.com>
+Date: Wed, 3 Aug 2011 21:52:51 -0700
+Subject: pnfs-obj: Fix the comp_index != 0 case
+
+From: Boaz Harrosh <bharrosh@panasas.com>
+
+commit 9af7db3228acc286c50e3a0f054ec982efdbc6c6 upstream.
+
+There were bugs in the case of partial layout where olo_comp_index
+is not zero. This used to work and was tested but one of the later
+cleanup SQUASHMEs broke it and was not tested since.
+
+Also add a dprint that specify those received layout parameters.
+Everything else was already printed.
+
+Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
+Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/nfs/objlayout/objio_osd.c | 16 +++++++---------
+ fs/nfs/objlayout/pnfs_osd_xdr_cli.c | 3 +++
+ 2 files changed, 10 insertions(+), 9 deletions(-)
+
+--- a/fs/nfs/objlayout/objio_osd.c
++++ b/fs/nfs/objlayout/objio_osd.c
+@@ -479,7 +479,6 @@ static int _io_check(struct objio_state
+ for (i = 0; i < ios->numdevs; i++) {
+ struct osd_sense_info osi;
+ struct osd_request *or = ios->per_dev[i].or;
+- unsigned dev;
+ int ret;
+
+ if (!or)
+@@ -500,9 +499,8 @@ static int _io_check(struct objio_state
+
+ continue; /* we recovered */
+ }
+- dev = ios->per_dev[i].dev;
+- objlayout_io_set_result(&ios->ol_state, dev,
+- &ios->layout->comps[dev].oc_object_id,
++ objlayout_io_set_result(&ios->ol_state, i,
++ &ios->layout->comps[i].oc_object_id,
+ osd_pri_2_pnfs_err(osi.osd_err_pri),
+ ios->per_dev[i].offset,
+ ios->per_dev[i].length,
+@@ -650,7 +648,7 @@ static int _prepare_one_group(struct obj
+ int ret = 0;
+
+ while (length) {
+- struct _objio_per_comp *per_dev = &ios->per_dev[dev];
++ struct _objio_per_comp *per_dev = &ios->per_dev[dev - first_dev];
+ unsigned cur_len, page_off = 0;
+
+ if (!per_dev->length) {
+@@ -670,8 +668,8 @@ static int _prepare_one_group(struct obj
+ cur_len = stripe_unit;
+ }
+
+- if (max_comp < dev)
+- max_comp = dev;
++ if (max_comp < dev - first_dev)
++ max_comp = dev - first_dev;
+ } else {
+ cur_len = stripe_unit;
+ }
+@@ -806,7 +804,7 @@ static int _read_mirrors(struct objio_st
+ struct _objio_per_comp *per_dev = &ios->per_dev[cur_comp];
+ unsigned dev = per_dev->dev;
+ struct pnfs_osd_object_cred *cred =
+- &ios->layout->comps[dev];
++ &ios->layout->comps[cur_comp];
+ struct osd_obj_id obj = {
+ .partition = cred->oc_object_id.oid_partition_id,
+ .id = cred->oc_object_id.oid_object_id,
+@@ -904,7 +902,7 @@ static int _write_mirrors(struct objio_s
+ for (; cur_comp < last_comp; ++cur_comp, ++dev) {
+ struct osd_request *or = NULL;
+ struct pnfs_osd_object_cred *cred =
+- &ios->layout->comps[dev];
++ &ios->layout->comps[cur_comp];
+ struct osd_obj_id obj = {
+ .partition = cred->oc_object_id.oid_partition_id,
+ .id = cred->oc_object_id.oid_object_id,
+--- a/fs/nfs/objlayout/pnfs_osd_xdr_cli.c
++++ b/fs/nfs/objlayout/pnfs_osd_xdr_cli.c
+@@ -170,6 +170,9 @@ int pnfs_osd_xdr_decode_layout_map(struc
+ p = _osd_xdr_decode_data_map(p, &layout->olo_map);
+ layout->olo_comps_index = be32_to_cpup(p++);
+ layout->olo_num_comps = be32_to_cpup(p++);
++ dprintk("%s: olo_comps_index=%d olo_num_comps=%d\n", __func__,
++ layout->olo_comps_index, layout->olo_num_comps);
++
+ iter->total_comps = layout->olo_num_comps;
+ return 0;
+ }
diff --git a/queue-3.0/series b/queue-3.0/series
index 7a03840847..07bb57d881 100644
--- a/queue-3.0/series
+++ b/queue-3.0/series
@@ -4,3 +4,9 @@ befs-validate-length-of-long-symbolic-links.patch
i7core_edac-fixed-typo-in-error-count-calculation.patch
possible-memory-corruption-on-mount.patch
x86-intel-power-correct-the-msr_ia32_energy_perf_bias.patch
+pata_via-disable-atapi-dma-on-averatec-3200.patch
+pnfs-obj-fix-the-comp_index-0-case.patch
+pnfs-obj-bug-when-we-are-running-out-of-bio.patch
+nfsv4.1-fix-the-callback-highest_used_slotid-behaviour.patch
+nfsv4.1-return-nfs4err_badsession-to-callbacks-during.patch
+x86-mtrr-lock-stop-machine-during-mtrr-rendezvous-sequence.patch
diff --git a/queue-3.0/x86-mtrr-lock-stop-machine-during-mtrr-rendezvous-sequence.patch b/queue-3.0/x86-mtrr-lock-stop-machine-during-mtrr-rendezvous-sequence.patch
new file mode 100644
index 0000000000..02094b1768
--- /dev/null
+++ b/queue-3.0/x86-mtrr-lock-stop-machine-during-mtrr-rendezvous-sequence.patch
@@ -0,0 +1,109 @@
+From 6d3321e8e2b3bf6a5892e2ef673c7bf536e3f904 Mon Sep 17 00:00:00 2001
+From: Suresh Siddha <suresh.b.siddha@intel.com>
+Date: Thu, 23 Jun 2011 11:19:26 -0700
+Subject: x86, mtrr: lock stop machine during MTRR rendezvous sequence
+
+From: Suresh Siddha <suresh.b.siddha@intel.com>
+
+commit 6d3321e8e2b3bf6a5892e2ef673c7bf536e3f904 upstream.
+
+MTRR rendezvous sequence using stop_one_cpu_nowait() can potentially
+happen in parallel with another system wide rendezvous using
+stop_machine(). This can lead to deadlock (The order in which
+works are queued can be different on different cpu's. Some cpu's
+will be running the first rendezvous handler and others will be running
+the second rendezvous handler. Each set waiting for the other set to join
+for the system wide rendezvous, leading to a deadlock).
+
+MTRR rendezvous sequence is not implemented using stop_machine() as this
+gets called both from the process context aswell as the cpu online paths
+(where the cpu has not come online and the interrupts are disabled etc).
+stop_machine() works with only online cpus.
+
+For now, take the stop_machine mutex in the MTRR rendezvous sequence that
+gets called from an online cpu (here we are in the process context
+and can potentially sleep while taking the mutex). And the MTRR rendezvous
+that gets triggered during cpu online doesn't need to take this stop_machine
+lock (as the stop_machine() already ensures that there is no cpu hotplug
+going on in parallel by doing get_online_cpus())
+
+ TBD: Pursue a cleaner solution of extending the stop_machine()
+ infrastructure to handle the case where the calling cpu is
+ still not online and use this for MTRR rendezvous sequence.
+
+fixes: https://bugzilla.novell.com/show_bug.cgi?id=672008
+
+Reported-by: Vadim Kotelnikov <vadimuzzz@inbox.ru>
+Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
+Link: http://lkml.kernel.org/r/20110623182056.807230326@sbsiddha-MOBL3.sc.intel.com
+Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/x86/kernel/cpu/mtrr/main.c | 23 +++++++++++++++++++++++
+ include/linux/stop_machine.h | 2 ++
+ kernel/stop_machine.c | 2 +-
+ 3 files changed, 26 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/kernel/cpu/mtrr/main.c
++++ b/arch/x86/kernel/cpu/mtrr/main.c
+@@ -248,6 +248,25 @@ set_mtrr(unsigned int reg, unsigned long
+ unsigned long flags;
+ int cpu;
+
++#ifdef CONFIG_SMP
++ /*
++ * If this cpu is not yet active, we are in the cpu online path. There
++ * can be no stop_machine() in parallel, as stop machine ensures this
++ * by using get_online_cpus(). We can skip taking the stop_cpus_mutex,
++ * as we don't need it and also we can't afford to block while waiting
++ * for the mutex.
++ *
++ * If this cpu is active, we need to prevent stop_machine() happening
++ * in parallel by taking the stop cpus mutex.
++ *
++ * Also, this is called in the context of cpu online path or in the
++ * context where cpu hotplug is prevented. So checking the active status
++ * of the raw_smp_processor_id() is safe.
++ */
++ if (cpu_active(raw_smp_processor_id()))
++ mutex_lock(&stop_cpus_mutex);
++#endif
++
+ preempt_disable();
+
+ data.smp_reg = reg;
+@@ -330,6 +349,10 @@ set_mtrr(unsigned int reg, unsigned long
+
+ local_irq_restore(flags);
+ preempt_enable();
++#ifdef CONFIG_SMP
++ if (cpu_active(raw_smp_processor_id()))
++ mutex_unlock(&stop_cpus_mutex);
++#endif
+ }
+
+ /**
+--- a/include/linux/stop_machine.h
++++ b/include/linux/stop_machine.h
+@@ -27,6 +27,8 @@ struct cpu_stop_work {
+ struct cpu_stop_done *done;
+ };
+
++extern struct mutex stop_cpus_mutex;
++
+ int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg);
+ void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
+ struct cpu_stop_work *work_buf);
+--- a/kernel/stop_machine.c
++++ b/kernel/stop_machine.c
+@@ -132,8 +132,8 @@ void stop_one_cpu_nowait(unsigned int cp
+ cpu_stop_queue_work(&per_cpu(cpu_stopper, cpu), work_buf);
+ }
+
++DEFINE_MUTEX(stop_cpus_mutex);
+ /* static data for stop_cpus */
+-static DEFINE_MUTEX(stop_cpus_mutex);
+ static DEFINE_PER_CPU(struct cpu_stop_work, stop_cpus_work);
+
+ int __stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)