aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRoland Dreier <rolandd@cisco.com>2006-09-04 16:19:25 +0000
committerRoland Dreier <rolandd@cisco.com>2006-11-09 19:57:07 -0800
commit39632d6c2a24616917cf21e1941e13ed1eab4a7f (patch)
tree0f36779ad28d7c71830e0ac73e9acac3c7a5de7a
parent1ce8eda4f646dd72d85c8f4c380eb0ebeb77e8be (diff)
downloadlibmthca-39632d6c2a24616917cf21e1941e13ed1eab4a7f.tar.gz
Fix destroy QP deadlock
Fix potential AB-BA deadlock in libmthca when destroying multiple QPs simultaneously. Signed-off-by: Roland Dreier <rolandd@cisco.com>
-rw-r--r--ChangeLog9
-rw-r--r--src/verbs.c49
2 files changed, 46 insertions, 12 deletions
diff --git a/ChangeLog b/ChangeLog
index 0126186..a507f97 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,12 @@
+2006-09-04 Roland Dreier <rdreier@cisco.com>
+
+ * src/verbs.c (mthca_destroy_qp): Avoid potential AB-BA deadlock
+ when destroying QPs by always taking CQ locks in a consistent
+ order (lowest CQN first). The old code always took the send_cq
+ lock first, which is prone to deadlock if the send_cq of one QP is
+ the recv_cq of another QP destroyed at the same time. This bug
+ was pointed out by Dotan Barak and Jack Morgenstein.
+
2006-08-23 Roland Dreier <rdreier@cisco.com>
* src/verbs.c (mthca_resize_cq): Add a test for
diff --git a/src/verbs.c b/src/verbs.c
index 4924df0..227535b 100644
--- a/src/verbs.c
+++ b/src/verbs.c
@@ -622,6 +622,38 @@ int mthca_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
return ret;
}
+static void mthca_lock_cqs(struct ibv_qp *qp)
+{
+ struct mthca_cq *send_cq = to_mcq(qp->send_cq);
+ struct mthca_cq *recv_cq = to_mcq(qp->recv_cq);
+
+ if (send_cq == recv_cq)
+ pthread_spin_lock(&send_cq->lock);
+ else if (send_cq->cqn < recv_cq->cqn) {
+ pthread_spin_lock(&send_cq->lock);
+ pthread_spin_lock(&recv_cq->lock);
+ } else {
+ pthread_spin_lock(&recv_cq->lock);
+ pthread_spin_lock(&send_cq->lock);
+ }
+}
+
+static void mthca_unlock_cqs(struct ibv_qp *qp)
+{
+ struct mthca_cq *send_cq = to_mcq(qp->send_cq);
+ struct mthca_cq *recv_cq = to_mcq(qp->recv_cq);
+
+ if (send_cq == recv_cq)
+ pthread_spin_unlock(&send_cq->lock);
+ else if (send_cq->cqn < recv_cq->cqn) {
+ pthread_spin_unlock(&recv_cq->lock);
+ pthread_spin_unlock(&send_cq->lock);
+ } else {
+ pthread_spin_unlock(&send_cq->lock);
+ pthread_spin_unlock(&recv_cq->lock);
+ }
+}
+
int mthca_destroy_qp(struct ibv_qp *qp)
{
int ret;
@@ -631,23 +663,16 @@ int mthca_destroy_qp(struct ibv_qp *qp)
if (qp->send_cq != qp->recv_cq)
mthca_cq_clean(to_mcq(qp->send_cq), qp->qp_num, NULL);
- pthread_spin_lock(&to_mcq(qp->send_cq)->lock);
- if (qp->send_cq != qp->recv_cq)
- pthread_spin_lock(&to_mcq(qp->recv_cq)->lock);
+ mthca_lock_cqs(qp);
mthca_clear_qp(to_mctx(qp->context), qp->qp_num);
- if (qp->send_cq != qp->recv_cq)
- pthread_spin_unlock(&to_mcq(qp->recv_cq)->lock);
- pthread_spin_unlock(&to_mcq(qp->send_cq)->lock);
+ mthca_unlock_cqs(qp);
ret = ibv_cmd_destroy_qp(qp);
if (ret) {
- pthread_spin_lock(&to_mcq(qp->send_cq)->lock);
- if (qp->send_cq != qp->recv_cq)
- pthread_spin_lock(&to_mcq(qp->recv_cq)->lock);
+ mthca_lock_cqs(qp);
mthca_store_qp(to_mctx(qp->context), qp->qp_num, to_mqp(qp));
- if (qp->send_cq != qp->recv_cq)
- pthread_spin_unlock(&to_mcq(qp->recv_cq)->lock);
- pthread_spin_unlock(&to_mcq(qp->send_cq)->lock);
+ mthca_unlock_cqs(qp);
+
return ret;
}