aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRoland Dreier <rolandd@cisco.com>2007-04-23 15:07:49 -0700
committerRoland Dreier <rolandd@cisco.com>2007-04-23 15:13:59 -0700
commitbb586a91fb00e7b674e14ea6f39d9aac8042a91a (patch)
treeee806c0568fd3b59482953221ba1c73c2d65e42f
parentf89e3921653950229b1da397881fe69a1d6af42b (diff)
downloadlibmlx4-bb586a91fb00e7b674e14ea6f39d9aac8042a91a.tar.gz
Use BlueFlame for inline sends
If BlueFlame is available, map the BlueFlame page when creating a context and use BlueFlame for inline sends. Signed-off-by: Roland Dreier <rolandd@cisco.com>
-rw-r--r--src/mlx4-abi.h3
-rw-r--r--src/mlx4.c21
-rw-r--r--src/mlx4.h5
-rw-r--r--src/qp.c34
-rw-r--r--src/wqe.h2
5 files changed, 55 insertions, 10 deletions
diff --git a/src/mlx4-abi.h b/src/mlx4-abi.h
index 2a392cb..7810913 100644
--- a/src/mlx4-abi.h
+++ b/src/mlx4-abi.h
@@ -40,7 +40,8 @@
struct mlx4_alloc_ucontext_resp {
struct ibv_get_context_resp ibv_resp;
__u32 qp_tab_size;
- __u32 bf_reg_size;
+ __u16 bf_reg_size;
+ __u16 bf_regs_per_page;
};
struct mlx4_alloc_pd_resp {
diff --git a/src/mlx4.c b/src/mlx4.c
index 1d8de87..23577c1 100644
--- a/src/mlx4.c
+++ b/src/mlx4.c
@@ -149,6 +149,25 @@ static struct ibv_context *mlx4_alloc_context(struct ibv_device *ibdev, int cmd_
if (context->uar == MAP_FAILED)
goto err_free;
+ if (resp.bf_reg_size) {
+ context->bf_page = mmap(NULL, to_mdev(ibdev)->page_size,
+ PROT_WRITE, MAP_SHARED, cmd_fd,
+ to_mdev(ibdev)->page_size);
+ if (context->bf_page == MAP_FAILED) {
+ fprintf(stderr, PFX "Warning: BlueFlame available, "
+ "but failed to mmap() BlueFlame page.\n");
+ context->bf_page = NULL;
+ context->bf_buf_size = 0;
+ } else {
+ context->bf_buf_size = resp.bf_reg_size / 2;
+ context->bf_offset = 0;
+ pthread_spin_init(&context->bf_lock, PTHREAD_PROCESS_PRIVATE);
+ }
+ } else {
+ context->bf_page = NULL;
+ context->bf_buf_size = 0;
+ }
+
pthread_spin_init(&context->uar_lock, PTHREAD_PROCESS_PRIVATE);
context->ibv_ctx.ops = mlx4_ctx_ops;
@@ -165,6 +184,8 @@ static void mlx4_free_context(struct ibv_context *ibctx)
struct mlx4_context *context = to_mctx(ibctx);
munmap(context->uar, to_mdev(ibctx->device)->page_size);
+ if (context->bf_page)
+ munmap(context->bf_page, to_mdev(ibctx->device)->page_size);
free(context);
}
diff --git a/src/mlx4.h b/src/mlx4.h
index 8b4dc20..c4d389f 100644
--- a/src/mlx4.h
+++ b/src/mlx4.h
@@ -129,6 +129,11 @@ struct mlx4_context {
void *uar;
pthread_spinlock_t uar_lock;
+ void *bf_page;
+ int bf_buf_size;
+ int bf_offset;
+ pthread_spinlock_t bf_lock;
+
struct {
struct mlx4_qp **table;
int refcnt;
diff --git a/src/qp.c b/src/qp.c
index b9b7305..76abf75 100644
--- a/src/qp.c
+++ b/src/qp.c
@@ -91,11 +91,13 @@ static int wq_overflow(struct mlx4_wq *wq, int nreq, struct mlx4_cq *cq)
int mlx4_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
struct ibv_send_wr **bad_wr)
{
+ struct mlx4_context *ctx;
struct mlx4_qp *qp = to_mqp(ibqp);
void *wqe;
struct mlx4_wqe_ctrl_seg *ctrl;
int ind;
int nreq;
+ int inl = 0;
int ret = 0;
int size;
int i;
@@ -214,15 +216,14 @@ int mlx4_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
if (wr->send_flags & IBV_SEND_INLINE) {
if (wr->num_sge) {
struct mlx4_wqe_inline_seg *seg = wqe;
- int s = 0;
wqe += sizeof *seg;
for (i = 0; i < wr->num_sge; ++i) {
uint32_t len = wr->sg_list[i].length;
- s += len;
+ inl += len;
- if (s > qp->max_inline_data) {
+ if (inl > qp->max_inline_data) {
ret = -1;
*bad_wr = wr;
goto out;
@@ -234,8 +235,8 @@ int mlx4_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
wqe += len;
}
- seg->byte_count = htonl(MLX4_INLINE_SEG | s);
- size += (s + sizeof *seg + 15) / 16;
+ seg->byte_count = htonl(MLX4_INLINE_SEG | inl);
+ size += (inl + sizeof *seg + 15) / 16;
}
} else {
struct mlx4_wqe_data_seg *seg = wqe;
@@ -266,7 +267,25 @@ int mlx4_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
}
out:
- if (nreq) {
+ ctx = to_mctx(ibqp->context);
+
+ if (nreq == 1 && inl && size > 1 && size < ctx->bf_buf_size / 16) {
+ ctrl->owner_opcode |= htonl((qp->sq.head & 0xffff) << 8);
+ *(uint32_t *) ctrl->reserved |= qp->doorbell_qpn;
+ /*
+ * Make sure that descriptor is written to memory
+ * before writing to BlueFlame page.
+ */
+ wmb();
+
+ ++qp->sq.head;
+
+ pthread_spin_lock(&ctx->bf_lock);
+ memcpy(ctx->bf_page + ctx->bf_offset, ctrl, align(size * 16, 64));
+ /* FIXME flush wc buffers */
+ ctx->bf_offset ^= ctx->bf_buf_size;
+ pthread_spin_unlock(&ctx->bf_lock);
+ } else if (nreq) {
qp->sq.head += nreq;
/*
@@ -275,8 +294,7 @@ out:
*/
wmb();
- *(uint32_t *) (to_mctx(ibqp->context)->uar + MLX4_SEND_DOORBELL) =
- qp->doorbell_qpn;
+ *(uint32_t *) (ctx->uar + MLX4_SEND_DOORBELL) = qp->doorbell_qpn;
}
pthread_spin_unlock(&qp->sq.lock);
diff --git a/src/wqe.h b/src/wqe.h
index 5411fe2..877ebfd 100644
--- a/src/wqe.h
+++ b/src/wqe.h
@@ -53,7 +53,7 @@ enum {
struct mlx4_wqe_ctrl_seg {
uint32_t owner_opcode;
- uint8_t reserved2[3];
+ uint8_t reserved[3];
uint8_t fence_size;
/*
* High 24 bits are SRC remote buffer; low 8 bits are flags: