aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRoland Dreier <rolandd@cisco.com>2007-06-14 13:23:33 -0700
committerRoland Dreier <rolandd@cisco.com>2007-06-14 14:10:41 -0700
commit722356de9e2d624929d887447088b7a0efe0b6d6 (patch)
treedc5b8fcd37fbe211c7a2f6ec60e95621e28ab61e
parent81b83b34f171b3f1de226bda82e1f55bbe31b75b (diff)
downloadlibmlx4-722356de9e2d624929d887447088b7a0efe0b6d6.tar.gz
Make sure inline segments in send WQEs don't cross 64 byte boundaries
Hardware requires that inline data segments do not cross a 64 byte boundary. Make sure that send work requests satisfy this by using multiple inline data segments when needed. Based on a patch from Jack Morgenstein <jackm@dev.mellanox.co.il>. Signed-off-by: Roland Dreier <rolandd@cisco.com>
-rw-r--r--src/qp.c116
-rw-r--r--src/wqe.h5
2 files changed, 92 insertions, 29 deletions
diff --git a/src/qp.c b/src/qp.c
index 301f7cb..0d53623 100644
--- a/src/qp.c
+++ b/src/qp.c
@@ -240,33 +240,59 @@ int mlx4_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
break;
}
- if (wr->send_flags & IBV_SEND_INLINE) {
- if (wr->num_sge) {
- struct mlx4_wqe_inline_seg *seg = wqe;
-
- inl = 0;
- wqe += sizeof *seg;
- for (i = 0; i < wr->num_sge; ++i) {
- uint32_t len = wr->sg_list[i].length;
-
- inl += len;
-
- if (inl > qp->max_inline_data) {
- inl = 0;
- ret = -1;
- *bad_wr = wr;
- goto out;
- }
-
- memcpy(wqe,
- (void *) (intptr_t) wr->sg_list[i].addr,
- len);
- wqe += len;
+ if (wr->send_flags & IBV_SEND_INLINE && wr->num_sge) {
+ struct mlx4_wqe_inline_seg *seg;
+ void *addr;
+ int len, seg_len;
+ int num_seg;
+ int off, to_copy;
+
+ inl = 0;
+
+ seg = wqe;
+ wqe += sizeof *seg;
+ off = ((uintptr_t) wqe) & (MLX4_INLINE_ALIGN - 1);
+ num_seg = 0;
+ seg_len = 0;
+
+ for (i = 0; i < wr->num_sge; ++i) {
+ addr = (void *) (uintptr_t) wr->sg_list[i].addr;
+ len = wr->sg_list[i].length;
+ inl += len;
+
+ if (inl > qp->max_inline_data) {
+ inl = 0;
+ ret = -1;
+ *bad_wr = wr;
+ goto out;
}
- seg->byte_count = htonl(MLX4_INLINE_SEG | inl);
- size += (inl + sizeof *seg + 15) / 16;
+ while (len >= MLX4_INLINE_ALIGN - off) {
+ to_copy = MLX4_INLINE_ALIGN - off;
+ memcpy(wqe, addr, to_copy);
+ len -= to_copy;
+ wqe += to_copy;
+ addr += to_copy;
+ seg_len += to_copy;
+ seg->byte_count = htonl(MLX4_INLINE_SEG | seg_len);
+ seg_len = 0;
+ seg = wqe;
+ wqe += sizeof *seg;
+ off = sizeof *seg;
+ ++num_seg;
+ }
+
+ memcpy(wqe, addr, len);
+ wqe += len;
+ seg_len += len;
+ }
+
+ if (seg_len) {
+ ++num_seg;
+ seg->byte_count = htonl(MLX4_INLINE_SEG | seg_len);
}
+
+ size += (inl + num_seg * sizeof * seg + 15) / 16;
} else {
struct mlx4_wqe_data_seg *seg = wqe;
@@ -413,14 +439,41 @@ out:
return ret;
}
+static int num_inline_segs(int data, enum ibv_qp_type type)
+{
+ /*
+ * Inline data segments are not allowed to cross 64 byte
+ * boundaries. For UD QPs, the data segments always start
+ * aligned to 64 bytes (16 byte control segment + 48 byte
+ * datagram segment); for other QPs, there will be a 16 byte
+ * control segment and possibly a 16 byte remote address
+ * segment, so in the worst case there will be only 32 bytes
+ * available for the first data segment.
+ */
+ if (type == IBV_QPT_UD)
+ data += (sizeof (struct mlx4_wqe_ctrl_seg) +
+ sizeof (struct mlx4_wqe_datagram_seg)) %
+ MLX4_INLINE_ALIGN;
+ else
+ data += (sizeof (struct mlx4_wqe_ctrl_seg) +
+ sizeof (struct mlx4_wqe_raddr_seg)) %
+ MLX4_INLINE_ALIGN;
+
+ return (data + MLX4_INLINE_ALIGN - sizeof (struct mlx4_wqe_inline_seg) - 1) /
+ (MLX4_INLINE_ALIGN - sizeof (struct mlx4_wqe_inline_seg));
+}
+
void mlx4_calc_sq_wqe_size(struct ibv_qp_cap *cap, enum ibv_qp_type type,
struct mlx4_qp *qp)
{
int size;
int max_sq_sge;
- max_sq_sge = align(cap->max_inline_data + sizeof (struct mlx4_wqe_inline_seg),
- sizeof (struct mlx4_wqe_data_seg)) / sizeof (struct mlx4_wqe_data_seg);
+ max_sq_sge = align(cap->max_inline_data +
+ num_inline_segs(cap->max_inline_data, type) *
+ sizeof (struct mlx4_wqe_inline_seg),
+ sizeof (struct mlx4_wqe_data_seg)) /
+ sizeof (struct mlx4_wqe_data_seg);
if (max_sq_sge < cap->max_send_sge)
max_sq_sge = cap->max_send_sge;
@@ -530,10 +583,19 @@ void mlx4_set_sq_sizes(struct mlx4_qp *qp, struct ibv_qp_cap *cap,
qp->sq.max_gs = wqe_size / sizeof (struct mlx4_wqe_data_seg);
cap->max_send_sge = qp->sq.max_gs;
- qp->max_inline_data = wqe_size - sizeof (struct mlx4_wqe_inline_seg);
cap->max_inline_data = qp->max_inline_data;
qp->sq.max_post = qp->sq.wqe_cnt - qp->sq_spare_wqes;
cap->max_send_wr = qp->sq.max_post;
+
+ /*
+ * Inline data segments can't cross a 64 byte boundary. So
+ * subtract off one segment header for each 64-byte chunk,
+ * taking into account the fact that wqe_size will be 32 mod
+ * 64 for non-UD QPs.
+ */
+ qp->max_inline_data = wqe_size -
+ sizeof (struct mlx4_wqe_inline_seg) *
+ (align(wqe_size, MLX4_INLINE_ALIGN) / MLX4_INLINE_ALIGN);
}
struct mlx4_qp *mlx4_find_qp(struct mlx4_context *ctx, uint32_t qpn)
diff --git a/src/wqe.h b/src/wqe.h
index 877ebfd..6f7f309 100644
--- a/src/wqe.h
+++ b/src/wqe.h
@@ -44,11 +44,12 @@ enum {
};
enum {
- MLX4_INLINE_SEG = 1 << 31
+ MLX4_INLINE_SEG = 1 << 31,
+ MLX4_INLINE_ALIGN = 64,
};
enum {
- MLX4_INVALID_LKEY = 0x100,
+ MLX4_INVALID_LKEY = 0x100,
};
struct mlx4_wqe_ctrl_seg {