Make sure inline segments in send WQEs don't cross 64 byte boundaries

Hardware requires that inline data segments do not cross a 64 byte boundary. Make sure that send work requests satisfy this by using multiple inline data segments when needed. Based on a patch from Jack Morgenstein <jackm@dev.mellanox.co.il>. Signed-off-by: Roland Dreier <rolandd@cisco.com>
author: Roland Dreier <rolandd@cisco.com> 2007-06-14 13:23:33 -0700
committer: Roland Dreier <rolandd@cisco.com> 2007-06-14 14:10:41 -0700
commit: 722356de9e2d624929d887447088b7a0efe0b6d6 (patch)
tree: dc5b8fcd37fbe211c7a2f6ec60e95621e28ab61e
parent: 81b83b34f171b3f1de226bda82e1f55bbe31b75b (diff)
download: libmlx4-722356de9e2d624929d887447088b7a0efe0b6d6.tar.gz
2 files changed, 92 insertions, 29 deletions
diff --git a/src/qp.c b/src/qp.c
index 301f7cb..0d53623 100644
--- a/src/qp.c
+++ b/src/qp.c
@@ -240,33 +240,59 @@ int mlx4_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
 			break;
 		}
 
-		if (wr->send_flags & IBV_SEND_INLINE) {
-			if (wr->num_sge) {
-				struct mlx4_wqe_inline_seg *seg = wqe;
-
-				inl = 0;
-				wqe += sizeof *seg;
-				for (i = 0; i < wr->num_sge; ++i) {
-					uint32_t len = wr->sg_list[i].length;
-
-					inl += len;
-
-					if (inl > qp->max_inline_data) {
-						inl = 0;
-						ret = -1;
-						*bad_wr = wr;
-						goto out;
-					}
-
-					memcpy(wqe,
-					       (void *) (intptr_t) wr->sg_list[i].addr,
-					       len);
-					wqe += len;
+		if (wr->send_flags & IBV_SEND_INLINE && wr->num_sge) {
+			struct mlx4_wqe_inline_seg *seg;
+			void *addr;
+			int len, seg_len;
+			int num_seg;
+			int off, to_copy;
+
+			inl = 0;
+
+			seg = wqe;
+			wqe += sizeof *seg;
+			off = ((uintptr_t) wqe) & (MLX4_INLINE_ALIGN - 1);
+			num_seg = 0;
+			seg_len = 0;
+
+			for (i = 0; i < wr->num_sge; ++i) {
+				addr = (void *) (uintptr_t) wr->sg_list[i].addr;
+				len  = wr->sg_list[i].length;
+				inl += len;
+
+				if (inl > qp->max_inline_data) {
+					inl = 0;
+					ret = -1;
+					*bad_wr = wr;
+					goto out;
 				}
 
-				seg->byte_count = htonl(MLX4_INLINE_SEG | inl);
-				size += (inl + sizeof *seg + 15) / 16;
+				while (len >= MLX4_INLINE_ALIGN - off) {
+					to_copy = MLX4_INLINE_ALIGN - off;
+					memcpy(wqe, addr, to_copy);
+					len -= to_copy;
+					wqe += to_copy;
+					addr += to_copy;
+					seg_len += to_copy;
+					seg->byte_count = htonl(MLX4_INLINE_SEG | seg_len);
+					seg_len = 0;
+					seg = wqe;
+					wqe += sizeof *seg;
+					off = sizeof *seg;
+					++num_seg;
+				}
+
+				memcpy(wqe, addr, len);
+				wqe += len;
+				seg_len += len;
+			}
+
+			if (seg_len) {
+				++num_seg;
+				seg->byte_count = htonl(MLX4_INLINE_SEG | seg_len);
 			}
+
+			size += (inl + num_seg * sizeof * seg + 15) / 16;
 		} else {
 			struct mlx4_wqe_data_seg *seg = wqe;
 
@@ -413,14 +439,41 @@ out:
 	return ret;
 }
 
+static int num_inline_segs(int data, enum ibv_qp_type type)
+{
+	/*
+	 * Inline data segments are not allowed to cross 64 byte
+	 * boundaries.  For UD QPs, the data segments always start
+	 * aligned to 64 bytes (16 byte control segment + 48 byte
+	 * datagram segment); for other QPs, there will be a 16 byte
+	 * control segment and possibly a 16 byte remote address
+	 * segment, so in the worst case there will be only 32 bytes
+	 * available for the first data segment.
+	 */
+	if (type == IBV_QPT_UD)
+		data += (sizeof (struct mlx4_wqe_ctrl_seg) +
+			 sizeof (struct mlx4_wqe_datagram_seg)) %
+			MLX4_INLINE_ALIGN;
+	else
+		data += (sizeof (struct mlx4_wqe_ctrl_seg) +
+			 sizeof (struct mlx4_wqe_raddr_seg)) %
+			MLX4_INLINE_ALIGN;
+
+	return (data + MLX4_INLINE_ALIGN - sizeof (struct mlx4_wqe_inline_seg) - 1) /
+		(MLX4_INLINE_ALIGN - sizeof (struct mlx4_wqe_inline_seg));
+}
+
 void mlx4_calc_sq_wqe_size(struct ibv_qp_cap *cap, enum ibv_qp_type type,
 			   struct mlx4_qp *qp)
 {
 	int size;
 	int max_sq_sge;
 
-	max_sq_sge	 = align(cap->max_inline_data + sizeof (struct mlx4_wqe_inline_seg),
-				 sizeof (struct mlx4_wqe_data_seg)) / sizeof (struct mlx4_wqe_data_seg);
+	max_sq_sge	 = align(cap->max_inline_data +
+				 num_inline_segs(cap->max_inline_data, type) *
+				 sizeof (struct mlx4_wqe_inline_seg),
+				 sizeof (struct mlx4_wqe_data_seg)) /
+		sizeof (struct mlx4_wqe_data_seg);
 	if (max_sq_sge < cap->max_send_sge)
 		max_sq_sge = cap->max_send_sge;
 
@@ -530,10 +583,19 @@ void mlx4_set_sq_sizes(struct mlx4_qp *qp, struct ibv_qp_cap *cap,
 
 	qp->sq.max_gs	     = wqe_size / sizeof (struct mlx4_wqe_data_seg);
 	cap->max_send_sge    = qp->sq.max_gs;
-	qp->max_inline_data  = wqe_size - sizeof (struct mlx4_wqe_inline_seg);
 	cap->max_inline_data = qp->max_inline_data;
 	qp->sq.max_post	     = qp->sq.wqe_cnt - qp->sq_spare_wqes;
 	cap->max_send_wr     = qp->sq.max_post;
+
+	/*
+	 * Inline data segments can't cross a 64 byte boundary.  So
+	 * subtract off one segment header for each 64-byte chunk,
+	 * taking into account the fact that wqe_size will be 32 mod
+	 * 64 for non-UD QPs.
+	 */
+	qp->max_inline_data  = wqe_size -
+		sizeof (struct mlx4_wqe_inline_seg) *
+		(align(wqe_size, MLX4_INLINE_ALIGN) / MLX4_INLINE_ALIGN);
 }
 
 struct mlx4_qp *mlx4_find_qp(struct mlx4_context *ctx, uint32_t qpn)
diff --git a/src/wqe.h b/src/wqe.h
index 877ebfd..6f7f309 100644
--- a/src/wqe.h
+++ b/src/wqe.h
@@ -44,11 +44,12 @@ enum {
 };
 
 enum {
-	MLX4_INLINE_SEG			= 1 << 31
+	MLX4_INLINE_SEG		= 1 << 31,
+	MLX4_INLINE_ALIGN	= 64,
 };
 
 enum {
-	MLX4_INVALID_LKEY		= 0x100,
+	MLX4_INVALID_LKEY	= 0x100,
 };
 
 struct mlx4_wqe_ctrl_seg {
author	Roland Dreier <rolandd@cisco.com>	2007-06-14 13:23:33 -0700
committer	Roland Dreier <rolandd@cisco.com>	2007-06-14 14:10:41 -0700
commit	722356de9e2d624929d887447088b7a0efe0b6d6 (patch)
tree	dc5b8fcd37fbe211c7a2f6ec60e95621e28ab61e
parent	81b83b34f171b3f1de226bda82e1f55bbe31b75b (diff)
download	libmlx4-722356de9e2d624929d887447088b7a0efe0b6d6.tar.gz