diff options
author | Roland Dreier <rolandd@cisco.com> | 2005-10-14 22:48:54 +0000 |
---|---|---|
committer | Roland Dreier <rolandd@cisco.com> | 2006-11-09 11:35:57 -0800 |
commit | eb0663777c2486b6c58b40babadbe99f52bad4d0 (patch) | |
tree | 69293412ced50689fbb196f616a49e678cd21580 | |
parent | fcdf175c5a6684dad0677e8d06e3071ba2aaf410 (diff) | |
download | libibverbs-eb0663777c2486b6c58b40babadbe99f52bad4d0.tar.gz |
Add support for new datapath kernel commands
Add handling for calling into kernel for datapath operations, so that
we can handle the PathScale userspace driver.
Signed-off-by: Roland Dreier <rolandd@cisco.com>
-rw-r--r-- | ChangeLog | 15 | ||||
-rw-r--r-- | include/infiniband/driver.h | 12 | ||||
-rw-r--r-- | include/infiniband/kern-abi.h | 173 | ||||
-rw-r--r-- | include/infiniband/verbs.h | 3 | ||||
-rw-r--r-- | src/cmd.c | 306 | ||||
-rw-r--r-- | src/libibverbs.map | 7 |
6 files changed, 514 insertions, 2 deletions
@@ -1,3 +1,18 @@ +2005-10-13 Roland Dreier <roland@cisco.com> + + * include/infiniband/driver.h, src/cmd.c, src/libibverbs.map: Add + command functions for calling new kernel commands. + + * include/infiniband/verbs.h: Add qp_type to struct ibv_qp so that + we know when we're posting a send on a UD QP, and add kernel + handle member to struct ibv_ah so we can handle drivers that do + create AH and destroy AH operations in the kernel. + + * include/infiniband/kern-abi.h: Add new command structures for + poll CQ, request notification for CQ, post send, post receive, + post SRQ receive, create AH and destroy AH commands. These will + be used by the PathScale userspace driver. + 2005-10-12 Roland Dreier <roland@cisco.com> * examples/srq_pingpong.c (main): Zero out unused entries in diff --git a/include/infiniband/driver.h b/include/infiniband/driver.h index d7a0dce..2e07e7e 100644 --- a/include/infiniband/driver.h +++ b/include/infiniband/driver.h @@ -1,6 +1,7 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -92,6 +93,8 @@ extern int ibv_cmd_create_cq(struct ibv_context *context, int cqe, int comp_vector, struct ibv_cq *cq, struct ibv_create_cq *cmd, size_t cmd_size, struct ibv_create_cq_resp *resp, size_t resp_size); +extern int ibv_cmd_poll_cq(struct ibv_cq *cq, int ne, struct ibv_wc *wc); +extern int ibv_cmd_req_notify_cq(struct ibv_cq *cq, int solicited); extern int ibv_cmd_destroy_cq(struct ibv_cq *cq); extern int ibv_cmd_create_srq(struct ibv_pd *pd, @@ -111,6 +114,15 @@ extern int ibv_cmd_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, enum ibv_qp_attr_mask attr_mask, struct ibv_modify_qp *cmd, size_t cmd_size); extern int ibv_cmd_destroy_qp(struct ibv_qp *qp); +extern int ibv_cmd_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr, + struct ibv_send_wr **bad_wr); +extern int ibv_cmd_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr, + struct ibv_recv_wr **bad_wr); +extern int ibv_cmd_post_srq_recv(struct ibv_srq *srq, struct ibv_recv_wr *wr, + struct ibv_recv_wr **bad_wr); +extern int ibv_cmd_create_ah(struct ibv_pd *pd, struct ibv_ah *ah, + struct ibv_ah_attr *attr); +extern int ibv_cmd_destroy_ah(struct ibv_ah *ah); extern int ibv_cmd_attach_mcast(struct ibv_qp *qp, union ibv_gid *gid, uint16_t lid); extern int ibv_cmd_detach_mcast(struct ibv_qp *qp, union ibv_gid *gid, uint16_t lid); diff --git a/include/infiniband/kern-abi.h b/include/infiniband/kern-abi.h index e02f94f..1bfbd79 100644 --- a/include/infiniband/kern-abi.h +++ b/include/infiniband/kern-abi.h @@ -1,6 +1,7 @@ /* * Copyright (c) 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -93,8 +94,11 @@ enum { * Make sure that all structs defined in this file remain laid out so * that they pack the same way on 32-bit and 64-bit architectures (to * avoid incompatibility between 32-bit userspace and 64-bit kernels). - * In particular do not use pointer types -- pass pointers in __u64 - * instead. + * Specifically: + * - Do not use pointer types -- pass pointers in __u64 instead. + * - Make sure that any structure larger than 4 bytes is padded to a + * multiple of 8 bytes. Otherwise the structure size will be + * different between 32-bit and 64-bit architectures. */ struct ibv_kern_async_event { @@ -298,6 +302,47 @@ struct ibv_create_cq_resp { __u32 cqe; }; +struct ibv_kern_wc { + __u64 wr_id; + __u32 status; + __u32 opcode; + __u32 vendor_err; + __u32 byte_len; + __u32 imm_data; + __u32 qp_num; + __u32 src_qp; + __u32 wc_flags; + __u16 pkey_index; + __u16 slid; + __u8 sl; + __u8 dlid_path_bits; + __u8 port_num; + __u8 reserved; +}; + +struct ibv_poll_cq { + __u32 command; + __u16 in_words; + __u16 out_words; + __u64 response; + __u32 cq_handle; + __u32 ne; +}; + +struct ibv_poll_cq_resp { + __u32 count; + __u32 reserved; + struct ibv_kern_wc wc[0]; +}; + +struct ibv_req_notify_cq { + __u32 command; + __u16 in_words; + __u16 out_words; + __u32 cq_handle; + __u32 solicited; +}; + struct ibv_destroy_cq { __u32 command; __u16 in_words; @@ -400,6 +445,130 @@ struct ibv_destroy_qp_resp { __u32 events_reported; }; +struct ibv_kern_send_wr { + __u64 wr_id; + __u32 num_sge; + __u32 opcode; + __u32 send_flags; + __u32 imm_data; + union { + struct { + __u64 remote_addr; + __u32 rkey; + __u32 reserved; + } rdma; + struct { + __u64 remote_addr; + __u64 compare_add; + __u64 swap; + __u32 rkey; + __u32 reserved; + } atomic; + struct { + __u32 ah; + __u32 remote_qpn; + __u32 remote_qkey; + __u32 reserved; + } ud; + } wr; +}; + +struct ibv_post_send { + __u32 command; + __u16 in_words; + __u16 out_words; + __u64 response; + __u32 qp_handle; + __u32 wr_count; + __u32 sge_count; + __u32 wqe_size; + struct ibv_kern_send_wr send_wr[0]; +}; + +struct ibv_post_send_resp { + __u32 bad_wr; +}; + +struct ibv_kern_recv_wr { + __u64 wr_id; + __u32 num_sge; + __u32 reserved; +}; + +struct ibv_post_recv { + __u32 command; + __u16 in_words; + __u16 out_words; + __u64 response; + __u32 qp_handle; + __u32 wr_count; + __u32 sge_count; + __u32 wqe_size; + struct ibv_kern_recv_wr recv_wr[0]; +}; + +struct ibv_post_recv_resp { + __u32 bad_wr; +}; + +struct ibv_post_srq_recv { + __u32 command; + __u16 in_words; + __u16 out_words; + __u64 response; + __u32 srq_handle; + __u32 wr_count; + __u32 sge_count; + __u32 wqe_size; + struct ibv_kern_recv_wr recv_wr[0]; +}; + +struct ibv_post_srq_recv_resp { + __u32 bad_wr; +}; + +struct ibv_kern_global_route { + __u8 dgid[16]; + __u32 flow_label; + __u8 sgid_index; + __u8 hop_limit; + __u8 traffic_class; + __u8 reserved; +}; + +struct ibv_kern_ah_attr { + struct ibv_kern_global_route grh; + __u16 dlid; + __u8 sl; + __u8 src_path_bits; + __u8 static_rate; + __u8 is_global; + __u8 port_num; + __u8 reserved; +}; + +struct ibv_create_ah { + __u32 command; + __u16 in_words; + __u16 out_words; + __u64 response; + __u64 user_handle; + __u32 pd_handle; + __u32 reserved; + struct ibv_kern_ah_attr attr; +}; + +struct ibv_create_ah_resp { + __u32 handle; +}; + +struct ibv_destroy_ah { + __u32 command; + __u16 in_words; + __u16 out_words; + __u32 ah_handle; +}; + struct ibv_attach_mcast { __u32 command; __u16 in_words; diff --git a/include/infiniband/verbs.h b/include/infiniband/verbs.h index bcc2fca..b5e7d7b 100644 --- a/include/infiniband/verbs.h +++ b/include/infiniband/verbs.h @@ -2,6 +2,7 @@ * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. * Copyright (c) 2004 Intel Corporation. All rights reserved. * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -488,6 +489,7 @@ struct ibv_qp { uint32_t handle; uint32_t qp_num; enum ibv_qp_state state; + enum ibv_qp_type qp_type; pthread_mutex_t mutex; pthread_cond_t cond; @@ -513,6 +515,7 @@ struct ibv_cq { struct ibv_ah { struct ibv_context *context; struct ibv_pd *pd; + uint32_t handle; }; struct ibv_device; @@ -1,5 +1,6 @@ /* * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -304,6 +305,65 @@ int ibv_cmd_create_cq(struct ibv_context *context, int cqe, return 0; } +int ibv_cmd_poll_cq(struct ibv_cq *ibcq, int ne, struct ibv_wc *wc) +{ + struct ibv_poll_cq cmd; + struct ibv_poll_cq_resp *resp; + int i; + int rsize; + int ret; + + rsize = sizeof *resp + ne * sizeof(struct ibv_kern_wc); + resp = malloc(rsize); + if (!resp) + return -1; + + IBV_INIT_CMD_RESP(&cmd, sizeof cmd, POLL_CQ, resp, rsize); + cmd.cq_handle = ibcq->handle; + cmd.ne = ne; + + if (write(ibcq->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd) { + ret = -1; + goto out; + } + + for (i = 0; i < resp->count; i++) { + wc[i].wr_id = resp->wc[i].wr_id; + wc[i].status = resp->wc[i].status; + wc[i].opcode = resp->wc[i].opcode; + wc[i].vendor_err = resp->wc[i].vendor_err; + wc[i].byte_len = resp->wc[i].byte_len; + wc[i].imm_data = resp->wc[i].imm_data; + wc[i].qp_num = resp->wc[i].qp_num; + wc[i].src_qp = resp->wc[i].src_qp; + wc[i].wc_flags = resp->wc[i].wc_flags; + wc[i].pkey_index = resp->wc[i].pkey_index; + wc[i].slid = resp->wc[i].slid; + wc[i].sl = resp->wc[i].sl; + wc[i].dlid_path_bits = resp->wc[i].dlid_path_bits; + } + + ret = resp->count; + +out: + free(resp); + return ret; +} + +int ibv_cmd_req_notify_cq(struct ibv_cq *ibcq, int solicited) +{ + struct ibv_req_notify_cq cmd; + + IBV_INIT_CMD(&cmd, sizeof cmd, REQ_NOTIFY_CQ); + cmd.cq_handle = ibcq->handle; + cmd.solicited = solicited ? 0 : 1; + + if (write(ibcq->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd) + return errno; + + return 0; +} + static int ibv_cmd_destroy_cq_v1(struct ibv_cq *cq) { struct ibv_destroy_cq_v1 cmd; @@ -441,6 +501,7 @@ int ibv_cmd_create_qp(struct ibv_pd *pd, qp->handle = resp.qp_handle; qp->qp_num = resp.qpn; + qp->qp_type = attr->qp_type; return 0; } @@ -518,6 +579,251 @@ static int ibv_cmd_destroy_qp_v1(struct ibv_qp *qp) return 0; } +int ibv_cmd_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr, + struct ibv_send_wr **bad_wr) +{ + struct ibv_post_send *cmd; + struct ibv_post_send_resp resp; + struct ibv_send_wr *i; + struct ibv_kern_send_wr *n, *tmp; + struct ibv_sge *s; + unsigned wr_count = 0; + unsigned sge_count = 0; + int size; + int ret = 0; + + for (i = wr; i; i = i->next) { + wr_count++; + sge_count += i->num_sge; + } + + size = sizeof *cmd + wr_count * sizeof *n + sge_count * sizeof *s; + cmd = alloca(size); + + IBV_INIT_CMD_RESP(cmd, size, POST_SEND, &resp, sizeof resp); + cmd->qp_handle = ibqp->handle; + cmd->wr_count = wr_count; + cmd->sge_count = sge_count; + cmd->wqe_size = sizeof *n; + + n = (struct ibv_kern_send_wr *) ((void *) cmd + sizeof *cmd); + s = (struct ibv_sge *) (n + wr_count); + + tmp = n; + for (i = wr; i; i = i->next) { + tmp->wr_id = i->wr_id; + tmp->num_sge = i->num_sge; + tmp->opcode = i->opcode; + tmp->send_flags = i->send_flags; + tmp->imm_data = i->imm_data; + if (ibqp->qp_type == IBV_QPT_UD) { + tmp->wr.ud.ah = i->wr.ud.ah->handle; + tmp->wr.ud.remote_qpn = i->wr.ud.remote_qpn; + tmp->wr.ud.remote_qkey = i->wr.ud.remote_qkey; + } else { + switch(i->opcode) { + case IBV_WR_RDMA_WRITE: + case IBV_WR_RDMA_WRITE_WITH_IMM: + case IBV_WR_RDMA_READ: + tmp->wr.rdma.remote_addr = + i->wr.rdma.remote_addr; + tmp->wr.rdma.rkey = i->wr.rdma.rkey; + break; + case IBV_WR_ATOMIC_CMP_AND_SWP: + case IBV_WR_ATOMIC_FETCH_AND_ADD: + tmp->wr.atomic.remote_addr = + i->wr.atomic.remote_addr; + tmp->wr.atomic.compare_add = + i->wr.atomic.compare_add; + tmp->wr.atomic.swap = i->wr.atomic.swap; + tmp->wr.atomic.rkey = i->wr.atomic.rkey; + break; + default: + break; + } + } + + if (tmp->num_sge) { + memcpy(s, i->sg_list, tmp->num_sge * sizeof *s); + s += tmp->num_sge; + } + + tmp++; + } + + resp.bad_wr = 0; + if (write(ibqp->context->cmd_fd, cmd, size) != sizeof cmd) + ret = errno; + + wr_count = resp.bad_wr; + if (wr_count) { + i = wr; + while (--wr_count) + i = i->next; + *bad_wr = i; + } + + return ret; +} + +int ibv_cmd_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr, + struct ibv_recv_wr **bad_wr) +{ + struct ibv_post_recv *cmd; + struct ibv_post_recv_resp resp; + struct ibv_recv_wr *i; + struct ibv_kern_recv_wr *n, *tmp; + struct ibv_sge *s; + unsigned wr_count = 0; + unsigned sge_count = 0; + int size; + int ret = 0; + + for (i = wr; i; i = i->next) { + wr_count++; + sge_count += i->num_sge; + } + + size = sizeof *cmd + wr_count * sizeof *n + sge_count * sizeof *s; + cmd = alloca(size); + + IBV_INIT_CMD_RESP(cmd, size, POST_RECV, &resp, sizeof resp); + cmd->qp_handle = ibqp->handle; + cmd->wr_count = wr_count; + cmd->sge_count = sge_count; + cmd->wqe_size = sizeof *n; + + n = (struct ibv_kern_recv_wr *) ((void *) cmd + sizeof *cmd); + s = (struct ibv_sge *) (n + wr_count); + + tmp = n; + for (i = wr; i; i = i->next) { + tmp->wr_id = i->wr_id; + tmp->num_sge = i->num_sge; + + if (tmp->num_sge) { + memcpy(s, i->sg_list, tmp->num_sge * sizeof *s); + s += tmp->num_sge; + } + + tmp++; + } + + resp.bad_wr = 0; + if (write(ibqp->context->cmd_fd, cmd, size) != sizeof cmd) + ret = errno; + + wr_count = resp.bad_wr; + if (wr_count) { + i = wr; + while (--wr_count) + i = i->next; + *bad_wr = i; + } + + return ret; +} + +int ibv_cmd_post_srq_recv(struct ibv_srq *srq, struct ibv_recv_wr *wr, + struct ibv_recv_wr **bad_wr) +{ + struct ibv_post_srq_recv *cmd; + struct ibv_post_srq_recv_resp resp; + struct ibv_recv_wr *i; + struct ibv_kern_recv_wr *n, *tmp; + struct ibv_sge *s; + unsigned wr_count = 0; + unsigned sge_count = 0; + int size; + int ret = 0; + + for (i = wr; i; i = i->next) { + wr_count++; + sge_count += i->num_sge; + } + + size = sizeof *cmd + wr_count * sizeof *n + sge_count * sizeof *s; + cmd = alloca(size); + + IBV_INIT_CMD_RESP(cmd, size, POST_SRQ_RECV, &resp, sizeof resp); + cmd->srq_handle = srq->handle; + cmd->wr_count = wr_count; + cmd->sge_count = sge_count; + cmd->wqe_size = sizeof *n; + + n = (struct ibv_kern_recv_wr *) ((void *) cmd + sizeof *cmd); + s = (struct ibv_sge *) (n + wr_count); + + tmp = n; + for (i = wr; i; i = i->next) { + tmp->wr_id = i->wr_id; + tmp->num_sge = i->num_sge; + + if (tmp->num_sge) { + memcpy(s, i->sg_list, tmp->num_sge * sizeof *s); + s += tmp->num_sge; + } + + tmp++; + } + + resp.bad_wr = 0; + if (write(srq->context->cmd_fd, cmd, size) != sizeof cmd) + ret = errno; + + wr_count = resp.bad_wr; + if (wr_count) { + i = wr; + while (--wr_count) + i = i->next; + *bad_wr = i; + } + + return ret; +} + +int ibv_cmd_create_ah(struct ibv_pd *pd, struct ibv_ah *ah, + struct ibv_ah_attr *attr) +{ + struct ibv_create_ah cmd; + struct ibv_create_ah_resp resp; + + IBV_INIT_CMD_RESP(&cmd, sizeof cmd, CREATE_AH, &resp, sizeof resp); + cmd.user_handle = (uintptr_t) ah; + cmd.pd_handle = pd->handle; + cmd.attr.dlid = attr->dlid; + cmd.attr.sl = attr->sl; + cmd.attr.src_path_bits = attr->src_path_bits; + cmd.attr.static_rate = attr->static_rate; + cmd.attr.is_global = attr->is_global; + cmd.attr.port_num = attr->port_num; + cmd.attr.grh.flow_label = attr->grh.flow_label; + cmd.attr.grh.sgid_index = attr->grh.sgid_index; + cmd.attr.grh.hop_limit = attr->grh.hop_limit; + cmd.attr.grh.traffic_class = attr->grh.traffic_class; + memcpy(cmd.attr.grh.dgid, attr->grh.dgid.raw, 16); + + if (write(pd->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd) + return errno; + + ah->handle = resp.handle; + + return 0; +} + +int ibv_cmd_destroy_ah(struct ibv_ah *ah) +{ + struct ibv_destroy_ah cmd; + + IBV_INIT_CMD(&cmd, sizeof cmd, DESTROY_AH); + cmd.ah_handle = ah->handle; + + if (write(ah->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd) + return errno; + + return 0; +} + int ibv_cmd_destroy_qp(struct ibv_qp *qp) { struct ibv_destroy_qp cmd; diff --git a/src/libibverbs.map b/src/libibverbs.map index 072287e..def6e7f 100644 --- a/src/libibverbs.map +++ b/src/libibverbs.map @@ -41,6 +41,8 @@ IBVERBS_1.0 { ibv_cmd_reg_mr; ibv_cmd_dereg_mr; ibv_cmd_create_cq; + ibv_cmd_poll_cq; + ibv_cmd_req_notify_cq; ibv_cmd_destroy_cq; ibv_cmd_create_srq; ibv_cmd_modify_srq; @@ -48,6 +50,11 @@ IBVERBS_1.0 { ibv_cmd_create_qp; ibv_cmd_modify_qp; ibv_cmd_destroy_qp; + ibv_cmd_post_send; + ibv_cmd_post_recv; + ibv_cmd_post_srq_recv; + ibv_cmd_create_ah; + ibv_cmd_destroy_ah; ibv_cmd_attach_mcast; ibv_cmd_detach_mcast; local: *; |