aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRoland Dreier <roland@topspin.com>2005-04-07 21:26:32 +0000
committerRoland Dreier <rolandd@cisco.com>2006-11-09 11:35:56 -0800
commit91fc39561d04903cd5b1665d9215a184baa66ba9 (patch)
tree968bc59755e0271f44576e4f8907abf9c380988d
parentbdb426aada13a0fd2cca920b9cd140008aa8a1eb (diff)
downloadlibibverbs-91fc39561d04903cd5b1665d9215a184baa66ba9.tar.gz
Commit libibverbs code from roland-uverbs branch back onto trunk
-rw-r--r--Makefile.am31
-rw-r--r--examples/device_list.c6
-rw-r--r--examples/pingpong.c639
-rw-r--r--examples/ud-pingpong.c677
-rw-r--r--include/infiniband/arch.h78
-rw-r--r--include/infiniband/driver.h29
-rw-r--r--include/infiniband/kern-abi.h299
-rw-r--r--include/infiniband/opcode.h149
-rw-r--r--include/infiniband/verbs.h422
-rw-r--r--src/cmd.c298
-rw-r--r--src/device.c76
-rw-r--r--src/ibverbs.h18
-rw-r--r--src/init.c32
-rw-r--r--src/libibverbs.map20
-rw-r--r--src/verbs.c175
15 files changed, 2845 insertions, 104 deletions
diff --git a/Makefile.am b/Makefile.am
index 01a3f6e..5b3fb8c 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -4,7 +4,9 @@ INCLUDES = -I$(srcdir)/include
lib_LTLIBRARIES = src/libibverbs.la
-src_libibverbs_la_CFLAGS = -Wall -D_GNU_SOURCE -DDRIVER_PATH=\"$(libdir)/infiniband\"
+AM_CFLAGS = -g -Wall -D_GNU_SOURCE
+
+src_libibverbs_la_CFLAGS = -g -Wall -D_GNU_SOURCE -DDRIVER_PATH=\"$(libdir)/infiniband\"
if HAVE_LD_VERSION_SCRIPT
libibverbs_version_script = -Wl,--version-script=$(srcdir)/src/libibverbs.map
@@ -12,24 +14,31 @@ else
libibverbs_version_script =
endif
-src_libibverbs_la_SOURCES = src/init.c src/device.c src/memory.c
+src_libibverbs_la_SOURCES = src/cmd.c src/device.c src/init.c src/memory.c src/verbs.c
src_libibverbs_la_LDFLAGS = -version-info 1 -export-dynamic \
$(libibverbs_version_script)
src_libibverbs_la_DEPENDENCIES = $(srcdir)/src/libibverbs.map
-bin_PROGRAMS = examples/ib_devices examples/asyncwatch examples/pingpong
-examples_ib_devices_SOURCES = examples/device_list.c
-examples_ib_devices_LDADD = $(top_builddir)/src/libibverbs.la
-examples_pingpong_SOURCES = examples/pingpong.c
-examples_pingpong_LDADD = $(top_builddir)/src/libibverbs.la
-examples_asyncwatch_SOURCES = examples/asyncwatch.c
-examples_asyncwatch_LDADD = $(top_builddir)/src/libibverbs.la
+bin_PROGRAMS = examples/ibv_devices examples/ibv_asyncwatch \
+ examples/ibv_pingpong examples/ibv_ud_pingpong
+examples_ibv_devices_SOURCES = examples/device_list.c
+examples_ibv_devices_LDADD = $(top_builddir)/src/libibverbs.la
+examples_ibv_pingpong_SOURCES = examples/pingpong.c
+examples_ibv_pingpong_LDADD = $(top_builddir)/src/libibverbs.la
+examples_ibv_ud_pingpong_SOURCES = examples/ud-pingpong.c
+examples_ibv_ud_pingpong_LDADD = $(top_builddir)/src/libibverbs.la
+examples_ibv_asyncwatch_SOURCES = examples/asyncwatch.c
+examples_ibv_asyncwatch_LDADD = $(top_builddir)/src/libibverbs.la
libibverbsincludedir = $(includedir)/infiniband
-libibverbsinclude_HEADERS = include/infiniband/verbs.h include/infiniband/driver.h
+libibverbsinclude_HEADERS = include/infiniband/arch.h include/infiniband/driver.h \
+ include/infiniband/kern-abi.h include/infiniband/opcode.h include/infiniband/verbs.h
-EXTRA_DIST = include/infiniband/verbs.h include/infiniband/driver.h \
+EXTRA_DIST = include/infiniband/driver.h include/infiniband/kern-abi.h \
+ include/infiniband/opcode.h include/infiniband/verbs.h \
+ src/ibverbs.h \
+ src/libibverbs.map \
libibverbs.spec.in
dist-hook: libibverbs.spec
diff --git a/examples/device_list.c b/examples/device_list.c
index a5395d5..414612e 100644
--- a/examples/device_list.c
+++ b/examples/device_list.c
@@ -36,6 +36,8 @@
# include <config.h>
#endif /* HAVE_CONFIG_H */
+#include <stdio.h>
+
#include <endian.h>
#include <byteswap.h>
@@ -60,5 +62,7 @@ int main(int argc, char *argv[])
dlist_for_each_data(dev_list, ib_dev, struct ibv_device)
printf(" %-16s\t%016llx\n",
ibv_get_device_name(ib_dev),
- be64_to_cpu(ibv_get_device_guid(ib_dev)));
+ (unsigned long long) be64_to_cpu(ibv_get_device_guid(ib_dev)));
+
+ return 0;
}
diff --git a/examples/pingpong.c b/examples/pingpong.c
index c8e6502..9d3adcb 100644
--- a/examples/pingpong.c
+++ b/examples/pingpong.c
@@ -29,7 +29,7 @@
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
- * $Id: device_list.c 1393 2004-12-28 02:15:24Z roland $
+ * $Id$
*/
#if HAVE_CONFIG_H
@@ -37,38 +37,641 @@
#endif /* HAVE_CONFIG_H */
#include <stdio.h>
-#include <endian.h>
-#include <byteswap.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <netdb.h>
+#include <malloc.h>
+#include <getopt.h>
+#include <arpa/inet.h>
+#include <time.h>
+
+#include <sysfs/libsysfs.h>
#include <infiniband/verbs.h>
-#if __BYTE_ORDER == __LITTLE_ENDIAN
-static inline uint64_t be64_to_cpu(uint64_t x) { return bswap_64(x); }
-#elif __BYTE_ORDER == __BIG_ENDIAN
-static inline uint64_t be64_to_cpu(uint64_t x) { return x; }
-#endif
+enum {
+ PINGPONG_RECV_WRID = 1,
+ PINGPONG_SEND_WRID = 2,
+};
+
+static int page_size;
+
+struct pingpong_context {
+ struct ibv_context *context;
+ struct ibv_pd *pd;
+ struct ibv_mr *mr;
+ struct ibv_cq *cq;
+ struct ibv_qp *qp;
+ void *buf;
+ int size;
+ int rx_depth;
+};
+
+struct pingpong_dest {
+ int lid;
+ int qpn;
+ int psn;
+};
+
+static uint16_t pp_get_local_lid(struct pingpong_context *ctx, int port)
+{
+ struct ibv_port_attr attr;
+
+ if (ibv_query_port(ctx->context, port, &attr))
+ return 0;
+
+ return attr.lid;
+}
+
+static struct pingpong_dest *pp_client_exch_dest(const char *servername, int port,
+ const struct pingpong_dest *my_dest)
+{
+ struct addrinfo *res, *t;
+ struct addrinfo hints = {
+ .ai_family = AF_UNSPEC,
+ .ai_socktype = SOCK_STREAM
+ };
+ char *service;
+ char msg[sizeof "0000:000000:000000"];
+ int n;
+ int sockfd = -1;
+ struct pingpong_dest *rem_dest = NULL;
+
+ asprintf(&service, "%d", port);
+ n = getaddrinfo(servername, service, &hints, &res);
+
+ if (n < 0) {
+ fprintf(stderr, "%s for %s:%d\n", gai_strerror(n), servername, port);
+ return NULL;
+ }
+
+ for (t = res; t; t = t->ai_next) {
+ sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol);
+ if (sockfd >= 0) {
+ if (!connect(sockfd, t->ai_addr, t->ai_addrlen))
+ break;
+ close(sockfd);
+ sockfd = -1;
+ }
+ }
+
+ freeaddrinfo(res);
+
+ if (sockfd < 0) {
+ fprintf(stderr, "Couldn't connect to %s:%d\n", servername, port);
+ return NULL;
+ }
+
+ sprintf(msg, "%04x:%06x:%06x", my_dest->lid, my_dest->qpn, my_dest->psn);
+ if (write(sockfd, msg, sizeof msg) != sizeof msg) {
+ fprintf(stderr, "Couldn't send local address\n");
+ goto out;
+ }
+
+ if (read(sockfd, msg, sizeof msg) != sizeof msg) {
+ perror("client read");
+ fprintf(stderr, "Couldn't read remote address\n");
+ goto out;
+ }
+
+ write(sockfd, "done", sizeof "done");
+
+ rem_dest = malloc(sizeof *rem_dest);
+ if (!rem_dest)
+ goto out;
+
+ sscanf(msg, "%x:%x:%x", &rem_dest->lid, &rem_dest->qpn, &rem_dest->psn);
+
+out:
+ close(sockfd);
+ return rem_dest;
+}
+
+static struct pingpong_dest *pp_server_exch_dest(int port, const struct pingpong_dest *my_dest)
+{
+ struct addrinfo *res, *t;
+ struct addrinfo hints = {
+ .ai_flags = AI_PASSIVE,
+ .ai_family = AF_UNSPEC,
+ .ai_socktype = SOCK_STREAM
+ };
+ char *service;
+ char msg[sizeof "0000:000000:000000"];
+ int n;
+ int sockfd = -1, connfd;
+ struct pingpong_dest *rem_dest = NULL;
+
+ asprintf(&service, "%d", port);
+ n = getaddrinfo(NULL, service, &hints, &res);
+
+ if (n < 0) {
+ fprintf(stderr, "%s for port %d\n", gai_strerror(n), port);
+ return NULL;
+ }
+
+ for (t = res; t; t = t->ai_next) {
+ sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol);
+ if (sockfd >= 0) {
+ n = 1;
+
+ setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &n, sizeof n);
+
+ if (!bind(sockfd, t->ai_addr, t->ai_addrlen))
+ break;
+ close(sockfd);
+ sockfd = -1;
+ }
+ }
+
+ freeaddrinfo(res);
+
+ if (sockfd < 0) {
+ fprintf(stderr, "Couldn't listen to port %d\n", port);
+ return NULL;
+ }
+
+ listen(sockfd, 1);
+ connfd = accept(sockfd, NULL, 0);
+ close(sockfd);
+ if (connfd < 0) {
+ fprintf(stderr, "accept() failed\n");
+ return NULL;
+ }
+
+ n = read(connfd, msg, sizeof msg);
+ if (n != sizeof msg) {
+ perror("server read");
+ fprintf(stderr, "%d/%d: Couldn't read remote address\n", n, (int) sizeof msg);
+ goto out;
+ }
+
+ rem_dest = malloc(sizeof *rem_dest);
+ if (!rem_dest)
+ goto out;
+
+ sscanf(msg, "%x:%x:%x", &rem_dest->lid, &rem_dest->qpn, &rem_dest->psn);
+
+ sprintf(msg, "%04x:%06x:%06x", my_dest->lid, my_dest->qpn, my_dest->psn);
+ if (write(connfd, msg, sizeof msg) != sizeof msg) {
+ fprintf(stderr, "Couldn't send local address\n");
+ free(rem_dest);
+ rem_dest = NULL;
+ goto out;
+ }
+
+ read(connfd, msg, sizeof msg);
+
+out:
+ close(connfd);
+ return rem_dest;
+}
+
+static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size,
+ int rx_depth, int port)
+{
+ struct pingpong_context *ctx;
+
+ ctx = malloc(sizeof *ctx);
+ if (!ctx)
+ return NULL;
+
+ ctx->size = size;
+ ctx->rx_depth = rx_depth;
+
+ ctx->buf = memalign(page_size, size);
+ if (!ctx->buf) {
+ fprintf(stderr, "Couldn't allocate work buf.\n");
+ return NULL;
+ }
+
+ memset(ctx->buf, 0, size);
+
+ ctx->context = ibv_open_device(ib_dev);
+ if (!ctx->context) {
+ fprintf(stderr, "Couldn't get context for %s\n",
+ ibv_get_device_name(ib_dev));
+ return NULL;
+ }
+
+ ctx->pd = ibv_alloc_pd(ctx->context);
+ if (!ctx->pd) {
+ fprintf(stderr, "Couldn't allocate PD\n");
+ return NULL;
+ }
+
+ ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, size, IBV_ACCESS_LOCAL_WRITE);
+ if (!ctx->mr) {
+ fprintf(stderr, "Couldn't allocate MR\n");
+ return NULL;
+ }
+
+ ctx->cq = ibv_create_cq(ctx->context, rx_depth + 1, NULL);
+ if (!ctx->cq) {
+ fprintf(stderr, "Couldn't create CQ\n");
+ return NULL;
+ }
+
+ {
+ struct ibv_qp_init_attr attr = {
+ .send_cq = ctx->cq,
+ .recv_cq = ctx->cq,
+ .cap = {
+ .max_send_wr = 1,
+ .max_recv_wr = rx_depth,
+ .max_send_sge = 1,
+ .max_recv_sge = 1
+ },
+ .qp_type = IBV_QPT_RC
+ };
+
+ ctx->qp = ibv_create_qp(ctx->pd, &attr);
+ if (!ctx->qp) {
+ fprintf(stderr, "Couldn't create QP\n");
+ return NULL;
+ }
+ }
+
+ {
+ struct ibv_qp_attr attr;
+
+ attr.qp_state = IBV_QPS_INIT;
+ attr.pkey_index = 0;
+ attr.port_num = port;
+ attr.qp_access_flags = 0;
+
+ if (ibv_modify_qp(ctx->qp, &attr,
+ IBV_QP_STATE |
+ IBV_QP_PKEY_INDEX |
+ IBV_QP_PORT |
+ IBV_QP_ACCESS_FLAGS)) {
+ fprintf(stderr, "Failed to modify QP to INIT\n");
+ return NULL;
+ }
+ }
+
+ return ctx;
+}
+
+static int pp_post_recv(struct pingpong_context *ctx, int n)
+{
+ struct ibv_sge list = {
+ .addr = (uintptr_t) ctx->buf,
+ .length = ctx->size,
+ .lkey = ctx->mr->lkey
+ };
+ struct ibv_recv_wr wr = {
+ .wr_id = PINGPONG_RECV_WRID,
+ .sg_list = &list,
+ .num_sge = 1,
+ };
+ struct ibv_recv_wr *bad_wr;
+ int i;
+
+ for (i = 0; i < n; ++i)
+ if (ibv_post_recv(ctx->qp, &wr, &bad_wr))
+ break;
+
+ return i;
+}
+
+static int pp_post_send(struct pingpong_context *ctx)
+{
+ struct ibv_sge list = {
+ .addr = (uintptr_t) ctx->buf,
+ .length = ctx->size,
+ .lkey = ctx->mr->lkey
+ };
+ struct ibv_send_wr wr = {
+ .wr_id = PINGPONG_SEND_WRID,
+ .sg_list = &list,
+ .num_sge = 1,
+ .opcode = IBV_WR_SEND,
+ .send_flags = IBV_SEND_SIGNALED,
+ };
+ struct ibv_send_wr *bad_wr;
+
+ return ibv_post_send(ctx->qp, &wr, &bad_wr);
+}
+
+static int pp_connect_ctx(struct pingpong_context *ctx, int port, int my_psn,
+ struct pingpong_dest *dest)
+{
+ struct ibv_qp_attr attr;
+
+ attr.qp_state = IBV_QPS_RTR;
+ attr.path_mtu = IBV_MTU_1024;
+ attr.dest_qp_num = dest->qpn;
+ attr.rq_psn = dest->psn;
+ attr.max_dest_rd_atomic = 1;
+ attr.min_rnr_timer = 12;
+ attr.ah_attr.is_global = 0;
+ attr.ah_attr.dlid = dest->lid;
+ attr.ah_attr.sl = 0;
+ attr.ah_attr.src_path_bits = 0;
+ attr.ah_attr.port_num = port;
+ if (ibv_modify_qp(ctx->qp, &attr,
+ IBV_QP_STATE |
+ IBV_QP_AV |
+ IBV_QP_PATH_MTU |
+ IBV_QP_DEST_QPN |
+ IBV_QP_RQ_PSN |
+ IBV_QP_MAX_DEST_RD_ATOMIC |
+ IBV_QP_MIN_RNR_TIMER)) {
+ fprintf(stderr, "Failed to modify QP to RTR\n");
+ return 1;
+ }
+
+ attr.qp_state = IBV_QPS_RTS;
+ attr.timeout = 14;
+ attr.retry_cnt = 7;
+ attr.rnr_retry = 7;
+ attr.sq_psn = my_psn;
+ attr.max_rd_atomic = 1;
+ if (ibv_modify_qp(ctx->qp, &attr,
+ IBV_QP_STATE |
+ IBV_QP_TIMEOUT |
+ IBV_QP_RETRY_CNT |
+ IBV_QP_RNR_RETRY |
+ IBV_QP_SQ_PSN |
+ IBV_QP_MAX_QP_RD_ATOMIC)) {
+ fprintf(stderr, "Failed to modify QP to RTS\n");
+ return 1;
+ }
+
+ return 0;
+}
+
+static void usage(const char *argv0)
+{
+ printf("Usage:\n");
+ printf(" %s start a server and wait for connection\n", argv0);
+ printf(" %s <host> connect to server at <host>\n", argv0);
+ printf("\n");
+ printf("Options:\n");
+ printf(" -p, --port=<port> listen on/connect to port <port> (default 18515)\n");
+ printf(" -d, --ib-dev=<dev> use IB device <dev> (default first device found)\n");
+ printf(" -i, --ib-port=<port> use port <port> of IB device (default 1)\n");
+ printf(" -s, --size=<size> size of message to exchange (default 4096)\n");
+ printf(" -r, --rx-depth=<dep> number of receives to post at a time (default 500)\n");
+ printf(" -n, --iters=<iters> number of exchanges (default 1000)\n");
+ printf(" -e, --events sleep on CQ events (default poll)\n");
+}
int main(int argc, char *argv[])
{
- struct dlist *dev_list;
- struct ibv_device *ib_dev;
- struct ibv_context *context;
+ struct dlist *dev_list;
+ struct ibv_device *ib_dev;
+ struct pingpong_context *ctx;
+ struct pingpong_dest my_dest;
+ struct pingpong_dest *rem_dest;
+ struct timeval start, end;
+ char *ib_devname = NULL;
+ char *servername = NULL;
+ int port = 18515;
+ int ib_port = 1;
+ int size = 4096;
+ int rx_depth = 500;
+ int iters = 1000;
+ int use_event = 0;
+ int routs;
+ int rcnt, scnt;
+
+ srand48(getpid() * time(NULL));
+
+ while (1) {
+ int c;
+
+ static struct option long_options[] = {
+ { .name = "port", .has_arg = 1, .val = 'p' },
+ { .name = "ib-dev", .has_arg = 1, .val = 'd' },
+ { .name = "ib-port", .has_arg = 1, .val = 'i' },
+ { .name = "size", .has_arg = 1, .val = 's' },
+ { .name = "iters", .has_arg = 1, .val = 'n' },
+ { .name = "events", .has_arg = 0, .val = 'e' },
+ { 0 }
+ };
+
+ c = getopt_long(argc, argv, "p:d:i:s:r:n:e", long_options, NULL);
+ if (c == -1)
+ break;
+
+ switch (c) {
+ case 'p':
+ port = strtol(optarg, NULL, 0);
+ if (port < 0 || port > 65535) {
+ usage(argv[0]);
+ return 1;
+ }
+ break;
+
+ case 'd':
+ ib_devname = strdupa(optarg);
+ break;
+
+ case 'i':
+ ib_port = strtol(optarg, NULL, 0);
+ if (port < 0) {
+ usage(argv[0]);
+ return 1;
+ }
+ break;
+
+ case 's':
+ size = strtol(optarg, NULL, 0);
+ break;
+
+ case 'r':
+ rx_depth = strtol(optarg, NULL, 0);
+ break;
+
+ case 'n':
+ iters = strtol(optarg, NULL, 0);
+ break;
+
+ case 'e':
+ ++use_event;
+ break;
+
+ default:
+ usage(argv[0]);
+ return 1;
+ }
+ }
+
+ if (optind == argc - 1)
+ servername = strdupa(argv[optind]);
+ else if (optind < argc) {
+ usage(argv[0]);
+ return 1;
+ }
+
+ page_size = sysconf(_SC_PAGESIZE);
dev_list = ibv_get_devices();
dlist_start(dev_list);
- ib_dev = dlist_next(dev_list);
+ if (!ib_devname) {
+ ib_dev = dlist_next(dev_list);
+ if (!ib_dev) {
+ fprintf(stderr, "No IB devices found\n");
+ return 1;
+ }
+ } else {
+ dlist_for_each_data(dev_list, ib_dev, struct ibv_device)
+ if (!strcmp(ibv_get_device_name(ib_dev), ib_devname))
+ break;
+ if (!ib_dev) {
+ fprintf(stderr, "IB device %s not found\n", ib_devname);
+ return 1;
+ }
+ }
+
+ ctx = pp_init_ctx(ib_dev, size, rx_depth, ib_port);
+ if (!ctx)
+ return 1;
- if (!ib_dev) {
- fprintf(stderr, "No IB devices found\n");
+ routs = pp_post_recv(ctx, ctx->rx_depth);
+ if (routs < ctx->rx_depth) {
+ fprintf(stderr, "Couldn't post receive (%d)\n", routs);
return 1;
}
- context = ibv_open_device(ib_dev);
- if (!context) {
- fprintf(stderr, "Couldn't get context for %s\n",
- ibv_get_device_name(ib_dev));
+ my_dest.lid = pp_get_local_lid(ctx, ib_port);
+ my_dest.qpn = ctx->qp->qp_num;
+ my_dest.psn = lrand48() & 0xffffff;
+ if (!my_dest.lid) {
+ fprintf(stderr, "Couldn't get local LID\n");
+ return 1;
+ }
+
+ printf(" local address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x\n",
+ my_dest.lid, my_dest.qpn, my_dest.psn);
+
+ if (servername)
+ rem_dest = pp_client_exch_dest(servername, port, &my_dest);
+ else
+ rem_dest = pp_server_exch_dest(port, &my_dest);
+
+ if (!rem_dest)
+ return 1;
+
+ printf(" remote address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x\n",
+ rem_dest->lid, rem_dest->qpn, rem_dest->psn);
+
+ if (pp_connect_ctx(ctx, ib_port, my_dest.psn, rem_dest))
return 1;
+
+ if (use_event)
+ if (ibv_req_notify_cq(ctx->cq, 0)) {
+ fprintf(stderr, "Couldn't request CQ notification\n");
+ return 1;
+ }
+
+ if (servername)
+ if (pp_post_send(ctx)) {
+ fprintf(stderr, "Couldn't post send\n");
+ return 1;
+ }
+
+ if (gettimeofday(&start, NULL)) {
+ perror("gettimeofday");
+ return 1;
+ }
+
+ rcnt = scnt = 0;
+ while (rcnt < iters || scnt < iters) {
+ if (use_event) {
+ struct ibv_cq *ev_cq;
+ void *ev_ctx;
+
+ if (ibv_get_cq_event(ctx->context, 0, &ev_cq, &ev_ctx)) {
+ fprintf(stderr, "Failed to get cq_event\n");
+ return 1;
+ }
+
+ if (ev_cq != ctx->cq) {
+ fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq);
+ return 1;
+ }
+
+ if (ibv_req_notify_cq(ctx->cq, 0)) {
+ fprintf(stderr, "Couldn't request CQ notification\n");
+ return 1;
+ }
+ }
+
+ {
+ struct ibv_wc wc[2];
+ int ne, i;
+
+ do {
+ ne = ibv_poll_cq(ctx->cq, 2, wc);
+ } while (!use_event && ne < 1);
+
+ if (ne < 0) {
+ fprintf(stderr, "poll CQ failed %d\n", ne);
+ return 1;
+ }
+
+ for (i = 0; i < ne; ++i) {
+ if (wc[i].status != IBV_WC_SUCCESS) {
+ fprintf(stderr, "Failed status %d for wr_id %d\n",
+ wc[i].status, (int) wc[i].wr_id);
+ return 1;
+ }
+
+ switch ((int) wc[i].wr_id) {
+ case PINGPONG_SEND_WRID:
+ ++scnt;
+ break;
+
+ case PINGPONG_RECV_WRID:
+ if (--routs <= 1) {
+ routs += pp_post_recv(ctx, ctx->rx_depth - routs);
+ if (routs < ctx->rx_depth) {
+ fprintf(stderr,
+ "Couldn't post receive (%d)\n",
+ routs);
+ return 1;
+ }
+ }
+
+ if (scnt < iters)
+ if (pp_post_send(ctx)) {
+ fprintf(stderr, "Couldn't post send\n");
+ return 1;
+ }
+
+ ++rcnt;
+ break;
+
+ default:
+ fprintf(stderr, "Completion for unknown wr_id %d\n",
+ (int) wc[i].wr_id);
+ return 1;
+ }
+ }
+ }
+ }
+
+ if (gettimeofday(&end, NULL)) {
+ perror("gettimeofday");
+ return 1;
+ }
+
+ {
+ float usec = (end.tv_sec - start.tv_sec) * 1000000 +
+ (end.tv_usec - start.tv_usec);
+ long long bytes = (long long) size * iters * 2;
+
+ printf("%lld bytes in %.2f seconds = %.2f Mbit/sec\n",
+ bytes, usec / 1000000., bytes * 8. / usec);
+ printf("%d iters in %.2f seconds = %.2f usec/iter\n",
+ iters, usec / 1000000., usec / iters);
}
return 0;
diff --git a/examples/ud-pingpong.c b/examples/ud-pingpong.c
new file mode 100644
index 0000000..8aa6075
--- /dev/null
+++ b/examples/ud-pingpong.c
@@ -0,0 +1,677 @@
+/*
+ * Copyright (c) 2005 Topspin Communications. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id$
+ */
+
+#if HAVE_CONFIG_H
+# include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <netdb.h>
+#include <malloc.h>
+#include <getopt.h>
+#include <arpa/inet.h>
+#include <time.h>
+
+#include <sysfs/libsysfs.h>
+
+#include <infiniband/verbs.h>
+
+enum {
+ PINGPONG_RECV_WRID = 1,
+ PINGPONG_SEND_WRID = 2,
+};
+
+static int page_size;
+
+struct pingpong_context {
+ struct ibv_context *context;
+ struct ibv_pd *pd;
+ struct ibv_mr *mr;
+ struct ibv_cq *cq;
+ struct ibv_qp *qp;
+ struct ibv_ah *ah;
+ void *buf;
+ int size;
+ int rx_depth;
+};
+
+struct pingpong_dest {
+ int lid;
+ int qpn;
+ int psn;
+};
+
+
+static uint16_t pp_get_local_lid(struct pingpong_context *ctx, int port)
+{
+ struct ibv_port_attr attr;
+
+ if (ibv_query_port(ctx->context, port, &attr))
+ return 0;
+
+ return attr.lid;
+}
+
+static struct pingpong_dest *pp_client_exch_dest(const char *servername, int port,
+ const struct pingpong_dest *my_dest)
+{
+ struct addrinfo *res, *t;
+ struct addrinfo hints = {
+ .ai_family = AF_UNSPEC,
+ .ai_socktype = SOCK_STREAM
+ };
+ char *service;
+ char msg[sizeof "0000:000000:000000"];
+ int n;
+ int sockfd = -1;
+ struct pingpong_dest *rem_dest = NULL;
+
+ asprintf(&service, "%d", port);
+ n = getaddrinfo(servername, service, &hints, &res);
+
+ if (n < 0) {
+ fprintf(stderr, "%s for %s:%d\n", gai_strerror(n), servername, port);
+ return NULL;
+ }
+
+ for (t = res; t; t = t->ai_next) {
+ sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol);
+ if (sockfd >= 0) {
+ if (!connect(sockfd, t->ai_addr, t->ai_addrlen))
+ break;
+ close(sockfd);
+ sockfd = -1;
+ }
+ }
+
+ freeaddrinfo(res);
+
+ if (sockfd < 0) {
+ fprintf(stderr, "Couldn't connect to %s:%d\n", servername, port);
+ return NULL;
+ }
+
+ sprintf(msg, "%04x:%06x:%06x", my_dest->lid, my_dest->qpn, my_dest->psn);
+ if (write(sockfd, msg, sizeof msg) != sizeof msg) {
+ fprintf(stderr, "Couldn't send local address\n");
+ goto out;
+ }
+
+ if (read(sockfd, msg, sizeof msg) != sizeof msg) {
+ perror("client read");
+ fprintf(stderr, "Couldn't read remote address\n");
+ goto out;
+ }
+
+ write(sockfd, "done", sizeof "done");
+
+ rem_dest = malloc(sizeof *rem_dest);
+ if (!rem_dest)
+ goto out;
+
+ sscanf(msg, "%x:%x:%x", &rem_dest->lid, &rem_dest->qpn, &rem_dest->psn);
+
+out:
+ close(sockfd);
+ return rem_dest;
+}
+
+static struct pingpong_dest *pp_server_exch_dest(int port, const struct pingpong_dest *my_dest)
+{
+ struct addrinfo *res, *t;
+ struct addrinfo hints = {
+ .ai_flags = AI_PASSIVE,
+ .ai_family = AF_UNSPEC,
+ .ai_socktype = SOCK_STREAM
+ };
+ char *service;
+ char msg[sizeof "0000:000000:000000"];
+ int n;
+ int sockfd = -1, connfd;
+ struct pingpong_dest *rem_dest = NULL;
+
+ asprintf(&service, "%d", port);
+ n = getaddrinfo(NULL, service, &hints, &res);
+
+ if (n < 0) {
+ fprintf(stderr, "%s for port %d\n", gai_strerror(n), port);
+ return NULL;
+ }
+
+ for (t = res; t; t = t->ai_next) {
+ sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol);
+ if (sockfd >= 0) {
+ n = 1;
+
+ setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &n, sizeof n);
+
+ if (!bind(sockfd, t->ai_addr, t->ai_addrlen))
+ break;
+ close(sockfd);
+ sockfd = -1;
+ }
+ }
+
+ freeaddrinfo(res);
+
+ if (sockfd < 0) {
+ fprintf(stderr, "Couldn't listen to port %d\n", port);
+ return NULL;
+ }
+
+ listen(sockfd, 1);
+ connfd = accept(sockfd, NULL, 0);
+ close(sockfd);
+ if (connfd < 0) {
+ fprintf(stderr, "accept() failed\n");
+ return NULL;
+ }
+
+ n = read(connfd, msg, sizeof msg);
+ if (n != sizeof msg) {
+ perror("server read");
+ fprintf(stderr, "%d/%d: Couldn't read remote address\n", n, (int) sizeof msg);
+ goto out;
+ }
+
+ rem_dest = malloc(sizeof *rem_dest);
+ if (!rem_dest)
+ goto out;
+
+ sscanf(msg, "%x:%x:%x", &rem_dest->lid, &rem_dest->qpn, &rem_dest->psn);
+
+ sprintf(msg, "%04x:%06x:%06x", my_dest->lid, my_dest->qpn, my_dest->psn);
+ if (write(connfd, msg, sizeof msg) != sizeof msg) {
+ fprintf(stderr, "Couldn't send local address\n");
+ free(rem_dest);
+ rem_dest = NULL;
+ goto out;
+ }
+
+ read(connfd, msg, sizeof msg);
+
+out:
+ close(connfd);
+ return rem_dest;
+}
+
+static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size,
+ int rx_depth, int port)
+{
+ struct pingpong_context *ctx;
+
+ ctx = malloc(sizeof *ctx);
+ if (!ctx)
+ return NULL;
+
+ ctx->size = size;
+ ctx->rx_depth = rx_depth;
+
+ ctx->buf = memalign(page_size, size + 40);
+ if (!ctx->buf) {
+ fprintf(stderr, "Couldn't allocate work buf.\n");
+ return NULL;
+ }
+
+ memset(ctx->buf, 0, size + 40);
+
+ ctx->context = ibv_open_device(ib_dev);
+ if (!ctx->context) {
+ fprintf(stderr, "Couldn't get context for %s\n",
+ ibv_get_device_name(ib_dev));
+ return NULL;
+ }
+
+ ctx->pd = ibv_alloc_pd(ctx->context);
+ if (!ctx->pd) {
+ fprintf(stderr, "Couldn't allocate PD\n");
+ return NULL;
+ }
+
+ ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, size + 40, IBV_ACCESS_LOCAL_WRITE);
+ if (!ctx->mr) {
+ fprintf(stderr, "Couldn't allocate MR\n");
+ return NULL;
+ }
+
+ ctx->cq = ibv_create_cq(ctx->context, rx_depth + 1, NULL);
+ if (!ctx->cq) {
+ fprintf(stderr, "Couldn't create CQ\n");
+ return NULL;
+ }
+
+ {
+ struct ibv_qp_init_attr attr = {
+ .send_cq = ctx->cq,
+ .recv_cq = ctx->cq,
+ .cap = {
+ .max_send_wr = 1,
+ .max_recv_wr = rx_depth,
+ .max_send_sge = 1,
+ .max_recv_sge = 1
+ },
+ .qp_type = IBV_QPT_UD,
+ };
+
+ ctx->qp = ibv_create_qp(ctx->pd, &attr);
+ if (!ctx->qp) {
+ fprintf(stderr, "Couldn't create QP\n");
+ return NULL;
+ }
+ }
+
+ {
+ struct ibv_qp_attr attr;
+
+ attr.qp_state = IBV_QPS_INIT;
+ attr.pkey_index = 0;
+ attr.port_num = port;
+ attr.qkey = 0x11111111;
+
+ if (ibv_modify_qp(ctx->qp, &attr,
+ IBV_QP_STATE |
+ IBV_QP_PKEY_INDEX |
+ IBV_QP_PORT |
+ IBV_QP_QKEY)) {
+ fprintf(stderr, "Failed to modify QP to INIT\n");
+ return NULL;
+ }
+ }
+
+ return ctx;
+}
+
+static int pp_post_recv(struct pingpong_context *ctx, int n)
+{
+ struct ibv_sge list = {
+ .addr = (uintptr_t) ctx->buf,
+ .length = ctx->size + 40,
+ .lkey = ctx->mr->lkey
+ };
+ struct ibv_recv_wr wr = {
+ .wr_id = PINGPONG_RECV_WRID,
+ .sg_list = &list,
+ .num_sge = 1,
+ };
+ struct ibv_recv_wr *bad_wr;
+ int i;
+
+ for (i = 0; i < n; ++i)
+ if (ibv_post_recv(ctx->qp, &wr, &bad_wr))
+ break;
+
+ return i;
+}
+
+static int pp_post_send(struct pingpong_context *ctx, uint32_t qpn)
+{
+ struct ibv_sge list = {
+ .addr = (uintptr_t) ctx->buf + 40,
+ .length = ctx->size,
+ .lkey = ctx->mr->lkey
+ };
+ struct ibv_send_wr wr = {
+ .wr_id = PINGPONG_SEND_WRID,
+ .sg_list = &list,
+ .num_sge = 1,
+ .opcode = IBV_WR_SEND,
+ .send_flags = IBV_SEND_SIGNALED,
+ .wr = {
+ .ud = {
+ .ah = ctx->ah,
+ .remote_qpn = qpn,
+ .remote_qkey = 0x11111111
+ }
+ }
+ };
+ struct ibv_send_wr *bad_wr;
+
+ return ibv_post_send(ctx->qp, &wr, &bad_wr);
+}
+
+static int pp_connect_ctx(struct pingpong_context *ctx, int port, int my_psn,
+ struct pingpong_dest *dest)
+{
+ struct ibv_qp_attr attr;
+ struct ibv_ah_attr ah_attr;
+
+ attr.qp_state = IBV_QPS_RTR;
+
+ if (ibv_modify_qp(ctx->qp, &attr, IBV_QP_STATE)) {
+ fprintf(stderr, "Failed to modify QP to RTR\n");
+ return 1;
+ }
+
+ attr.qp_state = IBV_QPS_RTS;
+ attr.sq_psn = my_psn;
+
+ if (ibv_modify_qp(ctx->qp, &attr,
+ IBV_QP_STATE |
+ IBV_QP_SQ_PSN)) {
+ fprintf(stderr, "Failed to modify QP to RTS\n");
+ return 1;
+ }
+
+ ah_attr.is_global = 0;
+ ah_attr.dlid = dest->lid;
+ ah_attr.sl = 0;
+ ah_attr.src_path_bits = 0;
+ ah_attr.port_num = port;
+
+ ctx->ah = ibv_create_ah(ctx->pd, &ah_attr);
+ if (!ctx->ah) {
+ fprintf(stderr, "Failed to create AH\n");
+ return 1;
+ }
+
+ return 0;
+}
+
+static void usage(const char *argv0)
+{
+ printf("Usage:\n");
+ printf(" %s start a server and wait for connection\n", argv0);
+ printf(" %s <host> connect to server at <host>\n", argv0);
+ printf("\n");
+ printf("Options:\n");
+ printf(" -p, --port=<port> listen on/connect to port <port> (default 18515)\n");
+ printf(" -d, --ib-dev=<dev> use IB device <dev> (default first device found)\n");
+ printf(" -i, --ib-port=<port> use port <port> of IB device (default 1)\n");
+ printf(" -s, --size=<size> size of message to exchange (default 2048)\n");
+ printf(" -r, --rx-depth=<dep> number of receives to post at a time (default 500)\n");
+ printf(" -n, --iters=<iters> number of exchanges (default 1000)\n");
+ printf(" -e, --events sleep on CQ events (default poll)\n");
+}
+
+int main(int argc, char *argv[])
+{
+ struct dlist *dev_list;
+ struct ibv_device *ib_dev;
+ struct pingpong_context *ctx;
+ struct pingpong_dest my_dest;
+ struct pingpong_dest *rem_dest;
+ struct timeval start, end;
+ char *ib_devname = NULL;
+ char *servername = NULL;
+ int port = 18515;
+ int ib_port = 1;
+ int size = 2048;
+ int rx_depth = 500;
+ int iters = 1000;
+ int use_event = 0;
+ int routs;
+ int rcnt, scnt;
+
+ srand48(getpid() * time(NULL));
+
+ while (1) {
+ int c;
+
+ static struct option long_options[] = {
+ { .name = "port", .has_arg = 1, .val = 'p' },
+ { .name = "ib-dev", .has_arg = 1, .val = 'd' },
+ { .name = "ib-port", .has_arg = 1, .val = 'i' },
+ { .name = "size", .has_arg = 1, .val = 's' },
+ { .name = "iters", .has_arg = 1, .val = 'n' },
+ { .name = "events", .has_arg = 0, .val = 'e' },
+ { 0 }
+ };
+
+ c = getopt_long(argc, argv, "p:d:i:s:r:n:e", long_options, NULL);
+ if (c == -1)
+ break;
+
+ switch (c) {
+ case 'p':
+ port = strtol(optarg, NULL, 0);
+ if (port < 0 || port > 65535) {
+ usage(argv[0]);
+ return 1;
+ }
+ break;
+
+ case 'd':
+ ib_devname = strdupa(optarg);
+ break;
+
+ case 'i':
+ ib_port = strtol(optarg, NULL, 0);
+ if (port < 0) {
+ usage(argv[0]);
+ return 1;
+ }
+ break;
+
+ case 's':
+ size = strtol(optarg, NULL, 0);
+ break;
+
+ case 'r':
+ rx_depth = strtol(optarg, NULL, 0);
+ break;
+
+ case 'n':
+ iters = strtol(optarg, NULL, 0);
+ break;
+
+ case 'e':
+ ++use_event;
+ break;
+
+ default:
+ usage(argv[0]);
+ return 1;
+ }
+ }
+
+ if (optind == argc - 1)
+ servername = strdupa(argv[optind]);
+ else if (optind < argc) {
+ usage(argv[0]);
+ return 1;
+ }
+
+ page_size = sysconf(_SC_PAGESIZE);
+
+ dev_list = ibv_get_devices();
+
+ dlist_start(dev_list);
+ if (!ib_devname) {
+ ib_dev = dlist_next(dev_list);
+ if (!ib_dev) {
+ fprintf(stderr, "No IB devices found\n");
+ return 1;
+ }
+ } else {
+ dlist_for_each_data(dev_list, ib_dev, struct ibv_device)
+ if (!strcmp(ibv_get_device_name(ib_dev), ib_devname))
+ break;
+ if (!ib_dev) {
+ fprintf(stderr, "IB device %s not found\n", ib_devname);
+ return 1;
+ }
+ }
+
+ ctx = pp_init_ctx(ib_dev, size, rx_depth, ib_port);
+ if (!ctx)
+ return 1;
+
+ routs = pp_post_recv(ctx, ctx->rx_depth);
+ if (routs < ctx->rx_depth) {
+ fprintf(stderr, "Couldn't post receive (%d)\n", routs);
+ return 1;
+ }
+
+ my_dest.lid = pp_get_local_lid(ctx, ib_port);
+ my_dest.qpn = ctx->qp->qp_num;
+ my_dest.psn = lrand48() & 0xffffff;
+ if (!my_dest.lid) {
+ fprintf(stderr, "Couldn't get local LID\n");
+ return 1;
+ }
+
+ printf(" local address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x\n",
+ my_dest.lid, my_dest.qpn, my_dest.psn);
+
+ if (servername)
+ rem_dest = pp_client_exch_dest(servername, port, &my_dest);
+ else
+ rem_dest = pp_server_exch_dest(port, &my_dest);
+
+ if (!rem_dest)
+ return 1;
+
+ printf(" remote address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x\n",
+ rem_dest->lid, rem_dest->qpn, rem_dest->psn);
+
+ if (pp_connect_ctx(ctx, ib_port, my_dest.psn, rem_dest))
+ return 1;
+
+ if (use_event)
+ if (ibv_req_notify_cq(ctx->cq, 0)) {
+ fprintf(stderr, "Couldn't request CQ notification\n");
+ return 1;
+ }
+
+ if (servername)
+ if (pp_post_send(ctx, rem_dest->qpn)) {
+ fprintf(stderr, "Couldn't post send\n");
+ return 1;
+ }
+
+ if (gettimeofday(&start, NULL)) {
+ perror("gettimeofday");
+ return 1;
+ }
+
+ rcnt = scnt = 0;
+ while (rcnt < iters || scnt < iters) {
+ if (use_event) {
+ struct ibv_cq *ev_cq;
+ void *ev_ctx;
+
+ if (ibv_get_cq_event(ctx->context, 0, &ev_cq, &ev_ctx)) {
+ fprintf(stderr, "Failed to get cq_event\n");
+ return 1;
+ }
+
+ if (ev_cq != ctx->cq) {
+ fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq);
+ return 1;
+ }
+
+ if (ibv_req_notify_cq(ctx->cq, 0)) {
+ fprintf(stderr, "Couldn't request CQ notification\n");
+ return 1;
+ }
+ }
+
+ {
+ struct ibv_wc wc[2];
+ int ne, i;
+
+ do {
+ ne = ibv_poll_cq(ctx->cq, 2, wc);
+ } while (!use_event && ne < 1);
+
+ if (ne < 0) {
+ fprintf(stderr, "poll CQ failed %d\n", ne);
+ return 1;
+ }
+
+ for (i = 0; i < ne; ++i) {
+ if (wc[i].status != IBV_WC_SUCCESS) {
+ fprintf(stderr, "Failed status %d for wr_id %d\n",
+ wc[i].status, (int) wc[i].wr_id);
+ return 1;
+ }
+
+ switch ((int) wc[i].wr_id) {
+ case PINGPONG_SEND_WRID:
+ ++scnt;
+ break;
+
+ case PINGPONG_RECV_WRID:
+ if (--routs <= 1) {
+ routs += pp_post_recv(ctx, ctx->rx_depth - routs);
+ if (routs < ctx->rx_depth) {
+ fprintf(stderr,
+ "Couldn't post receive (%d)\n",
+ routs);
+ return 1;
+ }
+ }
+
+ if (scnt < iters)
+ if (pp_post_send(ctx, rem_dest->qpn)) {
+ fprintf(stderr, "Couldn't post send\n");
+ return 1;
+ }
+
+ ++rcnt;
+ break;
+
+ default:
+ fprintf(stderr, "Completion for unknown wr_id %d\n",
+ (int) wc[i].wr_id);
+ return 1;
+ }
+ }
+ }
+ }
+
+ if (gettimeofday(&end, NULL)) {
+ perror("gettimeofday");
+ return 1;
+ }
+
+ {
+ float usec = (end.tv_sec - start.tv_sec) * 1000000 +
+ (end.tv_usec - start.tv_usec);
+ long long bytes = (long long) size * iters * 2;
+
+ printf("%lld bytes in %.2f seconds = %.2f Mbit/sec\n",
+ bytes, usec / 1000000., bytes * 8. / usec);
+ printf("%d iters in %.2f seconds = %.2f usec/iter\n",
+ iters, usec / 1000000., usec / iters);
+ }
+
+ return 0;
+}
diff --git a/include/infiniband/arch.h b/include/infiniband/arch.h
new file mode 100644
index 0000000..b582d18
--- /dev/null
+++ b/include/infiniband/arch.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2005 Topspin Communications. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id$
+ */
+
+#ifndef INFINIBAND_ARCH_H
+#define INFINIBAND_ARCH_H
+
+/*
+ * Architecture-specific defines. Currently, an architecture is
+ * required to implement the following operations:
+ *
+ * mb() - memory barrier. No loads or stores may be reordered across
+ * this macro by either the compiler or the CPU.
+ */
+
+#if defined(__i386__)
+
+#define mb() asm volatile("" ::: "memory")
+
+#elif defined(__x86_64__)
+
+#define mb() asm volatile("" ::: "memory")
+
+#elif defined(__PPC64__)
+
+#define mb() asm volatile("sync" ::: "memory")
+
+#elif defined(__ia64__)
+
+#define mb() asm volatile("mf" ::: "memory")
+
+#elif defined(__PPC__)
+
+#define mb() asm volatile("sync" ::: "memory")
+
+#elif defined(__sparc__)
+
+#define mb() asm volatile("membar #LoadLoad | #LoadStore | #StoreStore | #StoreLoad" ::: "memory")
+
+#else
+
+#warning No architecture specific defines found. Using generic implementation.
+
+#define mb() asm volatile("" ::: "memory")
+
+#endif
+
+#endif /* INFINIBAND_ARCH_H */
diff --git a/include/infiniband/driver.h b/include/infiniband/driver.h
index a873621..dde2650 100644
--- a/include/infiniband/driver.h
+++ b/include/infiniband/driver.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2004 Topspin Communications. All rights reserved.
+ * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -38,6 +38,7 @@
#include <sysfs/libsysfs.h>
#include <infiniband/verbs.h>
+#include <infiniband/kern-abi.h>
#ifdef __cplusplus
# define BEGIN_C_DECLS extern "C" {
@@ -62,4 +63,30 @@
typedef struct ibv_device *(*ibv_driver_init_func)(struct sysfs_class_device *);
+extern int ibv_cmd_get_context(int num_comp, struct ibv_context *context,
+ struct ibv_get_context *cmd, size_t cmd_size);
+extern int ibv_cmd_query_port(struct ibv_context *context, uint8_t port_num,
+ struct ibv_port_attr *port_attr,
+ struct ibv_query_port *cmd, size_t cmd_size);
+extern int ibv_cmd_alloc_pd(struct ibv_context *context, struct ibv_pd *pd,
+ struct ibv_alloc_pd *cmd, size_t cmd_size);
+extern int ibv_cmd_dealloc_pd(struct ibv_pd *pd);
+extern int ibv_cmd_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
+ uint64_t hca_va, enum ibv_access_flags access,
+ struct ibv_mr *mr, struct ibv_reg_mr *cmd,
+ size_t cmd_size);
+extern int ibv_cmd_dereg_mr(struct ibv_mr *mr);
+extern int ibv_cmd_create_cq(struct ibv_context *context, int cqe,
+ struct ibv_cq *cq,
+ struct ibv_create_cq *cmd, size_t cmd_size);
+extern int ibv_cmd_destroy_cq(struct ibv_cq *cq);
+
+extern int ibv_cmd_create_qp(struct ibv_pd *pd,
+ struct ibv_qp *qp, struct ibv_qp_init_attr *attr,
+ struct ibv_create_qp *cmd, size_t cmd_size);
+extern int ibv_cmd_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
+ enum ibv_qp_attr_mask attr_mask,
+ struct ibv_modify_qp *cmd, size_t cmd_size);
+extern int ibv_cmd_destroy_qp(struct ibv_qp *qp);
+
#endif /* INFINIBAND_DRIVER_H */
diff --git a/include/infiniband/kern-abi.h b/include/infiniband/kern-abi.h
new file mode 100644
index 0000000..13a9c68
--- /dev/null
+++ b/include/infiniband/kern-abi.h
@@ -0,0 +1,299 @@
+/*
+ * Copyright (c) 2005 Topspin Communications. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id$
+ */
+
+#ifndef KERN_ABI_H
+#define KERN_ABI_H
+
+#include <linux/types.h>
+
+/*
+ * This file must be kept in sync with the kernel's version of
+ * drivers/infiniband/include/ib_user_verbs.h
+ */
+
+/*
+ * Increment this value if any changes that break userspace ABI
+ * compatibility are made.
+ */
+#define IB_USER_VERBS_ABI_VERSION 1
+
+enum {
+ IB_USER_VERBS_CMD_QUERY_PARAMS,
+ IB_USER_VERBS_CMD_GET_CONTEXT,
+ IB_USER_VERBS_CMD_QUERY_PORT,
+ IB_USER_VERBS_CMD_ALLOC_PD,
+ IB_USER_VERBS_CMD_DEALLOC_PD,
+ IB_USER_VERBS_CMD_REG_MR,
+ IB_USER_VERBS_CMD_DEREG_MR,
+ IB_USER_VERBS_CMD_CREATE_CQ,
+ IB_USER_VERBS_CMD_DESTROY_CQ,
+ IB_USER_VERBS_CMD_CREATE_QP,
+ IB_USER_VERBS_CMD_MODIFY_QP,
+ IB_USER_VERBS_CMD_DESTROY_QP,
+};
+
+/*
+ * Make sure that all structs defined in this file remain laid out so
+ * that they pack the same way on 32-bit and 64-bit architectures (to
+ * avoid incompatibility between 32-bit userspace and 64-bit kernels).
+ * In particular do not use pointer types -- pass pointers in __u64
+ * instead.
+ */
+
+struct ibv_kern_async_event {
+ __u64 element;
+ __u32 event_type;
+ __u32 reserved;
+};
+
+struct ibv_comp_event {
+ __u64 cq_handle;
+};
+
+/*
+ * All commands from userspace should start with a __u32 command field
+ * followed by __u16 in_words and out_words fields (which give the
+ * length of the command block and response buffer if any in 32-bit
+ * words). The kernel driver will read these fields first and read
+ * the rest of the command struct based on these value.
+ */
+
+struct ibv_query_params {
+ __u32 command;
+ __u16 in_words;
+ __u16 out_words;
+ __u64 response;
+};
+
+struct ibv_query_params_resp {
+ __u32 num_cq_events;
+};
+
+struct ibv_get_context {
+ __u32 command;
+ __u16 in_words;
+ __u16 out_words;
+ __u64 response;
+ __u64 driver_data[0];
+};
+
+struct ibv_get_context_resp {
+ __u32 async_fd;
+ __u32 cq_fd[1];
+};
+
+struct ibv_query_port {
+ __u32 command;
+ __u16 in_words;
+ __u16 out_words;
+ __u64 response;
+ __u8 port_num;
+ __u8 reserved[7];
+ __u64 driver_data[0];
+};
+
+struct ibv_query_port_resp {
+ __u32 port_cap_flags;
+ __u32 max_msg_sz;
+ __u32 bad_pkey_cntr;
+ __u32 qkey_viol_cntr;
+ __u32 gid_tbl_len;
+ __u16 pkey_tbl_len;
+ __u16 lid;
+ __u16 sm_lid;
+ __u8 state;
+ __u8 max_mtu;
+ __u8 active_mtu;
+ __u8 lmc;
+ __u8 max_vl_num;
+ __u8 sm_sl;
+ __u8 subnet_timeout;
+ __u8 init_type_reply;
+ __u8 active_width;
+ __u8 active_speed;
+ __u8 phys_state;
+ __u8 reserved[3];
+};
+
+struct ibv_alloc_pd {
+ __u32 command;
+ __u16 in_words;
+ __u16 out_words;
+ __u64 response;
+ __u64 driver_data[0];
+};
+
+struct ibv_alloc_pd_resp {
+ __u32 pd_handle;
+};
+
+struct ibv_dealloc_pd {
+ __u32 command;
+ __u16 in_words;
+ __u16 out_words;
+ __u32 pd_handle;
+};
+
+struct ibv_reg_mr {
+ __u32 command;
+ __u16 in_words;
+ __u16 out_words;
+ __u64 response;
+ __u64 start;
+ __u64 length;
+ __u64 hca_va;
+ __u32 pd_handle;
+ __u32 access_flags;
+ __u64 driver_data[0];
+};
+
+struct ibv_reg_mr_resp {
+ __u32 mr_handle;
+ __u32 lkey;
+ __u32 rkey;
+};
+
+struct ibv_dereg_mr {
+ __u32 command;
+ __u16 in_words;
+ __u16 out_words;
+ __u32 mr_handle;
+};
+
+struct ibv_create_cq {
+ __u32 command;
+ __u16 in_words;
+ __u16 out_words;
+ __u64 response;
+ __u64 user_handle;
+ __u32 cqe;
+ __u32 reserved;
+ __u64 driver_data[0];
+};
+
+struct ibv_create_cq_resp {
+ __u32 cq_handle;
+ __u32 cqe;
+};
+
+struct ibv_destroy_cq {
+ __u32 command;
+ __u16 in_words;
+ __u16 out_words;
+ __u32 cq_handle;
+};
+
+struct ibv_create_qp {
+ __u32 command;
+ __u16 in_words;
+ __u16 out_words;
+ __u64 response;
+ __u64 user_handle;
+ __u32 pd_handle;
+ __u32 send_cq_handle;
+ __u32 recv_cq_handle;
+ __u32 srq_handle;
+ __u32 max_send_wr;
+ __u32 max_recv_wr;
+ __u32 max_send_sge;
+ __u32 max_recv_sge;
+ __u32 max_inline_data;
+ __u8 sq_sig_all;
+ __u8 qp_type;
+ __u8 is_srq;
+ __u8 reserved;
+ __u64 driver_data[0];
+};
+
+struct ibv_create_qp_resp {
+ __u32 qp_handle;
+ __u32 qpn;
+};
+
+struct ibv_qp_dest {
+ __u8 dgid[16];
+ __u32 flow_label;
+ __u16 dlid;
+ __u16 reserved;
+ __u8 sgid_index;
+ __u8 hop_limit;
+ __u8 traffic_class;
+ __u8 sl;
+ __u8 src_path_bits;
+ __u8 static_rate;
+ __u8 is_global;
+ __u8 port_num;
+};
+
+struct ibv_modify_qp {
+ __u32 command;
+ __u16 in_words;
+ __u16 out_words;
+ struct ibv_qp_dest dest;
+ struct ibv_qp_dest alt_dest;
+ __u32 qp_handle;
+ __u32 attr_mask;
+ __u32 qkey;
+ __u32 rq_psn;
+ __u32 sq_psn;
+ __u32 dest_qp_num;
+ __u32 qp_access_flags;
+ __u16 pkey_index;
+ __u16 alt_pkey_index;
+ __u8 qp_state;
+ __u8 cur_qp_state;
+ __u8 path_mtu;
+ __u8 path_mig_state;
+ __u8 en_sqd_async_notify;
+ __u8 max_rd_atomic;
+ __u8 max_dest_rd_atomic;
+ __u8 min_rnr_timer;
+ __u8 port_num;
+ __u8 timeout;
+ __u8 retry_cnt;
+ __u8 rnr_retry;
+ __u8 alt_port_num;
+ __u8 alt_timeout;
+ __u8 reserved[2];
+ __u64 driver_data[0];
+};
+
+struct ibv_destroy_qp {
+ __u32 command;
+ __u16 in_words;
+ __u16 out_words;
+ __u32 qp_handle;
+};
+
+#endif /* KERN_ABI_H */
diff --git a/include/infiniband/opcode.h b/include/infiniband/opcode.h
new file mode 100644
index 0000000..cf2598b
--- /dev/null
+++ b/include/infiniband/opcode.h
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2005 Topspin Communications. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id$
+ */
+
+#ifndef INFINIBAND_OPCODE_H
+#define INFINIBAND_OPCODE_H
+
+/*
+ * This macro cleans up the definitions of constants for BTH opcodes.
+ * It is used to define constants such as IBV_OPCODE_UD_SEND_ONLY,
+ * which becomes IBV_OPCODE_UD + IBV_OPCODE_SEND_ONLY, and this gives
+ * the correct value.
+ *
+ * In short, user code should use the constants defined using the
+ * macro rather than worrying about adding together other constants.
+*/
+#define IBV_OPCODE(transport, op) \
+ IBV_OPCODE_ ## transport ## _ ## op = \
+ IBV_OPCODE_ ## transport + IBV_OPCODE_ ## op
+
+enum {
+ /* transport types -- just used to define real constants */
+ IBV_OPCODE_RC = 0x00,
+ IBV_OPCODE_UC = 0x20,
+ IBV_OPCODE_RD = 0x40,
+ IBV_OPCODE_UD = 0x60,
+
+ /* operations -- just used to define real constants */
+ IBV_OPCODE_SEND_FIRST = 0x00,
+ IBV_OPCODE_SEND_MIDDLE = 0x01,
+ IBV_OPCODE_SEND_LAST = 0x02,
+ IBV_OPCODE_SEND_LAST_WITH_IMMEDIATE = 0x03,
+ IBV_OPCODE_SEND_ONLY = 0x04,
+ IBV_OPCODE_SEND_ONLY_WITH_IMMEDIATE = 0x05,
+ IBV_OPCODE_RDMA_WRITE_FIRST = 0x06,
+ IBV_OPCODE_RDMA_WRITE_MIDDLE = 0x07,
+ IBV_OPCODE_RDMA_WRITE_LAST = 0x08,
+ IBV_OPCODE_RDMA_WRITE_LAST_WITH_IMMEDIATE = 0x09,
+ IBV_OPCODE_RDMA_WRITE_ONLY = 0x0a,
+ IBV_OPCODE_RDMA_WRITE_ONLY_WITH_IMMEDIATE = 0x0b,
+ IBV_OPCODE_RDMA_READ_REQUEST = 0x0c,
+ IBV_OPCODE_RDMA_READ_RESPONSE_FIRST = 0x0d,
+ IBV_OPCODE_RDMA_READ_RESPONSE_MIDDLE = 0x0e,
+ IBV_OPCODE_RDMA_READ_RESPONSE_LAST = 0x0f,
+ IBV_OPCODE_RDMA_READ_RESPONSE_ONLY = 0x10,
+ IBV_OPCODE_ACKNOWLEDGE = 0x11,
+ IBV_OPCODE_ATOMIC_ACKNOWLEDGE = 0x12,
+ IBV_OPCODE_COMPARE_SWAP = 0x13,
+ IBV_OPCODE_FETCH_ADD = 0x14,
+
+ /* real constants follow -- see comment about above IBV_OPCODE()
+ macro for more details */
+
+ /* RC */
+ IBV_OPCODE(RC, SEND_FIRST),
+ IBV_OPCODE(RC, SEND_MIDDLE),
+ IBV_OPCODE(RC, SEND_LAST),
+ IBV_OPCODE(RC, SEND_LAST_WITH_IMMEDIATE),
+ IBV_OPCODE(RC, SEND_ONLY),
+ IBV_OPCODE(RC, SEND_ONLY_WITH_IMMEDIATE),
+ IBV_OPCODE(RC, RDMA_WRITE_FIRST),
+ IBV_OPCODE(RC, RDMA_WRITE_MIDDLE),
+ IBV_OPCODE(RC, RDMA_WRITE_LAST),
+ IBV_OPCODE(RC, RDMA_WRITE_LAST_WITH_IMMEDIATE),
+ IBV_OPCODE(RC, RDMA_WRITE_ONLY),
+ IBV_OPCODE(RC, RDMA_WRITE_ONLY_WITH_IMMEDIATE),
+ IBV_OPCODE(RC, RDMA_READ_REQUEST),
+ IBV_OPCODE(RC, RDMA_READ_RESPONSE_FIRST),
+ IBV_OPCODE(RC, RDMA_READ_RESPONSE_MIDDLE),
+ IBV_OPCODE(RC, RDMA_READ_RESPONSE_LAST),
+ IBV_OPCODE(RC, RDMA_READ_RESPONSE_ONLY),
+ IBV_OPCODE(RC, ACKNOWLEDGE),
+ IBV_OPCODE(RC, ATOMIC_ACKNOWLEDGE),
+ IBV_OPCODE(RC, COMPARE_SWAP),
+ IBV_OPCODE(RC, FETCH_ADD),
+
+ /* UC */
+ IBV_OPCODE(UC, SEND_FIRST),
+ IBV_OPCODE(UC, SEND_MIDDLE),
+ IBV_OPCODE(UC, SEND_LAST),
+ IBV_OPCODE(UC, SEND_LAST_WITH_IMMEDIATE),
+ IBV_OPCODE(UC, SEND_ONLY),
+ IBV_OPCODE(UC, SEND_ONLY_WITH_IMMEDIATE),
+ IBV_OPCODE(UC, RDMA_WRITE_FIRST),
+ IBV_OPCODE(UC, RDMA_WRITE_MIDDLE),
+ IBV_OPCODE(UC, RDMA_WRITE_LAST),
+ IBV_OPCODE(UC, RDMA_WRITE_LAST_WITH_IMMEDIATE),
+ IBV_OPCODE(UC, RDMA_WRITE_ONLY),
+ IBV_OPCODE(UC, RDMA_WRITE_ONLY_WITH_IMMEDIATE),
+
+ /* RD */
+ IBV_OPCODE(RD, SEND_FIRST),
+ IBV_OPCODE(RD, SEND_MIDDLE),
+ IBV_OPCODE(RD, SEND_LAST),
+ IBV_OPCODE(RD, SEND_LAST_WITH_IMMEDIATE),
+ IBV_OPCODE(RD, SEND_ONLY),
+ IBV_OPCODE(RD, SEND_ONLY_WITH_IMMEDIATE),
+ IBV_OPCODE(RD, RDMA_WRITE_FIRST),
+ IBV_OPCODE(RD, RDMA_WRITE_MIDDLE),
+ IBV_OPCODE(RD, RDMA_WRITE_LAST),
+ IBV_OPCODE(RD, RDMA_WRITE_LAST_WITH_IMMEDIATE),
+ IBV_OPCODE(RD, RDMA_WRITE_ONLY),
+ IBV_OPCODE(RD, RDMA_WRITE_ONLY_WITH_IMMEDIATE),
+ IBV_OPCODE(RD, RDMA_READ_REQUEST),
+ IBV_OPCODE(RD, RDMA_READ_RESPONSE_FIRST),
+ IBV_OPCODE(RD, RDMA_READ_RESPONSE_MIDDLE),
+ IBV_OPCODE(RD, RDMA_READ_RESPONSE_LAST),
+ IBV_OPCODE(RD, RDMA_READ_RESPONSE_ONLY),
+ IBV_OPCODE(RD, ACKNOWLEDGE),
+ IBV_OPCODE(RD, ATOMIC_ACKNOWLEDGE),
+ IBV_OPCODE(RD, COMPARE_SWAP),
+ IBV_OPCODE(RD, FETCH_ADD),
+
+ /* UD */
+ IBV_OPCODE(UD, SEND_ONLY),
+ IBV_OPCODE(UD, SEND_ONLY_WITH_IMMEDIATE)
+};
+
+#endif /* INFINIBAND_OPCODE_H */
diff --git a/include/infiniband/verbs.h b/include/infiniband/verbs.h
index f5fa340..5128c41 100644
--- a/include/infiniband/verbs.h
+++ b/include/infiniband/verbs.h
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2004 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -49,7 +50,54 @@
BEGIN_C_DECLS
-enum ib_event_type {
+union ibv_gid {
+ uint8_t raw[16];
+ struct {
+ uint64_t subnet_prefix;
+ uint64_t interface_id;
+ } global;
+};
+
+enum ibv_mtu {
+ IBV_MTU_256 = 1,
+ IBV_MTU_512 = 2,
+ IBV_MTU_1024 = 3,
+ IBV_MTU_2048 = 4,
+ IBV_MTU_4096 = 5
+};
+
+enum ibv_port_state {
+ IBV_PORT_NOP = 0,
+ IBV_PORT_DOWN = 1,
+ IBV_PORT_INIT = 2,
+ IBV_PORT_ARMED = 3,
+ IBV_PORT_ACTIVE = 4,
+ IBV_PORT_ACTIVE_DEFER = 5
+};
+
+struct ibv_port_attr {
+ enum ibv_port_state state;
+ enum ibv_mtu max_mtu;
+ enum ibv_mtu active_mtu;
+ int gid_tbl_len;
+ uint32_t port_cap_flags;
+ uint32_t max_msg_sz;
+ uint32_t bad_pkey_cntr;
+ uint32_t qkey_viol_cntr;
+ uint16_t pkey_tbl_len;
+ uint16_t lid;
+ uint16_t sm_lid;
+ uint8_t lmc;
+ uint8_t max_vl_num;
+ uint8_t sm_sl;
+ uint8_t subnet_timeout;
+ uint8_t init_type_reply;
+ uint8_t active_width;
+ uint8_t active_speed;
+ uint8_t phys_state;
+};
+
+enum ibv_event_type {
IBV_EVENT_CQ_ERR,
IBV_EVENT_QP_FATAL,
IBV_EVENT_QP_REQ_ERR,
@@ -72,7 +120,68 @@ struct ibv_async_event {
struct ibv_qp *qp;
int port_num;
} element;
- enum ib_event_type event_type;
+ enum ibv_event_type event_type;
+};
+
+enum ibv_wc_status {
+ IBV_WC_SUCCESS,
+ IBV_WC_LOC_LEN_ERR,
+ IBV_WC_LOC_QP_OP_ERR,
+ IBV_WC_LOC_EEC_OP_ERR,
+ IBV_WC_LOC_PROT_ERR,
+ IBV_WC_WR_FLUSH_ERR,
+ IBV_WC_MW_BIND_ERR,
+ IBV_WC_BAD_RESP_ERR,
+ IBV_WC_LOC_ACCESS_ERR,
+ IBV_WC_REM_INV_REQ_ERR,
+ IBV_WC_REM_ACCESS_ERR,
+ IBV_WC_REM_OP_ERR,
+ IBV_WC_RETRY_EXC_ERR,
+ IBV_WC_RNR_RETRY_EXC_ERR,
+ IBV_WC_LOC_RDD_VIOL_ERR,
+ IBV_WC_REM_INV_RD_REQ_ERR,
+ IBV_WC_REM_ABORT_ERR,
+ IBV_WC_INV_EECN_ERR,
+ IBV_WC_INV_EEC_STATE_ERR,
+ IBV_WC_FATAL_ERR,
+ IBV_WC_RESP_TIMEOUT_ERR,
+ IBV_WC_GENERAL_ERR
+};
+
+enum ibv_wc_opcode {
+ IBV_WC_SEND,
+ IBV_WC_RDMA_WRITE,
+ IBV_WC_RDMA_READ,
+ IBV_WC_COMP_SWAP,
+ IBV_WC_FETCH_ADD,
+ IBV_WC_BIND_MW,
+/*
+ * Set value of IBV_WC_RECV so consumers can test if a completion is a
+ * receive by testing (opcode & IBV_WC_RECV).
+ */
+ IBV_WC_RECV = 1 << 7,
+ IBV_WC_RECV_RDMA_WITH_IMM
+};
+
+enum ibv_wc_flags {
+ IBV_WC_GRH = 1 << 0,
+ IBV_WC_WITH_IMM = 1 << 1
+};
+
+struct ibv_wc {
+ uint64_t wr_id;
+ enum ibv_wc_status status;
+ enum ibv_wc_opcode opcode;
+ uint32_t vendor_err;
+ uint32_t byte_len;
+ uint32_t imm_data; /* in network byte order */
+ uint32_t qp_num;
+ uint32_t src_qp;
+ enum ibv_wc_flags wc_flags;
+ uint16_t pkey_index;
+ uint16_t slid;
+ uint8_t sl;
+ uint8_t dlid_path_bits;
};
enum ibv_access_flags {
@@ -84,23 +193,215 @@ enum ibv_access_flags {
};
struct ibv_pd {
-
+ struct ibv_context *context;
+ uint32_t handle;
};
struct ibv_mr {
+ struct ibv_context *context;
+ struct ibv_pd *pd;
+ uint32_t handle;
+ uint32_t lkey;
+ uint32_t rkey;
+};
+struct ibv_global_route {
+ union ibv_gid dgid;
+ uint32_t flow_label;
+ uint8_t sgid_index;
+ uint8_t hop_limit;
+ uint8_t traffic_class;
};
-struct ibv_qp {
+struct ibv_ah_attr {
+ struct ibv_global_route grh;
+ uint16_t dlid;
+ uint8_t sl;
+ uint8_t src_path_bits;
+ uint8_t static_rate;
+ uint8_t is_global;
+ uint8_t port_num;
+};
+
+enum ibv_qp_type {
+ IBV_QPT_RC = 2,
+ IBV_QPT_UC,
+ IBV_QPT_UD
+};
+
+struct ibv_qp_cap {
+ uint32_t max_send_wr;
+ uint32_t max_recv_wr;
+ uint32_t max_send_sge;
+ uint32_t max_recv_sge;
+ uint32_t max_inline_data;
+};
+
+struct ibv_qp_init_attr {
+ void *qp_context;
+ struct ibv_cq *send_cq;
+ struct ibv_cq *recv_cq;
+ struct ibv_srq *srq;
+ struct ibv_qp_cap cap;
+ enum ibv_qp_type qp_type;
+ int sq_sig_all;
+};
+
+enum ibv_qp_attr_mask {
+ IBV_QP_STATE = 1 << 0,
+ IBV_QP_CUR_STATE = 1 << 1,
+ IBV_QP_EN_SQD_ASYNC_NOTIFY = 1 << 2,
+ IBV_QP_ACCESS_FLAGS = 1 << 3,
+ IBV_QP_PKEY_INDEX = 1 << 4,
+ IBV_QP_PORT = 1 << 5,
+ IBV_QP_QKEY = 1 << 6,
+ IBV_QP_AV = 1 << 7,
+ IBV_QP_PATH_MTU = 1 << 8,
+ IBV_QP_TIMEOUT = 1 << 9,
+ IBV_QP_RETRY_CNT = 1 << 10,
+ IBV_QP_RNR_RETRY = 1 << 11,
+ IBV_QP_RQ_PSN = 1 << 12,
+ IBV_QP_MAX_QP_RD_ATOMIC = 1 << 13,
+ IBV_QP_ALT_PATH = 1 << 14,
+ IBV_QP_MIN_RNR_TIMER = 1 << 15,
+ IBV_QP_SQ_PSN = 1 << 16,
+ IBV_QP_MAX_DEST_RD_ATOMIC = 1 << 17,
+ IBV_QP_PATH_MIG_STATE = 1 << 18,
+ IBV_QP_CAP = 1 << 19,
+ IBV_QP_DEST_QPN = 1 << 20
+};
+
+enum ibv_qp_state {
+ IBV_QPS_RESET,
+ IBV_QPS_INIT,
+ IBV_QPS_RTR,
+ IBV_QPS_RTS,
+ IBV_QPS_SQD,
+ IBV_QPS_SQE,
+ IBV_QPS_ERR
+};
+
+enum ibv_mig_state {
+ IBV_MIG_MIGRATED,
+ IBV_MIG_REARM,
+ IBV_MIG_ARMED
+};
+
+struct ibv_qp_attr {
+ enum ibv_qp_state qp_state;
+ enum ibv_qp_state cur_qp_state;
+ enum ibv_mtu path_mtu;
+ enum ibv_mig_state path_mig_state;
+ uint32_t qkey;
+ uint32_t rq_psn;
+ uint32_t sq_psn;
+ uint32_t dest_qp_num;
+ int qp_access_flags;
+ struct ibv_qp_cap cap;
+ struct ibv_ah_attr ah_attr;
+ struct ibv_ah_attr alt_ah_attr;
+ uint16_t pkey_index;
+ uint16_t alt_pkey_index;
+ uint8_t en_sqd_async_notify;
+ uint8_t sq_draining;
+ uint8_t max_rd_atomic;
+ uint8_t max_dest_rd_atomic;
+ uint8_t min_rnr_timer;
+ uint8_t port_num;
+ uint8_t timeout;
+ uint8_t retry_cnt;
+ uint8_t rnr_retry;
+ uint8_t alt_port_num;
+ uint8_t alt_timeout;
+};
+
+enum ibv_wr_opcode {
+ IBV_WR_RDMA_WRITE,
+ IBV_WR_RDMA_WRITE_WITH_IMM,
+ IBV_WR_SEND,
+ IBV_WR_SEND_WITH_IMM,
+ IBV_WR_RDMA_READ,
+ IBV_WR_ATOMIC_CMP_AND_SWP,
+ IBV_WR_ATOMIC_FETCH_AND_ADD
+};
+
+enum ibv_send_flags {
+ IBV_SEND_FENCE = 1 << 0,
+ IBV_SEND_SIGNALED = 1 << 1,
+ IBV_SEND_SOLICITED = 1 << 2,
+ IBV_SEND_INLINE = 1 << 3
+};
+
+struct ibv_sge {
+ uint64_t addr;
+ uint32_t length;
+ uint32_t lkey;
+};
+
+struct ibv_send_wr {
+ struct ibv_send_wr *next;
+ uint64_t wr_id;
+ struct ibv_sge *sg_list;
+ int num_sge;
+ enum ibv_wr_opcode opcode;
+ enum ibv_send_flags send_flags;
+ uint32_t imm_data;
+ union {
+ struct {
+ uint64_t remote_addr;
+ uint32_t rkey;
+ } rdma;
+ struct {
+ uint64_t remote_addr;
+ uint64_t compare_add;
+ uint64_t swap;
+ uint32_t rkey;
+ } atomic;
+ struct {
+ struct ibv_ah *ah;
+ uint32_t remote_qpn;
+ uint32_t remote_qkey;
+ } ud;
+ } wr;
+};
+
+struct ibv_recv_wr {
+ struct ibv_recv_wr *next;
+ uint64_t wr_id;
+ struct ibv_sge *sg_list;
+ int num_sge;
+};
+struct ibv_qp {
+ struct ibv_context *context;
+ void *qp_context;
+ struct ibv_pd *pd;
+ struct ibv_cq *send_cq;
+ struct ibv_cq *recv_cq;
+ uint32_t handle;
+ uint32_t qp_num;
+ enum ibv_qp_state state;
};
struct ibv_cq {
+ struct ibv_context *context;
+ void *cq_context;
+ uint32_t handle;
+ int cqe;
+};
+struct ibv_ah {
+ struct ibv_context *context;
+ struct ibv_pd *pd;
};
-struct ibv_device_ops {
+struct ibv_device;
+struct ibv_context;
+struct ibv_device_ops {
+ struct ibv_context * (*alloc_context)(struct ibv_device *device,
+ int num_comp, int cmd_fd);
+ void (*free_context)(struct ibv_context *context);
};
struct ibv_device {
@@ -110,8 +411,34 @@ struct ibv_device {
struct ibv_device_ops ops;
};
+struct ibv_context_ops {
+ int (*query_port)(struct ibv_context *context, uint8_t port_num,
+ struct ibv_port_attr *port_attr);
+ struct ibv_pd * (*alloc_pd)(struct ibv_context *context);
+ int (*dealloc_pd)(struct ibv_pd *pd);
+ struct ibv_mr * (*reg_mr)(struct ibv_pd *pd, void *addr, size_t length,
+ enum ibv_access_flags access);
+ int (*dereg_mr)(struct ibv_mr *mr);
+ struct ibv_cq * (*create_cq)(struct ibv_context *context, int cqe);
+ int (*poll_cq)(struct ibv_cq *cq, int num_entries, struct ibv_wc *wc);
+ int (*req_notify_cq)(struct ibv_cq *cq, int solicited);
+ void (*cq_event)(struct ibv_cq *cq);
+ int (*destroy_cq)(struct ibv_cq *cq);
+ struct ibv_qp * (*create_qp)(struct ibv_pd *pd, struct ibv_qp_init_attr *attr);
+ int (*modify_qp)(struct ibv_qp *qp, struct ibv_qp_attr *attr,
+ enum ibv_qp_attr_mask attr_mask);
+ int (*destroy_qp)(struct ibv_qp *qp);
+ int (*post_send)(struct ibv_qp *qp, struct ibv_send_wr *wr,
+ struct ibv_send_wr **bad_wr);
+ int (*post_recv)(struct ibv_qp *qp, struct ibv_recv_wr *wr,
+ struct ibv_recv_wr **bad_wr);
+ struct ibv_ah * (*create_ah)(struct ibv_pd *pd, struct ibv_ah_attr *attr);
+ int (*destroy_ah)(struct ibv_ah *ah);
+};
+
struct ibv_context {
struct ibv_device *device;
+ struct ibv_context_ops ops;
int cmd_fd;
int async_fd;
int num_comp;
@@ -150,6 +477,12 @@ extern int ibv_get_async_event(struct ibv_context *context,
struct ibv_async_event *event);
/**
+ * ibv_query_port - Get port properties
+ */
+extern int ibv_query_port(struct ibv_context *context, uint8_t port_num,
+ struct ibv_port_attr *port_attr);
+
+/**
* ibv_alloc_pd - Allocate a protection domain
*/
extern struct ibv_pd *ibv_alloc_pd(struct ibv_context *context);
@@ -170,6 +503,85 @@ extern struct ibv_mr *ibv_reg_mr(struct ibv_pd *pd, void *addr,
*/
extern int ibv_dereg_mr(struct ibv_mr *mr);
+/**
+ * ibv_create_cq - Create a completion queue
+ */
+extern struct ibv_cq *ibv_create_cq(struct ibv_context *context, int cqe,
+ void *cq_context);
+
+/**
+ * ibv_destroy_cq - Destroy a completion queue
+ */
+extern int ibv_destroy_cq(struct ibv_cq *cq);
+
+/**
+ * ibv_get_cq_event - Read next CQ event
+ */
+extern int ibv_get_cq_event(struct ibv_context *context, int comp_num,
+ struct ibv_cq **cq, void **cq_context);
+
+
+/**
+ * ibv_poll_cq - Poll a CQ for work completions
+ */
+static inline int ibv_poll_cq(struct ibv_cq *cq, int num_entries, struct ibv_wc *wc)
+{
+ return cq->context->ops.poll_cq(cq, num_entries, wc);
+}
+
+/**
+ * ibv_req_notify_cq - Request completion notification on a CQ.
+ */
+static inline int ibv_req_notify_cq(struct ibv_cq *cq, int solicited)
+{
+ return cq->context->ops.req_notify_cq(cq, solicited);
+}
+
+/**
+ * ibv_create_qp - Create a queue pair.
+ */
+extern struct ibv_qp *ibv_create_qp(struct ibv_pd *pd,
+ struct ibv_qp_init_attr *qp_init_attr);
+
+/**
+ * ibv_modify_qp - Modify a queue pair.
+ */
+extern int ibv_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
+ enum ibv_qp_attr_mask attr_mask);
+
+/**
+ * ibv_destroy_qp - Destroy a queue pair.
+ */
+extern int ibv_destroy_qp(struct ibv_qp *qp);
+
+/**
+ * ibv_post_send - Post a list of work requests to a send queue.
+ */
+static inline int ibv_post_send(struct ibv_qp *qp, struct ibv_send_wr *wr,
+ struct ibv_send_wr **bad_wr)
+{
+ return qp->context->ops.post_send(qp, wr, bad_wr);
+}
+
+/**
+ * ibv_post_recv - Post a list of work requests to a receive queue.
+ */
+static inline int ibv_post_recv(struct ibv_qp *qp, struct ibv_recv_wr *wr,
+ struct ibv_recv_wr **bad_wr)
+{
+ return qp->context->ops.post_recv(qp, wr, bad_wr);
+}
+
+/**
+ * ibv_create_ah - Create an address handle.
+ */
+extern struct ibv_ah *ibv_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr);
+
+/**
+ * ibv_destroy_ah - Destroy an address handle.
+ */
+extern int ibv_destroy_ah(struct ibv_ah *ah);
+
END_C_DECLS
#endif /* INFINIBAND_VERBS_H */
diff --git a/src/cmd.c b/src/cmd.c
new file mode 100644
index 0000000..44e149f
--- /dev/null
+++ b/src/cmd.c
@@ -0,0 +1,298 @@
+/*
+ * Copyright (c) 2005 Topspin Communications. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id$
+ */
+
+#if HAVE_CONFIG_H
+# include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <errno.h>
+#include <alloca.h>
+
+#include "ibverbs.h"
+
+int ibv_cmd_get_context(int num_comp, struct ibv_context *context,
+ struct ibv_get_context *cmd, size_t cmd_size)
+{
+ struct ibv_get_context_resp *resp;
+ int i;
+
+ resp = alloca(sizeof *resp + num_comp * sizeof (int));
+ IBV_INIT_CMD_RESP(cmd, cmd_size, GET_CONTEXT, resp);
+
+ if (write(context->cmd_fd, cmd, cmd_size) != cmd_size)
+ return errno;
+
+ context->async_fd = resp->async_fd;
+ for (i = 0; i < num_comp; ++i)
+ context->cq_fd[i] = resp->cq_fd[i];
+
+ return 0;
+}
+
+int ibv_cmd_query_port(struct ibv_context *context, uint8_t port_num,
+ struct ibv_port_attr *port_attr,
+ struct ibv_query_port *cmd, size_t cmd_size)
+{
+ struct ibv_query_port_resp resp;
+
+ IBV_INIT_CMD_RESP(cmd, cmd_size, QUERY_PORT, &resp);
+ cmd->port_num = port_num;
+
+ if (write(context->cmd_fd, cmd, cmd_size) != cmd_size)
+ return errno;
+
+ port_attr->state = resp.state;
+ port_attr->max_mtu = resp.max_mtu;
+ port_attr->active_mtu = resp.active_mtu;
+ port_attr->gid_tbl_len = resp.gid_tbl_len;
+ port_attr->port_cap_flags = resp.port_cap_flags;
+ port_attr->max_msg_sz = resp.max_msg_sz;
+ port_attr->bad_pkey_cntr = resp.bad_pkey_cntr;
+ port_attr->qkey_viol_cntr = resp.qkey_viol_cntr;
+ port_attr->pkey_tbl_len = resp.pkey_tbl_len;
+ port_attr->lid = resp.lid;
+ port_attr->sm_lid = resp.sm_lid;
+ port_attr->lmc = resp.lmc;
+ port_attr->max_vl_num = resp.max_vl_num;
+ port_attr->sm_sl = resp.sm_sl;
+ port_attr->subnet_timeout = resp.subnet_timeout;
+ port_attr->init_type_reply = resp.init_type_reply;
+ port_attr->active_width = resp.active_width;
+ port_attr->active_speed = resp.active_speed;
+ port_attr->phys_state = resp.phys_state;
+
+ return 0;
+}
+
+int ibv_cmd_alloc_pd(struct ibv_context *context, struct ibv_pd *pd,
+ struct ibv_alloc_pd *cmd, size_t cmd_size)
+{
+ struct ibv_alloc_pd_resp resp;
+
+ IBV_INIT_CMD_RESP(cmd, cmd_size, ALLOC_PD, &resp);
+
+ if (write(context->cmd_fd, cmd, cmd_size) != cmd_size)
+ return errno;
+
+ pd->handle = resp.pd_handle;
+
+ return 0;
+}
+
+int ibv_cmd_dealloc_pd(struct ibv_pd *pd)
+{
+ struct ibv_dealloc_pd cmd;
+
+ IBV_INIT_CMD(&cmd, sizeof cmd, DEALLOC_PD);
+ cmd.pd_handle = pd->handle;
+
+ if (write(pd->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
+ return errno;
+
+ return 0;
+}
+
+int ibv_cmd_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
+ uint64_t hca_va, enum ibv_access_flags access,
+ struct ibv_mr *mr, struct ibv_reg_mr *cmd,
+ size_t cmd_size)
+{
+ struct ibv_reg_mr_resp resp;
+
+ IBV_INIT_CMD_RESP(cmd, cmd_size, REG_MR, &resp);
+
+ cmd->start = (uintptr_t) addr;
+ cmd->length = length;
+ cmd->hca_va = hca_va;
+ cmd->pd_handle = pd->handle;
+ cmd->access_flags = access;
+
+ if (write(pd->context->cmd_fd, cmd, cmd_size) != cmd_size)
+ return errno;
+
+ mr->handle = resp.mr_handle;
+ mr->lkey = resp.lkey;
+ mr->rkey = resp.rkey;
+
+ return 0;
+}
+
+int ibv_cmd_dereg_mr(struct ibv_mr *mr)
+{
+ struct ibv_dereg_mr cmd;
+
+ IBV_INIT_CMD(&cmd, sizeof cmd, DEREG_MR);
+ cmd.mr_handle = mr->handle;
+
+ if (write(mr->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
+ return errno;
+
+ return 0;
+}
+
+int ibv_cmd_create_cq(struct ibv_context *context, int cqe,
+ struct ibv_cq *cq,
+ struct ibv_create_cq *cmd, size_t cmd_size)
+{
+ struct ibv_create_cq_resp resp;
+
+ IBV_INIT_CMD_RESP(cmd, cmd_size, CREATE_CQ, &resp);
+ cmd->user_handle = (uintptr_t) cq;
+ cmd->cqe = cqe;
+
+ if (write(context->cmd_fd, cmd, cmd_size) != cmd_size)
+ return errno;
+
+ cq->handle = resp.cq_handle;
+ cq->cqe = resp.cqe;
+
+ return 0;
+}
+
+int ibv_cmd_destroy_cq(struct ibv_cq *cq)
+{
+ struct ibv_destroy_cq cmd;
+
+ IBV_INIT_CMD(&cmd, sizeof cmd, DESTROY_CQ);
+ cmd.cq_handle = cq->handle;
+
+ if (write(cq->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
+ return errno;
+
+ return 0;
+}
+
+int ibv_cmd_create_qp(struct ibv_pd *pd,
+ struct ibv_qp *qp, struct ibv_qp_init_attr *attr,
+ struct ibv_create_qp *cmd, size_t cmd_size)
+{
+ struct ibv_create_qp_resp resp;
+
+ IBV_INIT_CMD_RESP(cmd, cmd_size, CREATE_QP, &resp);
+ cmd->user_handle = (uintptr_t) qp;
+ cmd->pd_handle = pd->handle;
+ cmd->send_cq_handle = attr->send_cq->handle;
+ cmd->recv_cq_handle = attr->recv_cq->handle;
+ cmd->max_send_wr = attr->cap.max_send_wr;
+ cmd->max_recv_wr = attr->cap.max_recv_wr;
+ cmd->max_send_sge = attr->cap.max_send_sge;
+ cmd->max_recv_sge = attr->cap.max_recv_sge;
+ cmd->max_inline_data = attr->cap.max_inline_data;
+ cmd->sq_sig_all = attr->sq_sig_all;
+ cmd->qp_type = attr->qp_type;
+ cmd->is_srq = 0;
+
+ if (write(pd->context->cmd_fd, cmd, cmd_size) != cmd_size)
+ return errno;
+
+ qp->handle = resp.qp_handle;
+ qp->qp_num = resp.qpn;
+
+ return 0;
+}
+
+int ibv_cmd_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
+ enum ibv_qp_attr_mask attr_mask,
+ struct ibv_modify_qp *cmd, size_t cmd_size)
+{
+ IBV_INIT_CMD(cmd, cmd_size, MODIFY_QP);
+
+ cmd->qp_handle = qp->handle;
+ cmd->attr_mask = attr_mask;
+ cmd->qkey = attr->qkey;
+ cmd->rq_psn = attr->rq_psn;
+ cmd->sq_psn = attr->sq_psn;
+ cmd->dest_qp_num = attr->dest_qp_num;
+ cmd->qp_access_flags = attr->qp_access_flags;
+ cmd->pkey_index = attr->pkey_index;
+ cmd->alt_pkey_index = attr->alt_pkey_index;
+ cmd->qp_state = attr->qp_state;
+ cmd->cur_qp_state = attr->cur_qp_state;
+ cmd->path_mtu = attr->path_mtu;
+ cmd->path_mig_state = attr->path_mig_state;
+ cmd->en_sqd_async_notify = attr->en_sqd_async_notify;
+ cmd->max_rd_atomic = attr->max_rd_atomic;
+ cmd->max_dest_rd_atomic = attr->max_dest_rd_atomic;
+ cmd->min_rnr_timer = attr->min_rnr_timer;
+ cmd->port_num = attr->port_num;
+ cmd->timeout = attr->timeout;
+ cmd->retry_cnt = attr->retry_cnt;
+ cmd->rnr_retry = attr->rnr_retry;
+ cmd->alt_port_num = attr->alt_port_num;
+ cmd->alt_timeout = attr->alt_timeout;
+
+ memcpy(cmd->dest.dgid, attr->ah_attr.grh.dgid.raw, 16);
+ cmd->dest.flow_label = attr->ah_attr.grh.flow_label;
+ cmd->dest.dlid = attr->ah_attr.dlid;
+ cmd->dest.sgid_index = attr->ah_attr.grh.sgid_index;
+ cmd->dest.hop_limit = attr->ah_attr.grh.hop_limit;
+ cmd->dest.traffic_class = attr->ah_attr.grh.traffic_class;
+ cmd->dest.sl = attr->ah_attr.sl;
+ cmd->dest.src_path_bits = attr->ah_attr.src_path_bits;
+ cmd->dest.static_rate = attr->ah_attr.static_rate;
+ cmd->dest.is_global = attr->ah_attr.is_global;
+ cmd->dest.port_num = attr->ah_attr.port_num;
+
+ memcpy(cmd->alt_dest.dgid, attr->alt_ah_attr.grh.dgid.raw, 16);
+ cmd->alt_dest.flow_label = attr->alt_ah_attr.grh.flow_label;
+ cmd->alt_dest.dlid = attr->alt_ah_attr.dlid;
+ cmd->alt_dest.sgid_index = attr->alt_ah_attr.grh.sgid_index;
+ cmd->alt_dest.hop_limit = attr->alt_ah_attr.grh.hop_limit;
+ cmd->alt_dest.traffic_class = attr->alt_ah_attr.grh.traffic_class;
+ cmd->alt_dest.sl = attr->alt_ah_attr.sl;
+ cmd->alt_dest.src_path_bits = attr->alt_ah_attr.src_path_bits;
+ cmd->alt_dest.static_rate = attr->alt_ah_attr.static_rate;
+ cmd->alt_dest.is_global = attr->alt_ah_attr.is_global;
+ cmd->alt_dest.port_num = attr->alt_ah_attr.port_num;
+
+ if (write(qp->context->cmd_fd, cmd, cmd_size) != cmd_size)
+ return errno;
+
+ return 0;
+}
+
+int ibv_cmd_destroy_qp(struct ibv_qp *qp)
+{
+ struct ibv_destroy_qp cmd;
+
+ IBV_INIT_CMD(&cmd, sizeof cmd, DESTROY_QP);
+ cmd.qp_handle = qp->handle;
+
+ if (write(qp->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
+ return errno;
+
+ return 0;
+}
diff --git a/src/device.c b/src/device.c
index cf32602..3b38b85 100644
--- a/src/device.c
+++ b/src/device.c
@@ -78,65 +78,39 @@ uint64_t ibv_get_device_guid(struct ibv_device *device)
struct ibv_context *ibv_open_device(struct ibv_device *device)
{
- struct ibv_context *context, *tmp;
char *devpath;
- struct ibv_get_context context_cmd;
- struct ibv_get_context_resp context_resp;
- struct ibv_get_event_fds event_fds_cmd;
- struct ibv_get_event_fds_resp *event_fds_resp;
- int i;
-
- context = malloc(sizeof *context);
- if (!context)
- return NULL;
-
- context->device = device;
+ int cmd_fd;
+ struct ibv_context *context;
+ struct ibv_query_params cmd;
+ struct ibv_query_params_resp resp;
asprintf(&devpath, "/dev/infiniband/%s", device->dev->name);
- context->cmd_fd = open(devpath, O_WRONLY);
-
- if (context->cmd_fd < 0)
- goto err;
-
- context_cmd.command = IB_USER_VERBS_CMD_GET_CONTEXT;
- context_cmd.in_words = sizeof context_cmd / 4;
- context_cmd.out_words = sizeof context_resp / 4;
- context_cmd.response = (unsigned long) &context_resp;
-
- if (write(context->cmd_fd, &context_cmd, sizeof context_cmd) != sizeof context_cmd)
- goto err_close;
-
- context->num_comp = context_resp.num_cq_events;
- if (context->num_comp > 1) {
- tmp = realloc(context, sizeof *context + context->num_comp * sizeof (int));
- if (!tmp)
- goto err_close;
- context = tmp;
- }
-
- event_fds_resp = alloca(sizeof *event_fds_resp + context->num_comp * 4);
+ /*
+ * We'll only be doing writes, but we need O_RDWR in case the
+ * provider needs to mmap() the file.
+ */
+ cmd_fd = open(devpath, O_RDWR);
+ if (cmd_fd < 0)
+ return NULL;
- event_fds_cmd.command = IB_USER_VERBS_CMD_GET_EVENT_FDS;
- event_fds_cmd.in_words = sizeof event_fds_cmd / 4;
- event_fds_cmd.out_words = sizeof *event_fds_resp / 4 + context->num_comp;
- event_fds_cmd.response = (unsigned long) event_fds_resp;
+ IBV_INIT_CMD_RESP(&cmd, sizeof cmd, QUERY_PARAMS, &resp);
+ if (write(cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
+ goto err;
- if (write(context->cmd_fd, &event_fds_cmd, sizeof event_fds_cmd) !=
- sizeof event_fds_cmd)
- goto err_close;
+ context = device->ops.alloc_context(device, resp.num_cq_events, cmd_fd);
+ if (!context)
+ goto err;
- context->async_fd = event_fds_resp->async_fd;
- for (i = 0; i < context->num_comp; ++i)
- context->cq_fd[i] = event_fds_resp->cq_fd[i];
+ context->device = device;
+ context->cmd_fd = cmd_fd;
+ context->num_comp = resp.num_cq_events;
return context;
-err_close:
- close(context->cmd_fd);
-
err:
- free(context);
+ close(cmd_fd);
+
return NULL;
}
@@ -149,7 +123,7 @@ int ibv_close_device(struct ibv_context *context)
close(context->cq_fd[i]);
close(context->cmd_fd);
- free(context);
+ context->device->ops.free_context(context);
return 0;
}
@@ -159,9 +133,7 @@ int ibv_get_async_event(struct ibv_context *context,
{
struct ibv_kern_async_event ev;
- int ret = read(context->async_fd, &ev, sizeof ev);
-
- if (ret != sizeof ev)
+ if (read(context->async_fd, &ev, sizeof ev) != sizeof ev)
return -1;
/* XXX convert CQ/QP handles back to pointers */
diff --git a/src/ibverbs.h b/src/ibverbs.h
index 678dec9..0d0fec1 100644
--- a/src/ibverbs.h
+++ b/src/ibverbs.h
@@ -37,11 +37,8 @@
#include <pthread.h>
-#include <infiniband/verbs.h>
#include <infiniband/driver.h>
-#include "kern_abi.h"
-
#define HIDDEN __attribute__((visibility ("hidden")))
#define INIT __attribute__((constructor))
@@ -59,4 +56,19 @@ extern int ibv_init_mem_map(void);
extern int ibv_lock_range(void *base, size_t size);
extern int ibv_unlock_range(void *base, size_t size);
+#define IBV_INIT_CMD(cmd, size, opcode) \
+ do { \
+ (cmd)->command = IB_USER_VERBS_CMD_##opcode; \
+ (cmd)->in_words = (size) / 4; \
+ (cmd)->out_words = 0; \
+ } while (0)
+
+#define IBV_INIT_CMD_RESP(cmd, size, opcode, out) \
+ do { \
+ (cmd)->command = IB_USER_VERBS_CMD_##opcode; \
+ (cmd)->in_words = (size) / 4; \
+ (cmd)->out_words = sizeof (*(out)) / 4; \
+ (cmd)->response = (uintptr_t) (out); \
+ } while (0)
+
#endif /* IB_VERBS_H */
diff --git a/src/init.c b/src/init.c
index 1bcd570..87f9557 100644
--- a/src/init.c
+++ b/src/init.c
@@ -74,7 +74,8 @@ static void load_driver(char *so_path)
driver = malloc(sizeof *driver);
if (!driver) {
fprintf(stderr, PFX "Fatal: couldn't allocate driver for %s\n", so_path);
- abort();
+ dlclose(dlhandle);
+ return;
}
driver->init_func = init_func;
@@ -148,7 +149,7 @@ static void init_drivers(struct sysfs_class_device *verbs_dev)
fprintf(stderr, PFX "Warning: no driver for %s\n", verbs_dev->name);
}
-static void check_abi_version(void)
+static int check_abi_version(void)
{
char path[256];
char val[16];
@@ -156,14 +157,14 @@ static void check_abi_version(void)
if (sysfs_get_mnt_path(path, sizeof path)) {
fprintf(stderr, PFX "Fatal: couldn't find sysfs mount.\n");
- abort();
+ return -1;
}
strncat(path, "/class/infiniband_verbs/abi_version", sizeof path);
if (sysfs_read_attribute_value(path, val, sizeof val)) {
fprintf(stderr, PFX "Fatal: couldn't read uverbs ABI version.\n");
- abort();
+ return -1;
}
ver = strtol(val, NULL, 10);
@@ -172,8 +173,10 @@ static void check_abi_version(void)
fprintf(stderr, PFX "Fatal: kernel ABI version %d "
"doesn't match library version %d.\n",
ver, IB_USER_VERBS_ABI_VERSION);
- abort();
+ return -1;
}
+
+ return 0;
}
@@ -185,15 +188,15 @@ static void INIT ibverbs_init(void)
Dlist *verbs_dev_list;
struct sysfs_class_device *verbs_dev;
- check_abi_version();
-
- if (ibv_init_mem_map())
- abort();
-
driver_list = dlist_new(sizeof (struct ibv_driver));
device_list = dlist_new(sizeof (struct ibv_device));
- if (!driver_list || !device_list)
+ if (!driver_list || !device_list) {
+ fprintf(stderr, PFX "Fatal: couldn't allocate device/driver list.\n");
abort();
+ }
+
+ if (ibv_init_mem_map())
+ return;
user_path = getenv(OPENIB_DRIVER_PATH_ENV);
if (user_path) {
@@ -207,13 +210,16 @@ static void INIT ibverbs_init(void)
cls = sysfs_open_class("infiniband_verbs");
if (!cls) {
fprintf(stderr, PFX "Fatal: couldn't open infiniband sysfs class.\n");
- abort();
+ return;
}
+ if (check_abi_version())
+ return;
+
verbs_dev_list = sysfs_get_class_devices(cls);
if (!verbs_dev_list) {
fprintf(stderr, PFX "Fatal: no infiniband class devices found.\n");
- abort();
+ return;
}
dlist_for_each_data(verbs_dev_list, verbs_dev, struct sysfs_class_device)
diff --git a/src/libibverbs.map b/src/libibverbs.map
index 941b2bf..b0690eb 100644
--- a/src/libibverbs.map
+++ b/src/libibverbs.map
@@ -6,9 +6,29 @@ IBVERBS_1.0 {
ibv_open_device;
ibv_close_device;
ibv_get_async_event;
+ ibv_query_port;
ibv_alloc_pd;
ibv_dealloc_pd;
ibv_reg_mr;
ibv_dereg_mr;
+ ibv_create_cq;
+ ibv_destroy_cq;
+ ibv_get_cq_event;
+ ibv_create_qp;
+ ibv_modify_qp;
+ ibv_destroy_qp;
+ ibv_create_ah;
+ ibv_destroy_ah;
+ ibv_cmd_get_context;
+ ibv_cmd_query_port;
+ ibv_cmd_alloc_pd;
+ ibv_cmd_dealloc_pd;
+ ibv_cmd_reg_mr;
+ ibv_cmd_dereg_mr;
+ ibv_cmd_create_cq;
+ ibv_cmd_destroy_cq;
+ ibv_cmd_create_qp;
+ ibv_cmd_modify_qp;
+ ibv_cmd_destroy_qp;
local: *;
};
diff --git a/src/verbs.c b/src/verbs.c
new file mode 100644
index 0000000..087de30
--- /dev/null
+++ b/src/verbs.c
@@ -0,0 +1,175 @@
+/*
+ * Copyright (c) 2005 Topspin Communications. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id$
+ */
+
+#if HAVE_CONFIG_H
+# include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <errno.h>
+
+#include "ibverbs.h"
+
+int ibv_query_port(struct ibv_context *context, uint8_t port_num,
+ struct ibv_port_attr *port_attr)
+{
+ return context->ops.query_port(context, port_num, port_attr);
+}
+
+struct ibv_pd *ibv_alloc_pd(struct ibv_context *context)
+{
+ struct ibv_pd *pd;
+
+ pd = context->ops.alloc_pd(context);
+ if (pd)
+ pd->context = context;
+
+ return pd;
+}
+
+int ibv_dealloc_pd(struct ibv_pd *pd)
+{
+ return pd->context->ops.dealloc_pd(pd);
+}
+
+struct ibv_mr *ibv_reg_mr(struct ibv_pd *pd, void *addr,
+ size_t length, enum ibv_access_flags access)
+{
+ struct ibv_mr *mr;
+
+ mr = pd->context->ops.reg_mr(pd, addr, length, access);
+ if (mr) {
+ mr->context = pd->context;
+ mr->pd = pd;
+ }
+
+ return mr;
+}
+
+int ibv_dereg_mr(struct ibv_mr *mr)
+{
+ return mr->context->ops.dereg_mr(mr);
+}
+
+struct ibv_cq *ibv_create_cq(struct ibv_context *context, int cqe,
+ void *cq_context)
+{
+ struct ibv_cq *cq = context->ops.create_cq(context, cqe);
+
+ if (cq) {
+ cq->context = context;
+ cq->cq_context = cq_context;
+ }
+
+ return cq;
+}
+
+int ibv_destroy_cq(struct ibv_cq *cq)
+{
+ return cq->context->ops.destroy_cq(cq);
+}
+
+
+int ibv_get_cq_event(struct ibv_context *context, int comp_num,
+ struct ibv_cq **cq, void **cq_context)
+{
+ struct ibv_comp_event ev;
+
+ if (comp_num < 0 || comp_num >= context->num_comp)
+ return -1;
+
+ if (read(context->cq_fd[comp_num], &ev, sizeof ev) != sizeof ev)
+ return -1;
+
+ *cq = (struct ibv_cq *) (uintptr_t) ev.cq_handle;
+ *cq_context = (*cq)->cq_context;
+
+ if ((*cq)->context->ops.cq_event)
+ (*cq)->context->ops.cq_event(*cq);
+
+ return 0;
+}
+
+struct ibv_qp *ibv_create_qp(struct ibv_pd *pd,
+ struct ibv_qp_init_attr *qp_init_attr)
+{
+ struct ibv_qp *qp = pd->context->ops.create_qp(pd, qp_init_attr);
+
+ if (qp) {
+ qp->context = pd->context;
+ qp->qp_context = qp_init_attr->qp_context;
+ qp->pd = pd;
+ qp->send_cq = qp_init_attr->send_cq;
+ qp->recv_cq = qp_init_attr->recv_cq;
+ }
+
+ return qp;
+}
+int ibv_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
+ enum ibv_qp_attr_mask attr_mask)
+{
+ int ret;
+
+ ret = qp->context->ops.modify_qp(qp, attr, attr_mask);
+ if (ret)
+ return ret;
+
+ if (attr_mask & IBV_QP_STATE)
+ qp->state = attr->qp_state;
+
+ return 0;
+}
+
+int ibv_destroy_qp(struct ibv_qp *qp)
+{
+ return qp->context->ops.destroy_qp(qp);
+}
+
+struct ibv_ah *ibv_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr)
+{
+ struct ibv_ah *ah = pd->context->ops.create_ah(pd, attr);
+
+ if (ah) {
+ ah->context = pd->context;
+ ah->pd = pd;
+ }
+
+ return ah;
+}
+
+int ibv_destroy_ah(struct ibv_ah *ah)
+{
+ return ah->context->ops.destroy_ah(ah);
+}