diff options
author | Roland Dreier <roland@topspin.com> | 2005-04-07 21:26:32 +0000 |
---|---|---|
committer | Roland Dreier <rolandd@cisco.com> | 2006-11-09 11:35:56 -0800 |
commit | 91fc39561d04903cd5b1665d9215a184baa66ba9 (patch) | |
tree | 968bc59755e0271f44576e4f8907abf9c380988d | |
parent | bdb426aada13a0fd2cca920b9cd140008aa8a1eb (diff) | |
download | libibverbs-91fc39561d04903cd5b1665d9215a184baa66ba9.tar.gz |
Commit libibverbs code from roland-uverbs branch back onto trunk
-rw-r--r-- | Makefile.am | 31 | ||||
-rw-r--r-- | examples/device_list.c | 6 | ||||
-rw-r--r-- | examples/pingpong.c | 639 | ||||
-rw-r--r-- | examples/ud-pingpong.c | 677 | ||||
-rw-r--r-- | include/infiniband/arch.h | 78 | ||||
-rw-r--r-- | include/infiniband/driver.h | 29 | ||||
-rw-r--r-- | include/infiniband/kern-abi.h | 299 | ||||
-rw-r--r-- | include/infiniband/opcode.h | 149 | ||||
-rw-r--r-- | include/infiniband/verbs.h | 422 | ||||
-rw-r--r-- | src/cmd.c | 298 | ||||
-rw-r--r-- | src/device.c | 76 | ||||
-rw-r--r-- | src/ibverbs.h | 18 | ||||
-rw-r--r-- | src/init.c | 32 | ||||
-rw-r--r-- | src/libibverbs.map | 20 | ||||
-rw-r--r-- | src/verbs.c | 175 |
15 files changed, 2845 insertions, 104 deletions
diff --git a/Makefile.am b/Makefile.am index 01a3f6e..5b3fb8c 100644 --- a/Makefile.am +++ b/Makefile.am @@ -4,7 +4,9 @@ INCLUDES = -I$(srcdir)/include lib_LTLIBRARIES = src/libibverbs.la -src_libibverbs_la_CFLAGS = -Wall -D_GNU_SOURCE -DDRIVER_PATH=\"$(libdir)/infiniband\" +AM_CFLAGS = -g -Wall -D_GNU_SOURCE + +src_libibverbs_la_CFLAGS = -g -Wall -D_GNU_SOURCE -DDRIVER_PATH=\"$(libdir)/infiniband\" if HAVE_LD_VERSION_SCRIPT libibverbs_version_script = -Wl,--version-script=$(srcdir)/src/libibverbs.map @@ -12,24 +14,31 @@ else libibverbs_version_script = endif -src_libibverbs_la_SOURCES = src/init.c src/device.c src/memory.c +src_libibverbs_la_SOURCES = src/cmd.c src/device.c src/init.c src/memory.c src/verbs.c src_libibverbs_la_LDFLAGS = -version-info 1 -export-dynamic \ $(libibverbs_version_script) src_libibverbs_la_DEPENDENCIES = $(srcdir)/src/libibverbs.map -bin_PROGRAMS = examples/ib_devices examples/asyncwatch examples/pingpong -examples_ib_devices_SOURCES = examples/device_list.c -examples_ib_devices_LDADD = $(top_builddir)/src/libibverbs.la -examples_pingpong_SOURCES = examples/pingpong.c -examples_pingpong_LDADD = $(top_builddir)/src/libibverbs.la -examples_asyncwatch_SOURCES = examples/asyncwatch.c -examples_asyncwatch_LDADD = $(top_builddir)/src/libibverbs.la +bin_PROGRAMS = examples/ibv_devices examples/ibv_asyncwatch \ + examples/ibv_pingpong examples/ibv_ud_pingpong +examples_ibv_devices_SOURCES = examples/device_list.c +examples_ibv_devices_LDADD = $(top_builddir)/src/libibverbs.la +examples_ibv_pingpong_SOURCES = examples/pingpong.c +examples_ibv_pingpong_LDADD = $(top_builddir)/src/libibverbs.la +examples_ibv_ud_pingpong_SOURCES = examples/ud-pingpong.c +examples_ibv_ud_pingpong_LDADD = $(top_builddir)/src/libibverbs.la +examples_ibv_asyncwatch_SOURCES = examples/asyncwatch.c +examples_ibv_asyncwatch_LDADD = $(top_builddir)/src/libibverbs.la libibverbsincludedir = $(includedir)/infiniband -libibverbsinclude_HEADERS = include/infiniband/verbs.h include/infiniband/driver.h +libibverbsinclude_HEADERS = include/infiniband/arch.h include/infiniband/driver.h \ + include/infiniband/kern-abi.h include/infiniband/opcode.h include/infiniband/verbs.h -EXTRA_DIST = include/infiniband/verbs.h include/infiniband/driver.h \ +EXTRA_DIST = include/infiniband/driver.h include/infiniband/kern-abi.h \ + include/infiniband/opcode.h include/infiniband/verbs.h \ + src/ibverbs.h \ + src/libibverbs.map \ libibverbs.spec.in dist-hook: libibverbs.spec diff --git a/examples/device_list.c b/examples/device_list.c index a5395d5..414612e 100644 --- a/examples/device_list.c +++ b/examples/device_list.c @@ -36,6 +36,8 @@ # include <config.h> #endif /* HAVE_CONFIG_H */ +#include <stdio.h> + #include <endian.h> #include <byteswap.h> @@ -60,5 +62,7 @@ int main(int argc, char *argv[]) dlist_for_each_data(dev_list, ib_dev, struct ibv_device) printf(" %-16s\t%016llx\n", ibv_get_device_name(ib_dev), - be64_to_cpu(ibv_get_device_guid(ib_dev))); + (unsigned long long) be64_to_cpu(ibv_get_device_guid(ib_dev))); + + return 0; } diff --git a/examples/pingpong.c b/examples/pingpong.c index c8e6502..9d3adcb 100644 --- a/examples/pingpong.c +++ b/examples/pingpong.c @@ -29,7 +29,7 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * - * $Id: device_list.c 1393 2004-12-28 02:15:24Z roland $ + * $Id$ */ #if HAVE_CONFIG_H @@ -37,38 +37,641 @@ #endif /* HAVE_CONFIG_H */ #include <stdio.h> -#include <endian.h> -#include <byteswap.h> +#include <stdlib.h> +#include <unistd.h> +#include <string.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/time.h> +#include <netdb.h> +#include <malloc.h> +#include <getopt.h> +#include <arpa/inet.h> +#include <time.h> + +#include <sysfs/libsysfs.h> #include <infiniband/verbs.h> -#if __BYTE_ORDER == __LITTLE_ENDIAN -static inline uint64_t be64_to_cpu(uint64_t x) { return bswap_64(x); } -#elif __BYTE_ORDER == __BIG_ENDIAN -static inline uint64_t be64_to_cpu(uint64_t x) { return x; } -#endif +enum { + PINGPONG_RECV_WRID = 1, + PINGPONG_SEND_WRID = 2, +}; + +static int page_size; + +struct pingpong_context { + struct ibv_context *context; + struct ibv_pd *pd; + struct ibv_mr *mr; + struct ibv_cq *cq; + struct ibv_qp *qp; + void *buf; + int size; + int rx_depth; +}; + +struct pingpong_dest { + int lid; + int qpn; + int psn; +}; + +static uint16_t pp_get_local_lid(struct pingpong_context *ctx, int port) +{ + struct ibv_port_attr attr; + + if (ibv_query_port(ctx->context, port, &attr)) + return 0; + + return attr.lid; +} + +static struct pingpong_dest *pp_client_exch_dest(const char *servername, int port, + const struct pingpong_dest *my_dest) +{ + struct addrinfo *res, *t; + struct addrinfo hints = { + .ai_family = AF_UNSPEC, + .ai_socktype = SOCK_STREAM + }; + char *service; + char msg[sizeof "0000:000000:000000"]; + int n; + int sockfd = -1; + struct pingpong_dest *rem_dest = NULL; + + asprintf(&service, "%d", port); + n = getaddrinfo(servername, service, &hints, &res); + + if (n < 0) { + fprintf(stderr, "%s for %s:%d\n", gai_strerror(n), servername, port); + return NULL; + } + + for (t = res; t; t = t->ai_next) { + sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol); + if (sockfd >= 0) { + if (!connect(sockfd, t->ai_addr, t->ai_addrlen)) + break; + close(sockfd); + sockfd = -1; + } + } + + freeaddrinfo(res); + + if (sockfd < 0) { + fprintf(stderr, "Couldn't connect to %s:%d\n", servername, port); + return NULL; + } + + sprintf(msg, "%04x:%06x:%06x", my_dest->lid, my_dest->qpn, my_dest->psn); + if (write(sockfd, msg, sizeof msg) != sizeof msg) { + fprintf(stderr, "Couldn't send local address\n"); + goto out; + } + + if (read(sockfd, msg, sizeof msg) != sizeof msg) { + perror("client read"); + fprintf(stderr, "Couldn't read remote address\n"); + goto out; + } + + write(sockfd, "done", sizeof "done"); + + rem_dest = malloc(sizeof *rem_dest); + if (!rem_dest) + goto out; + + sscanf(msg, "%x:%x:%x", &rem_dest->lid, &rem_dest->qpn, &rem_dest->psn); + +out: + close(sockfd); + return rem_dest; +} + +static struct pingpong_dest *pp_server_exch_dest(int port, const struct pingpong_dest *my_dest) +{ + struct addrinfo *res, *t; + struct addrinfo hints = { + .ai_flags = AI_PASSIVE, + .ai_family = AF_UNSPEC, + .ai_socktype = SOCK_STREAM + }; + char *service; + char msg[sizeof "0000:000000:000000"]; + int n; + int sockfd = -1, connfd; + struct pingpong_dest *rem_dest = NULL; + + asprintf(&service, "%d", port); + n = getaddrinfo(NULL, service, &hints, &res); + + if (n < 0) { + fprintf(stderr, "%s for port %d\n", gai_strerror(n), port); + return NULL; + } + + for (t = res; t; t = t->ai_next) { + sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol); + if (sockfd >= 0) { + n = 1; + + setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &n, sizeof n); + + if (!bind(sockfd, t->ai_addr, t->ai_addrlen)) + break; + close(sockfd); + sockfd = -1; + } + } + + freeaddrinfo(res); + + if (sockfd < 0) { + fprintf(stderr, "Couldn't listen to port %d\n", port); + return NULL; + } + + listen(sockfd, 1); + connfd = accept(sockfd, NULL, 0); + close(sockfd); + if (connfd < 0) { + fprintf(stderr, "accept() failed\n"); + return NULL; + } + + n = read(connfd, msg, sizeof msg); + if (n != sizeof msg) { + perror("server read"); + fprintf(stderr, "%d/%d: Couldn't read remote address\n", n, (int) sizeof msg); + goto out; + } + + rem_dest = malloc(sizeof *rem_dest); + if (!rem_dest) + goto out; + + sscanf(msg, "%x:%x:%x", &rem_dest->lid, &rem_dest->qpn, &rem_dest->psn); + + sprintf(msg, "%04x:%06x:%06x", my_dest->lid, my_dest->qpn, my_dest->psn); + if (write(connfd, msg, sizeof msg) != sizeof msg) { + fprintf(stderr, "Couldn't send local address\n"); + free(rem_dest); + rem_dest = NULL; + goto out; + } + + read(connfd, msg, sizeof msg); + +out: + close(connfd); + return rem_dest; +} + +static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size, + int rx_depth, int port) +{ + struct pingpong_context *ctx; + + ctx = malloc(sizeof *ctx); + if (!ctx) + return NULL; + + ctx->size = size; + ctx->rx_depth = rx_depth; + + ctx->buf = memalign(page_size, size); + if (!ctx->buf) { + fprintf(stderr, "Couldn't allocate work buf.\n"); + return NULL; + } + + memset(ctx->buf, 0, size); + + ctx->context = ibv_open_device(ib_dev); + if (!ctx->context) { + fprintf(stderr, "Couldn't get context for %s\n", + ibv_get_device_name(ib_dev)); + return NULL; + } + + ctx->pd = ibv_alloc_pd(ctx->context); + if (!ctx->pd) { + fprintf(stderr, "Couldn't allocate PD\n"); + return NULL; + } + + ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, size, IBV_ACCESS_LOCAL_WRITE); + if (!ctx->mr) { + fprintf(stderr, "Couldn't allocate MR\n"); + return NULL; + } + + ctx->cq = ibv_create_cq(ctx->context, rx_depth + 1, NULL); + if (!ctx->cq) { + fprintf(stderr, "Couldn't create CQ\n"); + return NULL; + } + + { + struct ibv_qp_init_attr attr = { + .send_cq = ctx->cq, + .recv_cq = ctx->cq, + .cap = { + .max_send_wr = 1, + .max_recv_wr = rx_depth, + .max_send_sge = 1, + .max_recv_sge = 1 + }, + .qp_type = IBV_QPT_RC + }; + + ctx->qp = ibv_create_qp(ctx->pd, &attr); + if (!ctx->qp) { + fprintf(stderr, "Couldn't create QP\n"); + return NULL; + } + } + + { + struct ibv_qp_attr attr; + + attr.qp_state = IBV_QPS_INIT; + attr.pkey_index = 0; + attr.port_num = port; + attr.qp_access_flags = 0; + + if (ibv_modify_qp(ctx->qp, &attr, + IBV_QP_STATE | + IBV_QP_PKEY_INDEX | + IBV_QP_PORT | + IBV_QP_ACCESS_FLAGS)) { + fprintf(stderr, "Failed to modify QP to INIT\n"); + return NULL; + } + } + + return ctx; +} + +static int pp_post_recv(struct pingpong_context *ctx, int n) +{ + struct ibv_sge list = { + .addr = (uintptr_t) ctx->buf, + .length = ctx->size, + .lkey = ctx->mr->lkey + }; + struct ibv_recv_wr wr = { + .wr_id = PINGPONG_RECV_WRID, + .sg_list = &list, + .num_sge = 1, + }; + struct ibv_recv_wr *bad_wr; + int i; + + for (i = 0; i < n; ++i) + if (ibv_post_recv(ctx->qp, &wr, &bad_wr)) + break; + + return i; +} + +static int pp_post_send(struct pingpong_context *ctx) +{ + struct ibv_sge list = { + .addr = (uintptr_t) ctx->buf, + .length = ctx->size, + .lkey = ctx->mr->lkey + }; + struct ibv_send_wr wr = { + .wr_id = PINGPONG_SEND_WRID, + .sg_list = &list, + .num_sge = 1, + .opcode = IBV_WR_SEND, + .send_flags = IBV_SEND_SIGNALED, + }; + struct ibv_send_wr *bad_wr; + + return ibv_post_send(ctx->qp, &wr, &bad_wr); +} + +static int pp_connect_ctx(struct pingpong_context *ctx, int port, int my_psn, + struct pingpong_dest *dest) +{ + struct ibv_qp_attr attr; + + attr.qp_state = IBV_QPS_RTR; + attr.path_mtu = IBV_MTU_1024; + attr.dest_qp_num = dest->qpn; + attr.rq_psn = dest->psn; + attr.max_dest_rd_atomic = 1; + attr.min_rnr_timer = 12; + attr.ah_attr.is_global = 0; + attr.ah_attr.dlid = dest->lid; + attr.ah_attr.sl = 0; + attr.ah_attr.src_path_bits = 0; + attr.ah_attr.port_num = port; + if (ibv_modify_qp(ctx->qp, &attr, + IBV_QP_STATE | + IBV_QP_AV | + IBV_QP_PATH_MTU | + IBV_QP_DEST_QPN | + IBV_QP_RQ_PSN | + IBV_QP_MAX_DEST_RD_ATOMIC | + IBV_QP_MIN_RNR_TIMER)) { + fprintf(stderr, "Failed to modify QP to RTR\n"); + return 1; + } + + attr.qp_state = IBV_QPS_RTS; + attr.timeout = 14; + attr.retry_cnt = 7; + attr.rnr_retry = 7; + attr.sq_psn = my_psn; + attr.max_rd_atomic = 1; + if (ibv_modify_qp(ctx->qp, &attr, + IBV_QP_STATE | + IBV_QP_TIMEOUT | + IBV_QP_RETRY_CNT | + IBV_QP_RNR_RETRY | + IBV_QP_SQ_PSN | + IBV_QP_MAX_QP_RD_ATOMIC)) { + fprintf(stderr, "Failed to modify QP to RTS\n"); + return 1; + } + + return 0; +} + +static void usage(const char *argv0) +{ + printf("Usage:\n"); + printf(" %s start a server and wait for connection\n", argv0); + printf(" %s <host> connect to server at <host>\n", argv0); + printf("\n"); + printf("Options:\n"); + printf(" -p, --port=<port> listen on/connect to port <port> (default 18515)\n"); + printf(" -d, --ib-dev=<dev> use IB device <dev> (default first device found)\n"); + printf(" -i, --ib-port=<port> use port <port> of IB device (default 1)\n"); + printf(" -s, --size=<size> size of message to exchange (default 4096)\n"); + printf(" -r, --rx-depth=<dep> number of receives to post at a time (default 500)\n"); + printf(" -n, --iters=<iters> number of exchanges (default 1000)\n"); + printf(" -e, --events sleep on CQ events (default poll)\n"); +} int main(int argc, char *argv[]) { - struct dlist *dev_list; - struct ibv_device *ib_dev; - struct ibv_context *context; + struct dlist *dev_list; + struct ibv_device *ib_dev; + struct pingpong_context *ctx; + struct pingpong_dest my_dest; + struct pingpong_dest *rem_dest; + struct timeval start, end; + char *ib_devname = NULL; + char *servername = NULL; + int port = 18515; + int ib_port = 1; + int size = 4096; + int rx_depth = 500; + int iters = 1000; + int use_event = 0; + int routs; + int rcnt, scnt; + + srand48(getpid() * time(NULL)); + + while (1) { + int c; + + static struct option long_options[] = { + { .name = "port", .has_arg = 1, .val = 'p' }, + { .name = "ib-dev", .has_arg = 1, .val = 'd' }, + { .name = "ib-port", .has_arg = 1, .val = 'i' }, + { .name = "size", .has_arg = 1, .val = 's' }, + { .name = "iters", .has_arg = 1, .val = 'n' }, + { .name = "events", .has_arg = 0, .val = 'e' }, + { 0 } + }; + + c = getopt_long(argc, argv, "p:d:i:s:r:n:e", long_options, NULL); + if (c == -1) + break; + + switch (c) { + case 'p': + port = strtol(optarg, NULL, 0); + if (port < 0 || port > 65535) { + usage(argv[0]); + return 1; + } + break; + + case 'd': + ib_devname = strdupa(optarg); + break; + + case 'i': + ib_port = strtol(optarg, NULL, 0); + if (port < 0) { + usage(argv[0]); + return 1; + } + break; + + case 's': + size = strtol(optarg, NULL, 0); + break; + + case 'r': + rx_depth = strtol(optarg, NULL, 0); + break; + + case 'n': + iters = strtol(optarg, NULL, 0); + break; + + case 'e': + ++use_event; + break; + + default: + usage(argv[0]); + return 1; + } + } + + if (optind == argc - 1) + servername = strdupa(argv[optind]); + else if (optind < argc) { + usage(argv[0]); + return 1; + } + + page_size = sysconf(_SC_PAGESIZE); dev_list = ibv_get_devices(); dlist_start(dev_list); - ib_dev = dlist_next(dev_list); + if (!ib_devname) { + ib_dev = dlist_next(dev_list); + if (!ib_dev) { + fprintf(stderr, "No IB devices found\n"); + return 1; + } + } else { + dlist_for_each_data(dev_list, ib_dev, struct ibv_device) + if (!strcmp(ibv_get_device_name(ib_dev), ib_devname)) + break; + if (!ib_dev) { + fprintf(stderr, "IB device %s not found\n", ib_devname); + return 1; + } + } + + ctx = pp_init_ctx(ib_dev, size, rx_depth, ib_port); + if (!ctx) + return 1; - if (!ib_dev) { - fprintf(stderr, "No IB devices found\n"); + routs = pp_post_recv(ctx, ctx->rx_depth); + if (routs < ctx->rx_depth) { + fprintf(stderr, "Couldn't post receive (%d)\n", routs); return 1; } - context = ibv_open_device(ib_dev); - if (!context) { - fprintf(stderr, "Couldn't get context for %s\n", - ibv_get_device_name(ib_dev)); + my_dest.lid = pp_get_local_lid(ctx, ib_port); + my_dest.qpn = ctx->qp->qp_num; + my_dest.psn = lrand48() & 0xffffff; + if (!my_dest.lid) { + fprintf(stderr, "Couldn't get local LID\n"); + return 1; + } + + printf(" local address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x\n", + my_dest.lid, my_dest.qpn, my_dest.psn); + + if (servername) + rem_dest = pp_client_exch_dest(servername, port, &my_dest); + else + rem_dest = pp_server_exch_dest(port, &my_dest); + + if (!rem_dest) + return 1; + + printf(" remote address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x\n", + rem_dest->lid, rem_dest->qpn, rem_dest->psn); + + if (pp_connect_ctx(ctx, ib_port, my_dest.psn, rem_dest)) return 1; + + if (use_event) + if (ibv_req_notify_cq(ctx->cq, 0)) { + fprintf(stderr, "Couldn't request CQ notification\n"); + return 1; + } + + if (servername) + if (pp_post_send(ctx)) { + fprintf(stderr, "Couldn't post send\n"); + return 1; + } + + if (gettimeofday(&start, NULL)) { + perror("gettimeofday"); + return 1; + } + + rcnt = scnt = 0; + while (rcnt < iters || scnt < iters) { + if (use_event) { + struct ibv_cq *ev_cq; + void *ev_ctx; + + if (ibv_get_cq_event(ctx->context, 0, &ev_cq, &ev_ctx)) { + fprintf(stderr, "Failed to get cq_event\n"); + return 1; + } + + if (ev_cq != ctx->cq) { + fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq); + return 1; + } + + if (ibv_req_notify_cq(ctx->cq, 0)) { + fprintf(stderr, "Couldn't request CQ notification\n"); + return 1; + } + } + + { + struct ibv_wc wc[2]; + int ne, i; + + do { + ne = ibv_poll_cq(ctx->cq, 2, wc); + } while (!use_event && ne < 1); + + if (ne < 0) { + fprintf(stderr, "poll CQ failed %d\n", ne); + return 1; + } + + for (i = 0; i < ne; ++i) { + if (wc[i].status != IBV_WC_SUCCESS) { + fprintf(stderr, "Failed status %d for wr_id %d\n", + wc[i].status, (int) wc[i].wr_id); + return 1; + } + + switch ((int) wc[i].wr_id) { + case PINGPONG_SEND_WRID: + ++scnt; + break; + + case PINGPONG_RECV_WRID: + if (--routs <= 1) { + routs += pp_post_recv(ctx, ctx->rx_depth - routs); + if (routs < ctx->rx_depth) { + fprintf(stderr, + "Couldn't post receive (%d)\n", + routs); + return 1; + } + } + + if (scnt < iters) + if (pp_post_send(ctx)) { + fprintf(stderr, "Couldn't post send\n"); + return 1; + } + + ++rcnt; + break; + + default: + fprintf(stderr, "Completion for unknown wr_id %d\n", + (int) wc[i].wr_id); + return 1; + } + } + } + } + + if (gettimeofday(&end, NULL)) { + perror("gettimeofday"); + return 1; + } + + { + float usec = (end.tv_sec - start.tv_sec) * 1000000 + + (end.tv_usec - start.tv_usec); + long long bytes = (long long) size * iters * 2; + + printf("%lld bytes in %.2f seconds = %.2f Mbit/sec\n", + bytes, usec / 1000000., bytes * 8. / usec); + printf("%d iters in %.2f seconds = %.2f usec/iter\n", + iters, usec / 1000000., usec / iters); } return 0; diff --git a/examples/ud-pingpong.c b/examples/ud-pingpong.c new file mode 100644 index 0000000..8aa6075 --- /dev/null +++ b/examples/ud-pingpong.c @@ -0,0 +1,677 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#if HAVE_CONFIG_H +# include <config.h> +#endif /* HAVE_CONFIG_H */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <string.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/time.h> +#include <netdb.h> +#include <malloc.h> +#include <getopt.h> +#include <arpa/inet.h> +#include <time.h> + +#include <sysfs/libsysfs.h> + +#include <infiniband/verbs.h> + +enum { + PINGPONG_RECV_WRID = 1, + PINGPONG_SEND_WRID = 2, +}; + +static int page_size; + +struct pingpong_context { + struct ibv_context *context; + struct ibv_pd *pd; + struct ibv_mr *mr; + struct ibv_cq *cq; + struct ibv_qp *qp; + struct ibv_ah *ah; + void *buf; + int size; + int rx_depth; +}; + +struct pingpong_dest { + int lid; + int qpn; + int psn; +}; + + +static uint16_t pp_get_local_lid(struct pingpong_context *ctx, int port) +{ + struct ibv_port_attr attr; + + if (ibv_query_port(ctx->context, port, &attr)) + return 0; + + return attr.lid; +} + +static struct pingpong_dest *pp_client_exch_dest(const char *servername, int port, + const struct pingpong_dest *my_dest) +{ + struct addrinfo *res, *t; + struct addrinfo hints = { + .ai_family = AF_UNSPEC, + .ai_socktype = SOCK_STREAM + }; + char *service; + char msg[sizeof "0000:000000:000000"]; + int n; + int sockfd = -1; + struct pingpong_dest *rem_dest = NULL; + + asprintf(&service, "%d", port); + n = getaddrinfo(servername, service, &hints, &res); + + if (n < 0) { + fprintf(stderr, "%s for %s:%d\n", gai_strerror(n), servername, port); + return NULL; + } + + for (t = res; t; t = t->ai_next) { + sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol); + if (sockfd >= 0) { + if (!connect(sockfd, t->ai_addr, t->ai_addrlen)) + break; + close(sockfd); + sockfd = -1; + } + } + + freeaddrinfo(res); + + if (sockfd < 0) { + fprintf(stderr, "Couldn't connect to %s:%d\n", servername, port); + return NULL; + } + + sprintf(msg, "%04x:%06x:%06x", my_dest->lid, my_dest->qpn, my_dest->psn); + if (write(sockfd, msg, sizeof msg) != sizeof msg) { + fprintf(stderr, "Couldn't send local address\n"); + goto out; + } + + if (read(sockfd, msg, sizeof msg) != sizeof msg) { + perror("client read"); + fprintf(stderr, "Couldn't read remote address\n"); + goto out; + } + + write(sockfd, "done", sizeof "done"); + + rem_dest = malloc(sizeof *rem_dest); + if (!rem_dest) + goto out; + + sscanf(msg, "%x:%x:%x", &rem_dest->lid, &rem_dest->qpn, &rem_dest->psn); + +out: + close(sockfd); + return rem_dest; +} + +static struct pingpong_dest *pp_server_exch_dest(int port, const struct pingpong_dest *my_dest) +{ + struct addrinfo *res, *t; + struct addrinfo hints = { + .ai_flags = AI_PASSIVE, + .ai_family = AF_UNSPEC, + .ai_socktype = SOCK_STREAM + }; + char *service; + char msg[sizeof "0000:000000:000000"]; + int n; + int sockfd = -1, connfd; + struct pingpong_dest *rem_dest = NULL; + + asprintf(&service, "%d", port); + n = getaddrinfo(NULL, service, &hints, &res); + + if (n < 0) { + fprintf(stderr, "%s for port %d\n", gai_strerror(n), port); + return NULL; + } + + for (t = res; t; t = t->ai_next) { + sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol); + if (sockfd >= 0) { + n = 1; + + setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &n, sizeof n); + + if (!bind(sockfd, t->ai_addr, t->ai_addrlen)) + break; + close(sockfd); + sockfd = -1; + } + } + + freeaddrinfo(res); + + if (sockfd < 0) { + fprintf(stderr, "Couldn't listen to port %d\n", port); + return NULL; + } + + listen(sockfd, 1); + connfd = accept(sockfd, NULL, 0); + close(sockfd); + if (connfd < 0) { + fprintf(stderr, "accept() failed\n"); + return NULL; + } + + n = read(connfd, msg, sizeof msg); + if (n != sizeof msg) { + perror("server read"); + fprintf(stderr, "%d/%d: Couldn't read remote address\n", n, (int) sizeof msg); + goto out; + } + + rem_dest = malloc(sizeof *rem_dest); + if (!rem_dest) + goto out; + + sscanf(msg, "%x:%x:%x", &rem_dest->lid, &rem_dest->qpn, &rem_dest->psn); + + sprintf(msg, "%04x:%06x:%06x", my_dest->lid, my_dest->qpn, my_dest->psn); + if (write(connfd, msg, sizeof msg) != sizeof msg) { + fprintf(stderr, "Couldn't send local address\n"); + free(rem_dest); + rem_dest = NULL; + goto out; + } + + read(connfd, msg, sizeof msg); + +out: + close(connfd); + return rem_dest; +} + +static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size, + int rx_depth, int port) +{ + struct pingpong_context *ctx; + + ctx = malloc(sizeof *ctx); + if (!ctx) + return NULL; + + ctx->size = size; + ctx->rx_depth = rx_depth; + + ctx->buf = memalign(page_size, size + 40); + if (!ctx->buf) { + fprintf(stderr, "Couldn't allocate work buf.\n"); + return NULL; + } + + memset(ctx->buf, 0, size + 40); + + ctx->context = ibv_open_device(ib_dev); + if (!ctx->context) { + fprintf(stderr, "Couldn't get context for %s\n", + ibv_get_device_name(ib_dev)); + return NULL; + } + + ctx->pd = ibv_alloc_pd(ctx->context); + if (!ctx->pd) { + fprintf(stderr, "Couldn't allocate PD\n"); + return NULL; + } + + ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, size + 40, IBV_ACCESS_LOCAL_WRITE); + if (!ctx->mr) { + fprintf(stderr, "Couldn't allocate MR\n"); + return NULL; + } + + ctx->cq = ibv_create_cq(ctx->context, rx_depth + 1, NULL); + if (!ctx->cq) { + fprintf(stderr, "Couldn't create CQ\n"); + return NULL; + } + + { + struct ibv_qp_init_attr attr = { + .send_cq = ctx->cq, + .recv_cq = ctx->cq, + .cap = { + .max_send_wr = 1, + .max_recv_wr = rx_depth, + .max_send_sge = 1, + .max_recv_sge = 1 + }, + .qp_type = IBV_QPT_UD, + }; + + ctx->qp = ibv_create_qp(ctx->pd, &attr); + if (!ctx->qp) { + fprintf(stderr, "Couldn't create QP\n"); + return NULL; + } + } + + { + struct ibv_qp_attr attr; + + attr.qp_state = IBV_QPS_INIT; + attr.pkey_index = 0; + attr.port_num = port; + attr.qkey = 0x11111111; + + if (ibv_modify_qp(ctx->qp, &attr, + IBV_QP_STATE | + IBV_QP_PKEY_INDEX | + IBV_QP_PORT | + IBV_QP_QKEY)) { + fprintf(stderr, "Failed to modify QP to INIT\n"); + return NULL; + } + } + + return ctx; +} + +static int pp_post_recv(struct pingpong_context *ctx, int n) +{ + struct ibv_sge list = { + .addr = (uintptr_t) ctx->buf, + .length = ctx->size + 40, + .lkey = ctx->mr->lkey + }; + struct ibv_recv_wr wr = { + .wr_id = PINGPONG_RECV_WRID, + .sg_list = &list, + .num_sge = 1, + }; + struct ibv_recv_wr *bad_wr; + int i; + + for (i = 0; i < n; ++i) + if (ibv_post_recv(ctx->qp, &wr, &bad_wr)) + break; + + return i; +} + +static int pp_post_send(struct pingpong_context *ctx, uint32_t qpn) +{ + struct ibv_sge list = { + .addr = (uintptr_t) ctx->buf + 40, + .length = ctx->size, + .lkey = ctx->mr->lkey + }; + struct ibv_send_wr wr = { + .wr_id = PINGPONG_SEND_WRID, + .sg_list = &list, + .num_sge = 1, + .opcode = IBV_WR_SEND, + .send_flags = IBV_SEND_SIGNALED, + .wr = { + .ud = { + .ah = ctx->ah, + .remote_qpn = qpn, + .remote_qkey = 0x11111111 + } + } + }; + struct ibv_send_wr *bad_wr; + + return ibv_post_send(ctx->qp, &wr, &bad_wr); +} + +static int pp_connect_ctx(struct pingpong_context *ctx, int port, int my_psn, + struct pingpong_dest *dest) +{ + struct ibv_qp_attr attr; + struct ibv_ah_attr ah_attr; + + attr.qp_state = IBV_QPS_RTR; + + if (ibv_modify_qp(ctx->qp, &attr, IBV_QP_STATE)) { + fprintf(stderr, "Failed to modify QP to RTR\n"); + return 1; + } + + attr.qp_state = IBV_QPS_RTS; + attr.sq_psn = my_psn; + + if (ibv_modify_qp(ctx->qp, &attr, + IBV_QP_STATE | + IBV_QP_SQ_PSN)) { + fprintf(stderr, "Failed to modify QP to RTS\n"); + return 1; + } + + ah_attr.is_global = 0; + ah_attr.dlid = dest->lid; + ah_attr.sl = 0; + ah_attr.src_path_bits = 0; + ah_attr.port_num = port; + + ctx->ah = ibv_create_ah(ctx->pd, &ah_attr); + if (!ctx->ah) { + fprintf(stderr, "Failed to create AH\n"); + return 1; + } + + return 0; +} + +static void usage(const char *argv0) +{ + printf("Usage:\n"); + printf(" %s start a server and wait for connection\n", argv0); + printf(" %s <host> connect to server at <host>\n", argv0); + printf("\n"); + printf("Options:\n"); + printf(" -p, --port=<port> listen on/connect to port <port> (default 18515)\n"); + printf(" -d, --ib-dev=<dev> use IB device <dev> (default first device found)\n"); + printf(" -i, --ib-port=<port> use port <port> of IB device (default 1)\n"); + printf(" -s, --size=<size> size of message to exchange (default 2048)\n"); + printf(" -r, --rx-depth=<dep> number of receives to post at a time (default 500)\n"); + printf(" -n, --iters=<iters> number of exchanges (default 1000)\n"); + printf(" -e, --events sleep on CQ events (default poll)\n"); +} + +int main(int argc, char *argv[]) +{ + struct dlist *dev_list; + struct ibv_device *ib_dev; + struct pingpong_context *ctx; + struct pingpong_dest my_dest; + struct pingpong_dest *rem_dest; + struct timeval start, end; + char *ib_devname = NULL; + char *servername = NULL; + int port = 18515; + int ib_port = 1; + int size = 2048; + int rx_depth = 500; + int iters = 1000; + int use_event = 0; + int routs; + int rcnt, scnt; + + srand48(getpid() * time(NULL)); + + while (1) { + int c; + + static struct option long_options[] = { + { .name = "port", .has_arg = 1, .val = 'p' }, + { .name = "ib-dev", .has_arg = 1, .val = 'd' }, + { .name = "ib-port", .has_arg = 1, .val = 'i' }, + { .name = "size", .has_arg = 1, .val = 's' }, + { .name = "iters", .has_arg = 1, .val = 'n' }, + { .name = "events", .has_arg = 0, .val = 'e' }, + { 0 } + }; + + c = getopt_long(argc, argv, "p:d:i:s:r:n:e", long_options, NULL); + if (c == -1) + break; + + switch (c) { + case 'p': + port = strtol(optarg, NULL, 0); + if (port < 0 || port > 65535) { + usage(argv[0]); + return 1; + } + break; + + case 'd': + ib_devname = strdupa(optarg); + break; + + case 'i': + ib_port = strtol(optarg, NULL, 0); + if (port < 0) { + usage(argv[0]); + return 1; + } + break; + + case 's': + size = strtol(optarg, NULL, 0); + break; + + case 'r': + rx_depth = strtol(optarg, NULL, 0); + break; + + case 'n': + iters = strtol(optarg, NULL, 0); + break; + + case 'e': + ++use_event; + break; + + default: + usage(argv[0]); + return 1; + } + } + + if (optind == argc - 1) + servername = strdupa(argv[optind]); + else if (optind < argc) { + usage(argv[0]); + return 1; + } + + page_size = sysconf(_SC_PAGESIZE); + + dev_list = ibv_get_devices(); + + dlist_start(dev_list); + if (!ib_devname) { + ib_dev = dlist_next(dev_list); + if (!ib_dev) { + fprintf(stderr, "No IB devices found\n"); + return 1; + } + } else { + dlist_for_each_data(dev_list, ib_dev, struct ibv_device) + if (!strcmp(ibv_get_device_name(ib_dev), ib_devname)) + break; + if (!ib_dev) { + fprintf(stderr, "IB device %s not found\n", ib_devname); + return 1; + } + } + + ctx = pp_init_ctx(ib_dev, size, rx_depth, ib_port); + if (!ctx) + return 1; + + routs = pp_post_recv(ctx, ctx->rx_depth); + if (routs < ctx->rx_depth) { + fprintf(stderr, "Couldn't post receive (%d)\n", routs); + return 1; + } + + my_dest.lid = pp_get_local_lid(ctx, ib_port); + my_dest.qpn = ctx->qp->qp_num; + my_dest.psn = lrand48() & 0xffffff; + if (!my_dest.lid) { + fprintf(stderr, "Couldn't get local LID\n"); + return 1; + } + + printf(" local address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x\n", + my_dest.lid, my_dest.qpn, my_dest.psn); + + if (servername) + rem_dest = pp_client_exch_dest(servername, port, &my_dest); + else + rem_dest = pp_server_exch_dest(port, &my_dest); + + if (!rem_dest) + return 1; + + printf(" remote address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x\n", + rem_dest->lid, rem_dest->qpn, rem_dest->psn); + + if (pp_connect_ctx(ctx, ib_port, my_dest.psn, rem_dest)) + return 1; + + if (use_event) + if (ibv_req_notify_cq(ctx->cq, 0)) { + fprintf(stderr, "Couldn't request CQ notification\n"); + return 1; + } + + if (servername) + if (pp_post_send(ctx, rem_dest->qpn)) { + fprintf(stderr, "Couldn't post send\n"); + return 1; + } + + if (gettimeofday(&start, NULL)) { + perror("gettimeofday"); + return 1; + } + + rcnt = scnt = 0; + while (rcnt < iters || scnt < iters) { + if (use_event) { + struct ibv_cq *ev_cq; + void *ev_ctx; + + if (ibv_get_cq_event(ctx->context, 0, &ev_cq, &ev_ctx)) { + fprintf(stderr, "Failed to get cq_event\n"); + return 1; + } + + if (ev_cq != ctx->cq) { + fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq); + return 1; + } + + if (ibv_req_notify_cq(ctx->cq, 0)) { + fprintf(stderr, "Couldn't request CQ notification\n"); + return 1; + } + } + + { + struct ibv_wc wc[2]; + int ne, i; + + do { + ne = ibv_poll_cq(ctx->cq, 2, wc); + } while (!use_event && ne < 1); + + if (ne < 0) { + fprintf(stderr, "poll CQ failed %d\n", ne); + return 1; + } + + for (i = 0; i < ne; ++i) { + if (wc[i].status != IBV_WC_SUCCESS) { + fprintf(stderr, "Failed status %d for wr_id %d\n", + wc[i].status, (int) wc[i].wr_id); + return 1; + } + + switch ((int) wc[i].wr_id) { + case PINGPONG_SEND_WRID: + ++scnt; + break; + + case PINGPONG_RECV_WRID: + if (--routs <= 1) { + routs += pp_post_recv(ctx, ctx->rx_depth - routs); + if (routs < ctx->rx_depth) { + fprintf(stderr, + "Couldn't post receive (%d)\n", + routs); + return 1; + } + } + + if (scnt < iters) + if (pp_post_send(ctx, rem_dest->qpn)) { + fprintf(stderr, "Couldn't post send\n"); + return 1; + } + + ++rcnt; + break; + + default: + fprintf(stderr, "Completion for unknown wr_id %d\n", + (int) wc[i].wr_id); + return 1; + } + } + } + } + + if (gettimeofday(&end, NULL)) { + perror("gettimeofday"); + return 1; + } + + { + float usec = (end.tv_sec - start.tv_sec) * 1000000 + + (end.tv_usec - start.tv_usec); + long long bytes = (long long) size * iters * 2; + + printf("%lld bytes in %.2f seconds = %.2f Mbit/sec\n", + bytes, usec / 1000000., bytes * 8. / usec); + printf("%d iters in %.2f seconds = %.2f usec/iter\n", + iters, usec / 1000000., usec / iters); + } + + return 0; +} diff --git a/include/infiniband/arch.h b/include/infiniband/arch.h new file mode 100644 index 0000000..b582d18 --- /dev/null +++ b/include/infiniband/arch.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#ifndef INFINIBAND_ARCH_H +#define INFINIBAND_ARCH_H + +/* + * Architecture-specific defines. Currently, an architecture is + * required to implement the following operations: + * + * mb() - memory barrier. No loads or stores may be reordered across + * this macro by either the compiler or the CPU. + */ + +#if defined(__i386__) + +#define mb() asm volatile("" ::: "memory") + +#elif defined(__x86_64__) + +#define mb() asm volatile("" ::: "memory") + +#elif defined(__PPC64__) + +#define mb() asm volatile("sync" ::: "memory") + +#elif defined(__ia64__) + +#define mb() asm volatile("mf" ::: "memory") + +#elif defined(__PPC__) + +#define mb() asm volatile("sync" ::: "memory") + +#elif defined(__sparc__) + +#define mb() asm volatile("membar #LoadLoad | #LoadStore | #StoreStore | #StoreLoad" ::: "memory") + +#else + +#warning No architecture specific defines found. Using generic implementation. + +#define mb() asm volatile("" ::: "memory") + +#endif + +#endif /* INFINIBAND_ARCH_H */ diff --git a/include/infiniband/driver.h b/include/infiniband/driver.h index a873621..dde2650 100644 --- a/include/infiniband/driver.h +++ b/include/infiniband/driver.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -38,6 +38,7 @@ #include <sysfs/libsysfs.h> #include <infiniband/verbs.h> +#include <infiniband/kern-abi.h> #ifdef __cplusplus # define BEGIN_C_DECLS extern "C" { @@ -62,4 +63,30 @@ typedef struct ibv_device *(*ibv_driver_init_func)(struct sysfs_class_device *); +extern int ibv_cmd_get_context(int num_comp, struct ibv_context *context, + struct ibv_get_context *cmd, size_t cmd_size); +extern int ibv_cmd_query_port(struct ibv_context *context, uint8_t port_num, + struct ibv_port_attr *port_attr, + struct ibv_query_port *cmd, size_t cmd_size); +extern int ibv_cmd_alloc_pd(struct ibv_context *context, struct ibv_pd *pd, + struct ibv_alloc_pd *cmd, size_t cmd_size); +extern int ibv_cmd_dealloc_pd(struct ibv_pd *pd); +extern int ibv_cmd_reg_mr(struct ibv_pd *pd, void *addr, size_t length, + uint64_t hca_va, enum ibv_access_flags access, + struct ibv_mr *mr, struct ibv_reg_mr *cmd, + size_t cmd_size); +extern int ibv_cmd_dereg_mr(struct ibv_mr *mr); +extern int ibv_cmd_create_cq(struct ibv_context *context, int cqe, + struct ibv_cq *cq, + struct ibv_create_cq *cmd, size_t cmd_size); +extern int ibv_cmd_destroy_cq(struct ibv_cq *cq); + +extern int ibv_cmd_create_qp(struct ibv_pd *pd, + struct ibv_qp *qp, struct ibv_qp_init_attr *attr, + struct ibv_create_qp *cmd, size_t cmd_size); +extern int ibv_cmd_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, + enum ibv_qp_attr_mask attr_mask, + struct ibv_modify_qp *cmd, size_t cmd_size); +extern int ibv_cmd_destroy_qp(struct ibv_qp *qp); + #endif /* INFINIBAND_DRIVER_H */ diff --git a/include/infiniband/kern-abi.h b/include/infiniband/kern-abi.h new file mode 100644 index 0000000..13a9c68 --- /dev/null +++ b/include/infiniband/kern-abi.h @@ -0,0 +1,299 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#ifndef KERN_ABI_H +#define KERN_ABI_H + +#include <linux/types.h> + +/* + * This file must be kept in sync with the kernel's version of + * drivers/infiniband/include/ib_user_verbs.h + */ + +/* + * Increment this value if any changes that break userspace ABI + * compatibility are made. + */ +#define IB_USER_VERBS_ABI_VERSION 1 + +enum { + IB_USER_VERBS_CMD_QUERY_PARAMS, + IB_USER_VERBS_CMD_GET_CONTEXT, + IB_USER_VERBS_CMD_QUERY_PORT, + IB_USER_VERBS_CMD_ALLOC_PD, + IB_USER_VERBS_CMD_DEALLOC_PD, + IB_USER_VERBS_CMD_REG_MR, + IB_USER_VERBS_CMD_DEREG_MR, + IB_USER_VERBS_CMD_CREATE_CQ, + IB_USER_VERBS_CMD_DESTROY_CQ, + IB_USER_VERBS_CMD_CREATE_QP, + IB_USER_VERBS_CMD_MODIFY_QP, + IB_USER_VERBS_CMD_DESTROY_QP, +}; + +/* + * Make sure that all structs defined in this file remain laid out so + * that they pack the same way on 32-bit and 64-bit architectures (to + * avoid incompatibility between 32-bit userspace and 64-bit kernels). + * In particular do not use pointer types -- pass pointers in __u64 + * instead. + */ + +struct ibv_kern_async_event { + __u64 element; + __u32 event_type; + __u32 reserved; +}; + +struct ibv_comp_event { + __u64 cq_handle; +}; + +/* + * All commands from userspace should start with a __u32 command field + * followed by __u16 in_words and out_words fields (which give the + * length of the command block and response buffer if any in 32-bit + * words). The kernel driver will read these fields first and read + * the rest of the command struct based on these value. + */ + +struct ibv_query_params { + __u32 command; + __u16 in_words; + __u16 out_words; + __u64 response; +}; + +struct ibv_query_params_resp { + __u32 num_cq_events; +}; + +struct ibv_get_context { + __u32 command; + __u16 in_words; + __u16 out_words; + __u64 response; + __u64 driver_data[0]; +}; + +struct ibv_get_context_resp { + __u32 async_fd; + __u32 cq_fd[1]; +}; + +struct ibv_query_port { + __u32 command; + __u16 in_words; + __u16 out_words; + __u64 response; + __u8 port_num; + __u8 reserved[7]; + __u64 driver_data[0]; +}; + +struct ibv_query_port_resp { + __u32 port_cap_flags; + __u32 max_msg_sz; + __u32 bad_pkey_cntr; + __u32 qkey_viol_cntr; + __u32 gid_tbl_len; + __u16 pkey_tbl_len; + __u16 lid; + __u16 sm_lid; + __u8 state; + __u8 max_mtu; + __u8 active_mtu; + __u8 lmc; + __u8 max_vl_num; + __u8 sm_sl; + __u8 subnet_timeout; + __u8 init_type_reply; + __u8 active_width; + __u8 active_speed; + __u8 phys_state; + __u8 reserved[3]; +}; + +struct ibv_alloc_pd { + __u32 command; + __u16 in_words; + __u16 out_words; + __u64 response; + __u64 driver_data[0]; +}; + +struct ibv_alloc_pd_resp { + __u32 pd_handle; +}; + +struct ibv_dealloc_pd { + __u32 command; + __u16 in_words; + __u16 out_words; + __u32 pd_handle; +}; + +struct ibv_reg_mr { + __u32 command; + __u16 in_words; + __u16 out_words; + __u64 response; + __u64 start; + __u64 length; + __u64 hca_va; + __u32 pd_handle; + __u32 access_flags; + __u64 driver_data[0]; +}; + +struct ibv_reg_mr_resp { + __u32 mr_handle; + __u32 lkey; + __u32 rkey; +}; + +struct ibv_dereg_mr { + __u32 command; + __u16 in_words; + __u16 out_words; + __u32 mr_handle; +}; + +struct ibv_create_cq { + __u32 command; + __u16 in_words; + __u16 out_words; + __u64 response; + __u64 user_handle; + __u32 cqe; + __u32 reserved; + __u64 driver_data[0]; +}; + +struct ibv_create_cq_resp { + __u32 cq_handle; + __u32 cqe; +}; + +struct ibv_destroy_cq { + __u32 command; + __u16 in_words; + __u16 out_words; + __u32 cq_handle; +}; + +struct ibv_create_qp { + __u32 command; + __u16 in_words; + __u16 out_words; + __u64 response; + __u64 user_handle; + __u32 pd_handle; + __u32 send_cq_handle; + __u32 recv_cq_handle; + __u32 srq_handle; + __u32 max_send_wr; + __u32 max_recv_wr; + __u32 max_send_sge; + __u32 max_recv_sge; + __u32 max_inline_data; + __u8 sq_sig_all; + __u8 qp_type; + __u8 is_srq; + __u8 reserved; + __u64 driver_data[0]; +}; + +struct ibv_create_qp_resp { + __u32 qp_handle; + __u32 qpn; +}; + +struct ibv_qp_dest { + __u8 dgid[16]; + __u32 flow_label; + __u16 dlid; + __u16 reserved; + __u8 sgid_index; + __u8 hop_limit; + __u8 traffic_class; + __u8 sl; + __u8 src_path_bits; + __u8 static_rate; + __u8 is_global; + __u8 port_num; +}; + +struct ibv_modify_qp { + __u32 command; + __u16 in_words; + __u16 out_words; + struct ibv_qp_dest dest; + struct ibv_qp_dest alt_dest; + __u32 qp_handle; + __u32 attr_mask; + __u32 qkey; + __u32 rq_psn; + __u32 sq_psn; + __u32 dest_qp_num; + __u32 qp_access_flags; + __u16 pkey_index; + __u16 alt_pkey_index; + __u8 qp_state; + __u8 cur_qp_state; + __u8 path_mtu; + __u8 path_mig_state; + __u8 en_sqd_async_notify; + __u8 max_rd_atomic; + __u8 max_dest_rd_atomic; + __u8 min_rnr_timer; + __u8 port_num; + __u8 timeout; + __u8 retry_cnt; + __u8 rnr_retry; + __u8 alt_port_num; + __u8 alt_timeout; + __u8 reserved[2]; + __u64 driver_data[0]; +}; + +struct ibv_destroy_qp { + __u32 command; + __u16 in_words; + __u16 out_words; + __u32 qp_handle; +}; + +#endif /* KERN_ABI_H */ diff --git a/include/infiniband/opcode.h b/include/infiniband/opcode.h new file mode 100644 index 0000000..cf2598b --- /dev/null +++ b/include/infiniband/opcode.h @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#ifndef INFINIBAND_OPCODE_H +#define INFINIBAND_OPCODE_H + +/* + * This macro cleans up the definitions of constants for BTH opcodes. + * It is used to define constants such as IBV_OPCODE_UD_SEND_ONLY, + * which becomes IBV_OPCODE_UD + IBV_OPCODE_SEND_ONLY, and this gives + * the correct value. + * + * In short, user code should use the constants defined using the + * macro rather than worrying about adding together other constants. +*/ +#define IBV_OPCODE(transport, op) \ + IBV_OPCODE_ ## transport ## _ ## op = \ + IBV_OPCODE_ ## transport + IBV_OPCODE_ ## op + +enum { + /* transport types -- just used to define real constants */ + IBV_OPCODE_RC = 0x00, + IBV_OPCODE_UC = 0x20, + IBV_OPCODE_RD = 0x40, + IBV_OPCODE_UD = 0x60, + + /* operations -- just used to define real constants */ + IBV_OPCODE_SEND_FIRST = 0x00, + IBV_OPCODE_SEND_MIDDLE = 0x01, + IBV_OPCODE_SEND_LAST = 0x02, + IBV_OPCODE_SEND_LAST_WITH_IMMEDIATE = 0x03, + IBV_OPCODE_SEND_ONLY = 0x04, + IBV_OPCODE_SEND_ONLY_WITH_IMMEDIATE = 0x05, + IBV_OPCODE_RDMA_WRITE_FIRST = 0x06, + IBV_OPCODE_RDMA_WRITE_MIDDLE = 0x07, + IBV_OPCODE_RDMA_WRITE_LAST = 0x08, + IBV_OPCODE_RDMA_WRITE_LAST_WITH_IMMEDIATE = 0x09, + IBV_OPCODE_RDMA_WRITE_ONLY = 0x0a, + IBV_OPCODE_RDMA_WRITE_ONLY_WITH_IMMEDIATE = 0x0b, + IBV_OPCODE_RDMA_READ_REQUEST = 0x0c, + IBV_OPCODE_RDMA_READ_RESPONSE_FIRST = 0x0d, + IBV_OPCODE_RDMA_READ_RESPONSE_MIDDLE = 0x0e, + IBV_OPCODE_RDMA_READ_RESPONSE_LAST = 0x0f, + IBV_OPCODE_RDMA_READ_RESPONSE_ONLY = 0x10, + IBV_OPCODE_ACKNOWLEDGE = 0x11, + IBV_OPCODE_ATOMIC_ACKNOWLEDGE = 0x12, + IBV_OPCODE_COMPARE_SWAP = 0x13, + IBV_OPCODE_FETCH_ADD = 0x14, + + /* real constants follow -- see comment about above IBV_OPCODE() + macro for more details */ + + /* RC */ + IBV_OPCODE(RC, SEND_FIRST), + IBV_OPCODE(RC, SEND_MIDDLE), + IBV_OPCODE(RC, SEND_LAST), + IBV_OPCODE(RC, SEND_LAST_WITH_IMMEDIATE), + IBV_OPCODE(RC, SEND_ONLY), + IBV_OPCODE(RC, SEND_ONLY_WITH_IMMEDIATE), + IBV_OPCODE(RC, RDMA_WRITE_FIRST), + IBV_OPCODE(RC, RDMA_WRITE_MIDDLE), + IBV_OPCODE(RC, RDMA_WRITE_LAST), + IBV_OPCODE(RC, RDMA_WRITE_LAST_WITH_IMMEDIATE), + IBV_OPCODE(RC, RDMA_WRITE_ONLY), + IBV_OPCODE(RC, RDMA_WRITE_ONLY_WITH_IMMEDIATE), + IBV_OPCODE(RC, RDMA_READ_REQUEST), + IBV_OPCODE(RC, RDMA_READ_RESPONSE_FIRST), + IBV_OPCODE(RC, RDMA_READ_RESPONSE_MIDDLE), + IBV_OPCODE(RC, RDMA_READ_RESPONSE_LAST), + IBV_OPCODE(RC, RDMA_READ_RESPONSE_ONLY), + IBV_OPCODE(RC, ACKNOWLEDGE), + IBV_OPCODE(RC, ATOMIC_ACKNOWLEDGE), + IBV_OPCODE(RC, COMPARE_SWAP), + IBV_OPCODE(RC, FETCH_ADD), + + /* UC */ + IBV_OPCODE(UC, SEND_FIRST), + IBV_OPCODE(UC, SEND_MIDDLE), + IBV_OPCODE(UC, SEND_LAST), + IBV_OPCODE(UC, SEND_LAST_WITH_IMMEDIATE), + IBV_OPCODE(UC, SEND_ONLY), + IBV_OPCODE(UC, SEND_ONLY_WITH_IMMEDIATE), + IBV_OPCODE(UC, RDMA_WRITE_FIRST), + IBV_OPCODE(UC, RDMA_WRITE_MIDDLE), + IBV_OPCODE(UC, RDMA_WRITE_LAST), + IBV_OPCODE(UC, RDMA_WRITE_LAST_WITH_IMMEDIATE), + IBV_OPCODE(UC, RDMA_WRITE_ONLY), + IBV_OPCODE(UC, RDMA_WRITE_ONLY_WITH_IMMEDIATE), + + /* RD */ + IBV_OPCODE(RD, SEND_FIRST), + IBV_OPCODE(RD, SEND_MIDDLE), + IBV_OPCODE(RD, SEND_LAST), + IBV_OPCODE(RD, SEND_LAST_WITH_IMMEDIATE), + IBV_OPCODE(RD, SEND_ONLY), + IBV_OPCODE(RD, SEND_ONLY_WITH_IMMEDIATE), + IBV_OPCODE(RD, RDMA_WRITE_FIRST), + IBV_OPCODE(RD, RDMA_WRITE_MIDDLE), + IBV_OPCODE(RD, RDMA_WRITE_LAST), + IBV_OPCODE(RD, RDMA_WRITE_LAST_WITH_IMMEDIATE), + IBV_OPCODE(RD, RDMA_WRITE_ONLY), + IBV_OPCODE(RD, RDMA_WRITE_ONLY_WITH_IMMEDIATE), + IBV_OPCODE(RD, RDMA_READ_REQUEST), + IBV_OPCODE(RD, RDMA_READ_RESPONSE_FIRST), + IBV_OPCODE(RD, RDMA_READ_RESPONSE_MIDDLE), + IBV_OPCODE(RD, RDMA_READ_RESPONSE_LAST), + IBV_OPCODE(RD, RDMA_READ_RESPONSE_ONLY), + IBV_OPCODE(RD, ACKNOWLEDGE), + IBV_OPCODE(RD, ATOMIC_ACKNOWLEDGE), + IBV_OPCODE(RD, COMPARE_SWAP), + IBV_OPCODE(RD, FETCH_ADD), + + /* UD */ + IBV_OPCODE(UD, SEND_ONLY), + IBV_OPCODE(UD, SEND_ONLY_WITH_IMMEDIATE) +}; + +#endif /* INFINIBAND_OPCODE_H */ diff --git a/include/infiniband/verbs.h b/include/infiniband/verbs.h index f5fa340..5128c41 100644 --- a/include/infiniband/verbs.h +++ b/include/infiniband/verbs.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2004 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -49,7 +50,54 @@ BEGIN_C_DECLS -enum ib_event_type { +union ibv_gid { + uint8_t raw[16]; + struct { + uint64_t subnet_prefix; + uint64_t interface_id; + } global; +}; + +enum ibv_mtu { + IBV_MTU_256 = 1, + IBV_MTU_512 = 2, + IBV_MTU_1024 = 3, + IBV_MTU_2048 = 4, + IBV_MTU_4096 = 5 +}; + +enum ibv_port_state { + IBV_PORT_NOP = 0, + IBV_PORT_DOWN = 1, + IBV_PORT_INIT = 2, + IBV_PORT_ARMED = 3, + IBV_PORT_ACTIVE = 4, + IBV_PORT_ACTIVE_DEFER = 5 +}; + +struct ibv_port_attr { + enum ibv_port_state state; + enum ibv_mtu max_mtu; + enum ibv_mtu active_mtu; + int gid_tbl_len; + uint32_t port_cap_flags; + uint32_t max_msg_sz; + uint32_t bad_pkey_cntr; + uint32_t qkey_viol_cntr; + uint16_t pkey_tbl_len; + uint16_t lid; + uint16_t sm_lid; + uint8_t lmc; + uint8_t max_vl_num; + uint8_t sm_sl; + uint8_t subnet_timeout; + uint8_t init_type_reply; + uint8_t active_width; + uint8_t active_speed; + uint8_t phys_state; +}; + +enum ibv_event_type { IBV_EVENT_CQ_ERR, IBV_EVENT_QP_FATAL, IBV_EVENT_QP_REQ_ERR, @@ -72,7 +120,68 @@ struct ibv_async_event { struct ibv_qp *qp; int port_num; } element; - enum ib_event_type event_type; + enum ibv_event_type event_type; +}; + +enum ibv_wc_status { + IBV_WC_SUCCESS, + IBV_WC_LOC_LEN_ERR, + IBV_WC_LOC_QP_OP_ERR, + IBV_WC_LOC_EEC_OP_ERR, + IBV_WC_LOC_PROT_ERR, + IBV_WC_WR_FLUSH_ERR, + IBV_WC_MW_BIND_ERR, + IBV_WC_BAD_RESP_ERR, + IBV_WC_LOC_ACCESS_ERR, + IBV_WC_REM_INV_REQ_ERR, + IBV_WC_REM_ACCESS_ERR, + IBV_WC_REM_OP_ERR, + IBV_WC_RETRY_EXC_ERR, + IBV_WC_RNR_RETRY_EXC_ERR, + IBV_WC_LOC_RDD_VIOL_ERR, + IBV_WC_REM_INV_RD_REQ_ERR, + IBV_WC_REM_ABORT_ERR, + IBV_WC_INV_EECN_ERR, + IBV_WC_INV_EEC_STATE_ERR, + IBV_WC_FATAL_ERR, + IBV_WC_RESP_TIMEOUT_ERR, + IBV_WC_GENERAL_ERR +}; + +enum ibv_wc_opcode { + IBV_WC_SEND, + IBV_WC_RDMA_WRITE, + IBV_WC_RDMA_READ, + IBV_WC_COMP_SWAP, + IBV_WC_FETCH_ADD, + IBV_WC_BIND_MW, +/* + * Set value of IBV_WC_RECV so consumers can test if a completion is a + * receive by testing (opcode & IBV_WC_RECV). + */ + IBV_WC_RECV = 1 << 7, + IBV_WC_RECV_RDMA_WITH_IMM +}; + +enum ibv_wc_flags { + IBV_WC_GRH = 1 << 0, + IBV_WC_WITH_IMM = 1 << 1 +}; + +struct ibv_wc { + uint64_t wr_id; + enum ibv_wc_status status; + enum ibv_wc_opcode opcode; + uint32_t vendor_err; + uint32_t byte_len; + uint32_t imm_data; /* in network byte order */ + uint32_t qp_num; + uint32_t src_qp; + enum ibv_wc_flags wc_flags; + uint16_t pkey_index; + uint16_t slid; + uint8_t sl; + uint8_t dlid_path_bits; }; enum ibv_access_flags { @@ -84,23 +193,215 @@ enum ibv_access_flags { }; struct ibv_pd { - + struct ibv_context *context; + uint32_t handle; }; struct ibv_mr { + struct ibv_context *context; + struct ibv_pd *pd; + uint32_t handle; + uint32_t lkey; + uint32_t rkey; +}; +struct ibv_global_route { + union ibv_gid dgid; + uint32_t flow_label; + uint8_t sgid_index; + uint8_t hop_limit; + uint8_t traffic_class; }; -struct ibv_qp { +struct ibv_ah_attr { + struct ibv_global_route grh; + uint16_t dlid; + uint8_t sl; + uint8_t src_path_bits; + uint8_t static_rate; + uint8_t is_global; + uint8_t port_num; +}; + +enum ibv_qp_type { + IBV_QPT_RC = 2, + IBV_QPT_UC, + IBV_QPT_UD +}; + +struct ibv_qp_cap { + uint32_t max_send_wr; + uint32_t max_recv_wr; + uint32_t max_send_sge; + uint32_t max_recv_sge; + uint32_t max_inline_data; +}; + +struct ibv_qp_init_attr { + void *qp_context; + struct ibv_cq *send_cq; + struct ibv_cq *recv_cq; + struct ibv_srq *srq; + struct ibv_qp_cap cap; + enum ibv_qp_type qp_type; + int sq_sig_all; +}; + +enum ibv_qp_attr_mask { + IBV_QP_STATE = 1 << 0, + IBV_QP_CUR_STATE = 1 << 1, + IBV_QP_EN_SQD_ASYNC_NOTIFY = 1 << 2, + IBV_QP_ACCESS_FLAGS = 1 << 3, + IBV_QP_PKEY_INDEX = 1 << 4, + IBV_QP_PORT = 1 << 5, + IBV_QP_QKEY = 1 << 6, + IBV_QP_AV = 1 << 7, + IBV_QP_PATH_MTU = 1 << 8, + IBV_QP_TIMEOUT = 1 << 9, + IBV_QP_RETRY_CNT = 1 << 10, + IBV_QP_RNR_RETRY = 1 << 11, + IBV_QP_RQ_PSN = 1 << 12, + IBV_QP_MAX_QP_RD_ATOMIC = 1 << 13, + IBV_QP_ALT_PATH = 1 << 14, + IBV_QP_MIN_RNR_TIMER = 1 << 15, + IBV_QP_SQ_PSN = 1 << 16, + IBV_QP_MAX_DEST_RD_ATOMIC = 1 << 17, + IBV_QP_PATH_MIG_STATE = 1 << 18, + IBV_QP_CAP = 1 << 19, + IBV_QP_DEST_QPN = 1 << 20 +}; + +enum ibv_qp_state { + IBV_QPS_RESET, + IBV_QPS_INIT, + IBV_QPS_RTR, + IBV_QPS_RTS, + IBV_QPS_SQD, + IBV_QPS_SQE, + IBV_QPS_ERR +}; + +enum ibv_mig_state { + IBV_MIG_MIGRATED, + IBV_MIG_REARM, + IBV_MIG_ARMED +}; + +struct ibv_qp_attr { + enum ibv_qp_state qp_state; + enum ibv_qp_state cur_qp_state; + enum ibv_mtu path_mtu; + enum ibv_mig_state path_mig_state; + uint32_t qkey; + uint32_t rq_psn; + uint32_t sq_psn; + uint32_t dest_qp_num; + int qp_access_flags; + struct ibv_qp_cap cap; + struct ibv_ah_attr ah_attr; + struct ibv_ah_attr alt_ah_attr; + uint16_t pkey_index; + uint16_t alt_pkey_index; + uint8_t en_sqd_async_notify; + uint8_t sq_draining; + uint8_t max_rd_atomic; + uint8_t max_dest_rd_atomic; + uint8_t min_rnr_timer; + uint8_t port_num; + uint8_t timeout; + uint8_t retry_cnt; + uint8_t rnr_retry; + uint8_t alt_port_num; + uint8_t alt_timeout; +}; + +enum ibv_wr_opcode { + IBV_WR_RDMA_WRITE, + IBV_WR_RDMA_WRITE_WITH_IMM, + IBV_WR_SEND, + IBV_WR_SEND_WITH_IMM, + IBV_WR_RDMA_READ, + IBV_WR_ATOMIC_CMP_AND_SWP, + IBV_WR_ATOMIC_FETCH_AND_ADD +}; + +enum ibv_send_flags { + IBV_SEND_FENCE = 1 << 0, + IBV_SEND_SIGNALED = 1 << 1, + IBV_SEND_SOLICITED = 1 << 2, + IBV_SEND_INLINE = 1 << 3 +}; + +struct ibv_sge { + uint64_t addr; + uint32_t length; + uint32_t lkey; +}; + +struct ibv_send_wr { + struct ibv_send_wr *next; + uint64_t wr_id; + struct ibv_sge *sg_list; + int num_sge; + enum ibv_wr_opcode opcode; + enum ibv_send_flags send_flags; + uint32_t imm_data; + union { + struct { + uint64_t remote_addr; + uint32_t rkey; + } rdma; + struct { + uint64_t remote_addr; + uint64_t compare_add; + uint64_t swap; + uint32_t rkey; + } atomic; + struct { + struct ibv_ah *ah; + uint32_t remote_qpn; + uint32_t remote_qkey; + } ud; + } wr; +}; + +struct ibv_recv_wr { + struct ibv_recv_wr *next; + uint64_t wr_id; + struct ibv_sge *sg_list; + int num_sge; +}; +struct ibv_qp { + struct ibv_context *context; + void *qp_context; + struct ibv_pd *pd; + struct ibv_cq *send_cq; + struct ibv_cq *recv_cq; + uint32_t handle; + uint32_t qp_num; + enum ibv_qp_state state; }; struct ibv_cq { + struct ibv_context *context; + void *cq_context; + uint32_t handle; + int cqe; +}; +struct ibv_ah { + struct ibv_context *context; + struct ibv_pd *pd; }; -struct ibv_device_ops { +struct ibv_device; +struct ibv_context; +struct ibv_device_ops { + struct ibv_context * (*alloc_context)(struct ibv_device *device, + int num_comp, int cmd_fd); + void (*free_context)(struct ibv_context *context); }; struct ibv_device { @@ -110,8 +411,34 @@ struct ibv_device { struct ibv_device_ops ops; }; +struct ibv_context_ops { + int (*query_port)(struct ibv_context *context, uint8_t port_num, + struct ibv_port_attr *port_attr); + struct ibv_pd * (*alloc_pd)(struct ibv_context *context); + int (*dealloc_pd)(struct ibv_pd *pd); + struct ibv_mr * (*reg_mr)(struct ibv_pd *pd, void *addr, size_t length, + enum ibv_access_flags access); + int (*dereg_mr)(struct ibv_mr *mr); + struct ibv_cq * (*create_cq)(struct ibv_context *context, int cqe); + int (*poll_cq)(struct ibv_cq *cq, int num_entries, struct ibv_wc *wc); + int (*req_notify_cq)(struct ibv_cq *cq, int solicited); + void (*cq_event)(struct ibv_cq *cq); + int (*destroy_cq)(struct ibv_cq *cq); + struct ibv_qp * (*create_qp)(struct ibv_pd *pd, struct ibv_qp_init_attr *attr); + int (*modify_qp)(struct ibv_qp *qp, struct ibv_qp_attr *attr, + enum ibv_qp_attr_mask attr_mask); + int (*destroy_qp)(struct ibv_qp *qp); + int (*post_send)(struct ibv_qp *qp, struct ibv_send_wr *wr, + struct ibv_send_wr **bad_wr); + int (*post_recv)(struct ibv_qp *qp, struct ibv_recv_wr *wr, + struct ibv_recv_wr **bad_wr); + struct ibv_ah * (*create_ah)(struct ibv_pd *pd, struct ibv_ah_attr *attr); + int (*destroy_ah)(struct ibv_ah *ah); +}; + struct ibv_context { struct ibv_device *device; + struct ibv_context_ops ops; int cmd_fd; int async_fd; int num_comp; @@ -150,6 +477,12 @@ extern int ibv_get_async_event(struct ibv_context *context, struct ibv_async_event *event); /** + * ibv_query_port - Get port properties + */ +extern int ibv_query_port(struct ibv_context *context, uint8_t port_num, + struct ibv_port_attr *port_attr); + +/** * ibv_alloc_pd - Allocate a protection domain */ extern struct ibv_pd *ibv_alloc_pd(struct ibv_context *context); @@ -170,6 +503,85 @@ extern struct ibv_mr *ibv_reg_mr(struct ibv_pd *pd, void *addr, */ extern int ibv_dereg_mr(struct ibv_mr *mr); +/** + * ibv_create_cq - Create a completion queue + */ +extern struct ibv_cq *ibv_create_cq(struct ibv_context *context, int cqe, + void *cq_context); + +/** + * ibv_destroy_cq - Destroy a completion queue + */ +extern int ibv_destroy_cq(struct ibv_cq *cq); + +/** + * ibv_get_cq_event - Read next CQ event + */ +extern int ibv_get_cq_event(struct ibv_context *context, int comp_num, + struct ibv_cq **cq, void **cq_context); + + +/** + * ibv_poll_cq - Poll a CQ for work completions + */ +static inline int ibv_poll_cq(struct ibv_cq *cq, int num_entries, struct ibv_wc *wc) +{ + return cq->context->ops.poll_cq(cq, num_entries, wc); +} + +/** + * ibv_req_notify_cq - Request completion notification on a CQ. + */ +static inline int ibv_req_notify_cq(struct ibv_cq *cq, int solicited) +{ + return cq->context->ops.req_notify_cq(cq, solicited); +} + +/** + * ibv_create_qp - Create a queue pair. + */ +extern struct ibv_qp *ibv_create_qp(struct ibv_pd *pd, + struct ibv_qp_init_attr *qp_init_attr); + +/** + * ibv_modify_qp - Modify a queue pair. + */ +extern int ibv_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, + enum ibv_qp_attr_mask attr_mask); + +/** + * ibv_destroy_qp - Destroy a queue pair. + */ +extern int ibv_destroy_qp(struct ibv_qp *qp); + +/** + * ibv_post_send - Post a list of work requests to a send queue. + */ +static inline int ibv_post_send(struct ibv_qp *qp, struct ibv_send_wr *wr, + struct ibv_send_wr **bad_wr) +{ + return qp->context->ops.post_send(qp, wr, bad_wr); +} + +/** + * ibv_post_recv - Post a list of work requests to a receive queue. + */ +static inline int ibv_post_recv(struct ibv_qp *qp, struct ibv_recv_wr *wr, + struct ibv_recv_wr **bad_wr) +{ + return qp->context->ops.post_recv(qp, wr, bad_wr); +} + +/** + * ibv_create_ah - Create an address handle. + */ +extern struct ibv_ah *ibv_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr); + +/** + * ibv_destroy_ah - Destroy an address handle. + */ +extern int ibv_destroy_ah(struct ibv_ah *ah); + END_C_DECLS #endif /* INFINIBAND_VERBS_H */ diff --git a/src/cmd.c b/src/cmd.c new file mode 100644 index 0000000..44e149f --- /dev/null +++ b/src/cmd.c @@ -0,0 +1,298 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#if HAVE_CONFIG_H +# include <config.h> +#endif /* HAVE_CONFIG_H */ + +#include <stdio.h> +#include <unistd.h> +#include <errno.h> +#include <alloca.h> + +#include "ibverbs.h" + +int ibv_cmd_get_context(int num_comp, struct ibv_context *context, + struct ibv_get_context *cmd, size_t cmd_size) +{ + struct ibv_get_context_resp *resp; + int i; + + resp = alloca(sizeof *resp + num_comp * sizeof (int)); + IBV_INIT_CMD_RESP(cmd, cmd_size, GET_CONTEXT, resp); + + if (write(context->cmd_fd, cmd, cmd_size) != cmd_size) + return errno; + + context->async_fd = resp->async_fd; + for (i = 0; i < num_comp; ++i) + context->cq_fd[i] = resp->cq_fd[i]; + + return 0; +} + +int ibv_cmd_query_port(struct ibv_context *context, uint8_t port_num, + struct ibv_port_attr *port_attr, + struct ibv_query_port *cmd, size_t cmd_size) +{ + struct ibv_query_port_resp resp; + + IBV_INIT_CMD_RESP(cmd, cmd_size, QUERY_PORT, &resp); + cmd->port_num = port_num; + + if (write(context->cmd_fd, cmd, cmd_size) != cmd_size) + return errno; + + port_attr->state = resp.state; + port_attr->max_mtu = resp.max_mtu; + port_attr->active_mtu = resp.active_mtu; + port_attr->gid_tbl_len = resp.gid_tbl_len; + port_attr->port_cap_flags = resp.port_cap_flags; + port_attr->max_msg_sz = resp.max_msg_sz; + port_attr->bad_pkey_cntr = resp.bad_pkey_cntr; + port_attr->qkey_viol_cntr = resp.qkey_viol_cntr; + port_attr->pkey_tbl_len = resp.pkey_tbl_len; + port_attr->lid = resp.lid; + port_attr->sm_lid = resp.sm_lid; + port_attr->lmc = resp.lmc; + port_attr->max_vl_num = resp.max_vl_num; + port_attr->sm_sl = resp.sm_sl; + port_attr->subnet_timeout = resp.subnet_timeout; + port_attr->init_type_reply = resp.init_type_reply; + port_attr->active_width = resp.active_width; + port_attr->active_speed = resp.active_speed; + port_attr->phys_state = resp.phys_state; + + return 0; +} + +int ibv_cmd_alloc_pd(struct ibv_context *context, struct ibv_pd *pd, + struct ibv_alloc_pd *cmd, size_t cmd_size) +{ + struct ibv_alloc_pd_resp resp; + + IBV_INIT_CMD_RESP(cmd, cmd_size, ALLOC_PD, &resp); + + if (write(context->cmd_fd, cmd, cmd_size) != cmd_size) + return errno; + + pd->handle = resp.pd_handle; + + return 0; +} + +int ibv_cmd_dealloc_pd(struct ibv_pd *pd) +{ + struct ibv_dealloc_pd cmd; + + IBV_INIT_CMD(&cmd, sizeof cmd, DEALLOC_PD); + cmd.pd_handle = pd->handle; + + if (write(pd->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd) + return errno; + + return 0; +} + +int ibv_cmd_reg_mr(struct ibv_pd *pd, void *addr, size_t length, + uint64_t hca_va, enum ibv_access_flags access, + struct ibv_mr *mr, struct ibv_reg_mr *cmd, + size_t cmd_size) +{ + struct ibv_reg_mr_resp resp; + + IBV_INIT_CMD_RESP(cmd, cmd_size, REG_MR, &resp); + + cmd->start = (uintptr_t) addr; + cmd->length = length; + cmd->hca_va = hca_va; + cmd->pd_handle = pd->handle; + cmd->access_flags = access; + + if (write(pd->context->cmd_fd, cmd, cmd_size) != cmd_size) + return errno; + + mr->handle = resp.mr_handle; + mr->lkey = resp.lkey; + mr->rkey = resp.rkey; + + return 0; +} + +int ibv_cmd_dereg_mr(struct ibv_mr *mr) +{ + struct ibv_dereg_mr cmd; + + IBV_INIT_CMD(&cmd, sizeof cmd, DEREG_MR); + cmd.mr_handle = mr->handle; + + if (write(mr->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd) + return errno; + + return 0; +} + +int ibv_cmd_create_cq(struct ibv_context *context, int cqe, + struct ibv_cq *cq, + struct ibv_create_cq *cmd, size_t cmd_size) +{ + struct ibv_create_cq_resp resp; + + IBV_INIT_CMD_RESP(cmd, cmd_size, CREATE_CQ, &resp); + cmd->user_handle = (uintptr_t) cq; + cmd->cqe = cqe; + + if (write(context->cmd_fd, cmd, cmd_size) != cmd_size) + return errno; + + cq->handle = resp.cq_handle; + cq->cqe = resp.cqe; + + return 0; +} + +int ibv_cmd_destroy_cq(struct ibv_cq *cq) +{ + struct ibv_destroy_cq cmd; + + IBV_INIT_CMD(&cmd, sizeof cmd, DESTROY_CQ); + cmd.cq_handle = cq->handle; + + if (write(cq->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd) + return errno; + + return 0; +} + +int ibv_cmd_create_qp(struct ibv_pd *pd, + struct ibv_qp *qp, struct ibv_qp_init_attr *attr, + struct ibv_create_qp *cmd, size_t cmd_size) +{ + struct ibv_create_qp_resp resp; + + IBV_INIT_CMD_RESP(cmd, cmd_size, CREATE_QP, &resp); + cmd->user_handle = (uintptr_t) qp; + cmd->pd_handle = pd->handle; + cmd->send_cq_handle = attr->send_cq->handle; + cmd->recv_cq_handle = attr->recv_cq->handle; + cmd->max_send_wr = attr->cap.max_send_wr; + cmd->max_recv_wr = attr->cap.max_recv_wr; + cmd->max_send_sge = attr->cap.max_send_sge; + cmd->max_recv_sge = attr->cap.max_recv_sge; + cmd->max_inline_data = attr->cap.max_inline_data; + cmd->sq_sig_all = attr->sq_sig_all; + cmd->qp_type = attr->qp_type; + cmd->is_srq = 0; + + if (write(pd->context->cmd_fd, cmd, cmd_size) != cmd_size) + return errno; + + qp->handle = resp.qp_handle; + qp->qp_num = resp.qpn; + + return 0; +} + +int ibv_cmd_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, + enum ibv_qp_attr_mask attr_mask, + struct ibv_modify_qp *cmd, size_t cmd_size) +{ + IBV_INIT_CMD(cmd, cmd_size, MODIFY_QP); + + cmd->qp_handle = qp->handle; + cmd->attr_mask = attr_mask; + cmd->qkey = attr->qkey; + cmd->rq_psn = attr->rq_psn; + cmd->sq_psn = attr->sq_psn; + cmd->dest_qp_num = attr->dest_qp_num; + cmd->qp_access_flags = attr->qp_access_flags; + cmd->pkey_index = attr->pkey_index; + cmd->alt_pkey_index = attr->alt_pkey_index; + cmd->qp_state = attr->qp_state; + cmd->cur_qp_state = attr->cur_qp_state; + cmd->path_mtu = attr->path_mtu; + cmd->path_mig_state = attr->path_mig_state; + cmd->en_sqd_async_notify = attr->en_sqd_async_notify; + cmd->max_rd_atomic = attr->max_rd_atomic; + cmd->max_dest_rd_atomic = attr->max_dest_rd_atomic; + cmd->min_rnr_timer = attr->min_rnr_timer; + cmd->port_num = attr->port_num; + cmd->timeout = attr->timeout; + cmd->retry_cnt = attr->retry_cnt; + cmd->rnr_retry = attr->rnr_retry; + cmd->alt_port_num = attr->alt_port_num; + cmd->alt_timeout = attr->alt_timeout; + + memcpy(cmd->dest.dgid, attr->ah_attr.grh.dgid.raw, 16); + cmd->dest.flow_label = attr->ah_attr.grh.flow_label; + cmd->dest.dlid = attr->ah_attr.dlid; + cmd->dest.sgid_index = attr->ah_attr.grh.sgid_index; + cmd->dest.hop_limit = attr->ah_attr.grh.hop_limit; + cmd->dest.traffic_class = attr->ah_attr.grh.traffic_class; + cmd->dest.sl = attr->ah_attr.sl; + cmd->dest.src_path_bits = attr->ah_attr.src_path_bits; + cmd->dest.static_rate = attr->ah_attr.static_rate; + cmd->dest.is_global = attr->ah_attr.is_global; + cmd->dest.port_num = attr->ah_attr.port_num; + + memcpy(cmd->alt_dest.dgid, attr->alt_ah_attr.grh.dgid.raw, 16); + cmd->alt_dest.flow_label = attr->alt_ah_attr.grh.flow_label; + cmd->alt_dest.dlid = attr->alt_ah_attr.dlid; + cmd->alt_dest.sgid_index = attr->alt_ah_attr.grh.sgid_index; + cmd->alt_dest.hop_limit = attr->alt_ah_attr.grh.hop_limit; + cmd->alt_dest.traffic_class = attr->alt_ah_attr.grh.traffic_class; + cmd->alt_dest.sl = attr->alt_ah_attr.sl; + cmd->alt_dest.src_path_bits = attr->alt_ah_attr.src_path_bits; + cmd->alt_dest.static_rate = attr->alt_ah_attr.static_rate; + cmd->alt_dest.is_global = attr->alt_ah_attr.is_global; + cmd->alt_dest.port_num = attr->alt_ah_attr.port_num; + + if (write(qp->context->cmd_fd, cmd, cmd_size) != cmd_size) + return errno; + + return 0; +} + +int ibv_cmd_destroy_qp(struct ibv_qp *qp) +{ + struct ibv_destroy_qp cmd; + + IBV_INIT_CMD(&cmd, sizeof cmd, DESTROY_QP); + cmd.qp_handle = qp->handle; + + if (write(qp->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd) + return errno; + + return 0; +} diff --git a/src/device.c b/src/device.c index cf32602..3b38b85 100644 --- a/src/device.c +++ b/src/device.c @@ -78,65 +78,39 @@ uint64_t ibv_get_device_guid(struct ibv_device *device) struct ibv_context *ibv_open_device(struct ibv_device *device) { - struct ibv_context *context, *tmp; char *devpath; - struct ibv_get_context context_cmd; - struct ibv_get_context_resp context_resp; - struct ibv_get_event_fds event_fds_cmd; - struct ibv_get_event_fds_resp *event_fds_resp; - int i; - - context = malloc(sizeof *context); - if (!context) - return NULL; - - context->device = device; + int cmd_fd; + struct ibv_context *context; + struct ibv_query_params cmd; + struct ibv_query_params_resp resp; asprintf(&devpath, "/dev/infiniband/%s", device->dev->name); - context->cmd_fd = open(devpath, O_WRONLY); - - if (context->cmd_fd < 0) - goto err; - - context_cmd.command = IB_USER_VERBS_CMD_GET_CONTEXT; - context_cmd.in_words = sizeof context_cmd / 4; - context_cmd.out_words = sizeof context_resp / 4; - context_cmd.response = (unsigned long) &context_resp; - - if (write(context->cmd_fd, &context_cmd, sizeof context_cmd) != sizeof context_cmd) - goto err_close; - - context->num_comp = context_resp.num_cq_events; - if (context->num_comp > 1) { - tmp = realloc(context, sizeof *context + context->num_comp * sizeof (int)); - if (!tmp) - goto err_close; - context = tmp; - } - - event_fds_resp = alloca(sizeof *event_fds_resp + context->num_comp * 4); + /* + * We'll only be doing writes, but we need O_RDWR in case the + * provider needs to mmap() the file. + */ + cmd_fd = open(devpath, O_RDWR); + if (cmd_fd < 0) + return NULL; - event_fds_cmd.command = IB_USER_VERBS_CMD_GET_EVENT_FDS; - event_fds_cmd.in_words = sizeof event_fds_cmd / 4; - event_fds_cmd.out_words = sizeof *event_fds_resp / 4 + context->num_comp; - event_fds_cmd.response = (unsigned long) event_fds_resp; + IBV_INIT_CMD_RESP(&cmd, sizeof cmd, QUERY_PARAMS, &resp); + if (write(cmd_fd, &cmd, sizeof cmd) != sizeof cmd) + goto err; - if (write(context->cmd_fd, &event_fds_cmd, sizeof event_fds_cmd) != - sizeof event_fds_cmd) - goto err_close; + context = device->ops.alloc_context(device, resp.num_cq_events, cmd_fd); + if (!context) + goto err; - context->async_fd = event_fds_resp->async_fd; - for (i = 0; i < context->num_comp; ++i) - context->cq_fd[i] = event_fds_resp->cq_fd[i]; + context->device = device; + context->cmd_fd = cmd_fd; + context->num_comp = resp.num_cq_events; return context; -err_close: - close(context->cmd_fd); - err: - free(context); + close(cmd_fd); + return NULL; } @@ -149,7 +123,7 @@ int ibv_close_device(struct ibv_context *context) close(context->cq_fd[i]); close(context->cmd_fd); - free(context); + context->device->ops.free_context(context); return 0; } @@ -159,9 +133,7 @@ int ibv_get_async_event(struct ibv_context *context, { struct ibv_kern_async_event ev; - int ret = read(context->async_fd, &ev, sizeof ev); - - if (ret != sizeof ev) + if (read(context->async_fd, &ev, sizeof ev) != sizeof ev) return -1; /* XXX convert CQ/QP handles back to pointers */ diff --git a/src/ibverbs.h b/src/ibverbs.h index 678dec9..0d0fec1 100644 --- a/src/ibverbs.h +++ b/src/ibverbs.h @@ -37,11 +37,8 @@ #include <pthread.h> -#include <infiniband/verbs.h> #include <infiniband/driver.h> -#include "kern_abi.h" - #define HIDDEN __attribute__((visibility ("hidden"))) #define INIT __attribute__((constructor)) @@ -59,4 +56,19 @@ extern int ibv_init_mem_map(void); extern int ibv_lock_range(void *base, size_t size); extern int ibv_unlock_range(void *base, size_t size); +#define IBV_INIT_CMD(cmd, size, opcode) \ + do { \ + (cmd)->command = IB_USER_VERBS_CMD_##opcode; \ + (cmd)->in_words = (size) / 4; \ + (cmd)->out_words = 0; \ + } while (0) + +#define IBV_INIT_CMD_RESP(cmd, size, opcode, out) \ + do { \ + (cmd)->command = IB_USER_VERBS_CMD_##opcode; \ + (cmd)->in_words = (size) / 4; \ + (cmd)->out_words = sizeof (*(out)) / 4; \ + (cmd)->response = (uintptr_t) (out); \ + } while (0) + #endif /* IB_VERBS_H */ @@ -74,7 +74,8 @@ static void load_driver(char *so_path) driver = malloc(sizeof *driver); if (!driver) { fprintf(stderr, PFX "Fatal: couldn't allocate driver for %s\n", so_path); - abort(); + dlclose(dlhandle); + return; } driver->init_func = init_func; @@ -148,7 +149,7 @@ static void init_drivers(struct sysfs_class_device *verbs_dev) fprintf(stderr, PFX "Warning: no driver for %s\n", verbs_dev->name); } -static void check_abi_version(void) +static int check_abi_version(void) { char path[256]; char val[16]; @@ -156,14 +157,14 @@ static void check_abi_version(void) if (sysfs_get_mnt_path(path, sizeof path)) { fprintf(stderr, PFX "Fatal: couldn't find sysfs mount.\n"); - abort(); + return -1; } strncat(path, "/class/infiniband_verbs/abi_version", sizeof path); if (sysfs_read_attribute_value(path, val, sizeof val)) { fprintf(stderr, PFX "Fatal: couldn't read uverbs ABI version.\n"); - abort(); + return -1; } ver = strtol(val, NULL, 10); @@ -172,8 +173,10 @@ static void check_abi_version(void) fprintf(stderr, PFX "Fatal: kernel ABI version %d " "doesn't match library version %d.\n", ver, IB_USER_VERBS_ABI_VERSION); - abort(); + return -1; } + + return 0; } @@ -185,15 +188,15 @@ static void INIT ibverbs_init(void) Dlist *verbs_dev_list; struct sysfs_class_device *verbs_dev; - check_abi_version(); - - if (ibv_init_mem_map()) - abort(); - driver_list = dlist_new(sizeof (struct ibv_driver)); device_list = dlist_new(sizeof (struct ibv_device)); - if (!driver_list || !device_list) + if (!driver_list || !device_list) { + fprintf(stderr, PFX "Fatal: couldn't allocate device/driver list.\n"); abort(); + } + + if (ibv_init_mem_map()) + return; user_path = getenv(OPENIB_DRIVER_PATH_ENV); if (user_path) { @@ -207,13 +210,16 @@ static void INIT ibverbs_init(void) cls = sysfs_open_class("infiniband_verbs"); if (!cls) { fprintf(stderr, PFX "Fatal: couldn't open infiniband sysfs class.\n"); - abort(); + return; } + if (check_abi_version()) + return; + verbs_dev_list = sysfs_get_class_devices(cls); if (!verbs_dev_list) { fprintf(stderr, PFX "Fatal: no infiniband class devices found.\n"); - abort(); + return; } dlist_for_each_data(verbs_dev_list, verbs_dev, struct sysfs_class_device) diff --git a/src/libibverbs.map b/src/libibverbs.map index 941b2bf..b0690eb 100644 --- a/src/libibverbs.map +++ b/src/libibverbs.map @@ -6,9 +6,29 @@ IBVERBS_1.0 { ibv_open_device; ibv_close_device; ibv_get_async_event; + ibv_query_port; ibv_alloc_pd; ibv_dealloc_pd; ibv_reg_mr; ibv_dereg_mr; + ibv_create_cq; + ibv_destroy_cq; + ibv_get_cq_event; + ibv_create_qp; + ibv_modify_qp; + ibv_destroy_qp; + ibv_create_ah; + ibv_destroy_ah; + ibv_cmd_get_context; + ibv_cmd_query_port; + ibv_cmd_alloc_pd; + ibv_cmd_dealloc_pd; + ibv_cmd_reg_mr; + ibv_cmd_dereg_mr; + ibv_cmd_create_cq; + ibv_cmd_destroy_cq; + ibv_cmd_create_qp; + ibv_cmd_modify_qp; + ibv_cmd_destroy_qp; local: *; }; diff --git a/src/verbs.c b/src/verbs.c new file mode 100644 index 0000000..087de30 --- /dev/null +++ b/src/verbs.c @@ -0,0 +1,175 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#if HAVE_CONFIG_H +# include <config.h> +#endif /* HAVE_CONFIG_H */ + +#include <stdio.h> +#include <unistd.h> +#include <errno.h> + +#include "ibverbs.h" + +int ibv_query_port(struct ibv_context *context, uint8_t port_num, + struct ibv_port_attr *port_attr) +{ + return context->ops.query_port(context, port_num, port_attr); +} + +struct ibv_pd *ibv_alloc_pd(struct ibv_context *context) +{ + struct ibv_pd *pd; + + pd = context->ops.alloc_pd(context); + if (pd) + pd->context = context; + + return pd; +} + +int ibv_dealloc_pd(struct ibv_pd *pd) +{ + return pd->context->ops.dealloc_pd(pd); +} + +struct ibv_mr *ibv_reg_mr(struct ibv_pd *pd, void *addr, + size_t length, enum ibv_access_flags access) +{ + struct ibv_mr *mr; + + mr = pd->context->ops.reg_mr(pd, addr, length, access); + if (mr) { + mr->context = pd->context; + mr->pd = pd; + } + + return mr; +} + +int ibv_dereg_mr(struct ibv_mr *mr) +{ + return mr->context->ops.dereg_mr(mr); +} + +struct ibv_cq *ibv_create_cq(struct ibv_context *context, int cqe, + void *cq_context) +{ + struct ibv_cq *cq = context->ops.create_cq(context, cqe); + + if (cq) { + cq->context = context; + cq->cq_context = cq_context; + } + + return cq; +} + +int ibv_destroy_cq(struct ibv_cq *cq) +{ + return cq->context->ops.destroy_cq(cq); +} + + +int ibv_get_cq_event(struct ibv_context *context, int comp_num, + struct ibv_cq **cq, void **cq_context) +{ + struct ibv_comp_event ev; + + if (comp_num < 0 || comp_num >= context->num_comp) + return -1; + + if (read(context->cq_fd[comp_num], &ev, sizeof ev) != sizeof ev) + return -1; + + *cq = (struct ibv_cq *) (uintptr_t) ev.cq_handle; + *cq_context = (*cq)->cq_context; + + if ((*cq)->context->ops.cq_event) + (*cq)->context->ops.cq_event(*cq); + + return 0; +} + +struct ibv_qp *ibv_create_qp(struct ibv_pd *pd, + struct ibv_qp_init_attr *qp_init_attr) +{ + struct ibv_qp *qp = pd->context->ops.create_qp(pd, qp_init_attr); + + if (qp) { + qp->context = pd->context; + qp->qp_context = qp_init_attr->qp_context; + qp->pd = pd; + qp->send_cq = qp_init_attr->send_cq; + qp->recv_cq = qp_init_attr->recv_cq; + } + + return qp; +} +int ibv_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, + enum ibv_qp_attr_mask attr_mask) +{ + int ret; + + ret = qp->context->ops.modify_qp(qp, attr, attr_mask); + if (ret) + return ret; + + if (attr_mask & IBV_QP_STATE) + qp->state = attr->qp_state; + + return 0; +} + +int ibv_destroy_qp(struct ibv_qp *qp) +{ + return qp->context->ops.destroy_qp(qp); +} + +struct ibv_ah *ibv_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr) +{ + struct ibv_ah *ah = pd->context->ops.create_ah(pd, attr); + + if (ah) { + ah->context = pd->context; + ah->pd = pd; + } + + return ah; +} + +int ibv_destroy_ah(struct ibv_ah *ah) +{ + return ah->context->ops.destroy_ah(ah); +} |