aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2021-09-30 11:34:07 +1000
committerStephen Rothwell <sfr@canb.auug.org.au>2021-09-30 11:34:07 +1000
commit88bdd2a7e3bbfc4154904eaf7102cbed2cb59b28 (patch)
treed83763db578e82fd38be25ced4d8102130fe61f7
parent1bf79833be18b4278cffbec81aecd9376c823339 (diff)
parentd30ef6d5c013c19e907f2a3a3d6eee04fcd3de0d (diff)
downloaddevel-88bdd2a7e3bbfc4154904eaf7102cbed2cb59b28.tar.gz
Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git
# Conflicts: # drivers/infiniband/hw/hfi1/ipoib_tx.c
-rw-r--r--drivers/infiniband/hw/bnxt_re/bnxt_re.h19
-rw-r--r--drivers/infiniband/hw/bnxt_re/hw_counters.c277
-rw-r--r--drivers/infiniband/hw/bnxt_re/hw_counters.h30
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.c35
-rw-r--r--drivers/infiniband/hw/bnxt_re/main.c13
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_fp.c15
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_rcfw.c6
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_rcfw.h2
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_res.c5
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_res.h9
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_sp.c51
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_sp.h28
-rw-r--r--drivers/infiniband/hw/bnxt_re/roce_hsi.h85
-rw-r--r--drivers/infiniband/hw/hfi1/ipoib.h76
-rw-r--r--drivers/infiniband/hw/hfi1/ipoib_tx.c314
-rw-r--r--drivers/infiniband/hw/hfi1/trace_tx.h71
-rw-r--r--drivers/infiniband/hw/hfi1/user_exp_rcv.c5
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.c26
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.h2
-rw-r--r--drivers/infiniband/hw/mlx5/main.c55
-rw-r--r--drivers/infiniband/sw/rxe/rxe_comp.c12
-rw-r--r--drivers/infiniband/sw/rxe/rxe_cq.c25
-rw-r--r--drivers/infiniband/sw/rxe/rxe_loc.h2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mr.c267
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mw.c36
-rw-r--r--drivers/infiniband/sw/rxe/rxe_opcode.h6
-rw-r--r--drivers/infiniband/sw/rxe/rxe_param.h2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_qp.c14
-rw-r--r--drivers/infiniband/sw/rxe/rxe_queue.c30
-rw-r--r--drivers/infiniband/sw/rxe/rxe_queue.h292
-rw-r--r--drivers/infiniband/sw/rxe/rxe_req.c57
-rw-r--r--drivers/infiniband/sw/rxe/rxe_resp.c46
-rw-r--r--drivers/infiniband/sw/rxe/rxe_srq.c2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.c97
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.h49
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/uar.c14
-rw-r--r--include/linux/mlx5/driver.h2
-rw-r--r--include/linux/mlx5/mlx5_ifc.h4
38 files changed, 1204 insertions, 877 deletions
diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
index ba26d8e6a9c2ef..79401e6c6aa9c8 100644
--- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h
+++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
@@ -39,22 +39,13 @@
#ifndef __BNXT_RE_H__
#define __BNXT_RE_H__
+#include "hw_counters.h"
#define ROCE_DRV_MODULE_NAME "bnxt_re"
#define BNXT_RE_DESC "Broadcom NetXtreme-C/E RoCE Driver"
-#define BNXT_RE_PAGE_SHIFT_4K (12)
-#define BNXT_RE_PAGE_SHIFT_8K (13)
-#define BNXT_RE_PAGE_SHIFT_64K (16)
-#define BNXT_RE_PAGE_SHIFT_2M (21)
-#define BNXT_RE_PAGE_SHIFT_8M (23)
-#define BNXT_RE_PAGE_SHIFT_1G (30)
-#define BNXT_RE_PAGE_SIZE_4K BIT(BNXT_RE_PAGE_SHIFT_4K)
-#define BNXT_RE_PAGE_SIZE_8K BIT(BNXT_RE_PAGE_SHIFT_8K)
-#define BNXT_RE_PAGE_SIZE_64K BIT(BNXT_RE_PAGE_SHIFT_64K)
-#define BNXT_RE_PAGE_SIZE_2M BIT(BNXT_RE_PAGE_SHIFT_2M)
-#define BNXT_RE_PAGE_SIZE_8M BIT(BNXT_RE_PAGE_SHIFT_8M)
-#define BNXT_RE_PAGE_SIZE_1G BIT(BNXT_RE_PAGE_SHIFT_1G)
+#define BNXT_RE_PAGE_SHIFT_1G (30)
+#define BNXT_RE_PAGE_SIZE_SUPPORTED 0x7FFFF000 /* 4kb - 1G */
#define BNXT_RE_MAX_MR_SIZE_LOW BIT_ULL(BNXT_RE_PAGE_SHIFT_1G)
#define BNXT_RE_MAX_MR_SIZE_HIGH BIT_ULL(39)
@@ -177,15 +168,17 @@ struct bnxt_re_dev {
atomic_t srq_count;
atomic_t mr_count;
atomic_t mw_count;
+ atomic_t ah_count;
+ atomic_t pd_count;
/* Max of 2 lossless traffic class supported per port */
u16 cosq[2];
/* QP for for handling QP1 packets */
struct bnxt_re_gsi_context gsi_ctx;
+ struct bnxt_re_stats stats;
atomic_t nq_alloc_cnt;
u32 is_virtfn;
u32 num_vfs;
- struct bnxt_qplib_roce_stats stats;
};
#define to_bnxt_re_dev(ptr, member) \
diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.c b/drivers/infiniband/hw/bnxt_re/hw_counters.c
index 7ba07797845c03..1c06c9cf234ba6 100644
--- a/drivers/infiniband/hw/bnxt_re/hw_counters.c
+++ b/drivers/infiniband/hw/bnxt_re/hw_counters.c
@@ -58,6 +58,8 @@
#include "hw_counters.h"
static const char * const bnxt_re_stat_name[] = {
+ [BNXT_RE_ACTIVE_PD] = "active_pds",
+ [BNXT_RE_ACTIVE_AH] = "active_ahs",
[BNXT_RE_ACTIVE_QP] = "active_qps",
[BNXT_RE_ACTIVE_SRQ] = "active_srqs",
[BNXT_RE_ACTIVE_CQ] = "active_cqs",
@@ -68,7 +70,7 @@ static const char * const bnxt_re_stat_name[] = {
[BNXT_RE_TX_PKTS] = "tx_pkts",
[BNXT_RE_TX_BYTES] = "tx_bytes",
[BNXT_RE_RECOVERABLE_ERRORS] = "recoverable_errors",
- [BNXT_RE_RX_DROPS] = "rx_roce_drops",
+ [BNXT_RE_RX_ERRORS] = "rx_roce_errors",
[BNXT_RE_RX_DISCARDS] = "rx_roce_discards",
[BNXT_RE_TO_RETRANSMITS] = "to_retransmits",
[BNXT_RE_SEQ_ERR_NAKS_RCVD] = "seq_err_naks_rcvd",
@@ -109,17 +111,154 @@ static const char * const bnxt_re_stat_name[] = {
[BNXT_RE_RES_SRQ_LOAD_ERR] = "res_srq_load_err",
[BNXT_RE_RES_TX_PCI_ERR] = "res_tx_pci_err",
[BNXT_RE_RES_RX_PCI_ERR] = "res_rx_pci_err",
- [BNXT_RE_OUT_OF_SEQ_ERR] = "oos_drop_count"
+ [BNXT_RE_OUT_OF_SEQ_ERR] = "oos_drop_count",
+ [BNXT_RE_TX_ATOMIC_REQ] = "tx_atomic_req",
+ [BNXT_RE_TX_READ_REQ] = "tx_read_req",
+ [BNXT_RE_TX_READ_RES] = "tx_read_resp",
+ [BNXT_RE_TX_WRITE_REQ] = "tx_write_req",
+ [BNXT_RE_TX_SEND_REQ] = "tx_send_req",
+ [BNXT_RE_RX_ATOMIC_REQ] = "rx_atomic_req",
+ [BNXT_RE_RX_READ_REQ] = "rx_read_req",
+ [BNXT_RE_RX_READ_RESP] = "rx_read_resp",
+ [BNXT_RE_RX_WRITE_REQ] = "rx_write_req",
+ [BNXT_RE_RX_SEND_REQ] = "rx_send_req",
+ [BNXT_RE_RX_ROCE_GOOD_PKTS] = "rx_roce_good_pkts",
+ [BNXT_RE_RX_ROCE_GOOD_BYTES] = "rx_roce_good_bytes",
+ [BNXT_RE_OOB] = "rx_out_of_buffer"
};
+static void bnxt_re_copy_ext_stats(struct bnxt_re_dev *rdev,
+ struct rdma_hw_stats *stats,
+ struct bnxt_qplib_ext_stat *s)
+{
+ stats->value[BNXT_RE_TX_ATOMIC_REQ] = s->tx_atomic_req;
+ stats->value[BNXT_RE_TX_READ_REQ] = s->tx_read_req;
+ stats->value[BNXT_RE_TX_READ_RES] = s->tx_read_res;
+ stats->value[BNXT_RE_TX_WRITE_REQ] = s->tx_write_req;
+ stats->value[BNXT_RE_TX_SEND_REQ] = s->tx_send_req;
+ stats->value[BNXT_RE_RX_ATOMIC_REQ] = s->rx_atomic_req;
+ stats->value[BNXT_RE_RX_READ_REQ] = s->rx_read_req;
+ stats->value[BNXT_RE_RX_READ_RESP] = s->rx_read_res;
+ stats->value[BNXT_RE_RX_WRITE_REQ] = s->rx_write_req;
+ stats->value[BNXT_RE_RX_SEND_REQ] = s->rx_send_req;
+ stats->value[BNXT_RE_RX_ROCE_GOOD_PKTS] = s->rx_roce_good_pkts;
+ stats->value[BNXT_RE_RX_ROCE_GOOD_BYTES] = s->rx_roce_good_bytes;
+ stats->value[BNXT_RE_OOB] = s->rx_out_of_buffer;
+}
+
+static int bnxt_re_get_ext_stat(struct bnxt_re_dev *rdev,
+ struct rdma_hw_stats *stats)
+{
+ struct bnxt_qplib_ext_stat *estat = &rdev->stats.rstat.ext_stat;
+ u32 fid;
+ int rc;
+
+ fid = PCI_FUNC(rdev->en_dev->pdev->devfn);
+ rc = bnxt_qplib_qext_stat(&rdev->rcfw, fid, estat);
+ if (rc)
+ goto done;
+ bnxt_re_copy_ext_stats(rdev, stats, estat);
+
+done:
+ return rc;
+}
+
+static void bnxt_re_copy_err_stats(struct bnxt_re_dev *rdev,
+ struct rdma_hw_stats *stats,
+ struct bnxt_qplib_roce_stats *err_s)
+{
+ stats->value[BNXT_RE_TO_RETRANSMITS] =
+ err_s->to_retransmits;
+ stats->value[BNXT_RE_SEQ_ERR_NAKS_RCVD] =
+ err_s->seq_err_naks_rcvd;
+ stats->value[BNXT_RE_MAX_RETRY_EXCEEDED] =
+ err_s->max_retry_exceeded;
+ stats->value[BNXT_RE_RNR_NAKS_RCVD] =
+ err_s->rnr_naks_rcvd;
+ stats->value[BNXT_RE_MISSING_RESP] =
+ err_s->missing_resp;
+ stats->value[BNXT_RE_UNRECOVERABLE_ERR] =
+ err_s->unrecoverable_err;
+ stats->value[BNXT_RE_BAD_RESP_ERR] =
+ err_s->bad_resp_err;
+ stats->value[BNXT_RE_LOCAL_QP_OP_ERR] =
+ err_s->local_qp_op_err;
+ stats->value[BNXT_RE_LOCAL_PROTECTION_ERR] =
+ err_s->local_protection_err;
+ stats->value[BNXT_RE_MEM_MGMT_OP_ERR] =
+ err_s->mem_mgmt_op_err;
+ stats->value[BNXT_RE_REMOTE_INVALID_REQ_ERR] =
+ err_s->remote_invalid_req_err;
+ stats->value[BNXT_RE_REMOTE_ACCESS_ERR] =
+ err_s->remote_access_err;
+ stats->value[BNXT_RE_REMOTE_OP_ERR] =
+ err_s->remote_op_err;
+ stats->value[BNXT_RE_DUP_REQ] =
+ err_s->dup_req;
+ stats->value[BNXT_RE_RES_EXCEED_MAX] =
+ err_s->res_exceed_max;
+ stats->value[BNXT_RE_RES_LENGTH_MISMATCH] =
+ err_s->res_length_mismatch;
+ stats->value[BNXT_RE_RES_EXCEEDS_WQE] =
+ err_s->res_exceeds_wqe;
+ stats->value[BNXT_RE_RES_OPCODE_ERR] =
+ err_s->res_opcode_err;
+ stats->value[BNXT_RE_RES_RX_INVALID_RKEY] =
+ err_s->res_rx_invalid_rkey;
+ stats->value[BNXT_RE_RES_RX_DOMAIN_ERR] =
+ err_s->res_rx_domain_err;
+ stats->value[BNXT_RE_RES_RX_NO_PERM] =
+ err_s->res_rx_no_perm;
+ stats->value[BNXT_RE_RES_RX_RANGE_ERR] =
+ err_s->res_rx_range_err;
+ stats->value[BNXT_RE_RES_TX_INVALID_RKEY] =
+ err_s->res_tx_invalid_rkey;
+ stats->value[BNXT_RE_RES_TX_DOMAIN_ERR] =
+ err_s->res_tx_domain_err;
+ stats->value[BNXT_RE_RES_TX_NO_PERM] =
+ err_s->res_tx_no_perm;
+ stats->value[BNXT_RE_RES_TX_RANGE_ERR] =
+ err_s->res_tx_range_err;
+ stats->value[BNXT_RE_RES_IRRQ_OFLOW] =
+ err_s->res_irrq_oflow;
+ stats->value[BNXT_RE_RES_UNSUP_OPCODE] =
+ err_s->res_unsup_opcode;
+ stats->value[BNXT_RE_RES_UNALIGNED_ATOMIC] =
+ err_s->res_unaligned_atomic;
+ stats->value[BNXT_RE_RES_REM_INV_ERR] =
+ err_s->res_rem_inv_err;
+ stats->value[BNXT_RE_RES_MEM_ERROR] =
+ err_s->res_mem_error;
+ stats->value[BNXT_RE_RES_SRQ_ERR] =
+ err_s->res_srq_err;
+ stats->value[BNXT_RE_RES_CMP_ERR] =
+ err_s->res_cmp_err;
+ stats->value[BNXT_RE_RES_INVALID_DUP_RKEY] =
+ err_s->res_invalid_dup_rkey;
+ stats->value[BNXT_RE_RES_WQE_FORMAT_ERR] =
+ err_s->res_wqe_format_err;
+ stats->value[BNXT_RE_RES_CQ_LOAD_ERR] =
+ err_s->res_cq_load_err;
+ stats->value[BNXT_RE_RES_SRQ_LOAD_ERR] =
+ err_s->res_srq_load_err;
+ stats->value[BNXT_RE_RES_TX_PCI_ERR] =
+ err_s->res_tx_pci_err;
+ stats->value[BNXT_RE_RES_RX_PCI_ERR] =
+ err_s->res_rx_pci_err;
+ stats->value[BNXT_RE_OUT_OF_SEQ_ERR] =
+ err_s->res_oos_drop_count;
+}
+
int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev,
struct rdma_hw_stats *stats,
u32 port, int index)
{
struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
- struct ctx_hw_stats *bnxt_re_stats = rdev->qplib_ctx.stats.dma;
+ struct ctx_hw_stats *hw_stats = NULL;
+ struct bnxt_qplib_roce_stats *err_s = NULL;
int rc = 0;
+ hw_stats = rdev->qplib_ctx.stats.dma;
if (!port || !stats)
return -EINVAL;
@@ -128,118 +267,62 @@ int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev,
stats->value[BNXT_RE_ACTIVE_CQ] = atomic_read(&rdev->cq_count);
stats->value[BNXT_RE_ACTIVE_MR] = atomic_read(&rdev->mr_count);
stats->value[BNXT_RE_ACTIVE_MW] = atomic_read(&rdev->mw_count);
- if (bnxt_re_stats) {
+ stats->value[BNXT_RE_ACTIVE_PD] = atomic_read(&rdev->pd_count);
+ stats->value[BNXT_RE_ACTIVE_AH] = atomic_read(&rdev->ah_count);
+
+ if (hw_stats) {
stats->value[BNXT_RE_RECOVERABLE_ERRORS] =
- le64_to_cpu(bnxt_re_stats->tx_bcast_pkts);
- stats->value[BNXT_RE_RX_DROPS] =
- le64_to_cpu(bnxt_re_stats->rx_error_pkts);
+ le64_to_cpu(hw_stats->tx_bcast_pkts);
+ stats->value[BNXT_RE_RX_ERRORS] =
+ le64_to_cpu(hw_stats->rx_error_pkts);
stats->value[BNXT_RE_RX_DISCARDS] =
- le64_to_cpu(bnxt_re_stats->rx_discard_pkts);
+ le64_to_cpu(hw_stats->rx_discard_pkts);
stats->value[BNXT_RE_RX_PKTS] =
- le64_to_cpu(bnxt_re_stats->rx_ucast_pkts);
+ le64_to_cpu(hw_stats->rx_ucast_pkts);
stats->value[BNXT_RE_RX_BYTES] =
- le64_to_cpu(bnxt_re_stats->rx_ucast_bytes);
+ le64_to_cpu(hw_stats->rx_ucast_bytes);
stats->value[BNXT_RE_TX_PKTS] =
- le64_to_cpu(bnxt_re_stats->tx_ucast_pkts);
+ le64_to_cpu(hw_stats->tx_ucast_pkts);
stats->value[BNXT_RE_TX_BYTES] =
- le64_to_cpu(bnxt_re_stats->tx_ucast_bytes);
+ le64_to_cpu(hw_stats->tx_ucast_bytes);
}
+ err_s = &rdev->stats.rstat.errs;
if (test_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, &rdev->flags)) {
- rc = bnxt_qplib_get_roce_stats(&rdev->rcfw, &rdev->stats);
- if (rc)
+ rc = bnxt_qplib_get_roce_stats(&rdev->rcfw, err_s);
+ if (rc) {
clear_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS,
&rdev->flags);
- stats->value[BNXT_RE_TO_RETRANSMITS] =
- rdev->stats.to_retransmits;
- stats->value[BNXT_RE_SEQ_ERR_NAKS_RCVD] =
- rdev->stats.seq_err_naks_rcvd;
- stats->value[BNXT_RE_MAX_RETRY_EXCEEDED] =
- rdev->stats.max_retry_exceeded;
- stats->value[BNXT_RE_RNR_NAKS_RCVD] =
- rdev->stats.rnr_naks_rcvd;
- stats->value[BNXT_RE_MISSING_RESP] =
- rdev->stats.missing_resp;
- stats->value[BNXT_RE_UNRECOVERABLE_ERR] =
- rdev->stats.unrecoverable_err;
- stats->value[BNXT_RE_BAD_RESP_ERR] =
- rdev->stats.bad_resp_err;
- stats->value[BNXT_RE_LOCAL_QP_OP_ERR] =
- rdev->stats.local_qp_op_err;
- stats->value[BNXT_RE_LOCAL_PROTECTION_ERR] =
- rdev->stats.local_protection_err;
- stats->value[BNXT_RE_MEM_MGMT_OP_ERR] =
- rdev->stats.mem_mgmt_op_err;
- stats->value[BNXT_RE_REMOTE_INVALID_REQ_ERR] =
- rdev->stats.remote_invalid_req_err;
- stats->value[BNXT_RE_REMOTE_ACCESS_ERR] =
- rdev->stats.remote_access_err;
- stats->value[BNXT_RE_REMOTE_OP_ERR] =
- rdev->stats.remote_op_err;
- stats->value[BNXT_RE_DUP_REQ] =
- rdev->stats.dup_req;
- stats->value[BNXT_RE_RES_EXCEED_MAX] =
- rdev->stats.res_exceed_max;
- stats->value[BNXT_RE_RES_LENGTH_MISMATCH] =
- rdev->stats.res_length_mismatch;
- stats->value[BNXT_RE_RES_EXCEEDS_WQE] =
- rdev->stats.res_exceeds_wqe;
- stats->value[BNXT_RE_RES_OPCODE_ERR] =
- rdev->stats.res_opcode_err;
- stats->value[BNXT_RE_RES_RX_INVALID_RKEY] =
- rdev->stats.res_rx_invalid_rkey;
- stats->value[BNXT_RE_RES_RX_DOMAIN_ERR] =
- rdev->stats.res_rx_domain_err;
- stats->value[BNXT_RE_RES_RX_NO_PERM] =
- rdev->stats.res_rx_no_perm;
- stats->value[BNXT_RE_RES_RX_RANGE_ERR] =
- rdev->stats.res_rx_range_err;
- stats->value[BNXT_RE_RES_TX_INVALID_RKEY] =
- rdev->stats.res_tx_invalid_rkey;
- stats->value[BNXT_RE_RES_TX_DOMAIN_ERR] =
- rdev->stats.res_tx_domain_err;
- stats->value[BNXT_RE_RES_TX_NO_PERM] =
- rdev->stats.res_tx_no_perm;
- stats->value[BNXT_RE_RES_TX_RANGE_ERR] =
- rdev->stats.res_tx_range_err;
- stats->value[BNXT_RE_RES_IRRQ_OFLOW] =
- rdev->stats.res_irrq_oflow;
- stats->value[BNXT_RE_RES_UNSUP_OPCODE] =
- rdev->stats.res_unsup_opcode;
- stats->value[BNXT_RE_RES_UNALIGNED_ATOMIC] =
- rdev->stats.res_unaligned_atomic;
- stats->value[BNXT_RE_RES_REM_INV_ERR] =
- rdev->stats.res_rem_inv_err;
- stats->value[BNXT_RE_RES_MEM_ERROR] =
- rdev->stats.res_mem_error;
- stats->value[BNXT_RE_RES_SRQ_ERR] =
- rdev->stats.res_srq_err;
- stats->value[BNXT_RE_RES_CMP_ERR] =
- rdev->stats.res_cmp_err;
- stats->value[BNXT_RE_RES_INVALID_DUP_RKEY] =
- rdev->stats.res_invalid_dup_rkey;
- stats->value[BNXT_RE_RES_WQE_FORMAT_ERR] =
- rdev->stats.res_wqe_format_err;
- stats->value[BNXT_RE_RES_CQ_LOAD_ERR] =
- rdev->stats.res_cq_load_err;
- stats->value[BNXT_RE_RES_SRQ_LOAD_ERR] =
- rdev->stats.res_srq_load_err;
- stats->value[BNXT_RE_RES_TX_PCI_ERR] =
- rdev->stats.res_tx_pci_err;
- stats->value[BNXT_RE_RES_RX_PCI_ERR] =
- rdev->stats.res_rx_pci_err;
- stats->value[BNXT_RE_OUT_OF_SEQ_ERR] =
- rdev->stats.res_oos_drop_count;
+ goto done;
+ }
+ if (_is_ext_stats_supported(rdev->dev_attr.dev_cap_flags) &&
+ !rdev->is_virtfn) {
+ rc = bnxt_re_get_ext_stat(rdev, stats);
+ if (rc) {
+ clear_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS,
+ &rdev->flags);
+ goto done;
+ }
+ }
+ bnxt_re_copy_err_stats(rdev, stats, err_s);
}
- return ARRAY_SIZE(bnxt_re_stat_name);
+done:
+ return bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) ?
+ BNXT_RE_NUM_EXT_COUNTERS : BNXT_RE_NUM_STD_COUNTERS;
}
struct rdma_hw_stats *bnxt_re_ib_alloc_hw_port_stats(struct ib_device *ibdev,
u32 port_num)
{
- BUILD_BUG_ON(ARRAY_SIZE(bnxt_re_stat_name) != BNXT_RE_NUM_COUNTERS);
+ struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
+ int num_counters = 0;
+
+ if (bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx))
+ num_counters = BNXT_RE_NUM_EXT_COUNTERS;
+ else
+ num_counters = BNXT_RE_NUM_STD_COUNTERS;
return rdma_alloc_hw_stats_struct(bnxt_re_stat_name,
- ARRAY_SIZE(bnxt_re_stat_name),
+ num_counters,
RDMA_HW_STATS_DEFAULT_LIFESPAN);
}
diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.h b/drivers/infiniband/hw/bnxt_re/hw_counters.h
index 6f2d2f91d9ff09..7943b2c393e422 100644
--- a/drivers/infiniband/hw/bnxt_re/hw_counters.h
+++ b/drivers/infiniband/hw/bnxt_re/hw_counters.h
@@ -41,6 +41,8 @@
#define __BNXT_RE_HW_STATS_H__
enum bnxt_re_hw_stats {
+ BNXT_RE_ACTIVE_PD,
+ BNXT_RE_ACTIVE_AH,
BNXT_RE_ACTIVE_QP,
BNXT_RE_ACTIVE_SRQ,
BNXT_RE_ACTIVE_CQ,
@@ -51,7 +53,7 @@ enum bnxt_re_hw_stats {
BNXT_RE_TX_PKTS,
BNXT_RE_TX_BYTES,
BNXT_RE_RECOVERABLE_ERRORS,
- BNXT_RE_RX_DROPS,
+ BNXT_RE_RX_ERRORS,
BNXT_RE_RX_DISCARDS,
BNXT_RE_TO_RETRANSMITS,
BNXT_RE_SEQ_ERR_NAKS_RCVD,
@@ -93,7 +95,31 @@ enum bnxt_re_hw_stats {
BNXT_RE_RES_TX_PCI_ERR,
BNXT_RE_RES_RX_PCI_ERR,
BNXT_RE_OUT_OF_SEQ_ERR,
- BNXT_RE_NUM_COUNTERS
+ BNXT_RE_TX_ATOMIC_REQ,
+ BNXT_RE_TX_READ_REQ,
+ BNXT_RE_TX_READ_RES,
+ BNXT_RE_TX_WRITE_REQ,
+ BNXT_RE_TX_SEND_REQ,
+ BNXT_RE_RX_ATOMIC_REQ,
+ BNXT_RE_RX_READ_REQ,
+ BNXT_RE_RX_READ_RESP,
+ BNXT_RE_RX_WRITE_REQ,
+ BNXT_RE_RX_SEND_REQ,
+ BNXT_RE_RX_ROCE_GOOD_PKTS,
+ BNXT_RE_RX_ROCE_GOOD_BYTES,
+ BNXT_RE_OOB,
+ BNXT_RE_NUM_EXT_COUNTERS
+};
+
+#define BNXT_RE_NUM_STD_COUNTERS (BNXT_RE_OUT_OF_SEQ_ERR + 1)
+
+struct bnxt_re_rstat {
+ struct bnxt_qplib_roce_stats errs;
+ struct bnxt_qplib_ext_stat ext_stat;
+};
+
+struct bnxt_re_stats {
+ struct bnxt_re_rstat rstat;
};
struct rdma_hw_stats *bnxt_re_ib_alloc_hw_port_stats(struct ib_device *ibdev,
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 408dfbcc47b5e6..67ca794f64e897 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -133,7 +133,7 @@ int bnxt_re_query_device(struct ib_device *ibdev,
bnxt_qplib_get_guid(rdev->netdev->dev_addr,
(u8 *)&ib_attr->sys_image_guid);
ib_attr->max_mr_size = BNXT_RE_MAX_MR_SIZE;
- ib_attr->page_size_cap = BNXT_RE_PAGE_SIZE_4K | BNXT_RE_PAGE_SIZE_2M;
+ ib_attr->page_size_cap = BNXT_RE_PAGE_SIZE_SUPPORTED;
ib_attr->vendor_id = rdev->en_dev->pdev->vendor;
ib_attr->vendor_part_id = rdev->en_dev->pdev->device;
@@ -541,9 +541,12 @@ int bnxt_re_dealloc_pd(struct ib_pd *ib_pd, struct ib_udata *udata)
bnxt_re_destroy_fence_mr(pd);
- if (pd->qplib_pd.id)
- bnxt_qplib_dealloc_pd(&rdev->qplib_res, &rdev->qplib_res.pd_tbl,
- &pd->qplib_pd);
+ if (pd->qplib_pd.id) {
+ if (!bnxt_qplib_dealloc_pd(&rdev->qplib_res,
+ &rdev->qplib_res.pd_tbl,
+ &pd->qplib_pd))
+ atomic_dec(&rdev->pd_count);
+ }
return 0;
}
@@ -595,6 +598,8 @@ int bnxt_re_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
if (bnxt_re_create_fence_mr(pd))
ibdev_warn(&rdev->ibdev,
"Failed to create Fence-MR\n");
+ atomic_inc(&rdev->pd_count);
+
return 0;
dbfail:
bnxt_qplib_dealloc_pd(&rdev->qplib_res, &rdev->qplib_res.pd_tbl,
@@ -611,6 +616,8 @@ int bnxt_re_destroy_ah(struct ib_ah *ib_ah, u32 flags)
bnxt_qplib_destroy_ah(&rdev->qplib_res, &ah->qplib_ah,
!(flags & RDMA_DESTROY_AH_SLEEPABLE));
+ atomic_dec(&rdev->ah_count);
+
return 0;
}
@@ -695,6 +702,7 @@ int bnxt_re_create_ah(struct ib_ah *ib_ah, struct rdma_ah_init_attr *init_attr,
wmb(); /* make sure cache is updated. */
spin_unlock_irqrestore(&uctx->sh_lock, flag);
}
+ atomic_inc(&rdev->ah_count);
return 0;
}
@@ -760,6 +768,7 @@ static int bnxt_re_destroy_gsi_sqp(struct bnxt_re_qp *qp)
bnxt_qplib_destroy_ah(&rdev->qplib_res,
&gsi_sah->qplib_ah,
true);
+ atomic_dec(&rdev->ah_count);
bnxt_qplib_clean_qp(&qp->qplib_qp);
ibdev_dbg(&rdev->ibdev, "Destroy the shadow QP\n");
@@ -1006,6 +1015,7 @@ static struct bnxt_re_ah *bnxt_re_create_shadow_qp_ah
"Failed to allocate HW AH for Shadow QP");
goto fail;
}
+ atomic_inc(&rdev->ah_count);
return ah;
@@ -2478,7 +2488,8 @@ static int bnxt_re_build_reg_wqe(const struct ib_reg_wr *wr,
wqe->frmr.l_key = wr->key;
wqe->frmr.length = wr->mr->length;
- wqe->frmr.pbl_pg_sz_log = (wr->mr->page_size >> PAGE_SHIFT_4K) - 1;
+ wqe->frmr.pbl_pg_sz_log = ilog2(PAGE_SIZE >> PAGE_SHIFT_4K);
+ wqe->frmr.pg_sz_log = ilog2(wr->mr->page_size >> PAGE_SHIFT_4K);
wqe->frmr.va = wr->mr->iova;
return 0;
}
@@ -3354,8 +3365,11 @@ static void bnxt_re_process_res_ud_wc(struct bnxt_re_qp *qp,
struct ib_wc *wc,
struct bnxt_qplib_cqe *cqe)
{
+ struct bnxt_re_dev *rdev;
+ u16 vlan_id = 0;
u8 nw_type;
+ rdev = qp->rdev;
wc->opcode = IB_WC_RECV;
wc->status = __rc_to_ib_wc_status(cqe->status);
@@ -3367,9 +3381,12 @@ static void bnxt_re_process_res_ud_wc(struct bnxt_re_qp *qp,
memcpy(wc->smac, cqe->smac, ETH_ALEN);
wc->wc_flags |= IB_WC_WITH_SMAC;
if (cqe->flags & CQ_RES_UD_FLAGS_META_FORMAT_VLAN) {
- wc->vlan_id = (cqe->cfa_meta & 0xFFF);
- if (wc->vlan_id < 0x1000)
- wc->wc_flags |= IB_WC_WITH_VLAN;
+ vlan_id = (cqe->cfa_meta & 0xFFF);
+ }
+ /* Mark only if vlan_id is non zero */
+ if (vlan_id && bnxt_re_check_if_vlan_valid(rdev, vlan_id)) {
+ wc->vlan_id = vlan_id;
+ wc->wc_flags |= IB_WC_WITH_VLAN;
}
nw_type = (cqe->flags & CQ_RES_UD_FLAGS_ROCE_IP_VER_MASK) >>
CQ_RES_UD_FLAGS_ROCE_IP_VER_SFT;
@@ -3798,7 +3815,7 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length,
mr->qplib_mr.va = virt_addr;
page_size = ib_umem_find_best_pgsz(
- umem, BNXT_RE_PAGE_SIZE_4K | BNXT_RE_PAGE_SIZE_2M, virt_addr);
+ umem, BNXT_RE_PAGE_SIZE_SUPPORTED, virt_addr);
if (!page_size) {
ibdev_err(&rdev->ibdev, "umem page size unsupported!");
rc = -EFAULT;
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index 66268e41b470e2..4fa3b14b261387 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -127,6 +127,8 @@ static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev, u8 wqe_mode)
rdev->qplib_res.cctx = rdev->chip_ctx;
rdev->rcfw.res = &rdev->qplib_res;
+ rdev->qplib_res.dattr = &rdev->dev_attr;
+ rdev->qplib_res.is_vf = BNXT_VF(bp);
bnxt_re_set_drv_mode(rdev, wqe_mode);
if (bnxt_qplib_determine_atomics(en_dev->pdev))
@@ -523,7 +525,8 @@ static int bnxt_re_net_stats_ctx_free(struct bnxt_re_dev *rdev,
u32 fw_stats_ctx_id)
{
struct bnxt_en_dev *en_dev = rdev->en_dev;
- struct hwrm_stat_ctx_free_input req = {0};
+ struct hwrm_stat_ctx_free_input req = {};
+ struct hwrm_stat_ctx_free_output resp = {};
struct bnxt_fw_msg fw_msg;
int rc = -EINVAL;
@@ -537,8 +540,8 @@ static int bnxt_re_net_stats_ctx_free(struct bnxt_re_dev *rdev,
bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_STAT_CTX_FREE, -1, -1);
req.stat_ctx_id = cpu_to_le32(fw_stats_ctx_id);
- bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&req,
- sizeof(req), DFLT_HWRM_CMD_TIMEOUT);
+ bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
+ sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
if (rc)
ibdev_err(&rdev->ibdev, "Failed to free HW stats context %#x",
@@ -777,6 +780,8 @@ static struct bnxt_re_dev *bnxt_re_dev_add(struct net_device *netdev,
atomic_set(&rdev->srq_count, 0);
atomic_set(&rdev->mr_count, 0);
atomic_set(&rdev->mw_count, 0);
+ atomic_set(&rdev->ah_count, 0);
+ atomic_set(&rdev->pd_count, 0);
rdev->cosq[0] = 0xFFFF;
rdev->cosq[1] = 0xFFFF;
@@ -1725,7 +1730,7 @@ static int bnxt_re_netdev_event(struct notifier_block *notifier,
}
if (sch_work) {
/* Allocate for the deferred task */
- re_work = kzalloc(sizeof(*re_work), GFP_ATOMIC);
+ re_work = kzalloc(sizeof(*re_work), GFP_KERNEL);
if (re_work) {
get_device(&rdev->ibdev.dev);
re_work->rdev = rdev;
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
index d4d4959c2434ce..ca88849559bf4f 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
@@ -707,12 +707,13 @@ int bnxt_qplib_query_srq(struct bnxt_qplib_res *res,
int rc = 0;
RCFW_CMD_PREP(req, QUERY_SRQ, cmd_flags);
- req.srq_cid = cpu_to_le32(srq->id);
/* Configure the request */
sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb));
if (!sbuf)
return -ENOMEM;
+ req.resp_size = sizeof(*sb) / BNXT_QPLIB_CMDQE_UNITS;
+ req.srq_cid = cpu_to_le32(srq->id);
sb = sbuf->sb;
rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp,
(void *)sbuf, 0);
@@ -1049,6 +1050,9 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_FORCE_COMPLETION;
if (qp->wqe_mode == BNXT_QPLIB_WQE_MODE_VARIABLE)
qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_VARIABLE_SIZED_WQE_ENABLED;
+ if (_is_ext_stats_supported(res->dattr->dev_cap_flags) && !res->is_vf)
+ qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_EXT_STATS_ENABLED;
+
req.qp_flags = cpu_to_le32(qp_flags);
/* ORRQ and IRRQ */
@@ -2851,6 +2855,7 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
struct cq_base *hw_cqe;
u32 sw_cons, raw_cons;
int budget, rc = 0;
+ u8 type;
raw_cons = cq->hwq.cons;
budget = num_cqes;
@@ -2869,7 +2874,8 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
*/
dma_rmb();
/* From the device's respective CQE format to qplib_wc*/
- switch (hw_cqe->cqe_type_toggle & CQ_BASE_CQE_TYPE_MASK) {
+ type = hw_cqe->cqe_type_toggle & CQ_BASE_CQE_TYPE_MASK;
+ switch (type) {
case CQ_BASE_CQE_TYPE_REQ:
rc = bnxt_qplib_cq_process_req(cq,
(struct cq_req *)hw_cqe,
@@ -2916,8 +2922,9 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
/* Error while processing the CQE, just skip to the
* next one
*/
- dev_err(&cq->hwq.pdev->dev,
- "process_cqe error rc = 0x%x\n", rc);
+ if (type != CQ_BASE_CQE_TYPE_TERMINAL)
+ dev_err(&cq->hwq.pdev->dev,
+ "process_cqe error rc = 0x%x\n", rc);
}
raw_cons++;
}
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
index 5d384def5e5fea..3de854727460e4 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
@@ -78,7 +78,7 @@ static int __block_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie)
if (!test_bit(cbit, cmdq->cmdq_bitmap))
goto done;
do {
- mdelay(1); /* 1m sec */
+ udelay(1);
bnxt_qplib_service_creq(&rcfw->creq.creq_tasklet);
} while (test_bit(cbit, cmdq->cmdq_bitmap) && --count);
done:
@@ -848,13 +848,13 @@ struct bnxt_qplib_rcfw_sbuf *bnxt_qplib_rcfw_alloc_sbuf(
{
struct bnxt_qplib_rcfw_sbuf *sbuf;
- sbuf = kzalloc(sizeof(*sbuf), GFP_ATOMIC);
+ sbuf = kzalloc(sizeof(*sbuf), GFP_KERNEL);
if (!sbuf)
return NULL;
sbuf->size = size;
sbuf->sb = dma_alloc_coherent(&rcfw->pdev->dev, sbuf->size,
- &sbuf->dma_addr, GFP_ATOMIC);
+ &sbuf->dma_addr, GFP_KERNEL);
if (!sbuf->sb)
goto bail;
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
index 9474c00465821a..82faa4e4cda84c 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
@@ -96,7 +96,7 @@ static inline void bnxt_qplib_set_cmd_slots(struct cmdq_base *req)
#define RCFW_MAX_COOKIE_VALUE 0x7FFF
#define RCFW_CMD_IS_BLOCKING 0x8000
-#define RCFW_BLOCKED_CMD_WAIT_COUNT 0x4E20
+#define RCFW_BLOCKED_CMD_WAIT_COUNT 20000000UL /* 20 sec */
#define HWRM_VERSION_RCFW_CMDQ_DEPTH_CHECK 0x1000900020011ULL
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c
index 44282a8cdd4f25..bf49363f590e6d 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_res.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c
@@ -228,15 +228,16 @@ int bnxt_qplib_alloc_init_hwq(struct bnxt_qplib_hwq *hwq,
npages++;
}
- if (npages == MAX_PBL_LVL_0_PGS) {
+ if (npages == MAX_PBL_LVL_0_PGS && !hwq_attr->sginfo->nopte) {
/* This request is Level 0, map PTE */
rc = __alloc_pbl(res, &hwq->pbl[PBL_LVL_0], hwq_attr->sginfo);
if (rc)
goto fail;
hwq->level = PBL_LVL_0;
+ goto done;
}
- if (npages > MAX_PBL_LVL_0_PGS) {
+ if (npages >= MAX_PBL_LVL_0_PGS) {
if (npages > MAX_PBL_LVL_1_PGS) {
u32 flag = (hwq_attr->type == HWQ_TYPE_L2_CMPL) ?
0 : PTU_PTE_VALID;
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h
index 91031502e8f5d6..c39b20236f16d0 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_res.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h
@@ -253,14 +253,15 @@ struct bnxt_qplib_ctx {
struct bnxt_qplib_res {
struct pci_dev *pdev;
struct bnxt_qplib_chip_ctx *cctx;
+ struct bnxt_qplib_dev_attr *dattr;
struct net_device *netdev;
-
struct bnxt_qplib_rcfw *rcfw;
struct bnxt_qplib_pd_tbl pd_tbl;
struct bnxt_qplib_sgid_tbl sgid_tbl;
struct bnxt_qplib_pkey_tbl pkey_tbl;
struct bnxt_qplib_dpi_tbl dpi_tbl;
bool prio;
+ bool is_vf;
};
static inline bool bnxt_qplib_is_chip_gen_p5(struct bnxt_qplib_chip_ctx *cctx)
@@ -450,4 +451,10 @@ static inline void bnxt_qplib_ring_nq_db(struct bnxt_qplib_db_info *info,
else
bnxt_qplib_ring_db32(info, arm);
}
+
+static inline bool _is_ext_stats_supported(u16 dev_cap_flags)
+{
+ return dev_cap_flags &
+ CREQ_QUERY_FUNC_RESP_SB_EXT_STATS;
+}
#endif /* __BNXT_QPLIB_RES_H__ */
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
index 3d9259632eb3d6..cbe83e9bce5cfd 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
@@ -161,6 +161,7 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
attr->l2_db_size = (sb->l2_db_space_size + 1) *
(0x01 << RCFW_DBR_BASE_PAGE_SHIFT);
attr->max_sgid = BNXT_QPLIB_NUM_GIDS_SUPPORTED;
+ attr->dev_cap_flags = le16_to_cpu(sb->dev_cap_flags);
bnxt_qplib_query_version(rcfw, attr->fw_ver);
@@ -869,3 +870,53 @@ bail:
bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf);
return rc;
}
+
+int bnxt_qplib_qext_stat(struct bnxt_qplib_rcfw *rcfw, u32 fid,
+ struct bnxt_qplib_ext_stat *estat)
+{
+ struct creq_query_roce_stats_ext_resp resp = {};
+ struct creq_query_roce_stats_ext_resp_sb *sb;
+ struct cmdq_query_roce_stats_ext req = {};
+ struct bnxt_qplib_rcfw_sbuf *sbuf;
+ u16 cmd_flags = 0;
+ int rc;
+
+ sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb));
+ if (!sbuf) {
+ dev_err(&rcfw->pdev->dev,
+ "SP: QUERY_ROCE_STATS_EXT alloc sb failed");
+ return -ENOMEM;
+ }
+
+ RCFW_CMD_PREP(req, QUERY_ROCE_STATS_EXT, cmd_flags);
+
+ req.resp_size = ALIGN(sizeof(*sb), BNXT_QPLIB_CMDQE_UNITS);
+ req.resp_addr = cpu_to_le64(sbuf->dma_addr);
+ req.function_id = cpu_to_le32(fid);
+ req.flags = cpu_to_le16(CMDQ_QUERY_ROCE_STATS_EXT_FLAGS_FUNCTION_ID);
+
+ rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+ (void *)&resp, (void *)sbuf, 0);
+ if (rc)
+ goto bail;
+
+ sb = sbuf->sb;
+ estat->tx_atomic_req = le64_to_cpu(sb->tx_atomic_req_pkts);
+ estat->tx_read_req = le64_to_cpu(sb->tx_read_req_pkts);
+ estat->tx_read_res = le64_to_cpu(sb->tx_read_res_pkts);
+ estat->tx_write_req = le64_to_cpu(sb->tx_write_req_pkts);
+ estat->tx_send_req = le64_to_cpu(sb->tx_send_req_pkts);
+ estat->rx_atomic_req = le64_to_cpu(sb->rx_atomic_req_pkts);
+ estat->rx_read_req = le64_to_cpu(sb->rx_read_req_pkts);
+ estat->rx_read_res = le64_to_cpu(sb->rx_read_res_pkts);
+ estat->rx_write_req = le64_to_cpu(sb->rx_write_req_pkts);
+ estat->rx_send_req = le64_to_cpu(sb->rx_send_req_pkts);
+ estat->rx_roce_good_pkts = le64_to_cpu(sb->rx_roce_good_pkts);
+ estat->rx_roce_good_bytes = le64_to_cpu(sb->rx_roce_good_bytes);
+ estat->rx_out_of_buffer = le64_to_cpu(sb->rx_out_of_buffer_pkts);
+ estat->rx_out_of_sequence = le64_to_cpu(sb->rx_out_of_sequence_pkts);
+
+bail:
+ bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf);
+ return rc;
+}
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h
index 26010478369184..3d5c418416682e 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h
@@ -71,6 +71,7 @@ struct bnxt_qplib_dev_attr {
u32 l2_db_size;
u8 tqm_alloc_reqs[MAX_TQM_ALLOC_REQ];
bool is_atomic;
+ u16 dev_cap_flags;
};
struct bnxt_qplib_pd {
@@ -219,6 +220,30 @@ struct bnxt_qplib_roce_stats {
/* port 3 active qps */
};
+struct bnxt_qplib_ext_stat {
+ u64 tx_atomic_req;
+ u64 tx_read_req;
+ u64 tx_read_res;
+ u64 tx_write_req;
+ u64 tx_send_req;
+ u64 tx_roce_pkts;
+ u64 tx_roce_bytes;
+ u64 rx_atomic_req;
+ u64 rx_read_req;
+ u64 rx_read_res;
+ u64 rx_write_req;
+ u64 rx_send_req;
+ u64 rx_roce_pkts;
+ u64 rx_roce_bytes;
+ u64 rx_roce_good_pkts;
+ u64 rx_roce_good_bytes;
+ u64 rx_out_of_buffer;
+ u64 rx_out_of_sequence;
+ u64 tx_cnp;
+ u64 rx_cnp;
+ u64 rx_ecn_marked;
+};
+
int bnxt_qplib_get_sgid(struct bnxt_qplib_res *res,
struct bnxt_qplib_sgid_tbl *sgid_tbl, int index,
struct bnxt_qplib_gid *gid);
@@ -263,4 +288,7 @@ int bnxt_qplib_free_fast_reg_page_list(struct bnxt_qplib_res *res,
int bnxt_qplib_map_tc2cos(struct bnxt_qplib_res *res, u16 *cids);
int bnxt_qplib_get_roce_stats(struct bnxt_qplib_rcfw *rcfw,
struct bnxt_qplib_roce_stats *stats);
+int bnxt_qplib_qext_stat(struct bnxt_qplib_rcfw *rcfw, u32 fid,
+ struct bnxt_qplib_ext_stat *estat);
+
#endif /* __BNXT_QPLIB_SP_H__*/
diff --git a/drivers/infiniband/hw/bnxt_re/roce_hsi.h b/drivers/infiniband/hw/bnxt_re/roce_hsi.h
index 3e40e0d76efd74..ecb719098b75f7 100644
--- a/drivers/infiniband/hw/bnxt_re/roce_hsi.h
+++ b/drivers/infiniband/hw/bnxt_re/roce_hsi.h
@@ -1102,6 +1102,7 @@ struct cmdq_base {
#define CMDQ_BASE_OPCODE_MODIFY_CC 0x8cUL
#define CMDQ_BASE_OPCODE_QUERY_CC 0x8dUL
#define CMDQ_BASE_OPCODE_QUERY_ROCE_STATS 0x8eUL
+ #define CMDQ_BASE_OPCODE_QUERY_ROCE_STATS_EXT 0x92UL
u8 cmd_size;
__le16 flags;
__le16 cookie;
@@ -1127,6 +1128,10 @@ struct cmdq_create_qp {
#define CMDQ_CREATE_QP_QP_FLAGS_RESERVED_LKEY_ENABLE 0x4UL
#define CMDQ_CREATE_QP_QP_FLAGS_FR_PMR_ENABLED 0x8UL
#define CMDQ_CREATE_QP_QP_FLAGS_VARIABLE_SIZED_WQE_ENABLED 0x10UL
+ #define CMDQ_CREATE_QP_QP_FLAGS_EXT_STATS_ENABLED 0x80UL
+ #define CMDQ_CREATE_QP_QP_FLAGS_LAST \
+ CMDQ_CREATE_QP_QP_FLAGS_EXT_STATS_ENABLED
+
u8 type;
#define CMDQ_CREATE_QP_TYPE_RC 0x2UL
#define CMDQ_CREATE_QP_TYPE_UD 0x4UL
@@ -2848,6 +2853,7 @@ struct creq_query_func_resp_sb {
__le16 max_qp_wr;
__le16 dev_cap_flags;
#define CREQ_QUERY_FUNC_RESP_SB_DEV_CAP_FLAGS_RESIZE_QP 0x1UL
+ #define CREQ_QUERY_FUNC_RESP_SB_EXT_STATS 0x10UL
__le32 max_cq;
__le32 max_cqe;
__le32 max_pd;
@@ -3087,6 +3093,85 @@ struct creq_query_roce_stats_resp_sb {
__le64 active_qp_count_p3;
};
+/* cmdq_query_roce_stats_ext (size:192b/24B) */
+struct cmdq_query_roce_stats_ext {
+ u8 opcode;
+ #define CMDQ_QUERY_ROCE_STATS_EXT_OPCODE_QUERY_ROCE_STATS 0x92UL
+ #define CMDQ_QUERY_ROCE_STATS_EXT_OPCODE_LAST \
+ CMDQ_QUERY_ROCE_STATS_EXT_OPCODE_QUERY_ROCE_STATS
+ u8 cmd_size;
+ __le16 flags;
+ #define CMDQ_QUERY_ROCE_STATS_EXT_FLAGS_COLLECTION_ID 0x1UL
+ #define CMDQ_QUERY_ROCE_STATS_EXT_FLAGS_FUNCTION_ID 0x2UL
+ __le16 cookie;
+ u8 resp_size;
+ u8 collection_id;
+ __le64 resp_addr;
+ __le32 function_id;
+ #define CMDQ_QUERY_ROCE_STATS_EXT_PF_NUM_MASK 0xffUL
+ #define CMDQ_QUERY_ROCE_STATS_EXT_PF_NUM_SFT 0
+ #define CMDQ_QUERY_ROCE_STATS_EXT_VF_NUM_MASK 0xffff00UL
+ #define CMDQ_QUERY_ROCE_STATS_EXT_VF_NUM_SFT 8
+ #define CMDQ_QUERY_ROCE_STATS_EXT_VF_VALID 0x1000000UL
+ __le32 reserved32;
+};
+
+/* creq_query_roce_stats_ext_resp (size:128b/16B) */
+struct creq_query_roce_stats_ext_resp {
+ u8 type;
+ #define CREQ_QUERY_ROCE_STATS_EXT_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_QUERY_ROCE_STATS_EXT_RESP_TYPE_SFT 0
+ #define CREQ_QUERY_ROCE_STATS_EXT_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_QUERY_ROCE_STATS_EXT_RESP_TYPE_LAST \
+ CREQ_QUERY_ROCE_STATS_EXT_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 size;
+ u8 v;
+ #define CREQ_QUERY_ROCE_STATS_EXT_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_QUERY_ROCE_STATS_EXT_RESP_EVENT_QUERY_ROCE_STATS_EXT 0x92UL
+ #define CREQ_QUERY_ROCE_STATS_EXT_RESP_EVENT_LAST \
+ CREQ_QUERY_ROCE_STATS_EXT_RESP_EVENT_QUERY_ROCE_STATS_EXT
+ u8 reserved48[6];
+};
+
+/* creq_query_roce_stats_ext_resp_sb (size:1536b/192B) */
+struct creq_query_roce_stats_ext_resp_sb {
+ u8 opcode;
+ #define CREQ_QUERY_ROCE_STATS_EXT_RESP_SB_OPCODE_QUERY_ROCE_STATS_EXT 0x92UL
+ #define CREQ_QUERY_ROCE_STATS_EXT_RESP_SB_OPCODE_LAST \
+ CREQ_QUERY_ROCE_STATS_EXT_RESP_SB_OPCODE_QUERY_ROCE_STATS_EXT
+ u8 status;
+ __le16 cookie;
+ __le16 flags;
+ u8 resp_size;
+ u8 rsvd;
+ __le64 tx_atomic_req_pkts;
+ __le64 tx_read_req_pkts;
+ __le64 tx_read_res_pkts;
+ __le64 tx_write_req_pkts;
+ __le64 tx_send_req_pkts;
+ __le64 tx_roce_pkts;
+ __le64 tx_roce_bytes;
+ __le64 rx_atomic_req_pkts;
+ __le64 rx_read_req_pkts;
+ __le64 rx_read_res_pkts;
+ __le64 rx_write_req_pkts;
+ __le64 rx_send_req_pkts;
+ __le64 rx_roce_pkts;
+ __le64 rx_roce_bytes;
+ __le64 rx_roce_good_pkts;
+ __le64 rx_roce_good_bytes;
+ __le64 rx_out_of_buffer_pkts;
+ __le64 rx_out_of_sequence_pkts;
+ __le64 tx_cnp_pkts;
+ __le64 rx_cnp_pkts;
+ __le64 rx_ecn_marked_pkts;
+ __le64 tx_cnp_bytes;
+ __le64 rx_cnp_bytes;
+};
+
/* QP error notification event (16 bytes) */
struct creq_qp_error_notification {
u8 type;
diff --git a/drivers/infiniband/hw/hfi1/ipoib.h b/drivers/infiniband/hw/hfi1/ipoib.h
index 2cff38b105ac32..90912293424648 100644
--- a/drivers/infiniband/hw/hfi1/ipoib.h
+++ b/drivers/infiniband/hw/hfi1/ipoib.h
@@ -44,22 +44,52 @@ union hfi1_ipoib_flow {
};
/**
+ * struct ipoib_txreq - IPOIB transmit descriptor
+ * @txreq: sdma transmit request
+ * @sdma_hdr: 9b ib headers
+ * @sdma_status: status returned by sdma engine
+ * @complete: non-zero implies complete
+ * @priv: ipoib netdev private data
+ * @txq: txq on which skb was output
+ * @skb: skb to send
+ */
+struct ipoib_txreq {
+ struct sdma_txreq txreq;
+ struct hfi1_sdma_header sdma_hdr;
+ int sdma_status;
+ int complete;
+ struct hfi1_ipoib_dev_priv *priv;
+ struct hfi1_ipoib_txq *txq;
+ struct sk_buff *skb;
+};
+
+/**
* struct hfi1_ipoib_circ_buf - List of items to be processed
- * @items: ring of items
- * @head: ring head
- * @tail: ring tail
+ * @items: ring of items each a power of two size
* @max_items: max items + 1 that the ring can contain
- * @producer_lock: producer sync lock
- * @consumer_lock: consumer sync lock
+ * @shift: log2 of size for getting txreq
+ * @sent_txreqs: count of txreqs posted to sdma
+ * @tail: ring tail
+ * @stops: count of stops of queue
+ * @ring_full: ring has been filled
+ * @no_desc: descriptor shortage seen
+ * @complete_txreqs: count of txreqs completed by sdma
+ * @head: ring head
*/
-struct ipoib_txreq;
struct hfi1_ipoib_circ_buf {
- struct ipoib_txreq **items;
- unsigned long head;
- unsigned long tail;
- unsigned long max_items;
- spinlock_t producer_lock; /* head sync lock */
- spinlock_t consumer_lock; /* tail sync lock */
+ void *items;
+ u32 max_items;
+ u32 shift;
+ /* consumer cache line */
+ u64 ____cacheline_aligned_in_smp sent_txreqs;
+ u32 avail;
+ u32 tail;
+ atomic_t stops;
+ atomic_t ring_full;
+ atomic_t no_desc;
+ /* producer cache line */
+ u64 ____cacheline_aligned_in_smp complete_txreqs;
+ u32 head;
};
/**
@@ -68,33 +98,24 @@ struct hfi1_ipoib_circ_buf {
* @sde: sdma engine
* @tx_list: tx request list
* @sent_txreqs: count of txreqs posted to sdma
- * @stops: count of stops of queue
- * @ring_full: ring has been filled
- * @no_desc: descriptor shortage seen
* @flow: tracks when list needs to be flushed for a flow change
* @q_idx: ipoib Tx queue index
* @pkts_sent: indicator packets have been sent from this queue
* @wait: iowait structure
- * @complete_txreqs: count of txreqs completed by sdma
* @napi: pointer to tx napi interface
* @tx_ring: ring of ipoib txreqs to be reaped by napi callback
*/
struct hfi1_ipoib_txq {
+ struct napi_struct napi;
struct hfi1_ipoib_dev_priv *priv;
struct sdma_engine *sde;
struct list_head tx_list;
- u64 sent_txreqs;
- atomic_t stops;
- atomic_t ring_full;
- atomic_t no_desc;
union hfi1_ipoib_flow flow;
u8 q_idx;
bool pkts_sent;
struct iowait wait;
- atomic64_t ____cacheline_aligned_in_smp complete_txreqs;
- struct napi_struct *napi;
- struct hfi1_ipoib_circ_buf tx_ring;
+ struct hfi1_ipoib_circ_buf ____cacheline_aligned_in_smp tx_ring;
};
struct hfi1_ipoib_dev_priv {
@@ -102,15 +123,12 @@ struct hfi1_ipoib_dev_priv {
struct net_device *netdev;
struct ib_device *device;
struct hfi1_ipoib_txq *txqs;
- struct kmem_cache *txreq_cache;
- struct napi_struct *tx_napis;
+ const struct net_device_ops *netdev_ops;
+ struct rvt_qp *qp;
+ u32 qkey;
u16 pkey;
u16 pkey_index;
- u32 qkey;
u8 port_num;
-
- const struct net_device_ops *netdev_ops;
- struct rvt_qp *qp;
};
/* hfi1 ipoib rdma netdev's private data structure */
diff --git a/drivers/infiniband/hw/hfi1/ipoib_tx.c b/drivers/infiniband/hw/hfi1/ipoib_tx.c
index 15b0cb0f363f42..f4010890309f7a 100644
--- a/drivers/infiniband/hw/hfi1/ipoib_tx.c
+++ b/drivers/infiniband/hw/hfi1/ipoib_tx.c
@@ -22,24 +22,6 @@
#define CIRC_NEXT(val, size) CIRC_ADD(val, 1, size)
#define CIRC_PREV(val, size) CIRC_ADD(val, -1, size)
-/**
- * struct ipoib_txreq - IPOIB transmit descriptor
- * @txreq: sdma transmit request
- * @sdma_hdr: 9b ib headers
- * @sdma_status: status returned by sdma engine
- * @priv: ipoib netdev private data
- * @txq: txq on which skb was output
- * @skb: skb to send
- */
-struct ipoib_txreq {
- struct sdma_txreq txreq;
- struct hfi1_sdma_header sdma_hdr;
- int sdma_status;
- struct hfi1_ipoib_dev_priv *priv;
- struct hfi1_ipoib_txq *txq;
- struct sk_buff *skb;
-};
-
struct ipoib_txparms {
struct hfi1_devdata *dd;
struct rdma_ah_attr *ah_attr;
@@ -51,28 +33,34 @@ struct ipoib_txparms {
u8 entropy;
};
-static u64 hfi1_ipoib_txreqs(const u64 sent, const u64 completed)
+static struct ipoib_txreq *
+hfi1_txreq_from_idx(struct hfi1_ipoib_circ_buf *r, u32 idx)
+{
+ return (struct ipoib_txreq *)(r->items + (idx << r->shift));
+}
+
+static u32 hfi1_ipoib_txreqs(const u64 sent, const u64 completed)
{
return sent - completed;
}
static u64 hfi1_ipoib_used(struct hfi1_ipoib_txq *txq)
{
- return hfi1_ipoib_txreqs(txq->sent_txreqs,
- atomic64_read(&txq->complete_txreqs));
+ return hfi1_ipoib_txreqs(txq->tx_ring.sent_txreqs,
+ txq->tx_ring.complete_txreqs);
}
static void hfi1_ipoib_stop_txq(struct hfi1_ipoib_txq *txq)
{
trace_hfi1_txq_stop(txq);
- if (atomic_inc_return(&txq->stops) == 1)
+ if (atomic_inc_return(&txq->tx_ring.stops) == 1)
netif_stop_subqueue(txq->priv->netdev, txq->q_idx);
}
static void hfi1_ipoib_wake_txq(struct hfi1_ipoib_txq *txq)
{
trace_hfi1_txq_wake(txq);
- if (atomic_dec_and_test(&txq->stops))
+ if (atomic_dec_and_test(&txq->tx_ring.stops))
netif_wake_subqueue(txq->priv->netdev, txq->q_idx);
}
@@ -90,9 +78,9 @@ static uint hfi1_ipoib_ring_lwat(struct hfi1_ipoib_txq *txq)
static void hfi1_ipoib_check_queue_depth(struct hfi1_ipoib_txq *txq)
{
- ++txq->sent_txreqs;
+ ++txq->tx_ring.sent_txreqs;
if (hfi1_ipoib_used(txq) >= hfi1_ipoib_ring_hwat(txq) &&
- !atomic_xchg(&txq->ring_full, 1)) {
+ !atomic_xchg(&txq->tx_ring.ring_full, 1)) {
trace_hfi1_txq_full(txq);
hfi1_ipoib_stop_txq(txq);
}
@@ -117,7 +105,7 @@ static void hfi1_ipoib_check_queue_stopped(struct hfi1_ipoib_txq *txq)
* to protect against ring overflow.
*/
if (hfi1_ipoib_used(txq) < hfi1_ipoib_ring_lwat(txq) &&
- atomic_xchg(&txq->ring_full, 0)) {
+ atomic_xchg(&txq->tx_ring.ring_full, 0)) {
trace_hfi1_txq_xmit_unstopped(txq);
hfi1_ipoib_wake_txq(txq);
}
@@ -125,7 +113,7 @@ static void hfi1_ipoib_check_queue_stopped(struct hfi1_ipoib_txq *txq)
static void hfi1_ipoib_free_tx(struct ipoib_txreq *tx, int budget)
{
- struct hfi1_ipoib_dev_priv *priv = tx->priv;
+ struct hfi1_ipoib_dev_priv *priv = tx->txq->priv;
if (likely(!tx->sdma_status)) {
dev_sw_netstats_tx_add(priv->netdev, 1, tx->skb->len);
@@ -139,97 +127,73 @@ static void hfi1_ipoib_free_tx(struct ipoib_txreq *tx, int budget)
}
napi_consume_skb(tx->skb, budget);
+ tx->skb = NULL;
sdma_txclean(priv->dd, &tx->txreq);
- kmem_cache_free(priv->txreq_cache, tx);
}
-static int hfi1_ipoib_drain_tx_ring(struct hfi1_ipoib_txq *txq, int budget)
+static void hfi1_ipoib_drain_tx_ring(struct hfi1_ipoib_txq *txq)
{
struct hfi1_ipoib_circ_buf *tx_ring = &txq->tx_ring;
- unsigned long head;
- unsigned long tail;
- unsigned int max_tx;
- int work_done;
- int tx_count;
-
- spin_lock_bh(&tx_ring->consumer_lock);
-
- /* Read index before reading contents at that index. */
- head = smp_load_acquire(&tx_ring->head);
- tail = tx_ring->tail;
- max_tx = tx_ring->max_items;
-
- work_done = min_t(int, CIRC_CNT(head, tail, max_tx), budget);
+ int i;
+ struct ipoib_txreq *tx;
- for (tx_count = work_done; tx_count; tx_count--) {
- hfi1_ipoib_free_tx(tx_ring->items[tail], budget);
- tail = CIRC_NEXT(tail, max_tx);
+ for (i = 0; i < tx_ring->max_items; i++) {
+ tx = hfi1_txreq_from_idx(tx_ring, i);
+ tx->complete = 0;
+ dev_kfree_skb_any(tx->skb);
+ tx->skb = NULL;
+ sdma_txclean(txq->priv->dd, &tx->txreq);
}
+ tx_ring->head = 0;
+ tx_ring->tail = 0;
+ tx_ring->complete_txreqs = 0;
+ tx_ring->sent_txreqs = 0;
+ tx_ring->avail = hfi1_ipoib_ring_hwat(txq);
+}
- atomic64_add(work_done, &txq->complete_txreqs);
+static int hfi1_ipoib_poll_tx_ring(struct napi_struct *napi, int budget)
+{
+ struct hfi1_ipoib_txq *txq =
+ container_of(napi, struct hfi1_ipoib_txq, napi);
+ struct hfi1_ipoib_circ_buf *tx_ring = &txq->tx_ring;
+ u32 head = tx_ring->head;
+ u32 max_tx = tx_ring->max_items;
+ int work_done;
+ struct ipoib_txreq *tx = hfi1_txreq_from_idx(tx_ring, head);
- /* Finished freeing tx items so store the tail value. */
- smp_store_release(&tx_ring->tail, tail);
+ trace_hfi1_txq_poll(txq);
+ for (work_done = 0; work_done < budget; work_done++) {
+ /* See hfi1_ipoib_sdma_complete() */
+ if (!smp_load_acquire(&tx->complete))
+ break;
+ tx->complete = 0;
+ trace_hfi1_tx_produce(tx, head);
+ hfi1_ipoib_free_tx(tx, budget);
+ head = CIRC_NEXT(head, max_tx);
+ tx = hfi1_txreq_from_idx(tx_ring, head);
+ }
+ tx_ring->complete_txreqs += work_done;
- spin_unlock_bh(&tx_ring->consumer_lock);
+ /* Finished freeing tx items so store the head value. */
+ smp_store_release(&tx_ring->head, head);
hfi1_ipoib_check_queue_stopped(txq);
- return work_done;
-}
-
-static int hfi1_ipoib_process_tx_ring(struct napi_struct *napi, int budget)
-{
- struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(napi->dev);
- struct hfi1_ipoib_txq *txq = &priv->txqs[napi - priv->tx_napis];
-
- int work_done = hfi1_ipoib_drain_tx_ring(txq, budget);
-
if (work_done < budget)
napi_complete_done(napi, work_done);
return work_done;
}
-static void hfi1_ipoib_add_tx(struct ipoib_txreq *tx)
-{
- struct hfi1_ipoib_circ_buf *tx_ring = &tx->txq->tx_ring;
- unsigned long head;
- unsigned long tail;
- size_t max_tx;
-
- spin_lock(&tx_ring->producer_lock);
-
- head = tx_ring->head;
- tail = READ_ONCE(tx_ring->tail);
- max_tx = tx_ring->max_items;
-
- if (likely(CIRC_SPACE(head, tail, max_tx))) {
- tx_ring->items[head] = tx;
-
- /* Finish storing txreq before incrementing head. */
- smp_store_release(&tx_ring->head, CIRC_ADD(head, 1, max_tx));
- napi_schedule_irqoff(tx->txq->napi);
- } else {
- struct hfi1_ipoib_txq *txq = tx->txq;
- struct hfi1_ipoib_dev_priv *priv = tx->priv;
-
- /* Ring was full */
- hfi1_ipoib_free_tx(tx, 0);
- atomic64_inc(&txq->complete_txreqs);
- dd_dev_dbg(priv->dd, "txq %d full.\n", txq->q_idx);
- }
-
- spin_unlock(&tx_ring->producer_lock);
-}
-
static void hfi1_ipoib_sdma_complete(struct sdma_txreq *txreq, int status)
{
struct ipoib_txreq *tx = container_of(txreq, struct ipoib_txreq, txreq);
+ trace_hfi1_txq_complete(tx->txq);
tx->sdma_status = status;
-
- hfi1_ipoib_add_tx(tx);
+ /* see hfi1_ipoib_poll_tx_ring */
+ smp_store_release(&tx->complete, 1);
+ napi_schedule_irqoff(&tx->txq->napi);
}
static int hfi1_ipoib_build_ulp_payload(struct ipoib_txreq *tx,
@@ -291,7 +255,7 @@ static int hfi1_ipoib_build_tx_desc(struct ipoib_txreq *tx,
static void hfi1_ipoib_build_ib_tx_headers(struct ipoib_txreq *tx,
struct ipoib_txparms *txp)
{
- struct hfi1_ipoib_dev_priv *priv = tx->priv;
+ struct hfi1_ipoib_dev_priv *priv = tx->txq->priv;
struct hfi1_sdma_header *sdma_hdr = &tx->sdma_hdr;
struct sk_buff *skb = tx->skb;
struct hfi1_pportdata *ppd = ppd_from_ibp(txp->ibp);
@@ -362,7 +326,7 @@ static void hfi1_ipoib_build_ib_tx_headers(struct ipoib_txreq *tx,
ohdr->bth[0] = cpu_to_be32(bth0);
ohdr->bth[1] = cpu_to_be32(txp->dqpn);
- ohdr->bth[2] = cpu_to_be32(mask_psn((u32)txp->txq->sent_txreqs));
+ ohdr->bth[2] = cpu_to_be32(mask_psn((u32)txp->txq->tx_ring.sent_txreqs));
/* Build the deth */
ohdr->u.ud.deth[0] = cpu_to_be32(priv->qkey);
@@ -385,19 +349,32 @@ static struct ipoib_txreq *hfi1_ipoib_send_dma_common(struct net_device *dev,
struct ipoib_txparms *txp)
{
struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+ struct hfi1_ipoib_txq *txq = txp->txq;
struct ipoib_txreq *tx;
+ struct hfi1_ipoib_circ_buf *tx_ring = &txq->tx_ring;
+ u32 tail = tx_ring->tail;
int ret;
- tx = kmem_cache_alloc_node(priv->txreq_cache,
- GFP_ATOMIC,
- priv->dd->node);
- if (unlikely(!tx))
- return ERR_PTR(-ENOMEM);
+ if (unlikely(!tx_ring->avail)) {
+ u32 head;
+
+ if (hfi1_ipoib_used(txq) >= hfi1_ipoib_ring_hwat(txq))
+ /* This shouldn't happen with a stopped queue */
+ return ERR_PTR(-ENOMEM);
+ /* See hfi1_ipoib_poll_tx_ring() */
+ head = smp_load_acquire(&tx_ring->head);
+ tx_ring->avail =
+ min_t(u32, hfi1_ipoib_ring_hwat(txq),
+ CIRC_CNT(head, tail, tx_ring->max_items));
+ } else {
+ tx_ring->avail--;
+ }
+ tx = hfi1_txreq_from_idx(tx_ring, tail);
+ trace_hfi1_txq_alloc_tx(txq);
/* so that we can test if the sdma descriptors are there */
tx->txreq.num_desc = 0;
- tx->priv = priv;
- tx->txq = txp->txq;
+ tx->txq = txq;
tx->skb = skb;
INIT_LIST_HEAD(&tx->txreq.list);
@@ -405,21 +382,20 @@ static struct ipoib_txreq *hfi1_ipoib_send_dma_common(struct net_device *dev,
ret = hfi1_ipoib_build_tx_desc(tx, txp);
if (likely(!ret)) {
- if (txp->txq->flow.as_int != txp->flow.as_int) {
- txp->txq->flow.tx_queue = txp->flow.tx_queue;
- txp->txq->flow.sc5 = txp->flow.sc5;
- txp->txq->sde =
+ if (txq->flow.as_int != txp->flow.as_int) {
+ txq->flow.tx_queue = txp->flow.tx_queue;
+ txq->flow.sc5 = txp->flow.sc5;
+ txq->sde =
sdma_select_engine_sc(priv->dd,
txp->flow.tx_queue,
txp->flow.sc5);
- trace_hfi1_flow_switch(txp->txq);
+ trace_hfi1_flow_switch(txq);
}
return tx;
}
sdma_txclean(priv->dd, &tx->txreq);
- kmem_cache_free(priv->txreq_cache, tx);
return ERR_PTR(ret);
}
@@ -480,8 +456,8 @@ static int hfi1_ipoib_send_dma_single(struct net_device *dev,
struct sk_buff *skb,
struct ipoib_txparms *txp)
{
- struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
struct hfi1_ipoib_txq *txq = txp->txq;
+ struct hfi1_ipoib_circ_buf *tx_ring;
struct ipoib_txreq *tx;
int ret;
@@ -499,10 +475,14 @@ static int hfi1_ipoib_send_dma_single(struct net_device *dev,
return NETDEV_TX_OK;
}
+ tx_ring = &txq->tx_ring;
+ trace_hfi1_tx_consume(tx, tx_ring->tail);
+ /* consume tx */
+ smp_store_release(&tx_ring->tail, CIRC_NEXT(tx_ring->tail, tx_ring->max_items));
ret = hfi1_ipoib_submit_tx(txq, tx);
if (likely(!ret)) {
tx_ok:
- trace_sdma_output_ibhdr(tx->priv->dd,
+ trace_sdma_output_ibhdr(txq->priv->dd,
&tx->sdma_hdr.hdr,
ib_is_sc5(txp->flow.sc5));
hfi1_ipoib_check_queue_depth(txq);
@@ -514,9 +494,10 @@ tx_ok:
if (ret == -EBUSY || ret == -ECOMM)
goto tx_ok;
- sdma_txclean(priv->dd, &tx->txreq);
- dev_kfree_skb_any(skb);
- kmem_cache_free(priv->txreq_cache, tx);
+ /* mark complete and kick napi tx */
+ smp_store_release(&tx->complete, 1);
+ napi_schedule(&tx->txq->napi);
+
++dev->stats.tx_carrier_errors;
return NETDEV_TX_OK;
@@ -527,6 +508,7 @@ static int hfi1_ipoib_send_dma_list(struct net_device *dev,
struct ipoib_txparms *txp)
{
struct hfi1_ipoib_txq *txq = txp->txq;
+ struct hfi1_ipoib_circ_buf *tx_ring;
struct ipoib_txreq *tx;
/* Has the flow change ? */
@@ -556,11 +538,15 @@ static int hfi1_ipoib_send_dma_list(struct net_device *dev,
return NETDEV_TX_OK;
}
+ tx_ring = &txq->tx_ring;
+ trace_hfi1_tx_consume(tx, tx_ring->tail);
+ /* consume tx */
+ smp_store_release(&tx_ring->tail, CIRC_NEXT(tx_ring->tail, tx_ring->max_items));
list_add_tail(&tx->txreq.list, &txq->tx_list);
hfi1_ipoib_check_queue_depth(txq);
- trace_sdma_output_ibhdr(tx->priv->dd,
+ trace_sdma_output_ibhdr(txq->priv->dd,
&tx->sdma_hdr.hdr,
ib_is_sc5(txp->flow.sc5));
@@ -646,7 +632,7 @@ static int hfi1_ipoib_sdma_sleep(struct sdma_engine *sde,
if (list_empty(&txq->wait.list)) {
struct hfi1_ibport *ibp = &sde->ppd->ibport_data;
- if (!atomic_xchg(&txq->no_desc, 1)) {
+ if (!atomic_xchg(&txq->tx_ring.no_desc, 1)) {
trace_hfi1_txq_queued(txq);
hfi1_ipoib_stop_txq(txq);
}
@@ -689,45 +675,29 @@ static void hfi1_ipoib_flush_txq(struct work_struct *work)
if (likely(dev->reg_state == NETREG_REGISTERED) &&
likely(!hfi1_ipoib_flush_tx_list(dev, txq)))
- if (atomic_xchg(&txq->no_desc, 0))
+ if (atomic_xchg(&txq->tx_ring.no_desc, 0))
hfi1_ipoib_wake_txq(txq);
}
int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv)
{
struct net_device *dev = priv->netdev;
- char buf[HFI1_IPOIB_TXREQ_NAME_LEN];
- unsigned long tx_ring_size;
+ u32 tx_ring_size, tx_item_size;
int i;
/*
* Ring holds 1 less than tx_ring_size
* Round up to next power of 2 in order to hold at least tx_queue_len
*/
- tx_ring_size = roundup_pow_of_two((unsigned long)dev->tx_queue_len + 1);
-
- snprintf(buf, sizeof(buf), "hfi1_%u_ipoib_txreq_cache", priv->dd->unit);
- priv->txreq_cache = kmem_cache_create(buf,
- sizeof(struct ipoib_txreq),
- 0,
- 0,
- NULL);
- if (!priv->txreq_cache)
- return -ENOMEM;
-
- priv->tx_napis = kcalloc_node(dev->num_tx_queues,
- sizeof(struct napi_struct),
- GFP_KERNEL,
- priv->dd->node);
- if (!priv->tx_napis)
- goto free_txreq_cache;
+ tx_ring_size = roundup_pow_of_two(dev->tx_queue_len + 1);
+ tx_item_size = roundup_pow_of_two(sizeof(struct ipoib_txreq));
priv->txqs = kcalloc_node(dev->num_tx_queues,
sizeof(struct hfi1_ipoib_txq),
GFP_KERNEL,
priv->dd->node);
if (!priv->txqs)
- goto free_tx_napis;
+ return -ENOMEM;
for (i = 0; i < dev->num_tx_queues; i++) {
struct hfi1_ipoib_txq *txq = &priv->txqs[i];
@@ -743,10 +713,9 @@ int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv)
txq->priv = priv;
txq->sde = NULL;
INIT_LIST_HEAD(&txq->tx_list);
- atomic64_set(&txq->complete_txreqs, 0);
- atomic_set(&txq->stops, 0);
- atomic_set(&txq->ring_full, 0);
- atomic_set(&txq->no_desc, 0);
+ atomic_set(&txq->tx_ring.stops, 0);
+ atomic_set(&txq->tx_ring.ring_full, 0);
+ atomic_set(&txq->tx_ring.no_desc, 0);
txq->q_idx = i;
txq->flow.tx_queue = 0xff;
txq->flow.sc5 = 0xff;
@@ -756,19 +725,17 @@ int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv)
priv->dd->node);
txq->tx_ring.items =
- kcalloc_node(tx_ring_size,
- sizeof(struct ipoib_txreq *),
+ kcalloc_node(tx_ring_size, tx_item_size,
GFP_KERNEL, priv->dd->node);
if (!txq->tx_ring.items)
goto free_txqs;
- spin_lock_init(&txq->tx_ring.producer_lock);
- spin_lock_init(&txq->tx_ring.consumer_lock);
txq->tx_ring.max_items = tx_ring_size;
+ txq->tx_ring.shift = ilog2(tx_ring_size);
+ txq->tx_ring.avail = hfi1_ipoib_ring_hwat(txq);
- txq->napi = &priv->tx_napis[i];
- netif_tx_napi_add(dev, txq->napi,
- hfi1_ipoib_process_tx_ring,
+ netif_tx_napi_add(dev, &txq->napi,
+ hfi1_ipoib_poll_tx_ring,
NAPI_POLL_WEIGHT);
}
@@ -778,20 +745,12 @@ free_txqs:
for (i--; i >= 0; i--) {
struct hfi1_ipoib_txq *txq = &priv->txqs[i];
- netif_napi_del(txq->napi);
+ netif_napi_del(&txq->napi);
kfree(txq->tx_ring.items);
}
kfree(priv->txqs);
priv->txqs = NULL;
-
-free_tx_napis:
- kfree(priv->tx_napis);
- priv->tx_napis = NULL;
-
-free_txreq_cache:
- kmem_cache_destroy(priv->txreq_cache);
- priv->txreq_cache = NULL;
return -ENOMEM;
}
@@ -799,7 +758,6 @@ static void hfi1_ipoib_drain_tx_list(struct hfi1_ipoib_txq *txq)
{
struct sdma_txreq *txreq;
struct sdma_txreq *txreq_tmp;
- atomic64_t *complete_txreqs = &txq->complete_txreqs;
list_for_each_entry_safe(txreq, txreq_tmp, &txq->tx_list, list) {
struct ipoib_txreq *tx =
@@ -808,16 +766,16 @@ static void hfi1_ipoib_drain_tx_list(struct hfi1_ipoib_txq *txq)
list_del(&txreq->list);
sdma_txclean(txq->priv->dd, &tx->txreq);
dev_kfree_skb_any(tx->skb);
- kmem_cache_free(txq->priv->txreq_cache, tx);
- atomic64_inc(complete_txreqs);
+ tx->skb = NULL;
+ txq->tx_ring.complete_txreqs++;
}
if (hfi1_ipoib_used(txq))
dd_dev_warn(txq->priv->dd,
- "txq %d not empty found %llu requests\n",
+ "txq %d not empty found %u requests\n",
txq->q_idx,
- hfi1_ipoib_txreqs(txq->sent_txreqs,
- atomic64_read(complete_txreqs)));
+ hfi1_ipoib_txreqs(txq->tx_ring.sent_txreqs,
+ txq->tx_ring.complete_txreqs));
}
void hfi1_ipoib_txreq_deinit(struct hfi1_ipoib_dev_priv *priv)
@@ -830,19 +788,13 @@ void hfi1_ipoib_txreq_deinit(struct hfi1_ipoib_dev_priv *priv)
iowait_cancel_work(&txq->wait);
iowait_sdma_drain(&txq->wait);
hfi1_ipoib_drain_tx_list(txq);
- netif_napi_del(txq->napi);
- (void)hfi1_ipoib_drain_tx_ring(txq, txq->tx_ring.max_items);
+ netif_napi_del(&txq->napi);
+ hfi1_ipoib_drain_tx_ring(txq);
kfree(txq->tx_ring.items);
}
kfree(priv->txqs);
priv->txqs = NULL;
-
- kfree(priv->tx_napis);
- priv->tx_napis = NULL;
-
- kmem_cache_destroy(priv->txreq_cache);
- priv->txreq_cache = NULL;
}
void hfi1_ipoib_napi_tx_enable(struct net_device *dev)
@@ -853,7 +805,7 @@ void hfi1_ipoib_napi_tx_enable(struct net_device *dev)
for (i = 0; i < dev->num_tx_queues; i++) {
struct hfi1_ipoib_txq *txq = &priv->txqs[i];
- napi_enable(txq->napi);
+ napi_enable(&txq->napi);
}
}
@@ -865,8 +817,8 @@ void hfi1_ipoib_napi_tx_disable(struct net_device *dev)
for (i = 0; i < dev->num_tx_queues; i++) {
struct hfi1_ipoib_txq *txq = &priv->txqs[i];
- napi_disable(txq->napi);
- (void)hfi1_ipoib_drain_tx_ring(txq, txq->tx_ring.max_items);
+ napi_disable(&txq->napi);
+ hfi1_ipoib_drain_tx_ring(txq);
}
}
@@ -874,23 +826,23 @@ void hfi1_ipoib_tx_timeout(struct net_device *dev, unsigned int q)
{
struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
struct hfi1_ipoib_txq *txq = &priv->txqs[q];
- u64 completed = atomic64_read(&txq->complete_txreqs);
dd_dev_info(priv->dd, "timeout txq %p q %u stopped %u stops %d no_desc %d ring_full %d\n",
txq, q,
__netif_subqueue_stopped(dev, txq->q_idx),
- atomic_read(&txq->stops),
- atomic_read(&txq->no_desc),
- atomic_read(&txq->ring_full));
+ atomic_read(&txq->tx_ring.stops),
+ atomic_read(&txq->tx_ring.no_desc),
+ atomic_read(&txq->tx_ring.ring_full));
dd_dev_info(priv->dd, "sde %p engine %u\n",
txq->sde,
txq->sde ? txq->sde->this_idx : 0);
dd_dev_info(priv->dd, "flow %x\n", txq->flow.as_int);
dd_dev_info(priv->dd, "sent %llu completed %llu used %llu\n",
- txq->sent_txreqs, completed, hfi1_ipoib_used(txq));
- dd_dev_info(priv->dd, "tx_queue_len %u max_items %lu\n",
+ txq->tx_ring.sent_txreqs, txq->tx_ring.complete_txreqs,
+ hfi1_ipoib_used(txq));
+ dd_dev_info(priv->dd, "tx_queue_len %u max_items %u\n",
dev->tx_queue_len, txq->tx_ring.max_items);
- dd_dev_info(priv->dd, "head %lu tail %lu\n",
+ dd_dev_info(priv->dd, "head %u tail %u\n",
txq->tx_ring.head, txq->tx_ring.tail);
dd_dev_info(priv->dd, "wait queued %u\n",
!list_empty(&txq->wait.list));
diff --git a/drivers/infiniband/hw/hfi1/trace_tx.h b/drivers/infiniband/hw/hfi1/trace_tx.h
index 7318aa6150b527..ed1b9e1e4b1731 100644
--- a/drivers/infiniband/hw/hfi1/trace_tx.h
+++ b/drivers/infiniband/hw/hfi1/trace_tx.h
@@ -917,20 +917,22 @@ DECLARE_EVENT_CLASS(/* AIP */
__entry->tail = txq->tx_ring.tail;
__entry->idx = txq->q_idx;
__entry->used =
- txq->sent_txreqs -
- atomic64_read(&txq->complete_txreqs);
+ txq->tx_ring.sent_txreqs -
+ txq->tx_ring.complete_txreqs;
__entry->flow = txq->flow.as_int;
- __entry->stops = atomic_read(&txq->stops);
- __entry->no_desc = atomic_read(&txq->no_desc);
+ __entry->stops = atomic_read(&txq->tx_ring.stops);
+ __entry->no_desc = atomic_read(&txq->tx_ring.no_desc);
__entry->stopped =
__netif_subqueue_stopped(txq->priv->netdev, txq->q_idx);
),
TP_printk(/* print */
- "[%s] txq %llx idx %u sde %llx head %lx tail %lx flow %x used %u stops %d no_desc %d stopped %u",
+ "[%s] txq %llx idx %u sde %llx:%u cpu %d head %lx tail %lx flow %x used %u stops %d no_desc %d stopped %u",
__get_str(dev),
(unsigned long long)__entry->txq,
__entry->idx,
(unsigned long long)__entry->sde,
+ __entry->sde ? __entry->sde->this_idx : 0,
+ __entry->sde ? __entry->sde->cpu : 0,
__entry->head,
__entry->tail,
__entry->flow,
@@ -995,6 +997,65 @@ DEFINE_EVENT(/* xmit_unstopped */
TP_ARGS(txq)
);
+DECLARE_EVENT_CLASS(/* AIP */
+ hfi1_ipoib_tx_template,
+ TP_PROTO(struct ipoib_txreq *tx, u32 idx),
+ TP_ARGS(tx, idx),
+ TP_STRUCT__entry(/* entry */
+ DD_DEV_ENTRY(tx->txq->priv->dd)
+ __field(struct ipoib_txreq *, tx)
+ __field(struct hfi1_ipoib_txq *, txq)
+ __field(struct sk_buff *, skb)
+ __field(ulong, idx)
+ ),
+ TP_fast_assign(/* assign */
+ DD_DEV_ASSIGN(tx->txq->priv->dd);
+ __entry->tx = tx;
+ __entry->skb = tx->skb;
+ __entry->txq = tx->txq;
+ __entry->idx = idx;
+ ),
+ TP_printk(/* print */
+ "[%s] tx %llx txq %llx,%u skb %llx idx %lu",
+ __get_str(dev),
+ (unsigned long long)__entry->tx,
+ (unsigned long long)__entry->txq,
+ __entry->txq ? __entry->txq->q_idx : 0,
+ (unsigned long long)__entry->skb,
+ __entry->idx
+ )
+);
+
+DEFINE_EVENT(/* produce */
+ hfi1_ipoib_tx_template, hfi1_tx_produce,
+ TP_PROTO(struct ipoib_txreq *tx, u32 idx),
+ TP_ARGS(tx, idx)
+);
+
+DEFINE_EVENT(/* consume */
+ hfi1_ipoib_tx_template, hfi1_tx_consume,
+ TP_PROTO(struct ipoib_txreq *tx, u32 idx),
+ TP_ARGS(tx, idx)
+);
+
+DEFINE_EVENT(/* alloc_tx */
+ hfi1_ipoib_txq_template, hfi1_txq_alloc_tx,
+ TP_PROTO(struct hfi1_ipoib_txq *txq),
+ TP_ARGS(txq)
+);
+
+DEFINE_EVENT(/* poll */
+ hfi1_ipoib_txq_template, hfi1_txq_poll,
+ TP_PROTO(struct hfi1_ipoib_txq *txq),
+ TP_ARGS(txq)
+);
+
+DEFINE_EVENT(/* complete */
+ hfi1_ipoib_txq_template, hfi1_txq_complete,
+ TP_PROTO(struct hfi1_ipoib_txq *txq),
+ TP_ARGS(txq)
+);
+
#endif /* __HFI1_TRACE_TX_H */
#undef TRACE_INCLUDE_PATH
diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
index 0c86e9d354f8ea..186d3029126069 100644
--- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c
+++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
@@ -692,8 +692,7 @@ static int set_rcvarray_entry(struct hfi1_filedata *fd,
* Allocate the node first so we can handle a potential
* failure before we've programmed anything.
*/
- node = kzalloc(sizeof(*node) + (sizeof(struct page *) * npages),
- GFP_KERNEL);
+ node = kzalloc(struct_size(node, pages, npages), GFP_KERNEL);
if (!node)
return -ENOMEM;
@@ -713,7 +712,7 @@ static int set_rcvarray_entry(struct hfi1_filedata *fd,
node->dma_addr = phys;
node->grp = grp;
node->freed = false;
- memcpy(node->pages, pages, sizeof(struct page *) * npages);
+ memcpy(node->pages, pages, flex_array_size(node, pages, npages));
if (fd->use_mn) {
ret = mmu_interval_notifier_insert(
diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c
index a8db8a051170e1..ff3742b0460ac0 100644
--- a/drivers/infiniband/hw/mlx5/cmd.c
+++ b/drivers/infiniband/hw/mlx5/cmd.c
@@ -206,3 +206,29 @@ out:
kfree(in);
return err;
}
+
+int mlx5_cmd_uar_alloc(struct mlx5_core_dev *dev, u32 *uarn, u16 uid)
+{
+ u32 out[MLX5_ST_SZ_DW(alloc_uar_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(alloc_uar_in)] = {};
+ int err;
+
+ MLX5_SET(alloc_uar_in, in, opcode, MLX5_CMD_OP_ALLOC_UAR);
+ MLX5_SET(alloc_uar_in, in, uid, uid);
+ err = mlx5_cmd_exec_inout(dev, alloc_uar, in, out);
+ if (err)
+ return err;
+
+ *uarn = MLX5_GET(alloc_uar_out, out, uar);
+ return 0;
+}
+
+int mlx5_cmd_uar_dealloc(struct mlx5_core_dev *dev, u32 uarn, u16 uid)
+{
+ u32 in[MLX5_ST_SZ_DW(dealloc_uar_in)] = {};
+
+ MLX5_SET(dealloc_uar_in, in, opcode, MLX5_CMD_OP_DEALLOC_UAR);
+ MLX5_SET(dealloc_uar_in, in, uar, uarn);
+ MLX5_SET(dealloc_uar_in, in, uid, uid);
+ return mlx5_cmd_exec_in(dev, dealloc_uar, in);
+}
diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h
index 66c96292ed432e..ee46638db5de31 100644
--- a/drivers/infiniband/hw/mlx5/cmd.h
+++ b/drivers/infiniband/hw/mlx5/cmd.h
@@ -57,4 +57,6 @@ int mlx5_cmd_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn, u16 uid);
int mlx5_cmd_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn, u16 uid);
int mlx5_cmd_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb,
u16 opmod, u8 port);
+int mlx5_cmd_uar_alloc(struct mlx5_core_dev *dev, u32 *uarn, u16 uid);
+int mlx5_cmd_uar_dealloc(struct mlx5_core_dev *dev, u32 uarn, u16 uid);
#endif /* MLX5_IB_CMD_H */
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 8664bcf6d3f590..5ec8bd2f0b2ff3 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1643,7 +1643,8 @@ static int allocate_uars(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *conte
bfregi = &context->bfregi;
for (i = 0; i < bfregi->num_static_sys_pages; i++) {
- err = mlx5_cmd_alloc_uar(dev->mdev, &bfregi->sys_pages[i]);
+ err = mlx5_cmd_uar_alloc(dev->mdev, &bfregi->sys_pages[i],
+ context->devx_uid);
if (err)
goto error;
@@ -1657,7 +1658,8 @@ static int allocate_uars(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *conte
error:
for (--i; i >= 0; i--)
- if (mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i]))
+ if (mlx5_cmd_uar_dealloc(dev->mdev, bfregi->sys_pages[i],
+ context->devx_uid))
mlx5_ib_warn(dev, "failed to free uar %d\n", i);
return err;
@@ -1673,7 +1675,8 @@ static void deallocate_uars(struct mlx5_ib_dev *dev,
for (i = 0; i < bfregi->num_sys_pages; i++)
if (i < bfregi->num_static_sys_pages ||
bfregi->sys_pages[i] != MLX5_IB_INVALID_UAR_INDEX)
- mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i]);
+ mlx5_cmd_uar_dealloc(dev->mdev, bfregi->sys_pages[i],
+ context->devx_uid);
}
int mlx5_ib_enable_lb(struct mlx5_ib_dev *dev, bool td, bool qp)
@@ -1891,6 +1894,13 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
if (req.num_low_latency_bfregs > req.total_num_bfregs - 1)
return -EINVAL;
+ if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) {
+ err = mlx5_ib_devx_create(dev, true);
+ if (err < 0)
+ goto out_ctx;
+ context->devx_uid = err;
+ }
+
lib_uar_4k = req.lib_caps & MLX5_LIB_CAP_4K_UAR;
lib_uar_dyn = req.lib_caps & MLX5_LIB_CAP_DYN_UAR;
bfregi = &context->bfregi;
@@ -1903,7 +1913,7 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
/* updates req->total_num_bfregs */
err = calc_total_bfregs(dev, lib_uar_4k, &req, bfregi);
if (err)
- goto out_ctx;
+ goto out_devx;
mutex_init(&bfregi->lock);
bfregi->lib_uar_4k = lib_uar_4k;
@@ -1911,7 +1921,7 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
GFP_KERNEL);
if (!bfregi->count) {
err = -ENOMEM;
- goto out_ctx;
+ goto out_devx;
}
bfregi->sys_pages = kcalloc(bfregi->num_sys_pages,
@@ -1927,17 +1937,10 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
goto out_sys_pages;
uar_done:
- if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) {
- err = mlx5_ib_devx_create(dev, true);
- if (err < 0)
- goto out_uars;
- context->devx_uid = err;
- }
-
err = mlx5_ib_alloc_transport_domain(dev, &context->tdn,
context->devx_uid);
if (err)
- goto out_devx;
+ goto out_uars;
INIT_LIST_HEAD(&context->db_page_list);
mutex_init(&context->db_page_mutex);
@@ -1972,9 +1975,6 @@ uar_done:
out_mdev:
mlx5_ib_dealloc_transport_domain(dev, context->tdn, context->devx_uid);
-out_devx:
- if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX)
- mlx5_ib_devx_destroy(dev, context->devx_uid);
out_uars:
deallocate_uars(dev, context);
@@ -1985,6 +1985,10 @@ out_sys_pages:
out_count:
kfree(bfregi->count);
+out_devx:
+ if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX)
+ mlx5_ib_devx_destroy(dev, context->devx_uid);
+
out_ctx:
return err;
}
@@ -2021,12 +2025,12 @@ static void mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
bfregi = &context->bfregi;
mlx5_ib_dealloc_transport_domain(dev, context->tdn, context->devx_uid);
- if (context->devx_uid)
- mlx5_ib_devx_destroy(dev, context->devx_uid);
-
deallocate_uars(dev, context);
kfree(bfregi->sys_pages);
kfree(bfregi->count);
+
+ if (context->devx_uid)
+ mlx5_ib_devx_destroy(dev, context->devx_uid);
}
static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev,
@@ -2119,6 +2123,7 @@ static void mlx5_ib_mmap_free(struct rdma_user_mmap_entry *entry)
struct mlx5_user_mmap_entry *mentry = to_mmmap(entry);
struct mlx5_ib_dev *dev = to_mdev(entry->ucontext->device);
struct mlx5_var_table *var_table = &dev->var_table;
+ struct mlx5_ib_ucontext *context = to_mucontext(entry->ucontext);
switch (mentry->mmap_flag) {
case MLX5_IB_MMAP_TYPE_MEMIC:
@@ -2133,7 +2138,8 @@ static void mlx5_ib_mmap_free(struct rdma_user_mmap_entry *entry)
break;
case MLX5_IB_MMAP_TYPE_UAR_WC:
case MLX5_IB_MMAP_TYPE_UAR_NC:
- mlx5_cmd_free_uar(dev->mdev, mentry->page_idx);
+ mlx5_cmd_uar_dealloc(dev->mdev, mentry->page_idx,
+ context->devx_uid);
kfree(mentry);
break;
default:
@@ -2211,7 +2217,8 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
bfregi->count[bfreg_dyn_idx]++;
mutex_unlock(&bfregi->lock);
- err = mlx5_cmd_alloc_uar(dev->mdev, &uar_index);
+ err = mlx5_cmd_uar_alloc(dev->mdev, &uar_index,
+ context->devx_uid);
if (err) {
mlx5_ib_warn(dev, "UAR alloc failed\n");
goto free_bfreg;
@@ -2240,7 +2247,7 @@ err:
if (!dyn_uar)
return err;
- mlx5_cmd_free_uar(dev->mdev, idx);
+ mlx5_cmd_uar_dealloc(dev->mdev, idx, context->devx_uid);
free_bfreg:
mlx5_ib_free_bfreg(dev, bfregi, bfreg_dyn_idx);
@@ -3489,7 +3496,7 @@ alloc_uar_entry(struct mlx5_ib_ucontext *c,
return ERR_PTR(-ENOMEM);
dev = to_mdev(c->ibucontext.device);
- err = mlx5_cmd_alloc_uar(dev->mdev, &uar_index);
+ err = mlx5_cmd_uar_alloc(dev->mdev, &uar_index, c->devx_uid);
if (err)
goto end;
@@ -3507,7 +3514,7 @@ alloc_uar_entry(struct mlx5_ib_ucontext *c,
return entry;
err_insert:
- mlx5_cmd_free_uar(dev->mdev, uar_index);
+ mlx5_cmd_uar_dealloc(dev->mdev, uar_index, c->devx_uid);
end:
kfree(entry);
return ERR_PTR(err);
diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index d2d802c776fdc7..48a3864ada29a3 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -142,10 +142,7 @@ static inline enum comp_state get_wqe(struct rxe_qp *qp,
/* we come here whether or not we found a response packet to see if
* there are any posted WQEs
*/
- if (qp->is_user)
- wqe = queue_head(qp->sq.queue, QUEUE_TYPE_FROM_USER);
- else
- wqe = queue_head(qp->sq.queue, QUEUE_TYPE_KERNEL);
+ wqe = queue_head(qp->sq.queue, QUEUE_TYPE_FROM_CLIENT);
*wqe_p = wqe;
/* no WQE or requester has not started it yet */
@@ -432,10 +429,7 @@ static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
if (post)
make_send_cqe(qp, wqe, &cqe);
- if (qp->is_user)
- advance_consumer(qp->sq.queue, QUEUE_TYPE_FROM_USER);
- else
- advance_consumer(qp->sq.queue, QUEUE_TYPE_KERNEL);
+ queue_advance_consumer(qp->sq.queue, QUEUE_TYPE_FROM_CLIENT);
if (post)
rxe_cq_post(qp->scq, &cqe, 0);
@@ -539,7 +533,7 @@ static void rxe_drain_resp_pkts(struct rxe_qp *qp, bool notify)
wqe->status = IB_WC_WR_FLUSH_ERR;
do_complete(qp, wqe);
} else {
- advance_consumer(q, q->type);
+ queue_advance_consumer(q, q->type);
}
}
}
diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c
index aef288f164fddd..4eedaa0244b398 100644
--- a/drivers/infiniband/sw/rxe/rxe_cq.c
+++ b/drivers/infiniband/sw/rxe/rxe_cq.c
@@ -25,11 +25,7 @@ int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq,
}
if (cq) {
- if (cq->is_user)
- count = queue_count(cq->queue, QUEUE_TYPE_TO_USER);
- else
- count = queue_count(cq->queue, QUEUE_TYPE_KERNEL);
-
+ count = queue_count(cq->queue, QUEUE_TYPE_TO_CLIENT);
if (cqe < count) {
pr_warn("cqe(%d) < current # elements in queue (%d)",
cqe, count);
@@ -65,7 +61,7 @@ int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe,
int err;
enum queue_type type;
- type = uresp ? QUEUE_TYPE_TO_USER : QUEUE_TYPE_KERNEL;
+ type = QUEUE_TYPE_TO_CLIENT;
cq->queue = rxe_queue_init(rxe, &cqe,
sizeof(struct rxe_cqe), type);
if (!cq->queue) {
@@ -117,11 +113,7 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited)
spin_lock_irqsave(&cq->cq_lock, flags);
- if (cq->is_user)
- full = queue_full(cq->queue, QUEUE_TYPE_TO_USER);
- else
- full = queue_full(cq->queue, QUEUE_TYPE_KERNEL);
-
+ full = queue_full(cq->queue, QUEUE_TYPE_TO_CLIENT);
if (unlikely(full)) {
spin_unlock_irqrestore(&cq->cq_lock, flags);
if (cq->ibcq.event_handler) {
@@ -134,17 +126,10 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited)
return -EBUSY;
}
- if (cq->is_user)
- addr = producer_addr(cq->queue, QUEUE_TYPE_TO_USER);
- else
- addr = producer_addr(cq->queue, QUEUE_TYPE_KERNEL);
-
+ addr = queue_producer_addr(cq->queue, QUEUE_TYPE_TO_CLIENT);
memcpy(addr, cqe, sizeof(*cqe));
- if (cq->is_user)
- advance_producer(cq->queue, QUEUE_TYPE_TO_USER);
- else
- advance_producer(cq->queue, QUEUE_TYPE_KERNEL);
+ queue_advance_producer(cq->queue, QUEUE_TYPE_TO_CLIENT);
spin_unlock_irqrestore(&cq->cq_lock, flags);
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index f0c954575bdec7..1ca43b859d806c 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -86,6 +86,8 @@ struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key,
int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length);
int advance_dma_data(struct rxe_dma_info *dma, unsigned int length);
int rxe_invalidate_mr(struct rxe_qp *qp, u32 rkey);
+int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe);
+int rxe_mr_set_page(struct ib_mr *ibmr, u64 addr);
int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata);
void rxe_mr_cleanup(struct rxe_pool_entry *arg);
diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index 5890a824621616..53271df10e47ea 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -24,17 +24,22 @@ u8 rxe_get_next_key(u32 last_key)
int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length)
{
+ struct rxe_map_set *set = mr->cur_map_set;
+
switch (mr->type) {
- case RXE_MR_TYPE_DMA:
+ case IB_MR_TYPE_DMA:
return 0;
- case RXE_MR_TYPE_MR:
- if (iova < mr->iova || length > mr->length ||
- iova > mr->iova + mr->length - length)
+ case IB_MR_TYPE_USER:
+ case IB_MR_TYPE_MEM_REG:
+ if (iova < set->iova || length > set->length ||
+ iova > set->iova + set->length - length)
return -EFAULT;
return 0;
default:
+ pr_warn("%s: mr type (%d) not supported\n",
+ __func__, mr->type);
return -EFAULT;
}
}
@@ -48,48 +53,101 @@ static void rxe_mr_init(int access, struct rxe_mr *mr)
u32 lkey = mr->pelem.index << 8 | rxe_get_next_key(-1);
u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0;
- mr->ibmr.lkey = lkey;
- mr->ibmr.rkey = rkey;
+ /* set ibmr->l/rkey and also copy into private l/rkey
+ * for user MRs these will always be the same
+ * for cases where caller 'owns' the key portion
+ * they may be different until REG_MR WQE is executed.
+ */
+ mr->lkey = mr->ibmr.lkey = lkey;
+ mr->rkey = mr->ibmr.rkey = rkey;
+
mr->state = RXE_MR_STATE_INVALID;
- mr->type = RXE_MR_TYPE_NONE;
mr->map_shift = ilog2(RXE_BUF_PER_MAP);
}
-static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf)
+static void rxe_mr_free_map_set(int num_map, struct rxe_map_set *set)
{
int i;
- int num_map;
- struct rxe_map **map = mr->map;
- num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP;
+ for (i = 0; i < num_map; i++)
+ kfree(set->map[i]);
- mr->map = kmalloc_array(num_map, sizeof(*map), GFP_KERNEL);
- if (!mr->map)
- goto err1;
+ kfree(set->map);
+ kfree(set);
+}
+
+static int rxe_mr_alloc_map_set(int num_map, struct rxe_map_set **setp)
+{
+ int i;
+ struct rxe_map_set *set;
+
+ set = kmalloc(sizeof(*set), GFP_KERNEL);
+ if (!set)
+ goto err_out;
+
+ set->map = kmalloc_array(num_map, sizeof(struct rxe_map *), GFP_KERNEL);
+ if (!set->map)
+ goto err_free_set;
for (i = 0; i < num_map; i++) {
- mr->map[i] = kmalloc(sizeof(**map), GFP_KERNEL);
- if (!mr->map[i])
- goto err2;
+ set->map[i] = kmalloc(sizeof(struct rxe_map), GFP_KERNEL);
+ if (!set->map[i])
+ goto err_free_map;
}
+ *setp = set;
+
+ return 0;
+
+err_free_map:
+ for (i--; i >= 0; i--)
+ kfree(set->map[i]);
+
+ kfree(set->map);
+err_free_set:
+ kfree(set);
+err_out:
+ return -ENOMEM;
+}
+
+/**
+ * rxe_mr_alloc() - Allocate memory map array(s) for MR
+ * @mr: Memory region
+ * @num_buf: Number of buffer descriptors to support
+ * @both: If non zero allocate both mr->map and mr->next_map
+ * else just allocate mr->map. Used for fast MRs
+ *
+ * Return: 0 on success else an error
+ */
+static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf, int both)
+{
+ int ret;
+ int num_map;
+
BUILD_BUG_ON(!is_power_of_2(RXE_BUF_PER_MAP));
+ num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP;
mr->map_shift = ilog2(RXE_BUF_PER_MAP);
mr->map_mask = RXE_BUF_PER_MAP - 1;
-
mr->num_buf = num_buf;
- mr->num_map = num_map;
mr->max_buf = num_map * RXE_BUF_PER_MAP;
+ mr->num_map = num_map;
- return 0;
+ ret = rxe_mr_alloc_map_set(num_map, &mr->cur_map_set);
+ if (ret)
+ goto err_out;
-err2:
- for (i--; i >= 0; i--)
- kfree(mr->map[i]);
+ if (both) {
+ ret = rxe_mr_alloc_map_set(num_map, &mr->next_map_set);
+ if (ret) {
+ rxe_mr_free_map_set(mr->num_map, mr->cur_map_set);
+ goto err_out;
+ }
+ }
- kfree(mr->map);
-err1:
+ return 0;
+
+err_out:
return -ENOMEM;
}
@@ -100,12 +158,13 @@ void rxe_mr_init_dma(struct rxe_pd *pd, int access, struct rxe_mr *mr)
mr->ibmr.pd = &pd->ibpd;
mr->access = access;
mr->state = RXE_MR_STATE_VALID;
- mr->type = RXE_MR_TYPE_DMA;
+ mr->type = IB_MR_TYPE_DMA;
}
int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
int access, struct rxe_mr *mr)
{
+ struct rxe_map_set *set;
struct rxe_map **map;
struct rxe_phys_buf *buf = NULL;
struct ib_umem *umem;
@@ -113,7 +172,6 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
int num_buf;
void *vaddr;
int err;
- int i;
umem = ib_umem_get(pd->ibpd.device, start, length, access);
if (IS_ERR(umem)) {
@@ -127,18 +185,20 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
rxe_mr_init(access, mr);
- err = rxe_mr_alloc(mr, num_buf);
+ err = rxe_mr_alloc(mr, num_buf, 0);
if (err) {
pr_warn("%s: Unable to allocate memory for map\n",
__func__);
goto err_release_umem;
}
- mr->page_shift = PAGE_SHIFT;
- mr->page_mask = PAGE_SIZE - 1;
+ set = mr->cur_map_set;
+ set->page_shift = PAGE_SHIFT;
+ set->page_mask = PAGE_SIZE - 1;
+
+ num_buf = 0;
+ map = set->map;
- num_buf = 0;
- map = mr->map;
if (length > 0) {
buf = map[0]->buf;
@@ -161,26 +221,24 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
buf->size = PAGE_SIZE;
num_buf++;
buf++;
-
}
}
mr->ibmr.pd = &pd->ibpd;
mr->umem = umem;
mr->access = access;
- mr->length = length;
- mr->iova = iova;
- mr->va = start;
- mr->offset = ib_umem_offset(umem);
mr->state = RXE_MR_STATE_VALID;
- mr->type = RXE_MR_TYPE_MR;
+ mr->type = IB_MR_TYPE_USER;
+
+ set->length = length;
+ set->iova = iova;
+ set->va = start;
+ set->offset = ib_umem_offset(umem);
return 0;
err_cleanup_map:
- for (i = 0; i < mr->num_map; i++)
- kfree(mr->map[i]);
- kfree(mr->map);
+ rxe_mr_free_map_set(mr->num_map, mr->cur_map_set);
err_release_umem:
ib_umem_release(umem);
err_out:
@@ -191,19 +249,17 @@ int rxe_mr_init_fast(struct rxe_pd *pd, int max_pages, struct rxe_mr *mr)
{
int err;
- rxe_mr_init(0, mr);
+ /* always allow remote access for FMRs */
+ rxe_mr_init(IB_ACCESS_REMOTE, mr);
- /* In fastreg, we also set the rkey */
- mr->ibmr.rkey = mr->ibmr.lkey;
-
- err = rxe_mr_alloc(mr, max_pages);
+ err = rxe_mr_alloc(mr, max_pages, 1);
if (err)
goto err1;
mr->ibmr.pd = &pd->ibpd;
mr->max_buf = max_pages;
mr->state = RXE_MR_STATE_FREE;
- mr->type = RXE_MR_TYPE_MR;
+ mr->type = IB_MR_TYPE_MEM_REG;
return 0;
@@ -214,21 +270,24 @@ err1:
static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out,
size_t *offset_out)
{
- size_t offset = iova - mr->iova + mr->offset;
+ struct rxe_map_set *set = mr->cur_map_set;
+ size_t offset = iova - set->iova + set->offset;
int map_index;
int buf_index;
u64 length;
+ struct rxe_map *map;
- if (likely(mr->page_shift)) {
- *offset_out = offset & mr->page_mask;
- offset >>= mr->page_shift;
+ if (likely(set->page_shift)) {
+ *offset_out = offset & set->page_mask;
+ offset >>= set->page_shift;
*n_out = offset & mr->map_mask;
*m_out = offset >> mr->map_shift;
} else {
map_index = 0;
buf_index = 0;
- length = mr->map[map_index]->buf[buf_index].size;
+ map = set->map[map_index];
+ length = map->buf[buf_index].size;
while (offset >= length) {
offset -= length;
@@ -238,7 +297,8 @@ static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out,
map_index++;
buf_index = 0;
}
- length = mr->map[map_index]->buf[buf_index].size;
+ map = set->map[map_index];
+ length = map->buf[buf_index].size;
}
*m_out = map_index;
@@ -259,7 +319,7 @@ void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length)
goto out;
}
- if (!mr->map) {
+ if (!mr->cur_map_set) {
addr = (void *)(uintptr_t)iova;
goto out;
}
@@ -272,13 +332,13 @@ void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length)
lookup_iova(mr, iova, &m, &n, &offset);
- if (offset + length > mr->map[m]->buf[n].size) {
+ if (offset + length > mr->cur_map_set->map[m]->buf[n].size) {
pr_warn("crosses page boundary\n");
addr = NULL;
goto out;
}
- addr = (void *)(uintptr_t)mr->map[m]->buf[n].addr + offset;
+ addr = (void *)(uintptr_t)mr->cur_map_set->map[m]->buf[n].addr + offset;
out:
return addr;
@@ -302,7 +362,7 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
if (length == 0)
return 0;
- if (mr->type == RXE_MR_TYPE_DMA) {
+ if (mr->type == IB_MR_TYPE_DMA) {
u8 *src, *dest;
src = (dir == RXE_TO_MR_OBJ) ? addr : ((void *)(uintptr_t)iova);
@@ -314,7 +374,7 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
return 0;
}
- WARN_ON_ONCE(!mr->map);
+ WARN_ON_ONCE(!mr->cur_map_set);
err = mr_check_range(mr, iova, length);
if (err) {
@@ -324,7 +384,7 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
lookup_iova(mr, iova, &m, &i, &offset);
- map = mr->map + m;
+ map = mr->cur_map_set->map + m;
buf = map[0]->buf + i;
while (length > 0) {
@@ -507,8 +567,8 @@ struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key,
if (!mr)
return NULL;
- if (unlikely((type == RXE_LOOKUP_LOCAL && mr_lkey(mr) != key) ||
- (type == RXE_LOOKUP_REMOTE && mr_rkey(mr) != key) ||
+ if (unlikely((type == RXE_LOOKUP_LOCAL && mr->lkey != key) ||
+ (type == RXE_LOOKUP_REMOTE && mr->rkey != key) ||
mr_pd(mr) != pd || (access && !(access & mr->access)) ||
mr->state != RXE_MR_STATE_VALID)) {
rxe_drop_ref(mr);
@@ -531,9 +591,9 @@ int rxe_invalidate_mr(struct rxe_qp *qp, u32 rkey)
goto err;
}
- if (rkey != mr->ibmr.rkey) {
- pr_err("%s: rkey (%#x) doesn't match mr->ibmr.rkey (%#x)\n",
- __func__, rkey, mr->ibmr.rkey);
+ if (rkey != mr->rkey) {
+ pr_err("%s: rkey (%#x) doesn't match mr->rkey (%#x)\n",
+ __func__, rkey, mr->rkey);
ret = -EINVAL;
goto err_drop_ref;
}
@@ -545,6 +605,12 @@ int rxe_invalidate_mr(struct rxe_qp *qp, u32 rkey)
goto err_drop_ref;
}
+ if (unlikely(mr->type != IB_MR_TYPE_MEM_REG)) {
+ pr_warn("%s: mr->type (%d) is wrong type\n", __func__, mr->type);
+ ret = -EINVAL;
+ goto err_drop_ref;
+ }
+
mr->state = RXE_MR_STATE_FREE;
ret = 0;
@@ -554,6 +620,67 @@ err:
return ret;
}
+/* user can (re)register fast MR by executing a REG_MR WQE.
+ * user is expected to hold a reference on the ib mr until the
+ * WQE completes.
+ * Once a fast MR is created this is the only way to change the
+ * private keys. It is the responsibility of the user to maintain
+ * the ib mr keys in sync with rxe mr keys.
+ */
+int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
+{
+ struct rxe_mr *mr = to_rmr(wqe->wr.wr.reg.mr);
+ u32 key = wqe->wr.wr.reg.key & 0xff;
+ u32 access = wqe->wr.wr.reg.access;
+ struct rxe_map_set *set;
+
+ /* user can only register MR in free state */
+ if (unlikely(mr->state != RXE_MR_STATE_FREE)) {
+ pr_warn("%s: mr->lkey = 0x%x not free\n",
+ __func__, mr->lkey);
+ return -EINVAL;
+ }
+
+ /* user can only register mr with qp in same protection domain */
+ if (unlikely(qp->ibqp.pd != mr->ibmr.pd)) {
+ pr_warn("%s: qp->pd and mr->pd don't match\n",
+ __func__);
+ return -EINVAL;
+ }
+
+ mr->access = access;
+ mr->lkey = (mr->lkey & ~0xff) | key;
+ mr->rkey = (access & IB_ACCESS_REMOTE) ? mr->lkey : 0;
+ mr->state = RXE_MR_STATE_VALID;
+
+ set = mr->cur_map_set;
+ mr->cur_map_set = mr->next_map_set;
+ mr->cur_map_set->iova = wqe->wr.wr.reg.mr->iova;
+ mr->next_map_set = set;
+
+ return 0;
+}
+
+int rxe_mr_set_page(struct ib_mr *ibmr, u64 addr)
+{
+ struct rxe_mr *mr = to_rmr(ibmr);
+ struct rxe_map_set *set = mr->next_map_set;
+ struct rxe_map *map;
+ struct rxe_phys_buf *buf;
+
+ if (unlikely(set->nbuf == mr->num_buf))
+ return -ENOMEM;
+
+ map = set->map[set->nbuf / RXE_BUF_PER_MAP];
+ buf = &map->buf[set->nbuf % RXE_BUF_PER_MAP];
+
+ buf->addr = addr;
+ buf->size = ibmr->page_size;
+ set->nbuf++;
+
+ return 0;
+}
+
int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
{
struct rxe_mr *mr = to_rmr(ibmr);
@@ -564,7 +691,7 @@ int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
return -EINVAL;
}
- mr->state = RXE_MR_STATE_ZOMBIE;
+ mr->state = RXE_MR_STATE_INVALID;
rxe_drop_ref(mr_pd(mr));
rxe_drop_index(mr);
rxe_drop_ref(mr);
@@ -575,14 +702,12 @@ int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
void rxe_mr_cleanup(struct rxe_pool_entry *arg)
{
struct rxe_mr *mr = container_of(arg, typeof(*mr), pelem);
- int i;
ib_umem_release(mr->umem);
- if (mr->map) {
- for (i = 0; i < mr->num_map; i++)
- kfree(mr->map[i]);
+ if (mr->cur_map_set)
+ rxe_mr_free_map_set(mr->num_map, mr->cur_map_set);
- kfree(mr->map);
- }
+ if (mr->next_map_set)
+ rxe_mr_free_map_set(mr->num_map, mr->next_map_set);
}
diff --git a/drivers/infiniband/sw/rxe/rxe_mw.c b/drivers/infiniband/sw/rxe/rxe_mw.c
index 5ba77df7598ed2..9534a7fe1a98d2 100644
--- a/drivers/infiniband/sw/rxe/rxe_mw.c
+++ b/drivers/infiniband/sw/rxe/rxe_mw.c
@@ -21,7 +21,7 @@ int rxe_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
}
rxe_add_index(mw);
- ibmw->rkey = (mw->pelem.index << 8) | rxe_get_next_key(-1);
+ mw->rkey = ibmw->rkey = (mw->pelem.index << 8) | rxe_get_next_key(-1);
mw->state = (mw->ibmw.type == IB_MW_TYPE_2) ?
RXE_MW_STATE_FREE : RXE_MW_STATE_VALID;
spin_lock_init(&mw->lock);
@@ -71,6 +71,8 @@ int rxe_dealloc_mw(struct ib_mw *ibmw)
static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
struct rxe_mw *mw, struct rxe_mr *mr)
{
+ u32 key = wqe->wr.wr.mw.rkey & 0xff;
+
if (mw->ibmw.type == IB_MW_TYPE_1) {
if (unlikely(mw->state != RXE_MW_STATE_VALID)) {
pr_err_once(
@@ -108,7 +110,7 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
}
}
- if (unlikely((wqe->wr.wr.mw.rkey & 0xff) == (mw->ibmw.rkey & 0xff))) {
+ if (unlikely(key == (mw->rkey & 0xff))) {
pr_err_once("attempt to bind MW with same key\n");
return -EINVAL;
}
@@ -140,15 +142,15 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
/* C10-75 */
if (mw->access & IB_ZERO_BASED) {
- if (unlikely(wqe->wr.wr.mw.length > mr->length)) {
+ if (unlikely(wqe->wr.wr.mw.length > mr->cur_map_set->length)) {
pr_err_once(
"attempt to bind a ZB MW outside of the MR\n");
return -EINVAL;
}
} else {
- if (unlikely((wqe->wr.wr.mw.addr < mr->iova) ||
+ if (unlikely((wqe->wr.wr.mw.addr < mr->cur_map_set->iova) ||
((wqe->wr.wr.mw.addr + wqe->wr.wr.mw.length) >
- (mr->iova + mr->length)))) {
+ (mr->cur_map_set->iova + mr->cur_map_set->length)))) {
pr_err_once(
"attempt to bind a VA MW outside of the MR\n");
return -EINVAL;
@@ -161,13 +163,9 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
static void rxe_do_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
struct rxe_mw *mw, struct rxe_mr *mr)
{
- u32 rkey;
- u32 new_rkey;
-
- rkey = mw->ibmw.rkey;
- new_rkey = (rkey & 0xffffff00) | (wqe->wr.wr.mw.rkey & 0x000000ff);
+ u32 key = wqe->wr.wr.mw.rkey & 0xff;
- mw->ibmw.rkey = new_rkey;
+ mw->rkey = (mw->rkey & ~0xff) | key;
mw->access = wqe->wr.wr.mw.access;
mw->state = RXE_MW_STATE_VALID;
mw->addr = wqe->wr.wr.mw.addr;
@@ -197,29 +195,29 @@ int rxe_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
struct rxe_mw *mw;
struct rxe_mr *mr;
struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
+ u32 mw_rkey = wqe->wr.wr.mw.mw_rkey;
+ u32 mr_lkey = wqe->wr.wr.mw.mr_lkey;
unsigned long flags;
- mw = rxe_pool_get_index(&rxe->mw_pool,
- wqe->wr.wr.mw.mw_rkey >> 8);
+ mw = rxe_pool_get_index(&rxe->mw_pool, mw_rkey >> 8);
if (unlikely(!mw)) {
ret = -EINVAL;
goto err;
}
- if (unlikely(mw->ibmw.rkey != wqe->wr.wr.mw.mw_rkey)) {
+ if (unlikely(mw->rkey != mw_rkey)) {
ret = -EINVAL;
goto err_drop_mw;
}
if (likely(wqe->wr.wr.mw.length)) {
- mr = rxe_pool_get_index(&rxe->mr_pool,
- wqe->wr.wr.mw.mr_lkey >> 8);
+ mr = rxe_pool_get_index(&rxe->mr_pool, mr_lkey >> 8);
if (unlikely(!mr)) {
ret = -EINVAL;
goto err_drop_mw;
}
- if (unlikely(mr->ibmr.lkey != wqe->wr.wr.mw.mr_lkey)) {
+ if (unlikely(mr->lkey != mr_lkey)) {
ret = -EINVAL;
goto err_drop_mr;
}
@@ -292,7 +290,7 @@ int rxe_invalidate_mw(struct rxe_qp *qp, u32 rkey)
goto err;
}
- if (rkey != mw->ibmw.rkey) {
+ if (rkey != mw->rkey) {
ret = -EINVAL;
goto err_drop_ref;
}
@@ -323,7 +321,7 @@ struct rxe_mw *rxe_lookup_mw(struct rxe_qp *qp, int access, u32 rkey)
if (!mw)
return NULL;
- if (unlikely((rxe_mw_rkey(mw) != rkey) || rxe_mw_pd(mw) != pd ||
+ if (unlikely((mw->rkey != rkey) || rxe_mw_pd(mw) != pd ||
(mw->ibmw.type == IB_MW_TYPE_2 && mw->qp != qp) ||
(mw->length == 0) ||
(access && !(access & mw->access)) ||
diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.h b/drivers/infiniband/sw/rxe/rxe_opcode.h
index e02f039b8c444a..8f9aaaf260f293 100644
--- a/drivers/infiniband/sw/rxe/rxe_opcode.h
+++ b/drivers/infiniband/sw/rxe/rxe_opcode.h
@@ -22,7 +22,6 @@ enum rxe_wr_mask {
WR_LOCAL_OP_MASK = BIT(5),
WR_READ_OR_WRITE_MASK = WR_READ_MASK | WR_WRITE_MASK,
- WR_READ_WRITE_OR_SEND_MASK = WR_READ_OR_WRITE_MASK | WR_SEND_MASK,
WR_WRITE_OR_SEND_MASK = WR_WRITE_MASK | WR_SEND_MASK,
WR_ATOMIC_OR_READ_MASK = WR_ATOMIC_MASK | WR_READ_MASK,
};
@@ -82,8 +81,9 @@ enum rxe_hdr_mask {
RXE_LOOPBACK_MASK = BIT(NUM_HDR_TYPES + 12),
- RXE_READ_OR_ATOMIC = (RXE_READ_MASK | RXE_ATOMIC_MASK),
- RXE_WRITE_OR_SEND = (RXE_WRITE_MASK | RXE_SEND_MASK),
+ RXE_READ_OR_ATOMIC_MASK = (RXE_READ_MASK | RXE_ATOMIC_MASK),
+ RXE_WRITE_OR_SEND_MASK = (RXE_WRITE_MASK | RXE_SEND_MASK),
+ RXE_READ_OR_WRITE_MASK = (RXE_READ_MASK | RXE_WRITE_MASK),
};
#define OPCODE_NONE (-1)
diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h
index 742e6ec93686cc..b5a70cbe94aace 100644
--- a/drivers/infiniband/sw/rxe/rxe_param.h
+++ b/drivers/infiniband/sw/rxe/rxe_param.h
@@ -113,7 +113,7 @@ enum rxe_device_param {
/* default/initial rxe port parameters */
enum rxe_port_param {
RXE_PORT_GID_TBL_LEN = 1024,
- RXE_PORT_PORT_CAP_FLAGS = RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP,
+ RXE_PORT_PORT_CAP_FLAGS = IB_PORT_CM_SUP,
RXE_PORT_MAX_MSG_SZ = 0x800000,
RXE_PORT_BAD_PKEY_CNTR = 0,
RXE_PORT_QKEY_VIOL_CNTR = 0,
diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index 1ab6af7ddb2549..c8f4790083d235 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -190,8 +190,6 @@ static void rxe_qp_init_misc(struct rxe_dev *rxe, struct rxe_qp *qp,
INIT_LIST_HEAD(&qp->grp_list);
- skb_queue_head_init(&qp->send_pkts);
-
spin_lock_init(&qp->grp_lock);
spin_lock_init(&qp->state_lock);
@@ -231,7 +229,7 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
qp->sq.max_inline = init->cap.max_inline_data = wqe_size;
wqe_size += sizeof(struct rxe_send_wqe);
- type = uresp ? QUEUE_TYPE_FROM_USER : QUEUE_TYPE_KERNEL;
+ type = QUEUE_TYPE_FROM_CLIENT;
qp->sq.queue = rxe_queue_init(rxe, &qp->sq.max_wr,
wqe_size, type);
if (!qp->sq.queue)
@@ -248,12 +246,8 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
return err;
}
- if (qp->is_user)
- qp->req.wqe_index = producer_index(qp->sq.queue,
- QUEUE_TYPE_FROM_USER);
- else
- qp->req.wqe_index = producer_index(qp->sq.queue,
- QUEUE_TYPE_KERNEL);
+ qp->req.wqe_index = queue_get_producer(qp->sq.queue,
+ QUEUE_TYPE_FROM_CLIENT);
qp->req.state = QP_STATE_RESET;
qp->req.opcode = -1;
@@ -293,7 +287,7 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp,
pr_debug("qp#%d max_wr = %d, max_sge = %d, wqe_size = %d\n",
qp_num(qp), qp->rq.max_wr, qp->rq.max_sge, wqe_size);
- type = uresp ? QUEUE_TYPE_FROM_USER : QUEUE_TYPE_KERNEL;
+ type = QUEUE_TYPE_FROM_CLIENT;
qp->rq.queue = rxe_queue_init(rxe, &qp->rq.max_wr,
wqe_size, type);
if (!qp->rq.queue)
diff --git a/drivers/infiniband/sw/rxe/rxe_queue.c b/drivers/infiniband/sw/rxe/rxe_queue.c
index 72d95398e60410..6e6e023c1b45db 100644
--- a/drivers/infiniband/sw/rxe/rxe_queue.c
+++ b/drivers/infiniband/sw/rxe/rxe_queue.c
@@ -111,17 +111,33 @@ err1:
static int resize_finish(struct rxe_queue *q, struct rxe_queue *new_q,
unsigned int num_elem)
{
- if (!queue_empty(q, q->type) && (num_elem < queue_count(q, q->type)))
+ enum queue_type type = q->type;
+ u32 prod;
+ u32 cons;
+
+ if (!queue_empty(q, q->type) && (num_elem < queue_count(q, type)))
return -EINVAL;
- while (!queue_empty(q, q->type)) {
- memcpy(producer_addr(new_q, new_q->type),
- consumer_addr(q, q->type),
- new_q->elem_size);
- advance_producer(new_q, new_q->type);
- advance_consumer(q, q->type);
+ prod = queue_get_producer(new_q, type);
+ cons = queue_get_consumer(q, type);
+
+ while (!queue_empty(q, type)) {
+ memcpy(queue_addr_from_index(new_q, prod),
+ queue_addr_from_index(q, cons), new_q->elem_size);
+ prod = queue_next_index(new_q, prod);
+ cons = queue_next_index(q, cons);
}
+ new_q->buf->producer_index = prod;
+ q->buf->consumer_index = cons;
+
+ /* update private index copies */
+ if (type == QUEUE_TYPE_TO_CLIENT)
+ new_q->index = new_q->buf->producer_index;
+ else
+ q->index = q->buf->consumer_index;
+
+ /* exchange rxe_queue headers */
swap(*q, *new_q);
return 0;
diff --git a/drivers/infiniband/sw/rxe/rxe_queue.h b/drivers/infiniband/sw/rxe/rxe_queue.h
index 2702b0e55fc330..6227112ef7a2f0 100644
--- a/drivers/infiniband/sw/rxe/rxe_queue.h
+++ b/drivers/infiniband/sw/rxe/rxe_queue.h
@@ -10,34 +10,47 @@
/* for definition of shared struct rxe_queue_buf */
#include <uapi/rdma/rdma_user_rxe.h>
-/* implements a simple circular buffer that can optionally be
- * shared between user space and the kernel and can be resized
- * the requested element size is rounded up to a power of 2
- * and the number of elements in the buffer is also rounded
- * up to a power of 2. Since the queue is empty when the
- * producer and consumer indices match the maximum capacity
- * of the queue is one less than the number of element slots
+/* Implements a simple circular buffer that is shared between user
+ * and the driver and can be resized. The requested element size is
+ * rounded up to a power of 2 and the number of elements in the buffer
+ * is also rounded up to a power of 2. Since the queue is empty when
+ * the producer and consumer indices match the maximum capacity of the
+ * queue is one less than the number of element slots.
*
* Notes:
- * - Kernel space indices are always masked off to q->index_mask
- * before storing so do not need to be checked on reads.
- * - User space indices may be out of range and must be
- * masked before use when read.
- * - The kernel indices for shared queues must not be written
- * by user space so a local copy is used and a shared copy is
- * stored when the local copy changes.
+ * - The driver indices are always masked off to q->index_mask
+ * before storing so do not need to be checked on reads.
+ * - The user whether user space or kernel is generally
+ * not trusted so its parameters are masked to make sure
+ * they do not access the queue out of bounds on reads.
+ * - The driver indices for queues must not be written
+ * by user so a local copy is used and a shared copy is
+ * stored when the local copy is changed.
* - By passing the type in the parameter list separate from q
- * the compiler can eliminate the switch statement when the
- * actual queue type is known when the function is called.
- * In the performance path this is done. In less critical
- * paths just q->type is passed.
+ * the compiler can eliminate the switch statement when the
+ * actual queue type is known when the function is called at
+ * compile time.
+ * - These queues are lock free. The user and driver must protect
+ * changes to their end of the queues with locks if more than one
+ * CPU can be accessing it at the same time.
*/
-/* type of queue */
+/**
+ * enum queue_type - type of queue
+ * @QUEUE_TYPE_TO_CLIENT: Queue is written by rxe driver and
+ * read by client. Used by rxe driver only.
+ * @QUEUE_TYPE_FROM_CLIENT: Queue is written by client and
+ * read by rxe driver. Used by rxe driver only.
+ * @QUEUE_TYPE_TO_DRIVER: Queue is written by client and
+ * read by rxe driver. Used by kernel client only.
+ * @QUEUE_TYPE_FROM_DRIVER: Queue is written by rxe driver and
+ * read by client. Used by kernel client only.
+ */
enum queue_type {
- QUEUE_TYPE_KERNEL,
- QUEUE_TYPE_TO_USER,
- QUEUE_TYPE_FROM_USER,
+ QUEUE_TYPE_TO_CLIENT,
+ QUEUE_TYPE_FROM_CLIENT,
+ QUEUE_TYPE_TO_DRIVER,
+ QUEUE_TYPE_FROM_DRIVER,
};
struct rxe_queue {
@@ -69,238 +82,171 @@ struct rxe_queue *rxe_queue_init(struct rxe_dev *rxe, int *num_elem,
int rxe_queue_resize(struct rxe_queue *q, unsigned int *num_elem_p,
unsigned int elem_size, struct ib_udata *udata,
struct mminfo __user *outbuf,
- /* Protect producers while resizing queue */
- spinlock_t *producer_lock,
- /* Protect consumers while resizing queue */
- spinlock_t *consumer_lock);
+ spinlock_t *producer_lock, spinlock_t *consumer_lock);
void rxe_queue_cleanup(struct rxe_queue *queue);
-static inline int next_index(struct rxe_queue *q, int index)
+static inline u32 queue_next_index(struct rxe_queue *q, int index)
{
- return (index + 1) & q->buf->index_mask;
+ return (index + 1) & q->index_mask;
}
-static inline int queue_empty(struct rxe_queue *q, enum queue_type type)
+static inline u32 queue_get_producer(const struct rxe_queue *q,
+ enum queue_type type)
{
u32 prod;
- u32 cons;
switch (type) {
- case QUEUE_TYPE_FROM_USER:
- /* protect user space index */
+ case QUEUE_TYPE_FROM_CLIENT:
+ /* protect user index */
prod = smp_load_acquire(&q->buf->producer_index);
- cons = q->index;
break;
- case QUEUE_TYPE_TO_USER:
+ case QUEUE_TYPE_TO_CLIENT:
prod = q->index;
- /* protect user space index */
- cons = smp_load_acquire(&q->buf->consumer_index);
break;
- case QUEUE_TYPE_KERNEL:
+ case QUEUE_TYPE_FROM_DRIVER:
+ /* protect driver index */
+ prod = smp_load_acquire(&q->buf->producer_index);
+ break;
+ case QUEUE_TYPE_TO_DRIVER:
prod = q->buf->producer_index;
- cons = q->buf->consumer_index;
break;
}
- return ((prod - cons) & q->index_mask) == 0;
+ return prod;
}
-static inline int queue_full(struct rxe_queue *q, enum queue_type type)
+static inline u32 queue_get_consumer(const struct rxe_queue *q,
+ enum queue_type type)
{
- u32 prod;
u32 cons;
switch (type) {
- case QUEUE_TYPE_FROM_USER:
- /* protect user space index */
- prod = smp_load_acquire(&q->buf->producer_index);
+ case QUEUE_TYPE_FROM_CLIENT:
cons = q->index;
break;
- case QUEUE_TYPE_TO_USER:
- prod = q->index;
- /* protect user space index */
+ case QUEUE_TYPE_TO_CLIENT:
+ /* protect user index */
cons = smp_load_acquire(&q->buf->consumer_index);
break;
- case QUEUE_TYPE_KERNEL:
- prod = q->buf->producer_index;
+ case QUEUE_TYPE_FROM_DRIVER:
cons = q->buf->consumer_index;
break;
+ case QUEUE_TYPE_TO_DRIVER:
+ /* protect driver index */
+ cons = smp_load_acquire(&q->buf->consumer_index);
+ break;
}
- return ((prod + 1 - cons) & q->index_mask) == 0;
+ return cons;
}
-static inline unsigned int queue_count(const struct rxe_queue *q,
- enum queue_type type)
+static inline int queue_empty(struct rxe_queue *q, enum queue_type type)
{
- u32 prod;
- u32 cons;
-
- switch (type) {
- case QUEUE_TYPE_FROM_USER:
- /* protect user space index */
- prod = smp_load_acquire(&q->buf->producer_index);
- cons = q->index;
- break;
- case QUEUE_TYPE_TO_USER:
- prod = q->index;
- /* protect user space index */
- cons = smp_load_acquire(&q->buf->consumer_index);
- break;
- case QUEUE_TYPE_KERNEL:
- prod = q->buf->producer_index;
- cons = q->buf->consumer_index;
- break;
- }
+ u32 prod = queue_get_producer(q, type);
+ u32 cons = queue_get_consumer(q, type);
- return (prod - cons) & q->index_mask;
+ return ((prod - cons) & q->index_mask) == 0;
}
-static inline void advance_producer(struct rxe_queue *q, enum queue_type type)
+static inline int queue_full(struct rxe_queue *q, enum queue_type type)
{
- u32 prod;
+ u32 prod = queue_get_producer(q, type);
+ u32 cons = queue_get_consumer(q, type);
- switch (type) {
- case QUEUE_TYPE_FROM_USER:
- pr_warn_once("Normally kernel should not write user space index\n");
- /* protect user space index */
- prod = smp_load_acquire(&q->buf->producer_index);
- prod = (prod + 1) & q->index_mask;
- /* same */
- smp_store_release(&q->buf->producer_index, prod);
- break;
- case QUEUE_TYPE_TO_USER:
- prod = q->index;
- q->index = (prod + 1) & q->index_mask;
- q->buf->producer_index = q->index;
- break;
- case QUEUE_TYPE_KERNEL:
- prod = q->buf->producer_index;
- q->buf->producer_index = (prod + 1) & q->index_mask;
- break;
- }
+ return ((prod + 1 - cons) & q->index_mask) == 0;
}
-static inline void advance_consumer(struct rxe_queue *q, enum queue_type type)
+static inline u32 queue_count(const struct rxe_queue *q,
+ enum queue_type type)
{
- u32 cons;
+ u32 prod = queue_get_producer(q, type);
+ u32 cons = queue_get_consumer(q, type);
- switch (type) {
- case QUEUE_TYPE_FROM_USER:
- cons = q->index;
- q->index = (cons + 1) & q->index_mask;
- q->buf->consumer_index = q->index;
- break;
- case QUEUE_TYPE_TO_USER:
- pr_warn_once("Normally kernel should not write user space index\n");
- /* protect user space index */
- cons = smp_load_acquire(&q->buf->consumer_index);
- cons = (cons + 1) & q->index_mask;
- /* same */
- smp_store_release(&q->buf->consumer_index, cons);
- break;
- case QUEUE_TYPE_KERNEL:
- cons = q->buf->consumer_index;
- q->buf->consumer_index = (cons + 1) & q->index_mask;
- break;
- }
+ return (prod - cons) & q->index_mask;
}
-static inline void *producer_addr(struct rxe_queue *q, enum queue_type type)
+static inline void queue_advance_producer(struct rxe_queue *q,
+ enum queue_type type)
{
u32 prod;
switch (type) {
- case QUEUE_TYPE_FROM_USER:
- /* protect user space index */
- prod = smp_load_acquire(&q->buf->producer_index);
- prod &= q->index_mask;
+ case QUEUE_TYPE_FROM_CLIENT:
+ pr_warn("%s: attempt to advance client index\n",
+ __func__);
break;
- case QUEUE_TYPE_TO_USER:
+ case QUEUE_TYPE_TO_CLIENT:
prod = q->index;
+ prod = (prod + 1) & q->index_mask;
+ q->index = prod;
+ /* protect user index */
+ smp_store_release(&q->buf->producer_index, prod);
+ break;
+ case QUEUE_TYPE_FROM_DRIVER:
+ pr_warn("%s: attempt to advance driver index\n",
+ __func__);
break;
- case QUEUE_TYPE_KERNEL:
+ case QUEUE_TYPE_TO_DRIVER:
prod = q->buf->producer_index;
+ prod = (prod + 1) & q->index_mask;
+ q->buf->producer_index = prod;
break;
}
-
- return q->buf->data + (prod << q->log2_elem_size);
}
-static inline void *consumer_addr(struct rxe_queue *q, enum queue_type type)
+static inline void queue_advance_consumer(struct rxe_queue *q,
+ enum queue_type type)
{
u32 cons;
switch (type) {
- case QUEUE_TYPE_FROM_USER:
+ case QUEUE_TYPE_FROM_CLIENT:
cons = q->index;
+ cons = (cons + 1) & q->index_mask;
+ q->index = cons;
+ /* protect user index */
+ smp_store_release(&q->buf->consumer_index, cons);
break;
- case QUEUE_TYPE_TO_USER:
- /* protect user space index */
- cons = smp_load_acquire(&q->buf->consumer_index);
- cons &= q->index_mask;
+ case QUEUE_TYPE_TO_CLIENT:
+ pr_warn("%s: attempt to advance client index\n",
+ __func__);
break;
- case QUEUE_TYPE_KERNEL:
+ case QUEUE_TYPE_FROM_DRIVER:
cons = q->buf->consumer_index;
+ cons = (cons + 1) & q->index_mask;
+ q->buf->consumer_index = cons;
+ break;
+ case QUEUE_TYPE_TO_DRIVER:
+ pr_warn("%s: attempt to advance driver index\n",
+ __func__);
break;
}
-
- return q->buf->data + (cons << q->log2_elem_size);
}
-static inline unsigned int producer_index(struct rxe_queue *q,
- enum queue_type type)
+static inline void *queue_producer_addr(struct rxe_queue *q,
+ enum queue_type type)
{
- u32 prod;
+ u32 prod = queue_get_producer(q, type);
- switch (type) {
- case QUEUE_TYPE_FROM_USER:
- /* protect user space index */
- prod = smp_load_acquire(&q->buf->producer_index);
- prod &= q->index_mask;
- break;
- case QUEUE_TYPE_TO_USER:
- prod = q->index;
- break;
- case QUEUE_TYPE_KERNEL:
- prod = q->buf->producer_index;
- break;
- }
-
- return prod;
+ return q->buf->data + (prod << q->log2_elem_size);
}
-static inline unsigned int consumer_index(struct rxe_queue *q,
- enum queue_type type)
+static inline void *queue_consumer_addr(struct rxe_queue *q,
+ enum queue_type type)
{
- u32 cons;
-
- switch (type) {
- case QUEUE_TYPE_FROM_USER:
- cons = q->index;
- break;
- case QUEUE_TYPE_TO_USER:
- /* protect user space index */
- cons = smp_load_acquire(&q->buf->consumer_index);
- cons &= q->index_mask;
- break;
- case QUEUE_TYPE_KERNEL:
- cons = q->buf->consumer_index;
- break;
- }
+ u32 cons = queue_get_consumer(q, type);
- return cons;
+ return q->buf->data + (cons << q->log2_elem_size);
}
-static inline void *addr_from_index(struct rxe_queue *q,
- unsigned int index)
+static inline void *queue_addr_from_index(struct rxe_queue *q, u32 index)
{
return q->buf->data + ((index & q->index_mask)
- << q->buf->log2_elem_size);
+ << q->log2_elem_size);
}
-static inline unsigned int index_from_addr(const struct rxe_queue *q,
+static inline u32 queue_index_from_addr(const struct rxe_queue *q,
const void *addr)
{
return (((u8 *)addr - q->buf->data) >> q->log2_elem_size)
@@ -309,7 +255,7 @@ static inline unsigned int index_from_addr(const struct rxe_queue *q,
static inline void *queue_head(struct rxe_queue *q, enum queue_type type)
{
- return queue_empty(q, type) ? NULL : consumer_addr(q, type);
+ return queue_empty(q, type) ? NULL : queue_consumer_addr(q, type);
}
#endif /* RXE_QUEUE_H */
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index 3894197a82f62f..fe275fcaffbd91 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -49,21 +49,16 @@ static void req_retry(struct rxe_qp *qp)
unsigned int cons;
unsigned int prod;
- if (qp->is_user) {
- cons = consumer_index(q, QUEUE_TYPE_FROM_USER);
- prod = producer_index(q, QUEUE_TYPE_FROM_USER);
- } else {
- cons = consumer_index(q, QUEUE_TYPE_KERNEL);
- prod = producer_index(q, QUEUE_TYPE_KERNEL);
- }
+ cons = queue_get_consumer(q, QUEUE_TYPE_FROM_CLIENT);
+ prod = queue_get_producer(q, QUEUE_TYPE_FROM_CLIENT);
qp->req.wqe_index = cons;
qp->req.psn = qp->comp.psn;
qp->req.opcode = -1;
for (wqe_index = cons; wqe_index != prod;
- wqe_index = next_index(q, wqe_index)) {
- wqe = addr_from_index(qp->sq.queue, wqe_index);
+ wqe_index = queue_next_index(q, wqe_index)) {
+ wqe = queue_addr_from_index(qp->sq.queue, wqe_index);
mask = wr_opcode_mask(wqe->wr.opcode, qp);
if (wqe->state == wqe_state_posted)
@@ -121,15 +116,9 @@ static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp)
unsigned int cons;
unsigned int prod;
- if (qp->is_user) {
- wqe = queue_head(q, QUEUE_TYPE_FROM_USER);
- cons = consumer_index(q, QUEUE_TYPE_FROM_USER);
- prod = producer_index(q, QUEUE_TYPE_FROM_USER);
- } else {
- wqe = queue_head(q, QUEUE_TYPE_KERNEL);
- cons = consumer_index(q, QUEUE_TYPE_KERNEL);
- prod = producer_index(q, QUEUE_TYPE_KERNEL);
- }
+ wqe = queue_head(q, QUEUE_TYPE_FROM_CLIENT);
+ cons = queue_get_consumer(q, QUEUE_TYPE_FROM_CLIENT);
+ prod = queue_get_producer(q, QUEUE_TYPE_FROM_CLIENT);
if (unlikely(qp->req.state == QP_STATE_DRAIN)) {
/* check to see if we are drained;
@@ -170,7 +159,7 @@ static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp)
if (index == prod)
return NULL;
- wqe = addr_from_index(q, index);
+ wqe = queue_addr_from_index(q, index);
if (unlikely((qp->req.state == QP_STATE_DRAIN ||
qp->req.state == QP_STATE_DRAINED) &&
@@ -472,7 +461,7 @@ static int finish_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
if (err)
return err;
- if (pkt->mask & RXE_WRITE_OR_SEND) {
+ if (pkt->mask & RXE_WRITE_OR_SEND_MASK) {
if (wqe->wr.send_flags & IB_SEND_INLINE) {
u8 *tmp = &wqe->dma.inline_data[wqe->dma.sge_offset];
@@ -560,7 +549,8 @@ static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
qp->req.opcode = pkt->opcode;
if (pkt->mask & RXE_END_MASK)
- qp->req.wqe_index = next_index(qp->sq.queue, qp->req.wqe_index);
+ qp->req.wqe_index = queue_next_index(qp->sq.queue,
+ qp->req.wqe_index);
qp->need_req_skb = 0;
@@ -572,7 +562,6 @@ static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
static int rxe_do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
{
u8 opcode = wqe->wr.opcode;
- struct rxe_mr *mr;
u32 rkey;
int ret;
@@ -590,14 +579,11 @@ static int rxe_do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
}
break;
case IB_WR_REG_MR:
- mr = to_rmr(wqe->wr.wr.reg.mr);
- rxe_add_ref(mr);
- mr->state = RXE_MR_STATE_VALID;
- mr->access = wqe->wr.wr.reg.access;
- mr->ibmr.lkey = wqe->wr.wr.reg.key;
- mr->ibmr.rkey = wqe->wr.wr.reg.key;
- mr->iova = wqe->wr.wr.reg.mr->iova;
- rxe_drop_ref(mr);
+ ret = rxe_reg_fast_mr(qp, wqe);
+ if (unlikely(ret)) {
+ wqe->status = IB_WC_LOC_QP_OP_ERR;
+ return ret;
+ }
break;
case IB_WR_BIND_MW:
ret = rxe_bind_mw(qp, wqe);
@@ -614,7 +600,7 @@ static int rxe_do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
wqe->state = wqe_state_done;
wqe->status = IB_WC_SUCCESS;
- qp->req.wqe_index = next_index(qp->sq.queue, qp->req.wqe_index);
+ qp->req.wqe_index = queue_next_index(qp->sq.queue, qp->req.wqe_index);
if ((wqe->wr.send_flags & IB_SEND_SIGNALED) ||
qp->sq_sig_type == IB_SIGNAL_ALL_WR)
@@ -645,7 +631,8 @@ next_wqe:
goto exit;
if (unlikely(qp->req.state == QP_STATE_RESET)) {
- qp->req.wqe_index = consumer_index(q, q->type);
+ qp->req.wqe_index = queue_get_consumer(q,
+ QUEUE_TYPE_FROM_CLIENT);
qp->req.opcode = -1;
qp->req.need_rd_atomic = 0;
qp->req.wait_psn = 0;
@@ -691,13 +678,13 @@ next_wqe:
}
mask = rxe_opcode[opcode].mask;
- if (unlikely(mask & RXE_READ_OR_ATOMIC)) {
+ if (unlikely(mask & RXE_READ_OR_ATOMIC_MASK)) {
if (check_init_depth(qp, wqe))
goto exit;
}
mtu = get_mtu(qp);
- payload = (mask & RXE_WRITE_OR_SEND) ? wqe->dma.resid : 0;
+ payload = (mask & RXE_WRITE_OR_SEND_MASK) ? wqe->dma.resid : 0;
if (payload > mtu) {
if (qp_type(qp) == IB_QPT_UD) {
/* C10-93.1.1: If the total sum of all the buffer lengths specified for a
@@ -711,7 +698,7 @@ next_wqe:
wqe->last_psn = qp->req.psn;
qp->req.psn = (qp->req.psn + 1) & BTH_PSN_MASK;
qp->req.opcode = IB_OPCODE_UD_SEND_ONLY;
- qp->req.wqe_index = next_index(qp->sq.queue,
+ qp->req.wqe_index = queue_next_index(qp->sq.queue,
qp->req.wqe_index);
wqe->state = wqe_state_done;
wqe->status = IB_WC_SUCCESS;
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index 5501227ddc650f..4af0dc95784a81 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -303,10 +303,7 @@ static enum resp_states get_srq_wqe(struct rxe_qp *qp)
spin_lock_bh(&srq->rq.consumer_lock);
- if (qp->is_user)
- wqe = queue_head(q, QUEUE_TYPE_FROM_USER);
- else
- wqe = queue_head(q, QUEUE_TYPE_KERNEL);
+ wqe = queue_head(q, QUEUE_TYPE_FROM_CLIENT);
if (!wqe) {
spin_unlock_bh(&srq->rq.consumer_lock);
return RESPST_ERR_RNR;
@@ -322,13 +319,8 @@ static enum resp_states get_srq_wqe(struct rxe_qp *qp)
memcpy(&qp->resp.srq_wqe, wqe, size);
qp->resp.wqe = &qp->resp.srq_wqe.wqe;
- if (qp->is_user) {
- advance_consumer(q, QUEUE_TYPE_FROM_USER);
- count = queue_count(q, QUEUE_TYPE_FROM_USER);
- } else {
- advance_consumer(q, QUEUE_TYPE_KERNEL);
- count = queue_count(q, QUEUE_TYPE_KERNEL);
- }
+ queue_advance_consumer(q, QUEUE_TYPE_FROM_CLIENT);
+ count = queue_count(q, QUEUE_TYPE_FROM_CLIENT);
if (srq->limit && srq->ibsrq.event_handler && (count < srq->limit)) {
srq->limit = 0;
@@ -357,12 +349,8 @@ static enum resp_states check_resource(struct rxe_qp *qp,
qp->resp.status = IB_WC_WR_FLUSH_ERR;
return RESPST_COMPLETE;
} else if (!srq) {
- if (qp->is_user)
- qp->resp.wqe = queue_head(qp->rq.queue,
- QUEUE_TYPE_FROM_USER);
- else
- qp->resp.wqe = queue_head(qp->rq.queue,
- QUEUE_TYPE_KERNEL);
+ qp->resp.wqe = queue_head(qp->rq.queue,
+ QUEUE_TYPE_FROM_CLIENT);
if (qp->resp.wqe) {
qp->resp.status = IB_WC_WR_FLUSH_ERR;
return RESPST_COMPLETE;
@@ -374,7 +362,7 @@ static enum resp_states check_resource(struct rxe_qp *qp,
}
}
- if (pkt->mask & RXE_READ_OR_ATOMIC) {
+ if (pkt->mask & RXE_READ_OR_ATOMIC_MASK) {
/* it is the requesters job to not send
* too many read/atomic ops, we just
* recycle the responder resource queue
@@ -389,12 +377,8 @@ static enum resp_states check_resource(struct rxe_qp *qp,
if (srq)
return get_srq_wqe(qp);
- if (qp->is_user)
- qp->resp.wqe = queue_head(qp->rq.queue,
- QUEUE_TYPE_FROM_USER);
- else
- qp->resp.wqe = queue_head(qp->rq.queue,
- QUEUE_TYPE_KERNEL);
+ qp->resp.wqe = queue_head(qp->rq.queue,
+ QUEUE_TYPE_FROM_CLIENT);
return (qp->resp.wqe) ? RESPST_CHK_LENGTH : RESPST_ERR_RNR;
}
@@ -429,7 +413,7 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
enum resp_states state;
int access;
- if (pkt->mask & (RXE_READ_MASK | RXE_WRITE_MASK)) {
+ if (pkt->mask & RXE_READ_OR_WRITE_MASK) {
if (pkt->mask & RXE_RETH_MASK) {
qp->resp.va = reth_va(pkt);
qp->resp.offset = 0;
@@ -450,7 +434,7 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
}
/* A zero-byte op is not required to set an addr or rkey. */
- if ((pkt->mask & (RXE_READ_MASK | RXE_WRITE_OR_SEND)) &&
+ if ((pkt->mask & RXE_READ_OR_WRITE_MASK) &&
(pkt->mask & RXE_RETH_MASK) &&
reth_len(pkt) == 0) {
return RESPST_EXECUTE;
@@ -936,12 +920,8 @@ static enum resp_states do_complete(struct rxe_qp *qp,
}
/* have copy for srq and reference for !srq */
- if (!qp->srq) {
- if (qp->is_user)
- advance_consumer(qp->rq.queue, QUEUE_TYPE_FROM_USER);
- else
- advance_consumer(qp->rq.queue, QUEUE_TYPE_KERNEL);
- }
+ if (!qp->srq)
+ queue_advance_consumer(qp->rq.queue, QUEUE_TYPE_FROM_CLIENT);
qp->resp.wqe = NULL;
@@ -1213,7 +1193,7 @@ static void rxe_drain_req_pkts(struct rxe_qp *qp, bool notify)
return;
while (!qp->srq && q && queue_head(q, q->type))
- advance_consumer(q, q->type);
+ queue_advance_consumer(q, q->type);
}
int rxe_responder(void *arg)
diff --git a/drivers/infiniband/sw/rxe/rxe_srq.c b/drivers/infiniband/sw/rxe/rxe_srq.c
index 610c98d24b5cc7..a9e7817e27325a 100644
--- a/drivers/infiniband/sw/rxe/rxe_srq.c
+++ b/drivers/infiniband/sw/rxe/rxe_srq.c
@@ -93,7 +93,7 @@ int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq,
spin_lock_init(&srq->rq.producer_lock);
spin_lock_init(&srq->rq.consumer_lock);
- type = uresp ? QUEUE_TYPE_FROM_USER : QUEUE_TYPE_KERNEL;
+ type = QUEUE_TYPE_FROM_CLIENT;
q = rxe_queue_init(rxe, &srq->rq.max_wr,
srq_wqe_size, type);
if (!q) {
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 267b5a9c345d08..9d0bb9aa751437 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -29,13 +29,10 @@ static int rxe_query_port(struct ib_device *dev,
u32 port_num, struct ib_port_attr *attr)
{
struct rxe_dev *rxe = to_rdev(dev);
- struct rxe_port *port;
int rc;
- port = &rxe->port;
-
/* *attr being zeroed by the caller, avoid zeroing it here */
- *attr = port->attr;
+ *attr = rxe->port.attr;
mutex_lock(&rxe->usdev_lock);
rc = ib_get_eth_speed(dev, port_num, &attr->active_speed,
@@ -218,11 +215,7 @@ static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr)
int num_sge = ibwr->num_sge;
int full;
- if (rq->is_user)
- full = queue_full(rq->queue, QUEUE_TYPE_FROM_USER);
- else
- full = queue_full(rq->queue, QUEUE_TYPE_KERNEL);
-
+ full = queue_full(rq->queue, QUEUE_TYPE_TO_DRIVER);
if (unlikely(full)) {
err = -ENOMEM;
goto err1;
@@ -237,11 +230,7 @@ static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr)
for (i = 0; i < num_sge; i++)
length += ibwr->sg_list[i].length;
- if (rq->is_user)
- recv_wqe = producer_addr(rq->queue, QUEUE_TYPE_FROM_USER);
- else
- recv_wqe = producer_addr(rq->queue, QUEUE_TYPE_KERNEL);
-
+ recv_wqe = queue_producer_addr(rq->queue, QUEUE_TYPE_TO_DRIVER);
recv_wqe->wr_id = ibwr->wr_id;
recv_wqe->num_sge = num_sge;
@@ -254,10 +243,7 @@ static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr)
recv_wqe->dma.cur_sge = 0;
recv_wqe->dma.sge_offset = 0;
- if (rq->is_user)
- advance_producer(rq->queue, QUEUE_TYPE_FROM_USER);
- else
- advance_producer(rq->queue, QUEUE_TYPE_KERNEL);
+ queue_advance_producer(rq->queue, QUEUE_TYPE_TO_DRIVER);
return 0;
@@ -633,27 +619,17 @@ static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
spin_lock_irqsave(&qp->sq.sq_lock, flags);
- if (qp->is_user)
- full = queue_full(sq->queue, QUEUE_TYPE_FROM_USER);
- else
- full = queue_full(sq->queue, QUEUE_TYPE_KERNEL);
+ full = queue_full(sq->queue, QUEUE_TYPE_TO_DRIVER);
if (unlikely(full)) {
spin_unlock_irqrestore(&qp->sq.sq_lock, flags);
return -ENOMEM;
}
- if (qp->is_user)
- send_wqe = producer_addr(sq->queue, QUEUE_TYPE_FROM_USER);
- else
- send_wqe = producer_addr(sq->queue, QUEUE_TYPE_KERNEL);
-
+ send_wqe = queue_producer_addr(sq->queue, QUEUE_TYPE_TO_DRIVER);
init_send_wqe(qp, ibwr, mask, length, send_wqe);
- if (qp->is_user)
- advance_producer(sq->queue, QUEUE_TYPE_FROM_USER);
- else
- advance_producer(sq->queue, QUEUE_TYPE_KERNEL);
+ queue_advance_producer(sq->queue, QUEUE_TYPE_TO_DRIVER);
spin_unlock_irqrestore(&qp->sq.sq_lock, flags);
@@ -845,18 +821,12 @@ static int rxe_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
spin_lock_irqsave(&cq->cq_lock, flags);
for (i = 0; i < num_entries; i++) {
- if (cq->is_user)
- cqe = queue_head(cq->queue, QUEUE_TYPE_TO_USER);
- else
- cqe = queue_head(cq->queue, QUEUE_TYPE_KERNEL);
+ cqe = queue_head(cq->queue, QUEUE_TYPE_FROM_DRIVER);
if (!cqe)
break;
memcpy(wc++, &cqe->ibwc, sizeof(*wc));
- if (cq->is_user)
- advance_consumer(cq->queue, QUEUE_TYPE_TO_USER);
- else
- advance_consumer(cq->queue, QUEUE_TYPE_KERNEL);
+ queue_advance_consumer(cq->queue, QUEUE_TYPE_FROM_DRIVER);
}
spin_unlock_irqrestore(&cq->cq_lock, flags);
@@ -868,10 +838,7 @@ static int rxe_peek_cq(struct ib_cq *ibcq, int wc_cnt)
struct rxe_cq *cq = to_rcq(ibcq);
int count;
- if (cq->is_user)
- count = queue_count(cq->queue, QUEUE_TYPE_TO_USER);
- else
- count = queue_count(cq->queue, QUEUE_TYPE_KERNEL);
+ count = queue_count(cq->queue, QUEUE_TYPE_FROM_DRIVER);
return (count > wc_cnt) ? wc_cnt : count;
}
@@ -887,10 +854,7 @@ static int rxe_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
if (cq->notify != IB_CQ_NEXT_COMP)
cq->notify = flags & IB_CQ_SOLICITED_MASK;
- if (cq->is_user)
- empty = queue_empty(cq->queue, QUEUE_TYPE_TO_USER);
- else
- empty = queue_empty(cq->queue, QUEUE_TYPE_KERNEL);
+ empty = queue_empty(cq->queue, QUEUE_TYPE_FROM_DRIVER);
if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && !empty)
ret = 1;
@@ -987,41 +951,26 @@ err1:
return ERR_PTR(err);
}
-static int rxe_set_page(struct ib_mr *ibmr, u64 addr)
-{
- struct rxe_mr *mr = to_rmr(ibmr);
- struct rxe_map *map;
- struct rxe_phys_buf *buf;
-
- if (unlikely(mr->nbuf == mr->num_buf))
- return -ENOMEM;
-
- map = mr->map[mr->nbuf / RXE_BUF_PER_MAP];
- buf = &map->buf[mr->nbuf % RXE_BUF_PER_MAP];
-
- buf->addr = addr;
- buf->size = ibmr->page_size;
- mr->nbuf++;
-
- return 0;
-}
-
+/* build next_map_set from scatterlist
+ * The IB_WR_REG_MR WR will swap map_sets
+ */
static int rxe_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
int sg_nents, unsigned int *sg_offset)
{
struct rxe_mr *mr = to_rmr(ibmr);
+ struct rxe_map_set *set = mr->next_map_set;
int n;
- mr->nbuf = 0;
+ set->nbuf = 0;
- n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, rxe_set_page);
+ n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, rxe_mr_set_page);
- mr->va = ibmr->iova;
- mr->iova = ibmr->iova;
- mr->length = ibmr->length;
- mr->page_shift = ilog2(ibmr->page_size);
- mr->page_mask = ibmr->page_size - 1;
- mr->offset = mr->iova & mr->page_mask;
+ set->va = ibmr->iova;
+ set->iova = ibmr->iova;
+ set->length = ibmr->length;
+ set->page_shift = ilog2(ibmr->page_size);
+ set->page_mask = ibmr->page_size - 1;
+ set->offset = set->iova & set->page_mask;
return n;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index ac2a2148027f47..c807639435ebe4 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -240,7 +240,6 @@ struct rxe_qp {
struct sk_buff_head req_pkts;
struct sk_buff_head resp_pkts;
- struct sk_buff_head send_pkts;
struct rxe_req_info req;
struct rxe_comp_info comp;
@@ -267,18 +266,11 @@ struct rxe_qp {
};
enum rxe_mr_state {
- RXE_MR_STATE_ZOMBIE,
RXE_MR_STATE_INVALID,
RXE_MR_STATE_FREE,
RXE_MR_STATE_VALID,
};
-enum rxe_mr_type {
- RXE_MR_TYPE_NONE,
- RXE_MR_TYPE_DMA,
- RXE_MR_TYPE_MR,
-};
-
enum rxe_mr_copy_dir {
RXE_TO_MR_OBJ,
RXE_FROM_MR_OBJ,
@@ -300,6 +292,17 @@ struct rxe_map {
struct rxe_phys_buf buf[RXE_BUF_PER_MAP];
};
+struct rxe_map_set {
+ struct rxe_map **map;
+ u64 va;
+ u64 iova;
+ size_t length;
+ u32 offset;
+ u32 nbuf;
+ int page_shift;
+ int page_mask;
+};
+
static inline int rkey_is_mw(u32 rkey)
{
u32 index = rkey >> 8;
@@ -313,28 +316,24 @@ struct rxe_mr {
struct ib_umem *umem;
+ u32 lkey;
+ u32 rkey;
enum rxe_mr_state state;
- enum rxe_mr_type type;
- u64 va;
- u64 iova;
- size_t length;
- u32 offset;
+ enum ib_mr_type type;
int access;
- int page_shift;
- int page_mask;
int map_shift;
int map_mask;
u32 num_buf;
- u32 nbuf;
u32 max_buf;
u32 num_map;
atomic_t num_mw;
- struct rxe_map **map;
+ struct rxe_map_set *cur_map_set;
+ struct rxe_map_set *next_map_set;
};
enum rxe_mw_state {
@@ -350,6 +349,7 @@ struct rxe_mw {
enum rxe_mw_state state;
struct rxe_qp *qp; /* Type 2 only */
struct rxe_mr *mr;
+ u32 rkey;
int access;
u64 addr;
u64 length;
@@ -474,26 +474,11 @@ static inline struct rxe_pd *mr_pd(struct rxe_mr *mr)
return to_rpd(mr->ibmr.pd);
}
-static inline u32 mr_lkey(struct rxe_mr *mr)
-{
- return mr->ibmr.lkey;
-}
-
-static inline u32 mr_rkey(struct rxe_mr *mr)
-{
- return mr->ibmr.rkey;
-}
-
static inline struct rxe_pd *rxe_mw_pd(struct rxe_mw *mw)
{
return to_rpd(mw->ibmw.pd);
}
-static inline u32 rxe_mw_rkey(struct rxe_mw *mw)
-{
- return mw->ibmw.rkey;
-}
-
int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name);
void rxe_mc_cleanup(struct rxe_pool_entry *arg);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/uar.c b/drivers/net/ethernet/mellanox/mlx5/core/uar.c
index da481a7c12f4e6..01e9c412977cf3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/uar.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/uar.c
@@ -36,7 +36,7 @@
#include <linux/mlx5/driver.h>
#include "mlx5_core.h"
-int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn)
+static int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn)
{
u32 out[MLX5_ST_SZ_DW(alloc_uar_out)] = {};
u32 in[MLX5_ST_SZ_DW(alloc_uar_in)] = {};
@@ -44,13 +44,14 @@ int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn)
MLX5_SET(alloc_uar_in, in, opcode, MLX5_CMD_OP_ALLOC_UAR);
err = mlx5_cmd_exec_inout(dev, alloc_uar, in, out);
- if (!err)
- *uarn = MLX5_GET(alloc_uar_out, out, uar);
- return err;
+ if (err)
+ return err;
+
+ *uarn = MLX5_GET(alloc_uar_out, out, uar);
+ return 0;
}
-EXPORT_SYMBOL(mlx5_cmd_alloc_uar);
-int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn)
+static int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn)
{
u32 in[MLX5_ST_SZ_DW(dealloc_uar_in)] = {};
@@ -58,7 +59,6 @@ int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn)
MLX5_SET(dealloc_uar_in, in, uar, uarn);
return mlx5_cmd_exec_in(dev, dealloc_uar, in);
}
-EXPORT_SYMBOL(mlx5_cmd_free_uar);
static int uars_per_sys_page(struct mlx5_core_dev *mdev)
{
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index e23417424373fc..1b8bae246b2859 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -1005,8 +1005,6 @@ void mlx5_cmd_mbox_status(void *out, u8 *status, u32 *syndrome);
bool mlx5_cmd_is_down(struct mlx5_core_dev *dev);
int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type);
-int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn);
-int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn);
void mlx5_health_flush(struct mlx5_core_dev *dev);
void mlx5_health_cleanup(struct mlx5_core_dev *dev);
int mlx5_health_init(struct mlx5_core_dev *dev);
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index f3638d09ba7764..96f5fb2af8118c 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -7569,7 +7569,7 @@ struct mlx5_ifc_dealloc_uar_out_bits {
struct mlx5_ifc_dealloc_uar_in_bits {
u8 opcode[0x10];
- u8 reserved_at_10[0x10];
+ u8 uid[0x10];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
@@ -8416,7 +8416,7 @@ struct mlx5_ifc_alloc_uar_out_bits {
struct mlx5_ifc_alloc_uar_in_bits {
u8 opcode[0x10];
- u8 reserved_at_10[0x10];
+ u8 uid[0x10];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];