aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/infiniband/Kconfig1
-rw-r--r--drivers/infiniband/core/umem.c141
-rw-r--r--drivers/infiniband/core/uverbs.h24
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c306
-rw-r--r--drivers/infiniband/core/uverbs_main.c121
-rw-r--r--include/rdma/ib_umem.h61
-rw-r--r--include/uapi/rdma/ib_user_verbs.h36
7 files changed, 581 insertions, 109 deletions
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index b899531498eb0..6281d01483d7b 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -24,6 +24,7 @@ config INFINIBAND_USER_MAD
config INFINIBAND_USER_ACCESS
tristate "InfiniBand userspace access (verbs and CM)"
select ANON_INODES
+ select MMU_NOTIFIER
---help---
Userspace InfiniBand access support. This enables the
kernel side of userspace verbs and the userspace
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index aec7a6aa2951d..89430ecabaf85 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -352,3 +352,144 @@ int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
return 0;
}
EXPORT_SYMBOL(ib_umem_copy_from);
+
+void ib_ummunotify_register_range(struct ib_ummunotify_context *context,
+ struct ib_ummunotify_range *range)
+{
+ struct ib_ummunotify_range *trange;
+ struct rb_node **n = &context->reg_tree.rb_node;
+ struct rb_node *pn;
+ unsigned long flags;
+
+ spin_lock_irqsave(&context->lock, flags);
+
+ pn = NULL;
+ while (*n) {
+ pn = *n;
+ trange = rb_entry(pn, struct ib_ummunotify_range, node);
+
+ if (range->start <= trange->start)
+ n = &pn->rb_left;
+ else
+ n = &pn->rb_right;
+ }
+
+ rb_link_node(&range->node, pn, n);
+ rb_insert_color(&range->node, &context->reg_tree);
+
+ spin_unlock_irqrestore(&context->lock, flags);
+}
+EXPORT_SYMBOL(ib_ummunotify_register_range);
+
+void ib_ummunotify_unregister_range(struct ib_ummunotify_context *context,
+ struct ib_ummunotify_range *range)
+{
+ unsigned long flags;
+
+ if (!ib_ummunotify_context_used(context))
+ return;
+
+ if (RB_EMPTY_NODE(&range->node))
+ return;
+
+ spin_lock_irqsave(&context->lock, flags);
+ rb_erase(&range->node, &context->reg_tree);
+ spin_unlock_irqrestore(&context->lock, flags);
+}
+EXPORT_SYMBOL(ib_ummunotify_unregister_range);
+
+static void ib_ummunotify_handle_notify(struct mmu_notifier *mn,
+ unsigned long start, unsigned long end)
+{
+ struct ib_ummunotify_context *context =
+ container_of(mn, struct ib_ummunotify_context, mmu_notifier);
+ unsigned long flags;
+ struct rb_node *n;
+ struct ib_ummunotify_range *range;
+
+ spin_lock_irqsave(&context->lock, flags);
+
+ for (n = rb_first(&context->reg_tree); n; n = rb_next(n)) {
+ range = rb_entry(n, struct ib_ummunotify_range, node);
+
+ /*
+ * Ranges overlap if they're not disjoint; and they're
+ * disjoint if the end of one is before the start of
+ * the other one. So if both disjointness comparisons
+ * fail then the ranges overlap.
+ *
+ * Since we keep the tree of regions we're watching
+ * sorted by start address, we can end this loop as
+ * soon as we hit a region that starts past the end of
+ * the range for the event we're handling.
+ */
+ if (range->start >= end)
+ break;
+
+ /*
+ * Just go to the next region if the start of the
+ * range is after the end of the region -- there
+ * might still be more overlapping ranges that have a
+ * greater start.
+ */
+ if (start >= range->end)
+ continue;
+
+ context->callback(context, range);
+ }
+
+ spin_unlock_irqrestore(&context->lock, flags);
+}
+
+static void ib_ummunotify_invalidate_page(struct mmu_notifier *mn,
+ struct mm_struct *mm,
+ unsigned long addr)
+{
+ ib_ummunotify_handle_notify(mn, addr, addr + PAGE_SIZE);
+}
+
+static void ib_ummunotify_invalidate_range_start(struct mmu_notifier *mn,
+ struct mm_struct *mm,
+ unsigned long start,
+ unsigned long end)
+{
+ ib_ummunotify_handle_notify(mn, start, end);
+}
+
+static const struct mmu_notifier_ops ib_ummunotify_mmu_notifier_ops = {
+ .invalidate_page = ib_ummunotify_invalidate_page,
+ .invalidate_range_start = ib_ummunotify_invalidate_range_start,
+};
+
+int ib_ummunotify_init_context(struct ib_ummunotify_context *context,
+ void (*callback)(struct ib_ummunotify_context *,
+ struct ib_ummunotify_range *))
+{
+ int ret;
+
+ context->callback = callback;
+ context->reg_tree = RB_ROOT;
+ spin_lock_init(&context->lock);
+
+ context->mm = current->mm;
+ atomic_inc(&current->mm->mm_count);
+
+ context->mmu_notifier.ops = &ib_ummunotify_mmu_notifier_ops;
+ ret = mmu_notifier_register(&context->mmu_notifier, context->mm);
+ if (ret) {
+ mmdrop(context->mm);
+ context->mm = NULL;
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL(ib_ummunotify_init_context);
+
+void ib_ummunotify_cleanup_context(struct ib_ummunotify_context *context)
+{
+ if (!ib_ummunotify_context_used(context))
+ return;
+ mmu_notifier_unregister(&context->mmu_notifier, context->mm);
+ mmdrop(context->mm);
+}
+EXPORT_SYMBOL(ib_ummunotify_cleanup_context);
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index b716b08156446..dbb2b7f12b0e3 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -96,9 +96,15 @@ struct ib_uverbs_device {
struct mutex xrcd_tree_mutex;
};
+enum ib_uverbs_event_file_type {
+ IB_UVERBS_EVENT_FILE_ASYNC,
+ IB_UVERBS_EVENT_FILE_COMP,
+ IB_UVERBS_EVENT_FILE_MMU_NOTIFY,
+};
+
struct ib_uverbs_event_file {
struct kref ref;
- int is_async;
+ enum ib_uverbs_event_file_type type;
struct ib_uverbs_file *uverbs_file;
spinlock_t lock;
int is_closed;
@@ -113,13 +119,17 @@ struct ib_uverbs_file {
struct ib_uverbs_device *device;
struct ib_ucontext *ucontext;
struct ib_event_handler event_handler;
+ struct ib_ummunotify_context mmu_notify_context;
+ u64 *mmu_notify_counter;
struct ib_uverbs_event_file *async_file;
+ struct ib_uverbs_event_file *mmu_notify_file;
};
struct ib_uverbs_event {
union {
struct ib_uverbs_async_event_desc async;
struct ib_uverbs_comp_event_desc comp;
+ struct ib_uverbs_mmu_notify_event_desc mmu_notify;
} desc;
struct list_head list;
struct list_head obj_list;
@@ -148,6 +158,11 @@ struct ib_usrq_object {
struct ib_uxrcd_object *uxrcd;
};
+struct ib_umr_object {
+ struct ib_uevent_object uevent;
+ struct ib_ummunotify_range range;
+};
+
struct ib_uqp_object {
struct ib_uevent_object uevent;
struct list_head mcast_list;
@@ -177,7 +192,7 @@ extern struct idr ib_uverbs_rule_idr;
void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj);
struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
- int is_async);
+ enum ib_uverbs_event_file_type type);
struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd);
void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
@@ -187,6 +202,8 @@ void ib_uverbs_release_uevent(struct ib_uverbs_file *file,
struct ib_uevent_object *uobj);
void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context);
+void ib_uverbs_mr_event_handler(struct ib_ummunotify_context *context,
+ struct ib_ummunotify_range *range);
void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr);
void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr);
void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr);
@@ -250,6 +267,9 @@ IB_UVERBS_DECLARE_CMD(destroy_srq);
IB_UVERBS_DECLARE_CMD(create_xsrq);
IB_UVERBS_DECLARE_CMD(open_xrcd);
IB_UVERBS_DECLARE_CMD(close_xrcd);
+IB_UVERBS_DECLARE_CMD(create_mmu_notify_channel);
+IB_UVERBS_DECLARE_CMD(reg_mmu_notify_mr);
+IB_UVERBS_DECLARE_CMD(dereg_mmu_notify_mr);
#define IB_UVERBS_DECLARE_EX_CMD(name) \
int ib_uverbs_ex_##name(struct ib_uverbs_file *file, \
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index a9f048990dfcd..e3707de3e1b90 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -355,7 +355,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
goto err_free;
resp.async_fd = ret;
- filp = ib_uverbs_alloc_event_file(file, 1);
+ filp = ib_uverbs_alloc_event_file(file, IB_UVERBS_EVENT_FILE_ASYNC);
if (IS_ERR(filp)) {
ret = PTR_ERR(filp);
goto err_fd;
@@ -933,49 +933,38 @@ void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev,
xrcd_table_delete(dev, inode);
}
-ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static ssize_t ib_uverbs_reg_mr_common(struct ib_uverbs_file *file,
+ struct ib_uverbs_reg_mmu_notify_mr *cmd,
+ struct ib_uverbs_reg_mr_resp *resp,
+ struct ib_udata *udata,
+ bool do_notify)
{
- struct ib_uverbs_reg_mr cmd;
- struct ib_uverbs_reg_mr_resp resp;
- struct ib_udata udata;
- struct ib_uobject *uobj;
- struct ib_pd *pd;
- struct ib_mr *mr;
- int ret;
-
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- INIT_UDATA(&udata, buf + sizeof cmd,
- (unsigned long) cmd.response + sizeof resp,
- in_len - sizeof cmd, out_len - sizeof resp);
+ struct ib_umr_object *obj;
+ struct ib_pd *pd;
+ struct ib_mr *mr;
+ int ret;
- if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))
+ if ((cmd->start & ~PAGE_MASK) != (cmd->hca_va & ~PAGE_MASK))
return -EINVAL;
ret = ib_check_mr_access(cmd.access_flags);
if (ret)
return ret;
- uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
- if (!uobj)
+ obj = kmalloc(sizeof *obj, GFP_KERNEL);
+ if (!obj)
return -ENOMEM;
- init_uobj(uobj, 0, file->ucontext, &mr_lock_class);
- down_write(&uobj->mutex);
+ init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext, &mr_lock_class);
+ down_write(&obj->uevent.uobject.mutex);
- pd = idr_read_pd(cmd.pd_handle, file->ucontext);
+ pd = idr_read_pd(cmd->pd_handle, file->ucontext);
if (!pd) {
ret = -EINVAL;
goto err_free;
}
- if (cmd.access_flags & IB_ACCESS_ON_DEMAND) {
+ if (cmd->access_flags & IB_ACCESS_ON_DEMAND) {
struct ib_device_attr attr;
ret = ib_query_device(pd->device, &attr);
@@ -987,8 +976,8 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
}
}
- mr = pd->device->reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va,
- cmd.access_flags, &udata);
+ mr = pd->device->reg_user_mr(pd, cmd->start, cmd->length, cmd->hca_va,
+ cmd->access_flags, &udata);
if (IS_ERR(mr)) {
ret = PTR_ERR(mr);
goto err_put;
@@ -996,22 +985,22 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
mr->device = pd->device;
mr->pd = pd;
- mr->uobject = uobj;
+ mr->uobject = &obj->uevent.uobject;
atomic_inc(&pd->usecnt);
atomic_set(&mr->usecnt, 0);
- uobj->object = mr;
- ret = idr_add_uobj(&ib_uverbs_mr_idr, uobj);
+ obj->uevent.uobject.object = mr;
+ ret = idr_add_uobj(&ib_uverbs_mr_idr, &obj->uevent.uobject);
if (ret)
goto err_unreg;
memset(&resp, 0, sizeof resp);
- resp.lkey = mr->lkey;
- resp.rkey = mr->rkey;
- resp.mr_handle = uobj->id;
+ resp->lkey = mr->lkey;
+ resp->rkey = mr->rkey;
+ resp->mr_handle = obj->uevent.uobject.id;
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp)) {
+ if (copy_to_user((void __user *) (unsigned long) cmd->response,
+ resp, sizeof *resp)) {
ret = -EFAULT;
goto err_copy;
}
@@ -1019,17 +1008,23 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
put_pd_read(pd);
mutex_lock(&file->mutex);
- list_add_tail(&uobj->list, &file->ucontext->mr_list);
+ list_add_tail(&obj->uevent.uobject.list, &file->ucontext->mr_list);
mutex_unlock(&file->mutex);
- uobj->live = 1;
+ obj->uevent.uobject.live = 1;
- up_write(&uobj->mutex);
+ if (do_notify)
+ ib_ummunotify_register_range(&file->mmu_notify_context,
+ &obj->range);
+ else
+ ib_ummunotify_clear_range(&obj->range);
- return in_len;
+ up_write(&obj->uevent.uobject.mutex);
+
+ return 0;
err_copy:
- idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
+ idr_remove_uobj(&ib_uverbs_mr_idr, &obj->uevent.uobject);
err_unreg:
ib_dereg_mr(mr);
@@ -1038,7 +1033,7 @@ err_put:
put_pd_read(pd);
err_free:
- put_uobj_write(uobj);
+ put_uobj_write(&obj->uevent.uobject);
return ret;
}
@@ -1135,23 +1130,79 @@ put_uobjs:
return ret;
}
-ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
{
- struct ib_uverbs_dereg_mr cmd;
- struct ib_mr *mr;
- struct ib_uobject *uobj;
- int ret = -EINVAL;
+ struct ib_uverbs_reg_mr cmd;
+ struct ib_uverbs_reg_mmu_notify_mr not_cmd;
+ struct ib_uverbs_reg_mr_resp resp;
+ struct ib_udata udata;
+ int ret;
+
+ if (out_len < sizeof resp)
+ return -ENOSPC;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- uobj = idr_write_uobj(&ib_uverbs_mr_idr, cmd.mr_handle, file->ucontext);
+ INIT_UDATA(&udata, buf + sizeof cmd,
+ (unsigned long) cmd.response + sizeof resp,
+ in_len - sizeof cmd, out_len - sizeof resp);
+
+ not_cmd.response = cmd.response;
+ not_cmd.user_handle = 0;
+ not_cmd.start = cmd.start;
+ not_cmd.length = cmd.length;
+ not_cmd.hca_va = cmd.hca_va;
+ not_cmd.pd_handle = cmd.pd_handle;
+ not_cmd.access_flags = cmd.access_flags;
+
+ ret = ib_uverbs_reg_mr_common(file, &not_cmd, &resp, &udata, false);
+ return ret ? ret : in_len;
+}
+
+ssize_t ib_uverbs_reg_mmu_notify_mr(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_reg_mmu_notify_mr cmd;
+ struct ib_uverbs_reg_mr_resp resp;
+ struct ib_udata udata;
+ int ret;
+
+ if (out_len < sizeof resp)
+ return -ENOSPC;
+
+ if (!ib_ummunotify_context_used(&file->mmu_notify_context))
+ return -EINVAL;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ INIT_UDATA(&udata, buf + sizeof cmd,
+ (unsigned long) cmd.response + sizeof resp,
+ in_len - sizeof cmd, out_len - sizeof resp);
+
+ ret = ib_uverbs_reg_mr_common(file, &cmd, &resp, &udata, true);
+ return ret ? ret : in_len;
+}
+
+static ssize_t ib_uverbs_dereg_mr_common(struct ib_uverbs_file *file,
+ int mr_handle,
+ u32 *events_reported)
+{
+ struct ib_uobject *uobj;
+ struct ib_mr *mr;
+ struct ib_umr_object *obj;
+ int ret;
+
+ uobj = idr_write_uobj(&ib_uverbs_mr_idr, mr_handle, file->ucontext);
if (!uobj)
return -EINVAL;
mr = uobj->object;
+ obj = container_of(uobj, struct ib_umr_object, uevent.uobject);
ret = ib_dereg_mr(mr);
if (!ret)
@@ -1162,15 +1213,60 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
if (ret)
return ret;
+ ib_ummunotify_unregister_range(&file->mmu_notify_context,
+ &obj->range);
+
idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
mutex_lock(&file->mutex);
list_del(&uobj->list);
mutex_unlock(&file->mutex);
+ ib_uverbs_release_uevent(file, &obj->uevent);
+
+ if (events_reported)
+ *events_reported = obj->uevent.events_reported;
+
put_uobj(uobj);
- return in_len;
+ return 0;
+}
+
+ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_dereg_mr cmd;
+ int ret;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ ret = ib_uverbs_dereg_mr_common(file, cmd.mr_handle, NULL);
+
+ return ret ? ret : in_len;
+}
+
+
+ssize_t ib_uverbs_dereg_mmu_notify_mr(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_dereg_mmu_notify_mr cmd;
+ struct ib_uverbs_dereg_mmu_notify_mr_resp resp;
+ int ret;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ ret = ib_uverbs_dereg_mr_common(file, cmd.mr_handle,
+ &resp.events_reported);
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp))
+ return -EFAULT;
+
+ return ret ? ret : in_len;
}
ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
@@ -1313,7 +1409,7 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
return ret;
resp.fd = ret;
- filp = ib_uverbs_alloc_event_file(file, 0);
+ filp = ib_uverbs_alloc_event_file(file, IB_UVERBS_EVENT_FILE_COMP);
if (IS_ERR(filp)) {
put_unused_fd(resp.fd);
return PTR_ERR(filp);
@@ -3295,63 +3391,73 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
return ret ? ret : in_len;
}
-int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
- struct ib_udata *ucore,
- struct ib_udata *uhw)
+ssize_t ib_uverbs_create_mmu_notify_channel(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
{
- struct ib_uverbs_ex_query_device_resp resp;
- struct ib_uverbs_ex_query_device cmd;
- struct ib_device_attr attr;
- struct ib_device *device;
- int err;
+ struct ib_uverbs_create_mmu_notify_channel cmd;
+ struct ib_uverbs_create_mmu_notify_channel_resp resp;
+ struct file *filp;
+ int ret;
- device = file->device->ib_dev;
- if (ucore->inlen < sizeof(cmd))
- return -EINVAL;
+ if (out_len < sizeof resp)
+ return -ENOSPC;
- err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
- if (err)
- return err;
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
- if (cmd.comp_mask)
- return -EINVAL;
+ mutex_lock(&file->mutex);
- if (cmd.reserved)
- return -EINVAL;
+ if (file->mmu_notify_file) {
+ ret = -EINVAL;
+ goto err;
+ }
- resp.response_length = offsetof(typeof(resp), odp_caps);
+ ret = get_unused_fd();
+ if (ret < 0)
+ goto err;
+ resp.fd = ret;
- if (ucore->outlen < resp.response_length)
- return -ENOSPC;
+ filp = ib_uverbs_alloc_event_file(file, IB_UVERBS_EVENT_FILE_MMU_NOTIFY);
+ if (IS_ERR(filp)) {
+ ret = PTR_ERR(filp);
+ goto err_put_fd;
+ }
- err = device->query_device(device, &attr);
- if (err)
- return err;
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp)) {
+ ret = -EFAULT;
+ goto err_fput;
+ }
- copy_query_dev_fields(file, &resp.base, &attr);
- resp.comp_mask = 0;
+ ret = ib_ummunotify_init_context(&file->mmu_notify_context,
+ ib_uverbs_mr_event_handler);
+ if (ret)
+ goto err_fput;
- if (ucore->outlen < resp.response_length + sizeof(resp.odp_caps))
- goto end;
+ file->mmu_notify_counter = (void *) get_zeroed_page(GFP_KERNEL);
+ if (!file->mmu_notify_counter) {
+ ret = -ENOMEM;
+ goto err_context;
+ }
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- resp.odp_caps.general_caps = attr.odp_caps.general_caps;
- resp.odp_caps.per_transport_caps.rc_odp_caps =
- attr.odp_caps.per_transport_caps.rc_odp_caps;
- resp.odp_caps.per_transport_caps.uc_odp_caps =
- attr.odp_caps.per_transport_caps.uc_odp_caps;
- resp.odp_caps.per_transport_caps.ud_odp_caps =
- attr.odp_caps.per_transport_caps.ud_odp_caps;
- resp.odp_caps.reserved = 0;
-#else
- memset(&resp.odp_caps, 0, sizeof(resp.odp_caps));
-#endif
- resp.response_length += sizeof(resp.odp_caps);
+ file->mmu_notify_file = filp->private_data;
+ fd_install(resp.fd, filp);
-end:
- err = ib_copy_to_udata(ucore, &resp, resp.response_length);
- if (err)
- return err;
+ mutex_unlock(&file->mutex);
- return 0;
+ return in_len;
+
+err_context:
+ ib_ummunotify_cleanup_context(&file->mmu_notify_context);
+
+err_fput:
+ fput(filp);
+
+err_put_fd:
+ put_unused_fd(resp.fd);
+
+err:
+ mutex_unlock(&file->mutex);
+ return ret;
}
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 259dcc7779f5e..1aae7ed09d2f8 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -116,6 +116,9 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
[IB_USER_VERBS_CMD_CLOSE_XRCD] = ib_uverbs_close_xrcd,
[IB_USER_VERBS_CMD_CREATE_XSRQ] = ib_uverbs_create_xsrq,
[IB_USER_VERBS_CMD_OPEN_QP] = ib_uverbs_open_qp,
+ [IB_USER_VERBS_CMD_CREATE_MMU_NOTIFY_CHANNEL] = ib_uverbs_create_mmu_notify_channel,
+ [IB_USER_VERBS_CMD_REG_MMU_NOTIFY_MR] = ib_uverbs_reg_mmu_notify_mr,
+ [IB_USER_VERBS_CMD_DEREG_MMU_NOTIFY_MR] = ib_uverbs_dereg_mmu_notify_mr,
};
static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file,
@@ -271,9 +274,15 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) {
struct ib_mr *mr = uobj->object;
+ struct ib_umr_object *umr =
+ container_of(uobj, struct ib_umr_object, uevent.uobject);
idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
+ if (ib_ummunotify_context_used(&file->mmu_notify_context))
+ ib_ummunotify_unregister_range(&file->mmu_notify_context,
+ &umr->range);
ib_dereg_mr(mr);
+ ib_uverbs_release_uevent(file, &umr->uevent);
kfree(uobj);
}
@@ -298,6 +307,8 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
}
put_pid(context->tgid);
+ ib_ummunotify_cleanup_context(&file->mmu_notify_context);
+ kfree(file->mmu_notify_counter);
return context->device->dealloc_ucontext(context);
}
@@ -318,7 +329,7 @@ static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf,
{
struct ib_uverbs_event_file *file = filp->private_data;
struct ib_uverbs_event *event;
- int eventsz;
+ int uninitialized_var(eventsz);
int ret = 0;
spin_lock_irq(&file->lock);
@@ -338,10 +349,17 @@ static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf,
event = list_entry(file->event_list.next, struct ib_uverbs_event, list);
- if (file->is_async)
+ switch (file->type) {
+ case IB_UVERBS_EVENT_FILE_ASYNC:
eventsz = sizeof (struct ib_uverbs_async_event_desc);
- else
+ break;
+ case IB_UVERBS_EVENT_FILE_COMP:
eventsz = sizeof (struct ib_uverbs_comp_event_desc);
+ break;
+ case IB_UVERBS_EVENT_FILE_MMU_NOTIFY:
+ eventsz = sizeof (struct ib_uverbs_mmu_notify_event_desc);
+ break;
+ }
if (eventsz > count) {
ret = -EINVAL;
@@ -368,6 +386,37 @@ static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf,
return ret;
}
+static int uverbs_mmu_notify_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ struct ib_uverbs_file *file = vma->vm_private_data;
+
+ if (vmf->pgoff != 0)
+ return VM_FAULT_SIGBUS;
+
+ vmf->page = virt_to_page(file->mmu_notify_counter);
+ get_page(vmf->page);
+
+ return 0;
+}
+
+static const struct vm_operations_struct uverbs_mmu_notify_vm_ops = {
+ .fault = uverbs_mmu_notify_fault,
+};
+
+static int ib_uverbs_event_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+ struct ib_uverbs_event_file *ev_file = filp->private_data;
+ struct ib_uverbs_file *file = ev_file->uverbs_file;
+
+ if (vma->vm_end - vma->vm_start != PAGE_SIZE || vma->vm_pgoff != 0)
+ return -EINVAL;
+
+ vma->vm_ops = &uverbs_mmu_notify_vm_ops;
+ vma->vm_private_data = file;
+
+ return 0;
+}
+
static unsigned int ib_uverbs_event_poll(struct file *filp,
struct poll_table_struct *wait)
{
@@ -405,10 +454,15 @@ static int ib_uverbs_event_close(struct inode *inode, struct file *filp)
}
spin_unlock_irq(&file->lock);
- if (file->is_async) {
+ if (file->type == IB_UVERBS_EVENT_FILE_ASYNC) {
ib_unregister_event_handler(&file->uverbs_file->event_handler);
kref_put(&file->uverbs_file->ref, ib_uverbs_release_file);
}
+
+ if (file->type == IB_UVERBS_EVENT_FILE_MMU_NOTIFY) {
+ /* XXX */
+ }
+
kref_put(&file->ref, ib_uverbs_release_event_file);
return 0;
@@ -423,6 +477,16 @@ static const struct file_operations uverbs_event_fops = {
.llseek = no_llseek,
};
+static const struct file_operations uverbs_event_mmap_fops = {
+ .owner = THIS_MODULE,
+ .read = ib_uverbs_event_read,
+ .mmap = ib_uverbs_event_mmap,
+ .poll = ib_uverbs_event_poll,
+ .release = ib_uverbs_event_close,
+ .fasync = ib_uverbs_event_fasync,
+ .llseek = no_llseek,
+};
+
void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context)
{
struct ib_uverbs_event_file *file = cq_context;
@@ -458,6 +522,47 @@ void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context)
kill_fasync(&file->async_queue, SIGIO, POLL_IN);
}
+void ib_uverbs_mr_event_handler(struct ib_ummunotify_context *context,
+ struct ib_ummunotify_range *range)
+{
+ struct ib_uverbs_event_file *file =
+ container_of(context, struct ib_uverbs_file,
+ mmu_notify_context)->mmu_notify_file;
+ struct ib_umr_object *uobj;
+ struct ib_uverbs_event *entry;
+ unsigned long flags;
+
+ if (!file)
+ return;
+
+ spin_lock_irqsave(&file->lock, flags);
+ if (file->is_closed) {
+ spin_unlock_irqrestore(&file->lock, flags);
+ return;
+ }
+
+ entry = kmalloc(sizeof *entry, GFP_ATOMIC);
+ if (!entry) {
+ spin_unlock_irqrestore(&file->lock, flags);
+ return;
+ }
+
+ uobj = container_of(range, struct ib_umr_object, range);
+
+ entry->desc.mmu_notify.cq_handle = uobj->uevent.uobject.user_handle;
+ entry->counter = &uobj->uevent.events_reported;
+
+ list_add_tail(&entry->list, &file->event_list);
+ list_add_tail(&entry->obj_list, &uobj->uevent.event_list);
+
+ ++(*file->uverbs_file->mmu_notify_counter);
+
+ spin_unlock_irqrestore(&file->lock, flags);
+
+ wake_up_interruptible(&file->poll_wait);
+ kill_fasync(&file->async_queue, SIGIO, POLL_IN);
+}
+
static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
__u64 element, __u64 event,
struct list_head *obj_list,
@@ -541,7 +646,7 @@ void ib_uverbs_event_handler(struct ib_event_handler *handler,
}
struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
- int is_async)
+ enum ib_uverbs_event_file_type type)
{
struct ib_uverbs_event_file *ev_file;
struct file *filp;
@@ -556,7 +661,7 @@ struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
init_waitqueue_head(&ev_file->poll_wait);
ev_file->uverbs_file = uverbs_file;
ev_file->async_queue = NULL;
- ev_file->is_async = is_async;
+ ev_file->type = type;
ev_file->is_closed = 0;
filp = anon_inode_getfile("[infinibandevent]", &uverbs_event_fops,
@@ -584,7 +689,7 @@ struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd)
goto out;
ev_file = f.file->private_data;
- if (ev_file->is_async) {
+ if (ev_file->type != IB_UVERBS_EVENT_FILE_COMP) {
ev_file = NULL;
goto out;
}
@@ -763,6 +868,8 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
file->async_file = NULL;
kref_init(&file->ref);
mutex_init(&file->mutex);
+ ib_ummunotify_clear_context(&file->mmu_notify_context);
+ file->mmu_notify_counter = NULL;
filp->private_data = file;
diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
index 2d83cfd7e6ce2..b2e1c0808056d 100644
--- a/include/rdma/ib_umem.h
+++ b/include/rdma/ib_umem.h
@@ -34,6 +34,8 @@
#define IB_UMEM_H
#include <linux/list.h>
+#include <linux/mmu_notifier.h>
+#include <linux/rbtree.h>
#include <linux/scatterlist.h>
#include <linux/workqueue.h>
@@ -80,8 +82,23 @@ static inline size_t ib_umem_num_pages(struct ib_umem *umem)
return (ib_umem_end(umem) - ib_umem_start(umem)) >> PAGE_SHIFT;
}
+struct ib_ummunotify_range {
+ unsigned long start;
+ unsigned long end;
+ struct rb_node node;
+};
+
#ifdef CONFIG_INFINIBAND_USER_MEM
+struct ib_ummunotify_context {
+ struct mmu_notifier mmu_notifier;
+ void (*callback)(struct ib_ummunotify_context *,
+ struct ib_ummunotify_range *);
+ struct mm_struct *mm;
+ struct rb_root reg_tree;
+ spinlock_t lock;
+};
+
struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
size_t size, int access, int dmasync);
void ib_umem_release(struct ib_umem *umem);
@@ -89,10 +106,37 @@ int ib_umem_page_count(struct ib_umem *umem);
int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
size_t length);
+void ib_ummunotify_register_range(struct ib_ummunotify_context *context,
+ struct ib_ummunotify_range *range);
+void ib_ummunotify_unregister_range(struct ib_ummunotify_context *context,
+ struct ib_ummunotify_range *range);
+
+int ib_ummunotify_init_context(struct ib_ummunotify_context *context,
+ void (*callback)(struct ib_ummunotify_context *,
+ struct ib_ummunotify_range *));
+void ib_ummunotify_cleanup_context(struct ib_ummunotify_context *context);
+
+static inline void ib_ummunotify_clear_range(struct ib_ummunotify_range *range)
+{
+ RB_CLEAR_NODE(&range->node);
+}
+
+static inline void ib_ummunotify_clear_context(struct ib_ummunotify_context *context)
+{
+ context->mm = NULL;
+}
+
+static inline int ib_ummunotify_context_used(struct ib_ummunotify_context *context)
+{
+ return !!context->mm;
+}
+
#else /* CONFIG_INFINIBAND_USER_MEM */
#include <linux/err.h>
+struct ib_ummunotify_context;
+
static inline struct ib_umem *ib_umem_get(struct ib_ucontext *context,
unsigned long addr, size_t size,
int access, int dmasync) {
@@ -104,6 +148,23 @@ static inline int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offs
size_t length) {
return -EINVAL;
}
+
+static inline void ib_ummunotify_register_range(struct ib_ummunotify_context *context,
+ struct ib_ummunotify_range *range) { }
+static inline void ib_ummunotify_unregister_range(struct ib_ummunotify_context *context,
+ struct ib_ummunotify_range *range) { }
+
+static inline int ib_ummunotify_init_context(struct ib_ummunotify_context *context,
+ void (*callback)(struct ib_ummunotify_context *,
+ struct ib_ummunotify_range *)) { return 0; }
+static inline void ib_ummunotify_cleanup_context(struct ib_ummunotify_context *context) { }
+
+static inline void ib_ummunotify_clear_range(struct ib_ummunotify_range *range) { }
+
+static inline void ib_ummunotify_clear_context(struct ib_ummunotify_context *context) { }
+
+static inline int ib_ummunotify_context_used(struct ib_ummunotify_context *context) { return 0; }
+
#endif /* CONFIG_INFINIBAND_USER_MEM */
#endif /* IB_UMEM_H */
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index b513e662d8e49..e3c7f162f97b1 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -87,6 +87,9 @@ enum {
IB_USER_VERBS_CMD_CLOSE_XRCD,
IB_USER_VERBS_CMD_CREATE_XSRQ,
IB_USER_VERBS_CMD_OPEN_QP,
+ IB_USER_VERBS_CMD_CREATE_MMU_NOTIFY_CHANNEL,
+ IB_USER_VERBS_CMD_REG_MMU_NOTIFY_MR,
+ IB_USER_VERBS_CMD_DEREG_MMU_NOTIFY_MR,
};
enum {
@@ -116,6 +119,10 @@ struct ib_uverbs_comp_event_desc {
__u64 cq_handle;
};
+struct ib_uverbs_mmu_notify_event_desc {
+ __u64 cq_handle;
+};
+
/*
* All commands from userspace should start with a __u32 command field
* followed by __u16 in_words and out_words fields (which give the
@@ -900,4 +907,33 @@ struct ib_uverbs_destroy_srq_resp {
__u32 events_reported;
};
+struct ib_uverbs_create_mmu_notify_channel {
+ __u64 response;
+};
+
+struct ib_uverbs_create_mmu_notify_channel_resp {
+ __u32 fd;
+};
+
+struct ib_uverbs_reg_mmu_notify_mr {
+ __u64 response;
+ __u64 user_handle;
+ __u64 start;
+ __u64 length;
+ __u64 hca_va;
+ __u32 pd_handle;
+ __u32 access_flags;
+ __u64 driver_data[0];
+};
+
+struct ib_uverbs_dereg_mmu_notify_mr {
+ __u64 response;
+ __u32 mr_handle;
+ __u32 reserved;
+};
+
+struct ib_uverbs_dereg_mmu_notify_mr_resp {
+ __u32 events_reported;
+};
+
#endif /* IB_USER_VERBS_H */