diff options
-rw-r--r-- | drivers/infiniband/Kconfig | 1 | ||||
-rw-r--r-- | drivers/infiniband/core/umem.c | 141 | ||||
-rw-r--r-- | drivers/infiniband/core/uverbs.h | 24 | ||||
-rw-r--r-- | drivers/infiniband/core/uverbs_cmd.c | 306 | ||||
-rw-r--r-- | drivers/infiniband/core/uverbs_main.c | 121 | ||||
-rw-r--r-- | include/rdma/ib_umem.h | 61 | ||||
-rw-r--r-- | include/uapi/rdma/ib_user_verbs.h | 36 |
7 files changed, 581 insertions, 109 deletions
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index b899531498eb0..6281d01483d7b 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -24,6 +24,7 @@ config INFINIBAND_USER_MAD config INFINIBAND_USER_ACCESS tristate "InfiniBand userspace access (verbs and CM)" select ANON_INODES + select MMU_NOTIFIER ---help--- Userspace InfiniBand access support. This enables the kernel side of userspace verbs and the userspace diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index aec7a6aa2951d..89430ecabaf85 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -352,3 +352,144 @@ int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset, return 0; } EXPORT_SYMBOL(ib_umem_copy_from); + +void ib_ummunotify_register_range(struct ib_ummunotify_context *context, + struct ib_ummunotify_range *range) +{ + struct ib_ummunotify_range *trange; + struct rb_node **n = &context->reg_tree.rb_node; + struct rb_node *pn; + unsigned long flags; + + spin_lock_irqsave(&context->lock, flags); + + pn = NULL; + while (*n) { + pn = *n; + trange = rb_entry(pn, struct ib_ummunotify_range, node); + + if (range->start <= trange->start) + n = &pn->rb_left; + else + n = &pn->rb_right; + } + + rb_link_node(&range->node, pn, n); + rb_insert_color(&range->node, &context->reg_tree); + + spin_unlock_irqrestore(&context->lock, flags); +} +EXPORT_SYMBOL(ib_ummunotify_register_range); + +void ib_ummunotify_unregister_range(struct ib_ummunotify_context *context, + struct ib_ummunotify_range *range) +{ + unsigned long flags; + + if (!ib_ummunotify_context_used(context)) + return; + + if (RB_EMPTY_NODE(&range->node)) + return; + + spin_lock_irqsave(&context->lock, flags); + rb_erase(&range->node, &context->reg_tree); + spin_unlock_irqrestore(&context->lock, flags); +} +EXPORT_SYMBOL(ib_ummunotify_unregister_range); + +static void ib_ummunotify_handle_notify(struct mmu_notifier *mn, + unsigned long start, unsigned long end) +{ + struct ib_ummunotify_context *context = + container_of(mn, struct ib_ummunotify_context, mmu_notifier); + unsigned long flags; + struct rb_node *n; + struct ib_ummunotify_range *range; + + spin_lock_irqsave(&context->lock, flags); + + for (n = rb_first(&context->reg_tree); n; n = rb_next(n)) { + range = rb_entry(n, struct ib_ummunotify_range, node); + + /* + * Ranges overlap if they're not disjoint; and they're + * disjoint if the end of one is before the start of + * the other one. So if both disjointness comparisons + * fail then the ranges overlap. + * + * Since we keep the tree of regions we're watching + * sorted by start address, we can end this loop as + * soon as we hit a region that starts past the end of + * the range for the event we're handling. + */ + if (range->start >= end) + break; + + /* + * Just go to the next region if the start of the + * range is after the end of the region -- there + * might still be more overlapping ranges that have a + * greater start. + */ + if (start >= range->end) + continue; + + context->callback(context, range); + } + + spin_unlock_irqrestore(&context->lock, flags); +} + +static void ib_ummunotify_invalidate_page(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long addr) +{ + ib_ummunotify_handle_notify(mn, addr, addr + PAGE_SIZE); +} + +static void ib_ummunotify_invalidate_range_start(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, + unsigned long end) +{ + ib_ummunotify_handle_notify(mn, start, end); +} + +static const struct mmu_notifier_ops ib_ummunotify_mmu_notifier_ops = { + .invalidate_page = ib_ummunotify_invalidate_page, + .invalidate_range_start = ib_ummunotify_invalidate_range_start, +}; + +int ib_ummunotify_init_context(struct ib_ummunotify_context *context, + void (*callback)(struct ib_ummunotify_context *, + struct ib_ummunotify_range *)) +{ + int ret; + + context->callback = callback; + context->reg_tree = RB_ROOT; + spin_lock_init(&context->lock); + + context->mm = current->mm; + atomic_inc(¤t->mm->mm_count); + + context->mmu_notifier.ops = &ib_ummunotify_mmu_notifier_ops; + ret = mmu_notifier_register(&context->mmu_notifier, context->mm); + if (ret) { + mmdrop(context->mm); + context->mm = NULL; + } + + return ret; +} +EXPORT_SYMBOL(ib_ummunotify_init_context); + +void ib_ummunotify_cleanup_context(struct ib_ummunotify_context *context) +{ + if (!ib_ummunotify_context_used(context)) + return; + mmu_notifier_unregister(&context->mmu_notifier, context->mm); + mmdrop(context->mm); +} +EXPORT_SYMBOL(ib_ummunotify_cleanup_context); diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index b716b08156446..dbb2b7f12b0e3 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -96,9 +96,15 @@ struct ib_uverbs_device { struct mutex xrcd_tree_mutex; }; +enum ib_uverbs_event_file_type { + IB_UVERBS_EVENT_FILE_ASYNC, + IB_UVERBS_EVENT_FILE_COMP, + IB_UVERBS_EVENT_FILE_MMU_NOTIFY, +}; + struct ib_uverbs_event_file { struct kref ref; - int is_async; + enum ib_uverbs_event_file_type type; struct ib_uverbs_file *uverbs_file; spinlock_t lock; int is_closed; @@ -113,13 +119,17 @@ struct ib_uverbs_file { struct ib_uverbs_device *device; struct ib_ucontext *ucontext; struct ib_event_handler event_handler; + struct ib_ummunotify_context mmu_notify_context; + u64 *mmu_notify_counter; struct ib_uverbs_event_file *async_file; + struct ib_uverbs_event_file *mmu_notify_file; }; struct ib_uverbs_event { union { struct ib_uverbs_async_event_desc async; struct ib_uverbs_comp_event_desc comp; + struct ib_uverbs_mmu_notify_event_desc mmu_notify; } desc; struct list_head list; struct list_head obj_list; @@ -148,6 +158,11 @@ struct ib_usrq_object { struct ib_uxrcd_object *uxrcd; }; +struct ib_umr_object { + struct ib_uevent_object uevent; + struct ib_ummunotify_range range; +}; + struct ib_uqp_object { struct ib_uevent_object uevent; struct list_head mcast_list; @@ -177,7 +192,7 @@ extern struct idr ib_uverbs_rule_idr; void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj); struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, - int is_async); + enum ib_uverbs_event_file_type type); struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd); void ib_uverbs_release_ucq(struct ib_uverbs_file *file, @@ -187,6 +202,8 @@ void ib_uverbs_release_uevent(struct ib_uverbs_file *file, struct ib_uevent_object *uobj); void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context); +void ib_uverbs_mr_event_handler(struct ib_ummunotify_context *context, + struct ib_ummunotify_range *range); void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr); @@ -250,6 +267,9 @@ IB_UVERBS_DECLARE_CMD(destroy_srq); IB_UVERBS_DECLARE_CMD(create_xsrq); IB_UVERBS_DECLARE_CMD(open_xrcd); IB_UVERBS_DECLARE_CMD(close_xrcd); +IB_UVERBS_DECLARE_CMD(create_mmu_notify_channel); +IB_UVERBS_DECLARE_CMD(reg_mmu_notify_mr); +IB_UVERBS_DECLARE_CMD(dereg_mmu_notify_mr); #define IB_UVERBS_DECLARE_EX_CMD(name) \ int ib_uverbs_ex_##name(struct ib_uverbs_file *file, \ diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index a9f048990dfcd..e3707de3e1b90 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -355,7 +355,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, goto err_free; resp.async_fd = ret; - filp = ib_uverbs_alloc_event_file(file, 1); + filp = ib_uverbs_alloc_event_file(file, IB_UVERBS_EVENT_FILE_ASYNC); if (IS_ERR(filp)) { ret = PTR_ERR(filp); goto err_fd; @@ -933,49 +933,38 @@ void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, xrcd_table_delete(dev, inode); } -ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +static ssize_t ib_uverbs_reg_mr_common(struct ib_uverbs_file *file, + struct ib_uverbs_reg_mmu_notify_mr *cmd, + struct ib_uverbs_reg_mr_resp *resp, + struct ib_udata *udata, + bool do_notify) { - struct ib_uverbs_reg_mr cmd; - struct ib_uverbs_reg_mr_resp resp; - struct ib_udata udata; - struct ib_uobject *uobj; - struct ib_pd *pd; - struct ib_mr *mr; - int ret; - - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - INIT_UDATA(&udata, buf + sizeof cmd, - (unsigned long) cmd.response + sizeof resp, - in_len - sizeof cmd, out_len - sizeof resp); + struct ib_umr_object *obj; + struct ib_pd *pd; + struct ib_mr *mr; + int ret; - if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)) + if ((cmd->start & ~PAGE_MASK) != (cmd->hca_va & ~PAGE_MASK)) return -EINVAL; ret = ib_check_mr_access(cmd.access_flags); if (ret) return ret; - uobj = kmalloc(sizeof *uobj, GFP_KERNEL); - if (!uobj) + obj = kmalloc(sizeof *obj, GFP_KERNEL); + if (!obj) return -ENOMEM; - init_uobj(uobj, 0, file->ucontext, &mr_lock_class); - down_write(&uobj->mutex); + init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext, &mr_lock_class); + down_write(&obj->uevent.uobject.mutex); - pd = idr_read_pd(cmd.pd_handle, file->ucontext); + pd = idr_read_pd(cmd->pd_handle, file->ucontext); if (!pd) { ret = -EINVAL; goto err_free; } - if (cmd.access_flags & IB_ACCESS_ON_DEMAND) { + if (cmd->access_flags & IB_ACCESS_ON_DEMAND) { struct ib_device_attr attr; ret = ib_query_device(pd->device, &attr); @@ -987,8 +976,8 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, } } - mr = pd->device->reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va, - cmd.access_flags, &udata); + mr = pd->device->reg_user_mr(pd, cmd->start, cmd->length, cmd->hca_va, + cmd->access_flags, &udata); if (IS_ERR(mr)) { ret = PTR_ERR(mr); goto err_put; @@ -996,22 +985,22 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, mr->device = pd->device; mr->pd = pd; - mr->uobject = uobj; + mr->uobject = &obj->uevent.uobject; atomic_inc(&pd->usecnt); atomic_set(&mr->usecnt, 0); - uobj->object = mr; - ret = idr_add_uobj(&ib_uverbs_mr_idr, uobj); + obj->uevent.uobject.object = mr; + ret = idr_add_uobj(&ib_uverbs_mr_idr, &obj->uevent.uobject); if (ret) goto err_unreg; memset(&resp, 0, sizeof resp); - resp.lkey = mr->lkey; - resp.rkey = mr->rkey; - resp.mr_handle = uobj->id; + resp->lkey = mr->lkey; + resp->rkey = mr->rkey; + resp->mr_handle = obj->uevent.uobject.id; - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) { + if (copy_to_user((void __user *) (unsigned long) cmd->response, + resp, sizeof *resp)) { ret = -EFAULT; goto err_copy; } @@ -1019,17 +1008,23 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, put_pd_read(pd); mutex_lock(&file->mutex); - list_add_tail(&uobj->list, &file->ucontext->mr_list); + list_add_tail(&obj->uevent.uobject.list, &file->ucontext->mr_list); mutex_unlock(&file->mutex); - uobj->live = 1; + obj->uevent.uobject.live = 1; - up_write(&uobj->mutex); + if (do_notify) + ib_ummunotify_register_range(&file->mmu_notify_context, + &obj->range); + else + ib_ummunotify_clear_range(&obj->range); - return in_len; + up_write(&obj->uevent.uobject.mutex); + + return 0; err_copy: - idr_remove_uobj(&ib_uverbs_mr_idr, uobj); + idr_remove_uobj(&ib_uverbs_mr_idr, &obj->uevent.uobject); err_unreg: ib_dereg_mr(mr); @@ -1038,7 +1033,7 @@ err_put: put_pd_read(pd); err_free: - put_uobj_write(uobj); + put_uobj_write(&obj->uevent.uobject); return ret; } @@ -1135,23 +1130,79 @@ put_uobjs: return ret; } -ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file, - const char __user *buf, int in_len, - int out_len) +ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) { - struct ib_uverbs_dereg_mr cmd; - struct ib_mr *mr; - struct ib_uobject *uobj; - int ret = -EINVAL; + struct ib_uverbs_reg_mr cmd; + struct ib_uverbs_reg_mmu_notify_mr not_cmd; + struct ib_uverbs_reg_mr_resp resp; + struct ib_udata udata; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - uobj = idr_write_uobj(&ib_uverbs_mr_idr, cmd.mr_handle, file->ucontext); + INIT_UDATA(&udata, buf + sizeof cmd, + (unsigned long) cmd.response + sizeof resp, + in_len - sizeof cmd, out_len - sizeof resp); + + not_cmd.response = cmd.response; + not_cmd.user_handle = 0; + not_cmd.start = cmd.start; + not_cmd.length = cmd.length; + not_cmd.hca_va = cmd.hca_va; + not_cmd.pd_handle = cmd.pd_handle; + not_cmd.access_flags = cmd.access_flags; + + ret = ib_uverbs_reg_mr_common(file, ¬_cmd, &resp, &udata, false); + return ret ? ret : in_len; +} + +ssize_t ib_uverbs_reg_mmu_notify_mr(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_reg_mmu_notify_mr cmd; + struct ib_uverbs_reg_mr_resp resp; + struct ib_udata udata; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (!ib_ummunotify_context_used(&file->mmu_notify_context)) + return -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + INIT_UDATA(&udata, buf + sizeof cmd, + (unsigned long) cmd.response + sizeof resp, + in_len - sizeof cmd, out_len - sizeof resp); + + ret = ib_uverbs_reg_mr_common(file, &cmd, &resp, &udata, true); + return ret ? ret : in_len; +} + +static ssize_t ib_uverbs_dereg_mr_common(struct ib_uverbs_file *file, + int mr_handle, + u32 *events_reported) +{ + struct ib_uobject *uobj; + struct ib_mr *mr; + struct ib_umr_object *obj; + int ret; + + uobj = idr_write_uobj(&ib_uverbs_mr_idr, mr_handle, file->ucontext); if (!uobj) return -EINVAL; mr = uobj->object; + obj = container_of(uobj, struct ib_umr_object, uevent.uobject); ret = ib_dereg_mr(mr); if (!ret) @@ -1162,15 +1213,60 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file, if (ret) return ret; + ib_ummunotify_unregister_range(&file->mmu_notify_context, + &obj->range); + idr_remove_uobj(&ib_uverbs_mr_idr, uobj); mutex_lock(&file->mutex); list_del(&uobj->list); mutex_unlock(&file->mutex); + ib_uverbs_release_uevent(file, &obj->uevent); + + if (events_reported) + *events_reported = obj->uevent.events_reported; + put_uobj(uobj); - return in_len; + return 0; +} + +ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_dereg_mr cmd; + int ret; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + ret = ib_uverbs_dereg_mr_common(file, cmd.mr_handle, NULL); + + return ret ? ret : in_len; +} + + +ssize_t ib_uverbs_dereg_mmu_notify_mr(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_dereg_mmu_notify_mr cmd; + struct ib_uverbs_dereg_mmu_notify_mr_resp resp; + int ret; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + ret = ib_uverbs_dereg_mr_common(file, cmd.mr_handle, + &resp.events_reported); + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) + return -EFAULT; + + return ret ? ret : in_len; } ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file, @@ -1313,7 +1409,7 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, return ret; resp.fd = ret; - filp = ib_uverbs_alloc_event_file(file, 0); + filp = ib_uverbs_alloc_event_file(file, IB_UVERBS_EVENT_FILE_COMP); if (IS_ERR(filp)) { put_unused_fd(resp.fd); return PTR_ERR(filp); @@ -3295,63 +3391,73 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, return ret ? ret : in_len; } -int ib_uverbs_ex_query_device(struct ib_uverbs_file *file, - struct ib_udata *ucore, - struct ib_udata *uhw) +ssize_t ib_uverbs_create_mmu_notify_channel(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) { - struct ib_uverbs_ex_query_device_resp resp; - struct ib_uverbs_ex_query_device cmd; - struct ib_device_attr attr; - struct ib_device *device; - int err; + struct ib_uverbs_create_mmu_notify_channel cmd; + struct ib_uverbs_create_mmu_notify_channel_resp resp; + struct file *filp; + int ret; - device = file->device->ib_dev; - if (ucore->inlen < sizeof(cmd)) - return -EINVAL; + if (out_len < sizeof resp) + return -ENOSPC; - err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd)); - if (err) - return err; + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; - if (cmd.comp_mask) - return -EINVAL; + mutex_lock(&file->mutex); - if (cmd.reserved) - return -EINVAL; + if (file->mmu_notify_file) { + ret = -EINVAL; + goto err; + } - resp.response_length = offsetof(typeof(resp), odp_caps); + ret = get_unused_fd(); + if (ret < 0) + goto err; + resp.fd = ret; - if (ucore->outlen < resp.response_length) - return -ENOSPC; + filp = ib_uverbs_alloc_event_file(file, IB_UVERBS_EVENT_FILE_MMU_NOTIFY); + if (IS_ERR(filp)) { + ret = PTR_ERR(filp); + goto err_put_fd; + } - err = device->query_device(device, &attr); - if (err) - return err; + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) { + ret = -EFAULT; + goto err_fput; + } - copy_query_dev_fields(file, &resp.base, &attr); - resp.comp_mask = 0; + ret = ib_ummunotify_init_context(&file->mmu_notify_context, + ib_uverbs_mr_event_handler); + if (ret) + goto err_fput; - if (ucore->outlen < resp.response_length + sizeof(resp.odp_caps)) - goto end; + file->mmu_notify_counter = (void *) get_zeroed_page(GFP_KERNEL); + if (!file->mmu_notify_counter) { + ret = -ENOMEM; + goto err_context; + } -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - resp.odp_caps.general_caps = attr.odp_caps.general_caps; - resp.odp_caps.per_transport_caps.rc_odp_caps = - attr.odp_caps.per_transport_caps.rc_odp_caps; - resp.odp_caps.per_transport_caps.uc_odp_caps = - attr.odp_caps.per_transport_caps.uc_odp_caps; - resp.odp_caps.per_transport_caps.ud_odp_caps = - attr.odp_caps.per_transport_caps.ud_odp_caps; - resp.odp_caps.reserved = 0; -#else - memset(&resp.odp_caps, 0, sizeof(resp.odp_caps)); -#endif - resp.response_length += sizeof(resp.odp_caps); + file->mmu_notify_file = filp->private_data; + fd_install(resp.fd, filp); -end: - err = ib_copy_to_udata(ucore, &resp, resp.response_length); - if (err) - return err; + mutex_unlock(&file->mutex); - return 0; + return in_len; + +err_context: + ib_ummunotify_cleanup_context(&file->mmu_notify_context); + +err_fput: + fput(filp); + +err_put_fd: + put_unused_fd(resp.fd); + +err: + mutex_unlock(&file->mutex); + return ret; } diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 259dcc7779f5e..1aae7ed09d2f8 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -116,6 +116,9 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, [IB_USER_VERBS_CMD_CLOSE_XRCD] = ib_uverbs_close_xrcd, [IB_USER_VERBS_CMD_CREATE_XSRQ] = ib_uverbs_create_xsrq, [IB_USER_VERBS_CMD_OPEN_QP] = ib_uverbs_open_qp, + [IB_USER_VERBS_CMD_CREATE_MMU_NOTIFY_CHANNEL] = ib_uverbs_create_mmu_notify_channel, + [IB_USER_VERBS_CMD_REG_MMU_NOTIFY_MR] = ib_uverbs_reg_mmu_notify_mr, + [IB_USER_VERBS_CMD_DEREG_MMU_NOTIFY_MR] = ib_uverbs_dereg_mmu_notify_mr, }; static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file, @@ -271,9 +274,15 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) { struct ib_mr *mr = uobj->object; + struct ib_umr_object *umr = + container_of(uobj, struct ib_umr_object, uevent.uobject); idr_remove_uobj(&ib_uverbs_mr_idr, uobj); + if (ib_ummunotify_context_used(&file->mmu_notify_context)) + ib_ummunotify_unregister_range(&file->mmu_notify_context, + &umr->range); ib_dereg_mr(mr); + ib_uverbs_release_uevent(file, &umr->uevent); kfree(uobj); } @@ -298,6 +307,8 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, } put_pid(context->tgid); + ib_ummunotify_cleanup_context(&file->mmu_notify_context); + kfree(file->mmu_notify_counter); return context->device->dealloc_ucontext(context); } @@ -318,7 +329,7 @@ static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf, { struct ib_uverbs_event_file *file = filp->private_data; struct ib_uverbs_event *event; - int eventsz; + int uninitialized_var(eventsz); int ret = 0; spin_lock_irq(&file->lock); @@ -338,10 +349,17 @@ static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf, event = list_entry(file->event_list.next, struct ib_uverbs_event, list); - if (file->is_async) + switch (file->type) { + case IB_UVERBS_EVENT_FILE_ASYNC: eventsz = sizeof (struct ib_uverbs_async_event_desc); - else + break; + case IB_UVERBS_EVENT_FILE_COMP: eventsz = sizeof (struct ib_uverbs_comp_event_desc); + break; + case IB_UVERBS_EVENT_FILE_MMU_NOTIFY: + eventsz = sizeof (struct ib_uverbs_mmu_notify_event_desc); + break; + } if (eventsz > count) { ret = -EINVAL; @@ -368,6 +386,37 @@ static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf, return ret; } +static int uverbs_mmu_notify_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct ib_uverbs_file *file = vma->vm_private_data; + + if (vmf->pgoff != 0) + return VM_FAULT_SIGBUS; + + vmf->page = virt_to_page(file->mmu_notify_counter); + get_page(vmf->page); + + return 0; +} + +static const struct vm_operations_struct uverbs_mmu_notify_vm_ops = { + .fault = uverbs_mmu_notify_fault, +}; + +static int ib_uverbs_event_mmap(struct file *filp, struct vm_area_struct *vma) +{ + struct ib_uverbs_event_file *ev_file = filp->private_data; + struct ib_uverbs_file *file = ev_file->uverbs_file; + + if (vma->vm_end - vma->vm_start != PAGE_SIZE || vma->vm_pgoff != 0) + return -EINVAL; + + vma->vm_ops = &uverbs_mmu_notify_vm_ops; + vma->vm_private_data = file; + + return 0; +} + static unsigned int ib_uverbs_event_poll(struct file *filp, struct poll_table_struct *wait) { @@ -405,10 +454,15 @@ static int ib_uverbs_event_close(struct inode *inode, struct file *filp) } spin_unlock_irq(&file->lock); - if (file->is_async) { + if (file->type == IB_UVERBS_EVENT_FILE_ASYNC) { ib_unregister_event_handler(&file->uverbs_file->event_handler); kref_put(&file->uverbs_file->ref, ib_uverbs_release_file); } + + if (file->type == IB_UVERBS_EVENT_FILE_MMU_NOTIFY) { + /* XXX */ + } + kref_put(&file->ref, ib_uverbs_release_event_file); return 0; @@ -423,6 +477,16 @@ static const struct file_operations uverbs_event_fops = { .llseek = no_llseek, }; +static const struct file_operations uverbs_event_mmap_fops = { + .owner = THIS_MODULE, + .read = ib_uverbs_event_read, + .mmap = ib_uverbs_event_mmap, + .poll = ib_uverbs_event_poll, + .release = ib_uverbs_event_close, + .fasync = ib_uverbs_event_fasync, + .llseek = no_llseek, +}; + void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context) { struct ib_uverbs_event_file *file = cq_context; @@ -458,6 +522,47 @@ void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context) kill_fasync(&file->async_queue, SIGIO, POLL_IN); } +void ib_uverbs_mr_event_handler(struct ib_ummunotify_context *context, + struct ib_ummunotify_range *range) +{ + struct ib_uverbs_event_file *file = + container_of(context, struct ib_uverbs_file, + mmu_notify_context)->mmu_notify_file; + struct ib_umr_object *uobj; + struct ib_uverbs_event *entry; + unsigned long flags; + + if (!file) + return; + + spin_lock_irqsave(&file->lock, flags); + if (file->is_closed) { + spin_unlock_irqrestore(&file->lock, flags); + return; + } + + entry = kmalloc(sizeof *entry, GFP_ATOMIC); + if (!entry) { + spin_unlock_irqrestore(&file->lock, flags); + return; + } + + uobj = container_of(range, struct ib_umr_object, range); + + entry->desc.mmu_notify.cq_handle = uobj->uevent.uobject.user_handle; + entry->counter = &uobj->uevent.events_reported; + + list_add_tail(&entry->list, &file->event_list); + list_add_tail(&entry->obj_list, &uobj->uevent.event_list); + + ++(*file->uverbs_file->mmu_notify_counter); + + spin_unlock_irqrestore(&file->lock, flags); + + wake_up_interruptible(&file->poll_wait); + kill_fasync(&file->async_queue, SIGIO, POLL_IN); +} + static void ib_uverbs_async_handler(struct ib_uverbs_file *file, __u64 element, __u64 event, struct list_head *obj_list, @@ -541,7 +646,7 @@ void ib_uverbs_event_handler(struct ib_event_handler *handler, } struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, - int is_async) + enum ib_uverbs_event_file_type type) { struct ib_uverbs_event_file *ev_file; struct file *filp; @@ -556,7 +661,7 @@ struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, init_waitqueue_head(&ev_file->poll_wait); ev_file->uverbs_file = uverbs_file; ev_file->async_queue = NULL; - ev_file->is_async = is_async; + ev_file->type = type; ev_file->is_closed = 0; filp = anon_inode_getfile("[infinibandevent]", &uverbs_event_fops, @@ -584,7 +689,7 @@ struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd) goto out; ev_file = f.file->private_data; - if (ev_file->is_async) { + if (ev_file->type != IB_UVERBS_EVENT_FILE_COMP) { ev_file = NULL; goto out; } @@ -763,6 +868,8 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp) file->async_file = NULL; kref_init(&file->ref); mutex_init(&file->mutex); + ib_ummunotify_clear_context(&file->mmu_notify_context); + file->mmu_notify_counter = NULL; filp->private_data = file; diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h index 2d83cfd7e6ce2..b2e1c0808056d 100644 --- a/include/rdma/ib_umem.h +++ b/include/rdma/ib_umem.h @@ -34,6 +34,8 @@ #define IB_UMEM_H #include <linux/list.h> +#include <linux/mmu_notifier.h> +#include <linux/rbtree.h> #include <linux/scatterlist.h> #include <linux/workqueue.h> @@ -80,8 +82,23 @@ static inline size_t ib_umem_num_pages(struct ib_umem *umem) return (ib_umem_end(umem) - ib_umem_start(umem)) >> PAGE_SHIFT; } +struct ib_ummunotify_range { + unsigned long start; + unsigned long end; + struct rb_node node; +}; + #ifdef CONFIG_INFINIBAND_USER_MEM +struct ib_ummunotify_context { + struct mmu_notifier mmu_notifier; + void (*callback)(struct ib_ummunotify_context *, + struct ib_ummunotify_range *); + struct mm_struct *mm; + struct rb_root reg_tree; + spinlock_t lock; +}; + struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, size_t size, int access, int dmasync); void ib_umem_release(struct ib_umem *umem); @@ -89,10 +106,37 @@ int ib_umem_page_count(struct ib_umem *umem); int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset, size_t length); +void ib_ummunotify_register_range(struct ib_ummunotify_context *context, + struct ib_ummunotify_range *range); +void ib_ummunotify_unregister_range(struct ib_ummunotify_context *context, + struct ib_ummunotify_range *range); + +int ib_ummunotify_init_context(struct ib_ummunotify_context *context, + void (*callback)(struct ib_ummunotify_context *, + struct ib_ummunotify_range *)); +void ib_ummunotify_cleanup_context(struct ib_ummunotify_context *context); + +static inline void ib_ummunotify_clear_range(struct ib_ummunotify_range *range) +{ + RB_CLEAR_NODE(&range->node); +} + +static inline void ib_ummunotify_clear_context(struct ib_ummunotify_context *context) +{ + context->mm = NULL; +} + +static inline int ib_ummunotify_context_used(struct ib_ummunotify_context *context) +{ + return !!context->mm; +} + #else /* CONFIG_INFINIBAND_USER_MEM */ #include <linux/err.h> +struct ib_ummunotify_context; + static inline struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, size_t size, int access, int dmasync) { @@ -104,6 +148,23 @@ static inline int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offs size_t length) { return -EINVAL; } + +static inline void ib_ummunotify_register_range(struct ib_ummunotify_context *context, + struct ib_ummunotify_range *range) { } +static inline void ib_ummunotify_unregister_range(struct ib_ummunotify_context *context, + struct ib_ummunotify_range *range) { } + +static inline int ib_ummunotify_init_context(struct ib_ummunotify_context *context, + void (*callback)(struct ib_ummunotify_context *, + struct ib_ummunotify_range *)) { return 0; } +static inline void ib_ummunotify_cleanup_context(struct ib_ummunotify_context *context) { } + +static inline void ib_ummunotify_clear_range(struct ib_ummunotify_range *range) { } + +static inline void ib_ummunotify_clear_context(struct ib_ummunotify_context *context) { } + +static inline int ib_ummunotify_context_used(struct ib_ummunotify_context *context) { return 0; } + #endif /* CONFIG_INFINIBAND_USER_MEM */ #endif /* IB_UMEM_H */ diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index b513e662d8e49..e3c7f162f97b1 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -87,6 +87,9 @@ enum { IB_USER_VERBS_CMD_CLOSE_XRCD, IB_USER_VERBS_CMD_CREATE_XSRQ, IB_USER_VERBS_CMD_OPEN_QP, + IB_USER_VERBS_CMD_CREATE_MMU_NOTIFY_CHANNEL, + IB_USER_VERBS_CMD_REG_MMU_NOTIFY_MR, + IB_USER_VERBS_CMD_DEREG_MMU_NOTIFY_MR, }; enum { @@ -116,6 +119,10 @@ struct ib_uverbs_comp_event_desc { __u64 cq_handle; }; +struct ib_uverbs_mmu_notify_event_desc { + __u64 cq_handle; +}; + /* * All commands from userspace should start with a __u32 command field * followed by __u16 in_words and out_words fields (which give the @@ -900,4 +907,33 @@ struct ib_uverbs_destroy_srq_resp { __u32 events_reported; }; +struct ib_uverbs_create_mmu_notify_channel { + __u64 response; +}; + +struct ib_uverbs_create_mmu_notify_channel_resp { + __u32 fd; +}; + +struct ib_uverbs_reg_mmu_notify_mr { + __u64 response; + __u64 user_handle; + __u64 start; + __u64 length; + __u64 hca_va; + __u32 pd_handle; + __u32 access_flags; + __u64 driver_data[0]; +}; + +struct ib_uverbs_dereg_mmu_notify_mr { + __u64 response; + __u32 mr_handle; + __u32 reserved; +}; + +struct ib_uverbs_dereg_mmu_notify_mr_resp { + __u32 events_reported; +}; + #endif /* IB_USER_VERBS_H */ |