€•M–      Œsphinx.addnodes”Œdocument”“”)”}”(Œ	rawsource”Œ ”Œchildren”]”(Œtranslations”ŒLanguagesNode”“”)”}”(hhh]”(h Œpending_xref”“”)”}”(hhh]”Œdocutils.nodes”ŒText”“”ŒChinese (Simplified)”…””}”Œparent”hsbaŒ
attributes”}”(Œids”]”Œclasses”]”Œnames”]”Œdupnames”]”Œbackrefs”]”Œ	refdomain”Œstd”Œreftype”Œdoc”Œ	reftarget”Œ)/translations/zh_CN/gpu/drm-vm-bind-async”Œmodname”NŒ	classname”NŒrefexplicit”ˆuŒtagname”hhhubh)”}”(hhh]”hŒChinese (Traditional)”…””}”hh2sbah}”(h]”h ]”h"]”h$]”h&]”Œ	refdomain”h)Œreftype”h+Œ	reftarget”Œ)/translations/zh_TW/gpu/drm-vm-bind-async”Œmodname”NŒ	classname”NŒrefexplicit”ˆuh1hhhubh)”}”(hhh]”hŒItalian”…””}”hhFsbah}”(h]”h ]”h"]”h$]”h&]”Œ	refdomain”h)Œreftype”h+Œ	reftarget”Œ)/translations/it_IT/gpu/drm-vm-bind-async”Œmodname”NŒ	classname”NŒrefexplicit”ˆuh1hhhubh)”}”(hhh]”hŒJapanese”…””}”hhZsbah}”(h]”h ]”h"]”h$]”h&]”Œ	refdomain”h)Œreftype”h+Œ	reftarget”Œ)/translations/ja_JP/gpu/drm-vm-bind-async”Œmodname”NŒ	classname”NŒrefexplicit”ˆuh1hhhubh)”}”(hhh]”hŒKorean”…””}”hhnsbah}”(h]”h ]”h"]”h$]”h&]”Œ	refdomain”h)Œreftype”h+Œ	reftarget”Œ)/translations/ko_KR/gpu/drm-vm-bind-async”Œmodname”NŒ	classname”NŒrefexplicit”ˆuh1hhhubh)”}”(hhh]”hŒSpanish”…””}”hh‚sbah}”(h]”h ]”h"]”h$]”h&]”Œ	refdomain”h)Œreftype”h+Œ	reftarget”Œ)/translations/sp_SP/gpu/drm-vm-bind-async”Œmodname”NŒ	classname”NŒrefexplicit”ˆuh1hhhubeh}”(h]”h ]”h"]”h$]”h&]”Œcurrent_language”ŒEnglish”uh1h
hhŒ	_document”hŒsource”NŒline”NubhŒcomment”“”)”}”(hŒ*SPDX-License-Identifier: (GPL-2.0+ OR MIT)”h]”hŒ*SPDX-License-Identifier: (GPL-2.0+ OR MIT)”…””}”hh£sbah}”(h]”h ]”h"]”h$]”h&]”Œ	xml:space”Œpreserve”uh1h¡hhhžhhŸŒC/var/lib/git/docbuild/linux/Documentation/gpu/drm-vm-bind-async.rst”h KubhŒsection”“”)”}”(hhh]”(hŒtitle”“”)”}”(hŒAsynchronous VM_BIND”h]”hŒAsynchronous VM_BIND”…””}”(hh»hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h¹hh¶hžhhŸh³h Kubhµ)”}”(hhh]”(hº)”}”(hŒNomenclature:”h]”hŒNomenclature:”…””}”(hhÌhžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h¹hhÉhžhhŸh³h KubhŒbullet_list”“”)”}”(hhh]”(hŒ	list_item”“”)”}”(hŒJ``VRAM``: On-device memory. Sometimes referred to as device local memory.
”h]”hŒ	paragraph”“”)”}”(hŒI``VRAM``: On-device memory. Sometimes referred to as device local memory.”h]”(hŒliteral”“”)”}”(hŒ``VRAM``”h]”hŒVRAM”…””}”(hhíhžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hëhhçubhŒA: On-device memory. Sometimes referred to as device local memory.”…””}”(hhçhžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1håhŸh³h K
hháubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhhÜhžhhŸh³h Nubhà)”}”(hŒi``gpu_vm``: A virtual GPU address space. Typically per process, but
can be shared by multiple processes.
”h]”hæ)”}”(hŒh``gpu_vm``: A virtual GPU address space. Typically per process, but
can be shared by multiple processes.”h]”(hì)”}”(hŒ
``gpu_vm``”h]”hŒgpu_vm”…””}”(hj  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hëhj  ubhŒ^: A virtual GPU address space. Typically per process, but
can be shared by multiple processes.”…””}”(hj  hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1håhŸh³h Khj  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhhÜhžhhŸh³h Nubhà)”}”(hŒš``VM_BIND``: An operation or a list of operations to modify a gpu_vm using
an IOCTL. The operations include mapping and unmapping system- or
VRAM memory.
”h]”hæ)”}”(hŒ™``VM_BIND``: An operation or a list of operations to modify a gpu_vm using
an IOCTL. The operations include mapping and unmapping system- or
VRAM memory.”h]”(hì)”}”(hŒ``VM_BIND``”h]”hŒVM_BIND”…””}”(hj9  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hëhj5  ubhŒŽ: An operation or a list of operations to modify a gpu_vm using
an IOCTL. The operations include mapping and unmapping system- or
VRAM memory.”…””}”(hj5  hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1håhŸh³h Khj1  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhhÜhžhhŸh³h Nubhà)”}”(hŒé``syncobj``: A container that abstracts synchronization objects. The
synchronization objects can be either generic, like dma-fences or
driver specific. A syncobj typically indicates the type of the
underlying synchronization object.
”h]”hæ)”}”(hŒè``syncobj``: A container that abstracts synchronization objects. The
synchronization objects can be either generic, like dma-fences or
driver specific. A syncobj typically indicates the type of the
underlying synchronization object.”h]”(hì)”}”(hŒ``syncobj``”h]”hŒsyncobj”…””}”(hj_  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hëhj[  ubhŒÝ: A container that abstracts synchronization objects. The
synchronization objects can be either generic, like dma-fences or
driver specific. A syncobj typically indicates the type of the
underlying synchronization object.”…””}”(hj[  hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1håhŸh³h KhjW  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhhÜhžhhŸh³h Nubhà)”}”(hŒd``in-syncobj``: Argument to a VM_BIND IOCTL, the VM_BIND operation waits
for these before starting.
”h]”hæ)”}”(hŒc``in-syncobj``: Argument to a VM_BIND IOCTL, the VM_BIND operation waits
for these before starting.”h]”(hì)”}”(hŒ``in-syncobj``”h]”hŒ
in-syncobj”…””}”(hj…  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hëhj  ubhŒU: Argument to a VM_BIND IOCTL, the VM_BIND operation waits
for these before starting.”…””}”(hj  hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1håhŸh³h Khj}  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhhÜhžhhŸh³h Nubhà)”}”(hŒw``out-syncobj``: Argument to a VM_BIND_IOCTL, the VM_BIND operation
signals these when the bind operation is complete.
”h]”hæ)”}”(hŒv``out-syncobj``: Argument to a VM_BIND_IOCTL, the VM_BIND operation
signals these when the bind operation is complete.”h]”(hì)”}”(hŒ``out-syncobj``”h]”hŒout-syncobj”…””}”(hj«  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hëhj§  ubhŒg: Argument to a VM_BIND_IOCTL, the VM_BIND operation
signals these when the bind operation is complete.”…””}”(hj§  hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1håhŸh³h Khj£  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhhÜhžhhŸh³h Nubhà)”}”(hŒÛ``dma-fence``: A cross-driver synchronization object. A basic
understanding of dma-fences is required to digest this
document. Please refer to the ``DMA Fences`` section of the
:doc:`dma-buf doc </driver-api/dma-buf>`.
”h]”hæ)”}”(hŒÚ``dma-fence``: A cross-driver synchronization object. A basic
understanding of dma-fences is required to digest this
document. Please refer to the ``DMA Fences`` section of the
:doc:`dma-buf doc </driver-api/dma-buf>`.”h]”(hì)”}”(hŒ``dma-fence``”h]”hŒ	dma-fence”…””}”(hjÑ  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hëhjÍ  ubhŒ†: A cross-driver synchronization object. A basic
understanding of dma-fences is required to digest this
document. Please refer to the ”…””}”(hjÍ  hžhhŸNh Nubhì)”}”(hŒ``DMA Fences``”h]”hŒ
DMA Fences”…””}”(hjã  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hëhjÍ  ubhŒ section of the
”…””}”(hjÍ  hžhhŸNh Nubh)”}”(hŒ(:doc:`dma-buf doc </driver-api/dma-buf>`”h]”hŒinline”“”)”}”(hj÷  h]”hŒdma-buf doc”…””}”(hjû  hžhhŸNh Nubah}”(h]”h ]”(Œxref”Œstd”Œstd-doc”eh"]”h$]”h&]”uh1jù  hjõ  ubah}”(h]”h ]”h"]”h$]”h&]”Œrefdoc”Œgpu/drm-vm-bind-async”Œ	refdomain”j  Œreftype”Œdoc”Œrefexplicit”ˆŒrefwarn”ˆŒ	reftarget”Œ/driver-api/dma-buf”uh1hhŸh³h KhjÍ  ubhŒ.”…””}”(hjÍ  hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1håhŸh³h KhjÉ  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhhÜhžhhŸh³h Nubhà)”}”(hXé  ``memory fence``: A synchronization object, different from a dma-fence.
A memory fence uses the value of a specified memory location to determine
signaled status. A memory fence can be awaited and signaled by both
the GPU and CPU. Memory fences are sometimes referred to as
user-fences, userspace-fences or gpu futexes and do not necessarily obey
the dma-fence rule of signaling within a "reasonable amount of time".
The kernel should thus avoid waiting for memory fences with locks held.
”h]”hæ)”}”(hXè  ``memory fence``: A synchronization object, different from a dma-fence.
A memory fence uses the value of a specified memory location to determine
signaled status. A memory fence can be awaited and signaled by both
the GPU and CPU. Memory fences are sometimes referred to as
user-fences, userspace-fences or gpu futexes and do not necessarily obey
the dma-fence rule of signaling within a "reasonable amount of time".
The kernel should thus avoid waiting for memory fences with locks held.”h]”(hì)”}”(hŒ``memory fence``”h]”hŒmemory fence”…””}”(hj2  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hëhj.  ubhXÜ  : A synchronization object, different from a dma-fence.
A memory fence uses the value of a specified memory location to determine
signaled status. A memory fence can be awaited and signaled by both
the GPU and CPU. Memory fences are sometimes referred to as
user-fences, userspace-fences or gpu futexes and do not necessarily obey
the dma-fence rule of signaling within a â€œreasonable amount of timeâ€.
The kernel should thus avoid waiting for memory fences with locks held.”…””}”(hj.  hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1håhŸh³h K#hj*  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhhÜhžhhŸh³h Nubhà)”}”(hX  ``long-running workload``: A workload that may take more than the
current stipulated dma-fence maximum signal delay to complete and
which therefore needs to set the gpu_vm or the GPU execution context in
a certain mode that disallows completion dma-fences.
”h]”hæ)”}”(hX   ``long-running workload``: A workload that may take more than the
current stipulated dma-fence maximum signal delay to complete and
which therefore needs to set the gpu_vm or the GPU execution context in
a certain mode that disallows completion dma-fences.”h]”(hì)”}”(hŒ``long-running workload``”h]”hŒlong-running workload”…””}”(hjX  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hëhjT  ubhŒç: A workload that may take more than the
current stipulated dma-fence maximum signal delay to complete and
which therefore needs to set the gpu_vm or the GPU execution context in
a certain mode that disallows completion dma-fences.”…””}”(hjT  hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1håhŸh³h K+hjP  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhhÜhžhhŸh³h Nubhà)”}”(hX  ``exec function``: An exec function is a function that revalidates all
affected gpu_vmas, submits a GPU command batch and registers the
dma_fence representing the GPU command's activity with all affected
dma_resvs. For completeness, although not covered by this document,
it's worth mentioning that an exec function may also be the
revalidation worker that is used by some drivers in compute /
long-running mode.
”h]”hæ)”}”(hXœ  ``exec function``: An exec function is a function that revalidates all
affected gpu_vmas, submits a GPU command batch and registers the
dma_fence representing the GPU command's activity with all affected
dma_resvs. For completeness, although not covered by this document,
it's worth mentioning that an exec function may also be the
revalidation worker that is used by some drivers in compute /
long-running mode.”h]”(hì)”}”(hŒ``exec function``”h]”hŒexec function”…””}”(hj~  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hëhjz  ubhX  : An exec function is a function that revalidates all
affected gpu_vmas, submits a GPU command batch and registers the
dma_fence representing the GPU commandâ€™s activity with all affected
dma_resvs. For completeness, although not covered by this document,
itâ€™s worth mentioning that an exec function may also be the
revalidation worker that is used by some drivers in compute /
long-running mode.”…””}”(hjz  hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1håhŸh³h K0hjv  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhhÜhžhhŸh³h Nubhà)”}”(hX  ``bind context``: A context identifier used for the VM_BIND
operation. VM_BIND operations that use the same bind context can be
assumed, where it matters, to complete in order of submission. No such
assumptions can be made for VM_BIND operations using separate bind contexts.
”h]”hæ)”}”(hX  ``bind context``: A context identifier used for the VM_BIND
operation. VM_BIND operations that use the same bind context can be
assumed, where it matters, to complete in order of submission. No such
assumptions can be made for VM_BIND operations using separate bind contexts.”h]”(hì)”}”(hŒ``bind context``”h]”hŒbind context”…””}”(hj¤  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hëhj   ubhX  : A context identifier used for the VM_BIND
operation. VM_BIND operations that use the same bind context can be
assumed, where it matters, to complete in order of submission. No such
assumptions can be made for VM_BIND operations using separate bind contexts.”…””}”(hj   hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1håhŸh³h K8hjœ  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhhÜhžhhŸh³h Nubhà)”}”(hŒ``UMD``: User-mode driver.
”h]”hæ)”}”(hŒ``UMD``: User-mode driver.”h]”(hì)”}”(hŒ``UMD``”h]”hŒUMD”…””}”(hjÊ  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hëhjÆ  ubhŒ: User-mode driver.”…””}”(hjÆ  hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1håhŸh³h K=hjÂ  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhhÜhžhhŸh³h Nubhà)”}”(hŒ``KMD``: Kernel-mode driver.

”h]”hæ)”}”(hŒ``KMD``: Kernel-mode driver.”h]”(hì)”}”(hŒ``KMD``”h]”hŒKMD”…””}”(hjð  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hëhjì  ubhŒ: Kernel-mode driver.”…””}”(hjì  hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1håhŸh³h K?hjè  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhhÜhžhhŸh³h Nubeh}”(h]”h ]”h"]”h$]”h&]”Œbullet”Œ*”uh1hÚhŸh³h K
hhÉhžhubeh}”(h]”Œnomenclature”ah ]”h"]”Œnomenclature:”ah$]”h&]”uh1h´hh¶hžhhŸh³h Kubhµ)”}”(hhh]”(hº)”}”(hŒ,Synchronous / Asynchronous VM_BIND operation”h]”hŒ,Synchronous / Asynchronous VM_BIND operation”…””}”(hj!  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h¹hj  hžhhŸh³h KCubhµ)”}”(hhh]”(hº)”}”(hŒSynchronous VM_BIND”h]”hŒSynchronous VM_BIND”…””}”(hj2  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h¹hj/  hžhhŸh³h KFubhæ)”}”(hX	  With Synchronous VM_BIND, the VM_BIND operations all complete before the
IOCTL returns. A synchronous VM_BIND takes neither in-fences nor
out-fences. Synchronous VM_BIND may block and wait for GPU operations;
for example swap-in or clearing, or even previous binds.”h]”hX	  With Synchronous VM_BIND, the VM_BIND operations all complete before the
IOCTL returns. A synchronous VM_BIND takes neither in-fences nor
out-fences. Synchronous VM_BIND may block and wait for GPU operations;
for example swap-in or clearing, or even previous binds.”…””}”(hj@  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1håhŸh³h KGhj/  hžhubeh}”(h]”Œsynchronous-vm-bind”ah ]”h"]”Œsynchronous vm_bind”ah$]”h&]”uh1h´hj  hžhhŸh³h KFubhµ)”}”(hhh]”(hº)”}”(hŒAsynchronous VM_BIND”h]”hŒAsynchronous VM_BIND”…””}”(hjY  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h¹hjV  hžhhŸh³h KMubhæ)”}”(hX?  Asynchronous VM_BIND accepts both in-syncobjs and out-syncobjs. While the
IOCTL may return immediately, the VM_BIND operations wait for the in-syncobjs
before modifying the GPU page-tables, and signal the out-syncobjs when
the modification is done in the sense that the next exec function that
awaits for the out-syncobjs will see the change. Errors are reported
synchronously.
In low-memory situations the implementation may block, performing the
VM_BIND synchronously, because there might not be enough memory
immediately available for preparing the asynchronous operation.”h]”hX?  Asynchronous VM_BIND accepts both in-syncobjs and out-syncobjs. While the
IOCTL may return immediately, the VM_BIND operations wait for the in-syncobjs
before modifying the GPU page-tables, and signal the out-syncobjs when
the modification is done in the sense that the next exec function that
awaits for the out-syncobjs will see the change. Errors are reported
synchronously.
In low-memory situations the implementation may block, performing the
VM_BIND synchronously, because there might not be enough memory
immediately available for preparing the asynchronous operation.”…””}”(hjg  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1håhŸh³h KNhjV  hžhubhæ)”}”(hX2  If the VM_BIND IOCTL takes a list or an array of operations as an argument,
the in-syncobjs needs to signal before the first operation starts to
execute, and the out-syncobjs signal after the last operation
completes. Operations in the operation list can be assumed, where it
matters, to complete in order.”h]”hX2  If the VM_BIND IOCTL takes a list or an array of operations as an argument,
the in-syncobjs needs to signal before the first operation starts to
execute, and the out-syncobjs signal after the last operation
completes. Operations in the operation list can be assumed, where it
matters, to complete in order.”…””}”(hju  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1håhŸh³h KXhjV  hžhubhæ)”}”(hX‰  Since asynchronous VM_BIND operations may use dma-fences embedded in
out-syncobjs and internally in KMD to signal bind completion,  any
memory fences given as VM_BIND in-fences need to be awaited
synchronously before the VM_BIND ioctl returns, since dma-fences,
required to signal in a reasonable amount of time, can never be made
to depend on memory fences that don't have such a restriction.”h]”hX‹  Since asynchronous VM_BIND operations may use dma-fences embedded in
out-syncobjs and internally in KMD to signal bind completion,  any
memory fences given as VM_BIND in-fences need to be awaited
synchronously before the VM_BIND ioctl returns, since dma-fences,
required to signal in a reasonable amount of time, can never be made
to depend on memory fences that donâ€™t have such a restriction.”…””}”(hjƒ  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1håhŸh³h K^hjV  hžhubhæ)”}”(hXP  The purpose of an Asynchronous VM_BIND operation is for user-mode
drivers to be able to pipeline interleaved gpu_vm modifications and
exec functions. For long-running workloads, such pipelining of a bind
operation is not allowed and any in-fences need to be awaited
synchronously. The reason for this is twofold. First, any memory
fences gated by a long-running workload and used as in-syncobjs for the
VM_BIND operation will need to be awaited synchronously anyway (see
above). Second, any dma-fences used as in-syncobjs for VM_BIND
operations for long-running workloads will not allow for pipelining
anyway since long-running workloads don't allow for dma-fences as
out-syncobjs, so while theoretically possible the use of them is
questionable and should be rejected until there is a valuable use-case.
Note that this is not a limitation imposed by dma-fence rules, but
rather a limitation imposed to keep KMD implementation simple. It does
not affect using dma-fences as dependencies for the long-running
workload itself, which is allowed by dma-fence rules, but rather for
the VM_BIND operation only.”h]”hXR  The purpose of an Asynchronous VM_BIND operation is for user-mode
drivers to be able to pipeline interleaved gpu_vm modifications and
exec functions. For long-running workloads, such pipelining of a bind
operation is not allowed and any in-fences need to be awaited
synchronously. The reason for this is twofold. First, any memory
fences gated by a long-running workload and used as in-syncobjs for the
VM_BIND operation will need to be awaited synchronously anyway (see
above). Second, any dma-fences used as in-syncobjs for VM_BIND
operations for long-running workloads will not allow for pipelining
anyway since long-running workloads donâ€™t allow for dma-fences as
out-syncobjs, so while theoretically possible the use of them is
questionable and should be rejected until there is a valuable use-case.
Note that this is not a limitation imposed by dma-fence rules, but
rather a limitation imposed to keep KMD implementation simple. It does
not affect using dma-fences as dependencies for the long-running
workload itself, which is allowed by dma-fence rules, but rather for
the VM_BIND operation only.”…””}”(hj‘  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1håhŸh³h KehjV  hžhubhæ)”}”(hX  An asynchronous VM_BIND operation may take substantial time to
complete and signal the out_fence. In particular if the operation is
deeply pipelined behind other VM_BIND operations and workloads
submitted using exec functions. In that case, UMD might want to avoid a
subsequent VM_BIND operation to be queued behind the first one if
there are no explicit dependencies. In order to circumvent such a queue-up, a
VM_BIND implementation may allow for VM_BIND contexts to be
created. For each context, VM_BIND operations will be guaranteed to
complete in the order they were submitted, but that is not the case
for VM_BIND operations executing on separate VM_BIND contexts. Instead
KMD will attempt to execute such VM_BIND operations in parallel but
leaving no guarantee that they will actually be executed in
parallel. There may be internal implicit dependencies that only KMD knows
about, for example page-table structure changes. A way to attempt
to avoid such internal dependencies is to have different VM_BIND
contexts use separate regions of a VM.”h]”hX  An asynchronous VM_BIND operation may take substantial time to
complete and signal the out_fence. In particular if the operation is
deeply pipelined behind other VM_BIND operations and workloads
submitted using exec functions. In that case, UMD might want to avoid a
subsequent VM_BIND operation to be queued behind the first one if
there are no explicit dependencies. In order to circumvent such a queue-up, a
VM_BIND implementation may allow for VM_BIND contexts to be
created. For each context, VM_BIND operations will be guaranteed to
complete in the order they were submitted, but that is not the case
for VM_BIND operations executing on separate VM_BIND contexts. Instead
KMD will attempt to execute such VM_BIND operations in parallel but
leaving no guarantee that they will actually be executed in
parallel. There may be internal implicit dependencies that only KMD knows
about, for example page-table structure changes. A way to attempt
to avoid such internal dependencies is to have different VM_BIND
contexts use separate regions of a VM.”…””}”(hjŸ  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1håhŸh³h KwhjV  hžhubhæ)”}”(hXã  Also for VM_BINDS for long-running gpu_vms the user-mode driver should typically
select memory fences as out-fences since that gives greater flexibility for
the kernel mode driver to inject other operations into the bind /
unbind operations. Like for example inserting breakpoints into batch
buffers. The workload execution can then easily be pipelined behind
the bind completion using the memory out-fence as the signal condition
for a GPU semaphore embedded by UMD in the workload.”h]”hXã  Also for VM_BINDS for long-running gpu_vms the user-mode driver should typically
select memory fences as out-fences since that gives greater flexibility for
the kernel mode driver to inject other operations into the bind /
unbind operations. Like for example inserting breakpoints into batch
buffers. The workload execution can then easily be pipelined behind
the bind completion using the memory out-fence as the signal condition
for a GPU semaphore embedded by UMD in the workload.”…””}”(hj­  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1håhŸh³h KˆhjV  hžhubhæ)”}”(hŒ†There is no difference in the operations supported or in
multi-operation support between asynchronous VM_BIND and synchronous VM_BIND.”h]”hŒ†There is no difference in the operations supported or in
multi-operation support between asynchronous VM_BIND and synchronous VM_BIND.”…””}”(hj»  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1håhŸh³h KhjV  hžhubeh}”(h]”Œid1”ah ]”h"]”h$]”Œasynchronous vm_bind”ah&]”uh1h´hj  hžhhŸh³h KMŒ
referenced”Kubeh}”(h]”Œ*synchronous-asynchronous-vm-bind-operation”ah ]”h"]”Œ,synchronous / asynchronous vm_bind operation”ah$]”h&]”uh1h´hh¶hžhhŸh³h KCubhµ)”}”(hhh]”(hº)”}”(hŒ;Multi-operation VM_BIND IOCTL error handling and interrupts”h]”hŒ;Multi-operation VM_BIND IOCTL error handling and interrupts”…””}”(hjÝ  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h¹hjÚ  hžhhŸh³h K”ubhæ)”}”(hXB  The VM_BIND operations of the IOCTL may error for various reasons, for
example due to lack of resources to complete and due to interrupted
waits.
In these situations UMD should preferably restart the IOCTL after
taking suitable action.
If UMD has over-committed a memory resource, an -ENOSPC error will be
returned, and UMD may then unbind resources that are not used at the
moment and rerun the IOCTL. On -EINTR, UMD should simply rerun the
IOCTL and on -ENOMEM user-space may either attempt to free known
system memory resources or fail. In case of UMD deciding to fail a
bind operation, due to an error return, no additional action is needed
to clean up the failed operation, and the VM is left in the same state
as it was before the failing IOCTL.
Unbind operations are guaranteed not to return any errors due to
resource constraints, but may return errors due to, for example,
invalid arguments or the gpu_vm being banned.
In the case an unexpected error happens during the asynchronous bind
process, the gpu_vm will be banned, and attempts to use it after banning
will return -ENOENT.”h]”hXB  The VM_BIND operations of the IOCTL may error for various reasons, for
example due to lack of resources to complete and due to interrupted
waits.
In these situations UMD should preferably restart the IOCTL after
taking suitable action.
If UMD has over-committed a memory resource, an -ENOSPC error will be
returned, and UMD may then unbind resources that are not used at the
moment and rerun the IOCTL. On -EINTR, UMD should simply rerun the
IOCTL and on -ENOMEM user-space may either attempt to free known
system memory resources or fail. In case of UMD deciding to fail a
bind operation, due to an error return, no additional action is needed
to clean up the failed operation, and the VM is left in the same state
as it was before the failing IOCTL.
Unbind operations are guaranteed not to return any errors due to
resource constraints, but may return errors due to, for example,
invalid arguments or the gpu_vm being banned.
In the case an unexpected error happens during the asynchronous bind
process, the gpu_vm will be banned, and attempts to use it after banning
will return -ENOENT.”…””}”(hjë  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1håhŸh³h K–hjÚ  hžhubeh}”(h]”Œ;multi-operation-vm-bind-ioctl-error-handling-and-interrupts”ah ]”h"]”Œ;multi-operation vm_bind ioctl error handling and interrupts”ah$]”h&]”uh1h´hh¶hžhhŸh³h K”ubhµ)”}”(hhh]”(hº)”}”(hŒExample: The Xe VM_BIND uAPI”h]”hŒExample: The Xe VM_BIND uAPI”…””}”(hj  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h¹hj  hžhhŸh³h K«ubhæ)”}”(hX2  Starting with the VM_BIND operation struct, the IOCTL call can take
zero, one or many such operations. A zero number means only the
synchronization part of the IOCTL is carried out: an asynchronous
VM_BIND updates the syncobjects, whereas a sync VM_BIND waits for the
implicit dependencies to be fulfilled.”h]”hX2  Starting with the VM_BIND operation struct, the IOCTL call can take
zero, one or many such operations. A zero number means only the
synchronization part of the IOCTL is carried out: an asynchronous
VM_BIND updates the syncobjects, whereas a sync VM_BIND waits for the
implicit dependencies to be fulfilled.”…””}”(hj  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1håhŸh³h K­hj  hžhubhŒliteral_block”“”)”}”(hX(	  struct drm_xe_vm_bind_op {
     /**
      * @obj: GEM object to operate on, MBZ for MAP_USERPTR, MBZ for UNMAP
      */
     __u32 obj;

     /** @pad: MBZ */
     __u32 pad;

     union {
             /**
              * @obj_offset: Offset into the object for MAP.
              */
             __u64 obj_offset;

             /** @userptr: user virtual address for MAP_USERPTR */
             __u64 userptr;
     };

     /**
      * @range: Number of bytes from the object to bind to addr, MBZ for UNMAP_ALL
      */
     __u64 range;

     /** @addr: Address to operate on, MBZ for UNMAP_ALL */
     __u64 addr;

     /**
      * @tile_mask: Mask for which tiles to create binds for, 0 == All tiles,
      * only applies to creating new VMAs
      */
     __u64 tile_mask;

    /* Map (parts of) an object into the GPU virtual address range.
 #define XE_VM_BIND_OP_MAP           0x0
     /* Unmap a GPU virtual address range */
 #define XE_VM_BIND_OP_UNMAP         0x1
     /*
      * Map a CPU virtual address range into a GPU virtual
      * address range.
      */
 #define XE_VM_BIND_OP_MAP_USERPTR   0x2
     /* Unmap a gem object from the VM. */
 #define XE_VM_BIND_OP_UNMAP_ALL     0x3
     /*
      * Make the backing memory of an address range resident if
      * possible. Note that this doesn't pin backing memory.
      */
 #define XE_VM_BIND_OP_PREFETCH      0x4

     /* Make the GPU map readonly. */
 #define XE_VM_BIND_FLAG_READONLY    (0x1 << 16)
     /*
      * Valid on a faulting VM only, do the MAP operation immediately rather
      * than deferring the MAP to the page fault handler.
      */
 #define XE_VM_BIND_FLAG_IMMEDIATE   (0x1 << 17)
     /*
      * When the NULL flag is set, the page tables are setup with a special
      * bit which indicates writes are dropped and all reads return zero.  In
      * the future, the NULL flags will only be valid for XE_VM_BIND_OP_MAP
      * operations, the BO handle MBZ, and the BO offset MBZ. This flag is
      * intended to implement VK sparse bindings.
      */
 #define XE_VM_BIND_FLAG_NULL        (0x1 << 18)
     /** @op: Operation to perform (lower 16 bits) and flags (upper 16 bits) */
     __u32 op;

     /** @mem_region: Memory region to prefetch VMA to, instance not a mask */
     __u32 region;

     /** @reserved: Reserved */
     __u64 reserved[2];
};”h]”hX(	  struct drm_xe_vm_bind_op {
     /**
      * @obj: GEM object to operate on, MBZ for MAP_USERPTR, MBZ for UNMAP
      */
     __u32 obj;

     /** @pad: MBZ */
     __u32 pad;

     union {
             /**
              * @obj_offset: Offset into the object for MAP.
              */
             __u64 obj_offset;

             /** @userptr: user virtual address for MAP_USERPTR */
             __u64 userptr;
     };

     /**
      * @range: Number of bytes from the object to bind to addr, MBZ for UNMAP_ALL
      */
     __u64 range;

     /** @addr: Address to operate on, MBZ for UNMAP_ALL */
     __u64 addr;

     /**
      * @tile_mask: Mask for which tiles to create binds for, 0 == All tiles,
      * only applies to creating new VMAs
      */
     __u64 tile_mask;

    /* Map (parts of) an object into the GPU virtual address range.
 #define XE_VM_BIND_OP_MAP           0x0
     /* Unmap a GPU virtual address range */
 #define XE_VM_BIND_OP_UNMAP         0x1
     /*
      * Map a CPU virtual address range into a GPU virtual
      * address range.
      */
 #define XE_VM_BIND_OP_MAP_USERPTR   0x2
     /* Unmap a gem object from the VM. */
 #define XE_VM_BIND_OP_UNMAP_ALL     0x3
     /*
      * Make the backing memory of an address range resident if
      * possible. Note that this doesn't pin backing memory.
      */
 #define XE_VM_BIND_OP_PREFETCH      0x4

     /* Make the GPU map readonly. */
 #define XE_VM_BIND_FLAG_READONLY    (0x1 << 16)
     /*
      * Valid on a faulting VM only, do the MAP operation immediately rather
      * than deferring the MAP to the page fault handler.
      */
 #define XE_VM_BIND_FLAG_IMMEDIATE   (0x1 << 17)
     /*
      * When the NULL flag is set, the page tables are setup with a special
      * bit which indicates writes are dropped and all reads return zero.  In
      * the future, the NULL flags will only be valid for XE_VM_BIND_OP_MAP
      * operations, the BO handle MBZ, and the BO offset MBZ. This flag is
      * intended to implement VK sparse bindings.
      */
 #define XE_VM_BIND_FLAG_NULL        (0x1 << 18)
     /** @op: Operation to perform (lower 16 bits) and flags (upper 16 bits) */
     __u32 op;

     /** @mem_region: Memory region to prefetch VMA to, instance not a mask */
     __u32 region;

     /** @reserved: Reserved */
     __u64 reserved[2];
};”…””}”hj"  sbah}”(h]”h ]”h"]”h$]”h&]”h±h²Œforce”‰Œlanguage”Œc”Œhighlight_args”}”uh1j   hŸh³h K³hj  hžhubhæ)”}”(hX  The VM_BIND IOCTL argument itself, looks like follows. Note that for
synchronous VM_BIND, the num_syncs and syncs fields must be zero. Here
the ``exec_queue_id`` field is the VM_BIND context discussed previously
that is used to facilitate out-of-order VM_BINDs.”h]”(hŒThe VM_BIND IOCTL argument itself, looks like follows. Note that for
synchronous VM_BIND, the num_syncs and syncs fields must be zero. Here
the ”…””}”(hj5  hžhhŸNh Nubhì)”}”(hŒ``exec_queue_id``”h]”hŒexec_queue_id”…””}”(hj=  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hëhj5  ubhŒd field is the VM_BIND context discussed previously
that is used to facilitate out-of-order VM_BINDs.”…””}”(hj5  hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1håhŸh³h Mhj  hžhubj!  )”}”(hXÕ  struct drm_xe_vm_bind {
    /** @extensions: Pointer to the first extension struct, if any */
    __u64 extensions;

    /** @vm_id: The ID of the VM to bind to */
    __u32 vm_id;

    /**
     * @exec_queue_id: exec_queue_id, must be of class DRM_XE_ENGINE_CLASS_VM_BIND
     * and exec queue must have same vm_id. If zero, the default VM bind engine
     * is used.
     */
    __u32 exec_queue_id;

    /** @num_binds: number of binds in this IOCTL */
    __u32 num_binds;

    /* If set, perform an async VM_BIND, if clear a sync VM_BIND */
#define XE_VM_BIND_IOCTL_FLAG_ASYNC (0x1 << 0)

    /** @flag: Flags controlling all operations in this ioctl. */
    __u32 flags;

    union {
            /** @bind: used if num_binds == 1 */
            struct drm_xe_vm_bind_op bind;

            /**
             * @vector_of_binds: userptr to array of struct
             * drm_xe_vm_bind_op if num_binds > 1
             */
            __u64 vector_of_binds;
    };

    /** @num_syncs: amount of syncs to wait for or to signal on completion. */
    __u32 num_syncs;

    /** @pad2: MBZ */
    __u32 pad2;

    /** @syncs: pointer to struct drm_xe_sync array */
    __u64 syncs;

    /** @reserved: Reserved */
    __u64 reserved[2];
};”h]”hXÕ  struct drm_xe_vm_bind {
    /** @extensions: Pointer to the first extension struct, if any */
    __u64 extensions;

    /** @vm_id: The ID of the VM to bind to */
    __u32 vm_id;

    /**
     * @exec_queue_id: exec_queue_id, must be of class DRM_XE_ENGINE_CLASS_VM_BIND
     * and exec queue must have same vm_id. If zero, the default VM bind engine
     * is used.
     */
    __u32 exec_queue_id;

    /** @num_binds: number of binds in this IOCTL */
    __u32 num_binds;

    /* If set, perform an async VM_BIND, if clear a sync VM_BIND */
#define XE_VM_BIND_IOCTL_FLAG_ASYNC (0x1 << 0)

    /** @flag: Flags controlling all operations in this ioctl. */
    __u32 flags;

    union {
            /** @bind: used if num_binds == 1 */
            struct drm_xe_vm_bind_op bind;

            /**
             * @vector_of_binds: userptr to array of struct
             * drm_xe_vm_bind_op if num_binds > 1
             */
            __u64 vector_of_binds;
    };

    /** @num_syncs: amount of syncs to wait for or to signal on completion. */
    __u32 num_syncs;

    /** @pad2: MBZ */
    __u32 pad2;

    /** @syncs: pointer to struct drm_xe_sync array */
    __u64 syncs;

    /** @reserved: Reserved */
    __u64 reserved[2];
};”…””}”hjU  sbah}”(h]”h ]”h"]”h$]”h&]”h±h²j0  ‰j1  j2  j3  }”uh1j   hŸh³h Mhj  hžhubeh}”(h]”Œexample-the-xe-vm-bind-uapi”ah ]”h"]”Œexample: the xe vm_bind uapi”ah$]”h&]”uh1h´hh¶hžhhŸh³h K«ubeh}”(h]”Œasynchronous-vm-bind”ah ]”h"]”h$]”jÏ  ah&]”uh1h´hhhžhhŸh³h KjÑ  Kubeh}”(h]”h ]”h"]”h$]”h&]”Œsource”h³uh1hŒcurrent_source”NŒcurrent_line”NŒsettings”Œdocutils.frontend”ŒValues”“”)”}”(h¹NŒ	generator”NŒ	datestamp”NŒsource_link”NŒ
source_url”NŒtoc_backlinks”Œentry”Œfootnote_backlinks”KŒsectnum_xform”KŒstrip_comments”NŒstrip_elements_with_classes”NŒstrip_classes”NŒreport_level”KŒ
halt_level”KŒexit_status_level”KŒdebug”NŒwarning_stream”NŒ	traceback”ˆŒinput_encoding”Œ	utf-8-sig”Œinput_encoding_error_handler”Œstrict”Œoutput_encoding”Œutf-8”Œoutput_encoding_error_handler”j–  Œerror_encoding”Œutf-8”Œerror_encoding_error_handler”Œbackslashreplace”Œlanguage_code”Œen”Œrecord_dependencies”NŒconfig”NŒ	id_prefix”hŒauto_id_prefix”Œid”Œdump_settings”NŒdump_internals”NŒdump_transforms”NŒdump_pseudo_xml”NŒexpose_internals”NŒstrict_visitor”NŒ_disable_config”NŒ_source”h³Œ_destination”NŒ_config_files”]”Œ7/var/lib/git/docbuild/linux/Documentation/docutils.conf”aŒfile_insertion_enabled”ˆŒraw_enabled”KŒline_length_limit”M'Œpep_references”NŒpep_base_url”Œhttps://peps.python.org/”Œpep_file_url_template”Œpep-%04d”Œrfc_references”NŒrfc_base_url”Œ&https://datatracker.ietf.org/doc/html/”Œ	tab_width”KŒtrim_footnote_reference_space”‰Œsyntax_highlight”Œlong”Œsmart_quotes”ˆŒsmartquotes_locales”]”Œcharacter_level_inline_markup”‰Œdoctitle_xform”‰Œdocinfo_xform”KŒsectsubtitle_xform”‰Œimage_loading”Œlink”Œembed_stylesheet”‰Œcloak_email_addresses”ˆŒsection_self_link”‰Œenv”NubŒreporter”NŒindirect_targets”]”Œsubstitution_defs”}”Œsubstitution_names”}”Œrefnames”}”Œrefids”}”Œnameids”}”(Œasynchronous vm_bind”Nj  j  j×  jÔ  jS  jP  jþ  jû  ji  jf  uŒ	nametypes”}”(jÚ  ‰j  ‰j×  ‰jS  ‰jþ  ‰ji  ‰uh}”(jn  h¶j  hÉjÔ  j  jP  j/  jË  jV  jû  jÚ  jf  j  uŒfootnote_refs”}”Œcitation_refs”}”Œautofootnotes”]”Œautofootnote_refs”]”Œsymbol_footnotes”]”Œsymbol_footnote_refs”]”Œ	footnotes”]”Œ	citations”]”Œautofootnote_start”KŒsymbol_footnote_start”K Œ
id_counter”Œcollections”ŒCounter”“”}”j¤  Ks…”R”Œparse_messages”]”hŒsystem_message”“”)”}”(hhh]”hæ)”}”(hŒ7Duplicate implicit target name: "asynchronous vm_bind".”h]”hŒ;Duplicate implicit target name: â€œasynchronous vm_bindâ€.”…””}”(hjþ  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1håhjû  ubah}”(h]”h ]”h"]”h$]”h&]”jË  aŒlevel”KŒtype”ŒINFO”Œsource”h³Œline”KMuh1jù  hjV  hžhhŸh³h KMubaŒtransform_messages”]”Œtransformer”NŒinclude_log”]”Œ
decoration”Nhžhub.