€•;     Œsphinx.addnodes”Œdocument”“”)”}”(Œ	rawsource”Œ ”Œchildren”]”(Œtranslations”ŒLanguagesNode”“”)”}”(hhh]”(h Œpending_xref”“”)”}”(hhh]”Œdocutils.nodes”ŒText”“”ŒChinese (Simplified)”…””}”Œparent”hsbaŒ
attributes”}”(Œids”]”Œclasses”]”Œnames”]”Œdupnames”]”Œbackrefs”]”Œ	refdomain”Œstd”Œreftype”Œdoc”Œ	reftarget”Œ+/translations/zh_CN/gpu/drm-vm-bind-locking”Œmodname”NŒ	classname”NŒrefexplicit”ˆuŒtagname”hhhubh)”}”(hhh]”hŒChinese (Traditional)”…””}”hh2sbah}”(h]”h ]”h"]”h$]”h&]”Œ	refdomain”h)Œreftype”h+Œ	reftarget”Œ+/translations/zh_TW/gpu/drm-vm-bind-locking”Œmodname”NŒ	classname”NŒrefexplicit”ˆuh1hhhubh)”}”(hhh]”hŒItalian”…””}”hhFsbah}”(h]”h ]”h"]”h$]”h&]”Œ	refdomain”h)Œreftype”h+Œ	reftarget”Œ+/translations/it_IT/gpu/drm-vm-bind-locking”Œmodname”NŒ	classname”NŒrefexplicit”ˆuh1hhhubh)”}”(hhh]”hŒJapanese”…””}”hhZsbah}”(h]”h ]”h"]”h$]”h&]”Œ	refdomain”h)Œreftype”h+Œ	reftarget”Œ+/translations/ja_JP/gpu/drm-vm-bind-locking”Œmodname”NŒ	classname”NŒrefexplicit”ˆuh1hhhubh)”}”(hhh]”hŒKorean”…””}”hhnsbah}”(h]”h ]”h"]”h$]”h&]”Œ	refdomain”h)Œreftype”h+Œ	reftarget”Œ+/translations/ko_KR/gpu/drm-vm-bind-locking”Œmodname”NŒ	classname”NŒrefexplicit”ˆuh1hhhubh)”}”(hhh]”hŒSpanish”…””}”hh‚sbah}”(h]”h ]”h"]”h$]”h&]”Œ	refdomain”h)Œreftype”h+Œ	reftarget”Œ+/translations/sp_SP/gpu/drm-vm-bind-locking”Œmodname”NŒ	classname”NŒrefexplicit”ˆuh1hhhubeh}”(h]”h ]”h"]”h$]”h&]”Œcurrent_language”ŒEnglish”uh1h
hhŒ	_document”hŒsource”NŒline”NubhŒcomment”“”)”}”(hŒ*SPDX-License-Identifier: (GPL-2.0+ OR MIT)”h]”hŒ*SPDX-License-Identifier: (GPL-2.0+ OR MIT)”…””}”hh£sbah}”(h]”h ]”h"]”h$]”h&]”Œ	xml:space”Œpreserve”uh1h¡hhhžhhŸŒE/var/lib/git/docbuild/linux/Documentation/gpu/drm-vm-bind-locking.rst”h KubhŒsection”“”)”}”(hhh]”(hŒtitle”“”)”}”(hŒVM_BIND locking”h]”hŒVM_BIND locking”…””}”(hh»hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h¹hh¶hžhhŸh³h KubhŒ	paragraph”“”)”}”(hXŸ  This document attempts to describe what's needed to get VM_BIND locking right,
including the userptr mmu_notifier locking. It also discusses some
optimizations to get rid of the looping through of all userptr mappings and
external / shared object mappings that is needed in the simplest
implementation. In addition, there is a section describing the VM_BIND locking
required for implementing recoverable pagefaults.”h]”hX¡  This document attempts to describe whatâ€™s needed to get VM_BIND locking right,
including the userptr mmu_notifier locking. It also discusses some
optimizations to get rid of the looping through of all userptr mappings and
external / shared object mappings that is needed in the simplest
implementation. In addition, there is a section describing the VM_BIND locking
required for implementing recoverable pagefaults.”…””}”(hhËhžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h Khh¶hžhubhµ)”}”(hhh]”(hº)”}”(hŒThe DRM GPUVM set of helpers”h]”hŒThe DRM GPUVM set of helpers”…””}”(hhÜhžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h¹hhÙhžhhŸh³h KubhÊ)”}”(hXü  There is a set of helpers for drivers implementing VM_BIND, and this
set of helpers implements much, but not all of the locking described
in this document. In particular, it is currently lacking a userptr
implementation. This document does not intend to describe the DRM GPUVM
implementation in detail, but it is covered in :ref:`its own
documentation <drm_gpuvm>`. It is highly recommended for any driver
implementing VM_BIND to use the DRM GPUVM helpers and to extend it if
common functionality is missing.”h]”(hXD  There is a set of helpers for drivers implementing VM_BIND, and this
set of helpers implements much, but not all of the locking described
in this document. In particular, it is currently lacking a userptr
implementation. This document does not intend to describe the DRM GPUVM
implementation in detail, but it is covered in ”…””}”(hhêhžhhŸNh Nubh)”}”(hŒ(:ref:`its own
documentation <drm_gpuvm>`”h]”hŒinline”“”)”}”(hhôh]”hŒits own
documentation”…””}”(hhøhžhhŸNh Nubah}”(h]”h ]”(Œxref”Œstd”Œstd-ref”eh"]”h$]”h&]”uh1höhhòubah}”(h]”h ]”h"]”h$]”h&]”Œrefdoc”Œgpu/drm-vm-bind-locking”Œ	refdomain”j  Œreftype”Œref”Œrefexplicit”ˆŒrefwarn”ˆŒ	reftarget”Œ	drm_gpuvm”uh1hhŸh³h KhhêubhŒ. It is highly recommended for any driver
implementing VM_BIND to use the DRM GPUVM helpers and to extend it if
common functionality is missing.”…””}”(hhêhžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h KhhÙhžhubeh}”(h]”Œthe-drm-gpuvm-set-of-helpers”ah ]”h"]”Œthe drm gpuvm set of helpers”ah$]”h&]”uh1h´hh¶hžhhŸh³h Kubhµ)”}”(hhh]”(hº)”}”(hŒNomenclature”h]”hŒNomenclature”…””}”(hj,  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h¹hj)  hžhhŸh³h KubhŒbullet_list”“”)”}”(hhh]”(hŒ	list_item”“”)”}”(hŒ‘``gpu_vm``: Abstraction of a virtual GPU address space with
meta-data. Typically one per client (DRM file-private), or one per
execution context.”h]”hÊ)”}”(hŒ‘``gpu_vm``: Abstraction of a virtual GPU address space with
meta-data. Typically one per client (DRM file-private), or one per
execution context.”h]”(hŒliteral”“”)”}”(hŒ
``gpu_vm``”h]”hŒgpu_vm”…””}”(hjK  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1jI  hjE  ubhŒ‡: Abstraction of a virtual GPU address space with
meta-data. Typically one per client (DRM file-private), or one per
execution context.”…””}”(hjE  hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h KhjA  ubah}”(h]”h ]”h"]”h$]”h&]”uh1j?  hj<  hžhhŸh³h Nubj@  )”}”(hŒï``gpu_vma``: Abstraction of a GPU address range within a gpu_vm with
associated meta-data. The backing storage of a gpu_vma can either be
a GEM object or anonymous or page-cache pages mapped also into the CPU
address space for the process.”h]”hÊ)”}”(hŒï``gpu_vma``: Abstraction of a GPU address range within a gpu_vm with
associated meta-data. The backing storage of a gpu_vma can either be
a GEM object or anonymous or page-cache pages mapped also into the CPU
address space for the process.”h]”(jJ  )”}”(hŒ``gpu_vma``”h]”hŒgpu_vma”…””}”(hjq  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1jI  hjm  ubhŒä: Abstraction of a GPU address range within a gpu_vm with
associated meta-data. The backing storage of a gpu_vma can either be
a GEM object or anonymous or page-cache pages mapped also into the CPU
address space for the process.”…””}”(hjm  hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h K hji  ubah}”(h]”h ]”h"]”h$]”h&]”uh1j?  hj<  hžhhŸh³h Nubj@  )”}”(hŒ¤``gpu_vm_bo``: Abstracts the association of a GEM object and
a VM. The GEM object maintains a list of gpu_vm_bos, where each gpu_vm_bo
maintains a list of gpu_vmas.”h]”hÊ)”}”(hŒ¤``gpu_vm_bo``: Abstracts the association of a GEM object and
a VM. The GEM object maintains a list of gpu_vm_bos, where each gpu_vm_bo
maintains a list of gpu_vmas.”h]”(jJ  )”}”(hŒ``gpu_vm_bo``”h]”hŒ	gpu_vm_bo”…””}”(hj—  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1jI  hj“  ubhŒ—: Abstracts the association of a GEM object and
a VM. The GEM object maintains a list of gpu_vm_bos, where each gpu_vm_bo
maintains a list of gpu_vmas.”…””}”(hj“  hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h K$hj  ubah}”(h]”h ]”h"]”h$]”h&]”uh1j?  hj<  hžhhŸh³h Nubj@  )”}”(hŒx``userptr gpu_vma or just userptr``: A gpu_vma, whose backing store
is anonymous or page-cache pages as described above.”h]”hÊ)”}”(hŒx``userptr gpu_vma or just userptr``: A gpu_vma, whose backing store
is anonymous or page-cache pages as described above.”h]”(jJ  )”}”(hŒ#``userptr gpu_vma or just userptr``”h]”hŒuserptr gpu_vma or just userptr”…””}”(hj½  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1jI  hj¹  ubhŒU: A gpu_vma, whose backing store
is anonymous or page-cache pages as described above.”…””}”(hj¹  hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h K'hjµ  ubah}”(h]”h ]”h"]”h$]”h&]”uh1j?  hj<  hžhhŸh³h Nubj@  )”}”(hŒ´``revalidating``: Revalidating a gpu_vma means making the latest version
of the backing store resident and making sure the gpu_vma's
page-table entries point to that backing store.”h]”hÊ)”}”(hŒ´``revalidating``: Revalidating a gpu_vma means making the latest version
of the backing store resident and making sure the gpu_vma's
page-table entries point to that backing store.”h]”(jJ  )”}”(hŒ``revalidating``”h]”hŒrevalidating”…””}”(hjã  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1jI  hjß  ubhŒ¦: Revalidating a gpu_vma means making the latest version
of the backing store resident and making sure the gpu_vmaâ€™s
page-table entries point to that backing store.”…””}”(hjß  hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h K)hjÛ  ubah}”(h]”h ]”h"]”h$]”h&]”uh1j?  hj<  hžhhŸh³h Nubj@  )”}”(hŒý``dma_fence``: A struct dma_fence that is similar to a struct completion
and which tracks GPU activity. When the GPU activity is finished,
the dma_fence signals. Please refer to the ``DMA Fences`` section of
the :doc:`dma-buf doc </driver-api/dma-buf>`.”h]”hÊ)”}”(hŒý``dma_fence``: A struct dma_fence that is similar to a struct completion
and which tracks GPU activity. When the GPU activity is finished,
the dma_fence signals. Please refer to the ``DMA Fences`` section of
the :doc:`dma-buf doc </driver-api/dma-buf>`.”h]”(jJ  )”}”(hŒ``dma_fence``”h]”hŒ	dma_fence”…””}”(hj	  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1jI  hj  ubhŒ©: A struct dma_fence that is similar to a struct completion
and which tracks GPU activity. When the GPU activity is finished,
the dma_fence signals. Please refer to the ”…””}”(hj  hžhhŸNh NubjJ  )”}”(hŒ``DMA Fences``”h]”hŒ
DMA Fences”…””}”(hj  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1jI  hj  ubhŒ section of
the ”…””}”(hj  hžhhŸNh Nubh)”}”(hŒ(:doc:`dma-buf doc </driver-api/dma-buf>`”h]”h÷)”}”(hj/  h]”hŒdma-buf doc”…””}”(hj1  hžhhŸNh Nubah}”(h]”h ]”(j  Œstd”Œstd-doc”eh"]”h$]”h&]”uh1höhj-  ubah}”(h]”h ]”h"]”h$]”h&]”Œrefdoc”j  Œ	refdomain”j;  Œreftype”Œdoc”Œrefexplicit”ˆŒrefwarn”ˆj  Œ/driver-api/dma-buf”uh1hhŸh³h K,hj  ubhŒ.”…””}”(hj  hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h K,hj  ubah}”(h]”h ]”h"]”h$]”h&]”uh1j?  hj<  hžhhŸh³h Nubj@  )”}”(hXì  ``dma_resv``: A struct dma_resv (a.k.a reservation object) that is used
to track GPU activity in the form of multiple dma_fences on a
gpu_vm or a GEM object. The dma_resv contains an array / list
of dma_fences and a lock that needs to be held when adding
additional dma_fences to the dma_resv. The lock is of a type that
allows deadlock-safe locking of multiple dma_resvs in arbitrary
order. Please refer to the ``Reservation Objects`` section of the
:doc:`dma-buf doc </driver-api/dma-buf>`.”h]”hÊ)”}”(hXì  ``dma_resv``: A struct dma_resv (a.k.a reservation object) that is used
to track GPU activity in the form of multiple dma_fences on a
gpu_vm or a GEM object. The dma_resv contains an array / list
of dma_fences and a lock that needs to be held when adding
additional dma_fences to the dma_resv. The lock is of a type that
allows deadlock-safe locking of multiple dma_resvs in arbitrary
order. Please refer to the ``Reservation Objects`` section of the
:doc:`dma-buf doc </driver-api/dma-buf>`.”h]”(jJ  )”}”(hŒ``dma_resv``”h]”hŒdma_resv”…””}”(hje  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1jI  hja  ubhX  : A struct dma_resv (a.k.a reservation object) that is used
to track GPU activity in the form of multiple dma_fences on a
gpu_vm or a GEM object. The dma_resv contains an array / list
of dma_fences and a lock that needs to be held when adding
additional dma_fences to the dma_resv. The lock is of a type that
allows deadlock-safe locking of multiple dma_resvs in arbitrary
order. Please refer to the ”…””}”(hja  hžhhŸNh NubjJ  )”}”(hŒ``Reservation Objects``”h]”hŒReservation Objects”…””}”(hjw  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1jI  hja  ubhŒ section of the
”…””}”(hja  hžhhŸNh Nubh)”}”(hŒ(:doc:`dma-buf doc </driver-api/dma-buf>`”h]”h÷)”}”(hj‹  h]”hŒdma-buf doc”…””}”(hj  hžhhŸNh Nubah}”(h]”h ]”(j  Œstd”Œstd-doc”eh"]”h$]”h&]”uh1höhj‰  ubah}”(h]”h ]”h"]”h$]”h&]”Œrefdoc”j  Œ	refdomain”j—  Œreftype”Œdoc”Œrefexplicit”ˆŒrefwarn”ˆj  Œ/driver-api/dma-buf”uh1hhŸh³h K0hja  ubhŒ.”…””}”(hja  hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h K0hj]  ubah}”(h]”h ]”h"]”h$]”h&]”uh1j?  hj<  hžhhŸh³h Nubj@  )”}”(hXœ  ``exec function``: An exec function is a function that revalidates all
affected gpu_vmas, submits a GPU command batch and registers the
dma_fence representing the GPU command's activity with all affected
dma_resvs. For completeness, although not covered by this document,
it's worth mentioning that an exec function may also be the
revalidation worker that is used by some drivers in compute /
long-running mode.”h]”hÊ)”}”(hXœ  ``exec function``: An exec function is a function that revalidates all
affected gpu_vmas, submits a GPU command batch and registers the
dma_fence representing the GPU command's activity with all affected
dma_resvs. For completeness, although not covered by this document,
it's worth mentioning that an exec function may also be the
revalidation worker that is used by some drivers in compute /
long-running mode.”h]”(jJ  )”}”(hŒ``exec function``”h]”hŒexec function”…””}”(hjÁ  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1jI  hj½  ubhX  : An exec function is a function that revalidates all
affected gpu_vmas, submits a GPU command batch and registers the
dma_fence representing the GPU commandâ€™s activity with all affected
dma_resvs. For completeness, although not covered by this document,
itâ€™s worth mentioning that an exec function may also be the
revalidation worker that is used by some drivers in compute /
long-running mode.”…””}”(hj½  hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h K8hj¹  ubah}”(h]”h ]”h"]”h$]”h&]”uh1j?  hj<  hžhhŸh³h Nubj@  )”}”(hŒv``local object``: A GEM object which is only mapped within a
single VM. Local GEM objects share the gpu_vm's dma_resv.”h]”hÊ)”}”(hŒv``local object``: A GEM object which is only mapped within a
single VM. Local GEM objects share the gpu_vm's dma_resv.”h]”(jJ  )”}”(hŒ``local object``”h]”hŒlocal object”…””}”(hjç  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1jI  hjã  ubhŒh: A GEM object which is only mapped within a
single VM. Local GEM objects share the gpu_vmâ€™s dma_resv.”…””}”(hjã  hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h K?hjß  ubah}”(h]”h ]”h"]”h$]”h&]”uh1j?  hj<  hžhhŸh³h Nubj@  )”}”(hŒ›``external object``: a.k.a shared object: A GEM object which may be shared
by multiple gpu_vms and whose backing storage may be shared with
other drivers.
”h]”hÊ)”}”(hŒš``external object``: a.k.a shared object: A GEM object which may be shared
by multiple gpu_vms and whose backing storage may be shared with
other drivers.”h]”(jJ  )”}”(hŒ``external object``”h]”hŒexternal object”…””}”(hj  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1jI  hj	  ubhŒ‡: a.k.a shared object: A GEM object which may be shared
by multiple gpu_vms and whose backing storage may be shared with
other drivers.”…””}”(hj	  hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h KAhj  ubah}”(h]”h ]”h"]”h$]”h&]”uh1j?  hj<  hžhhŸh³h Nubeh}”(h]”h ]”h"]”h$]”h&]”Œbullet”Œ*”uh1j:  hŸh³h Khj)  hžhubeh}”(h]”Œnomenclature”ah ]”h"]”Œnomenclature”ah$]”h&]”uh1h´hh¶hžhhŸh³h Kubhµ)”}”(hhh]”(hº)”}”(hŒLocks and locking order”h]”hŒLocks and locking order”…””}”(hj>  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h¹hj;  hžhhŸh³h KFubhÊ)”}”(hŒäOne of the benefits of VM_BIND is that local GEM objects share the gpu_vm's
dma_resv object and hence the dma_resv lock. So, even with a huge
number of local GEM objects, only one lock is needed to make the exec
sequence atomic.”h]”hŒæOne of the benefits of VM_BIND is that local GEM objects share the gpu_vmâ€™s
dma_resv object and hence the dma_resv lock. So, even with a huge
number of local GEM objects, only one lock is needed to make the exec
sequence atomic.”…””}”(hjL  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h KHhj;  hžhubhÊ)”}”(hŒ0The following locks and locking orders are used:”h]”hŒ0The following locks and locking orders are used:”…””}”(hjZ  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h KMhj;  hžhubj;  )”}”(hhh]”(j@  )”}”(hXt  The ``gpu_vm->lock`` (optionally an rwsem). Protects the gpu_vm's
data structure keeping track of gpu_vmas. It can also protect the
gpu_vm's list of userptr gpu_vmas. With a CPU mm analogy this would
correspond to the mmap_lock. An rwsem allows several readers to walk
the VM tree concurrently, but the benefit of that concurrency most
likely varies from driver to driver.”h]”hÊ)”}”(hXt  The ``gpu_vm->lock`` (optionally an rwsem). Protects the gpu_vm's
data structure keeping track of gpu_vmas. It can also protect the
gpu_vm's list of userptr gpu_vmas. With a CPU mm analogy this would
correspond to the mmap_lock. An rwsem allows several readers to walk
the VM tree concurrently, but the benefit of that concurrency most
likely varies from driver to driver.”h]”(hŒThe ”…””}”(hjo  hžhhŸNh NubjJ  )”}”(hŒ``gpu_vm->lock``”h]”hŒgpu_vm->lock”…””}”(hjw  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1jI  hjo  ubhXd   (optionally an rwsem). Protects the gpu_vmâ€™s
data structure keeping track of gpu_vmas. It can also protect the
gpu_vmâ€™s list of userptr gpu_vmas. With a CPU mm analogy this would
correspond to the mmap_lock. An rwsem allows several readers to walk
the VM tree concurrently, but the benefit of that concurrency most
likely varies from driver to driver.”…””}”(hjo  hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h KOhjk  ubah}”(h]”h ]”h"]”h$]”h&]”uh1j?  hjh  hžhhŸh³h Nubj@  )”}”(hXw  The ``userptr_seqlock``. This lock is taken in read mode for each
userptr gpu_vma on the gpu_vm's userptr list, and in write mode during mmu
notifier invalidation. This is not a real seqlock but described in
``mm/mmu_notifier.c`` as a "Collision-retry read-side/write-side
'lock' a lot like a seqcount. However this allows multiple
write-sides to hold it at once...". The read side critical section
is enclosed by ``mmu_interval_read_begin() /
mmu_interval_read_retry()`` with ``mmu_interval_read_begin()``
sleeping if the write side is held.
The write side is held by the core mm while calling mmu interval
invalidation notifiers.”h]”hÊ)”}”(hXw  The ``userptr_seqlock``. This lock is taken in read mode for each
userptr gpu_vma on the gpu_vm's userptr list, and in write mode during mmu
notifier invalidation. This is not a real seqlock but described in
``mm/mmu_notifier.c`` as a "Collision-retry read-side/write-side
'lock' a lot like a seqcount. However this allows multiple
write-sides to hold it at once...". The read side critical section
is enclosed by ``mmu_interval_read_begin() /
mmu_interval_read_retry()`` with ``mmu_interval_read_begin()``
sleeping if the write side is held.
The write side is held by the core mm while calling mmu interval
invalidation notifiers.”h]”(hŒThe ”…””}”(hj™  hžhhŸNh NubjJ  )”}”(hŒ``userptr_seqlock``”h]”hŒuserptr_seqlock”…””}”(hj¡  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1jI  hj™  ubhŒ». This lock is taken in read mode for each
userptr gpu_vma on the gpu_vmâ€™s userptr list, and in write mode during mmu
notifier invalidation. This is not a real seqlock but described in
”…””}”(hj™  hžhhŸNh NubjJ  )”}”(hŒ``mm/mmu_notifier.c``”h]”hŒmm/mmu_notifier.c”…””}”(hj³  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1jI  hj™  ubhŒÁ as a â€œCollision-retry read-side/write-side
â€˜lockâ€™ a lot like a seqcount. However this allows multiple
write-sides to hold it at once...â€. The read side critical section
is enclosed by ”…””}”(hj™  hžhhŸNh NubjJ  )”}”(hŒ9``mmu_interval_read_begin() /
mmu_interval_read_retry()``”h]”hŒ5mmu_interval_read_begin() /
mmu_interval_read_retry()”…””}”(hjÅ  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1jI  hj™  ubhŒ with ”…””}”(hj™  hžhhŸNh NubjJ  )”}”(hŒ``mmu_interval_read_begin()``”h]”hŒmmu_interval_read_begin()”…””}”(hj×  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1jI  hj™  ubhŒ}
sleeping if the write side is held.
The write side is held by the core mm while calling mmu interval
invalidation notifiers.”…””}”(hj™  hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h KUhj•  ubah}”(h]”h ]”h"]”h$]”h&]”uh1j?  hjh  hžhhŸh³h Nubj@  )”}”(hŒôThe ``gpu_vm->resv`` lock. Protects the gpu_vm's list of gpu_vmas needing
rebinding, as well as the residency state of all the gpu_vm's local
GEM objects.
Furthermore, it typically protects the gpu_vm's list of evicted and
external GEM objects.”h]”hÊ)”}”(hŒôThe ``gpu_vm->resv`` lock. Protects the gpu_vm's list of gpu_vmas needing
rebinding, as well as the residency state of all the gpu_vm's local
GEM objects.
Furthermore, it typically protects the gpu_vm's list of evicted and
external GEM objects.”h]”(hŒThe ”…””}”(hjù  hžhhŸNh NubjJ  )”}”(hŒ``gpu_vm->resv``”h]”hŒgpu_vm->resv”…””}”(hj  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1jI  hjù  ubhŒæ lock. Protects the gpu_vmâ€™s list of gpu_vmas needing
rebinding, as well as the residency state of all the gpu_vmâ€™s local
GEM objects.
Furthermore, it typically protects the gpu_vmâ€™s list of evicted and
external GEM objects.”…””}”(hjù  hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h K`hjõ  ubah}”(h]”h ]”h"]”h$]”h&]”uh1j?  hjh  hžhhŸh³h Nubj@  )”}”(hŒºThe ``gpu_vm->userptr_notifier_lock``. This is an rwsem that is
taken in read mode during exec and write mode during a mmu notifier
invalidation. The userptr notifier lock is per gpu_vm.”h]”hÊ)”}”(hŒºThe ``gpu_vm->userptr_notifier_lock``. This is an rwsem that is
taken in read mode during exec and write mode during a mmu notifier
invalidation. The userptr notifier lock is per gpu_vm.”h]”(hŒThe ”…””}”(hj#  hžhhŸNh NubjJ  )”}”(hŒ!``gpu_vm->userptr_notifier_lock``”h]”hŒgpu_vm->userptr_notifier_lock”…””}”(hj+  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1jI  hj#  ubhŒ•. This is an rwsem that is
taken in read mode during exec and write mode during a mmu notifier
invalidation. The userptr notifier lock is per gpu_vm.”…””}”(hj#  hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h Kehj  ubah}”(h]”h ]”h"]”h$]”h&]”uh1j?  hjh  hžhhŸh³h Nubj@  )”}”(hŒÎThe ``gem_object->gpuva_lock`` This lock protects the GEM object's
list of gpu_vm_bos. This is usually the same lock as the GEM
object's dma_resv, but some drivers protects this list differently,
see below.”h]”hÊ)”}”(hŒÎThe ``gem_object->gpuva_lock`` This lock protects the GEM object's
list of gpu_vm_bos. This is usually the same lock as the GEM
object's dma_resv, but some drivers protects this list differently,
see below.”h]”(hŒThe ”…””}”(hjM  hžhhŸNh NubjJ  )”}”(hŒ``gem_object->gpuva_lock``”h]”hŒgem_object->gpuva_lock”…””}”(hjU  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1jI  hjM  ubhŒ´ This lock protects the GEM objectâ€™s
list of gpu_vm_bos. This is usually the same lock as the GEM
objectâ€™s dma_resv, but some drivers protects this list differently,
see below.”…””}”(hjM  hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h KhhjI  ubah}”(h]”h ]”h"]”h$]”h&]”uh1j?  hjh  hžhhŸh³h Nubj@  )”}”(hX_  The ``gpu_vm list spinlocks``. With some implementations they are needed
to be able to update the gpu_vm evicted- and external object
list. For those implementations, the spinlocks are grabbed when the
lists are manipulated. However, to avoid locking order violations
with the dma_resv locks, a special scheme is needed when iterating
over the lists.
”h]”hÊ)”}”(hX^  The ``gpu_vm list spinlocks``. With some implementations they are needed
to be able to update the gpu_vm evicted- and external object
list. For those implementations, the spinlocks are grabbed when the
lists are manipulated. However, to avoid locking order violations
with the dma_resv locks, a special scheme is needed when iterating
over the lists.”h]”(hŒThe ”…””}”(hjw  hžhhŸNh NubjJ  )”}”(hŒ``gpu_vm list spinlocks``”h]”hŒgpu_vm list spinlocks”…””}”(hj  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1jI  hjw  ubhXA  . With some implementations they are needed
to be able to update the gpu_vm evicted- and external object
list. For those implementations, the spinlocks are grabbed when the
lists are manipulated. However, to avoid locking order violations
with the dma_resv locks, a special scheme is needed when iterating
over the lists.”…””}”(hjw  hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h Klhjs  ubah}”(h]”h ]”h"]”h$]”h&]”uh1j?  hjh  hžhhŸh³h Nubeh}”(h]”h ]”h"]”h$]”h&]”j1  j2  uh1j:  hŸh³h KOhj;  hžhubhŒtarget”“”)”}”(hŒ.. _gpu_vma lifetime:”h]”h}”(h]”h ]”h"]”h$]”h&]”Œrefid”Œgpu-vma-lifetime”uh1j£  h Kshj;  hžhhŸh³ubeh}”(h]”Œlocks-and-locking-order”ah ]”h"]”Œlocks and locking order”ah$]”h&]”uh1h´hh¶hžhhŸh³h KFubhµ)”}”(hhh]”(hº)”}”(hŒ2Protection and lifetime of gpu_vm_bos and gpu_vmas”h]”hŒ2Protection and lifetime of gpu_vm_bos and gpu_vmas”…””}”(hj¼  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h¹hj¹  hžhhŸh³h KvubhÊ)”}”(hXY  The GEM object's list of gpu_vm_bos, and the gpu_vm_bo's list of gpu_vmas
is protected by the ``gem_object->gpuva_lock``, which is typically the
same as the GEM object's dma_resv, but if the driver
needs to access these lists from within a dma_fence signalling
critical section, it can instead choose to protect it with a
separate lock, which can be locked from within the dma_fence signalling
critical section. Such drivers then need to pay additional attention
to what locks need to be taken from within the loop when iterating
over the gpu_vm_bo and gpu_vma lists to avoid locking-order violations.”h]”(hŒbThe GEM objectâ€™s list of gpu_vm_bos, and the gpu_vm_boâ€™s list of gpu_vmas
is protected by the ”…””}”(hjÊ  hžhhŸNh NubjJ  )”}”(hŒ``gem_object->gpuva_lock``”h]”hŒgem_object->gpuva_lock”…””}”(hjÒ  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1jI  hjÊ  ubhXã  , which is typically the
same as the GEM objectâ€™s dma_resv, but if the driver
needs to access these lists from within a dma_fence signalling
critical section, it can instead choose to protect it with a
separate lock, which can be locked from within the dma_fence signalling
critical section. Such drivers then need to pay additional attention
to what locks need to be taken from within the loop when iterating
over the gpu_vm_bo and gpu_vma lists to avoid locking-order violations.”…””}”(hjÊ  hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h Kxhj¹  hžhubhÊ)”}”(hŒÕThe DRM GPUVM set of helpers provide lockdep asserts that this lock is
held in relevant situations and also provides a means of making itself
aware of which lock is actually used: :c:func:`drm_gem_gpuva_set_lock`.”h]”(hŒ´The DRM GPUVM set of helpers provide lockdep asserts that this lock is
held in relevant situations and also provides a means of making itself
aware of which lock is actually used: ”…””}”(hjê  hžhhŸNh Nubh)”}”(hŒ :c:func:`drm_gem_gpuva_set_lock`”h]”jJ  )”}”(hjô  h]”hŒdrm_gem_gpuva_set_lock()”…””}”(hjö  hžhhŸNh Nubah}”(h]”h ]”(j  Œc”Œc-func”eh"]”h$]”h&]”uh1jI  hjò  ubah}”(h]”h ]”h"]”h$]”h&]”Œrefdoc”j  Œ	refdomain”j   Œreftype”Œfunc”Œrefexplicit”‰Œrefwarn”‰j  Œdrm_gem_gpuva_set_lock”uh1hhŸh³h K‚hjê  ubhŒ.”…””}”(hjê  hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h K‚hj¹  hžhubhÊ)”}”(hX'  Each gpu_vm_bo holds a reference counted pointer to the underlying GEM
object, and each gpu_vma holds a reference counted pointer to the
gpu_vm_bo. When iterating over the GEM object's list of gpu_vm_bos and
over the gpu_vm_bo's list of gpu_vmas, the ``gem_object->gpuva_lock`` must
not be dropped, otherwise, gpu_vmas attached to a gpu_vm_bo may
disappear without notice since those are not reference-counted. A
driver may implement its own scheme to allow this at the expense of
additional complexity, but this is outside the scope of this document.”h]”(hŒÿEach gpu_vm_bo holds a reference counted pointer to the underlying GEM
object, and each gpu_vma holds a reference counted pointer to the
gpu_vm_bo. When iterating over the GEM objectâ€™s list of gpu_vm_bos and
over the gpu_vm_boâ€™s list of gpu_vmas, the ”…””}”(hj  hžhhŸNh NubjJ  )”}”(hŒ``gem_object->gpuva_lock``”h]”hŒgem_object->gpuva_lock”…””}”(hj$  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1jI  hj  ubhX   must
not be dropped, otherwise, gpu_vmas attached to a gpu_vm_bo may
disappear without notice since those are not reference-counted. A
driver may implement its own scheme to allow this at the expense of
additional complexity, but this is outside the scope of this document.”…””}”(hj  hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h K†hj¹  hžhubhÊ)”}”(hX°  In the DRM GPUVM implementation, each gpu_vm_bo and each gpu_vma
holds a reference count on the gpu_vm itself. Due to this, and to avoid circular
reference counting, cleanup of the gpu_vm's gpu_vmas must not be done from the
gpu_vm's destructor. Drivers typically implements a gpu_vm close
function for this cleanup. The gpu_vm close function will abort gpu
execution using this VM, unmap all gpu_vmas and release page-table memory.”h]”hX´  In the DRM GPUVM implementation, each gpu_vm_bo and each gpu_vma
holds a reference count on the gpu_vm itself. Due to this, and to avoid circular
reference counting, cleanup of the gpu_vmâ€™s gpu_vmas must not be done from the
gpu_vmâ€™s destructor. Drivers typically implements a gpu_vm close
function for this cleanup. The gpu_vm close function will abort gpu
execution using this VM, unmap all gpu_vmas and release page-table memory.”…””}”(hj<  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h Khj¹  hžhubeh}”(h]”(Œ2protection-and-lifetime-of-gpu-vm-bos-and-gpu-vmas”j°  eh ]”h"]”(Œ2protection and lifetime of gpu_vm_bos and gpu_vmas”Œgpu_vma lifetime”eh$]”h&]”uh1h´hh¶hžhhŸh³h KvŒexpect_referenced_by_name”}”jP  j¥  sŒexpect_referenced_by_id”}”j°  j¥  subhµ)”}”(hhh]”(hº)”}”(hŒ*Revalidation and eviction of local objects”h]”hŒ*Revalidation and eviction of local objects”…””}”(hjZ  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h¹hjW  hžhhŸh³h K—ubhÊ)”}”(hŒÃNote that in all the code examples given below we use simplified
pseudo-code. In particular, the dma_resv deadlock avoidance algorithm
as well as reserving memory for dma_resv fences is left out.”h]”hŒÃNote that in all the code examples given below we use simplified
pseudo-code. In particular, the dma_resv deadlock avoidance algorithm
as well as reserving memory for dma_resv fences is left out.”…””}”(hjh  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h K™hjW  hžhubhµ)”}”(hhh]”(hº)”}”(hŒRevalidation”h]”hŒRevalidation”…””}”(hjy  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h¹hjv  hžhhŸh³h KžubhÊ)”}”(hX  With VM_BIND, all local objects need to be resident when the gpu is
executing using the gpu_vm, and the objects need to have valid
gpu_vmas set up pointing to them. Typically, each gpu command buffer
submission is therefore preceded with a re-validation section:”h]”hX  With VM_BIND, all local objects need to be resident when the gpu is
executing using the gpu_vm, and the objects need to have valid
gpu_vmas set up pointing to them. Typically, each gpu command buffer
submission is therefore preceded with a re-validation section:”…””}”(hj‡  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h KŸhjv  hžhubhŒliteral_block”“”)”}”(hX°  dma_resv_lock(gpu_vm->resv);

// Validation section starts here.
for_each_gpu_vm_bo_on_evict_list(&gpu_vm->evict_list, &gpu_vm_bo) {
        validate_gem_bo(&gpu_vm_bo->gem_bo);

        // The following list iteration needs the Gem object's
        // dma_resv to be held (it protects the gpu_vm_bo's list of
        // gpu_vmas, but since local gem objects share the gpu_vm's
        // dma_resv, it is already held at this point.
        for_each_gpu_vma_of_gpu_vm_bo(&gpu_vm_bo, &gpu_vma)
               move_gpu_vma_to_rebind_list(&gpu_vma, &gpu_vm->rebind_list);
}

for_each_gpu_vma_on_rebind_list(&gpu vm->rebind_list, &gpu_vma) {
        rebind_gpu_vma(&gpu_vma);
        remove_gpu_vma_from_rebind_list(&gpu_vma);
}
// Validation section ends here, and job submission starts.

add_dependencies(&gpu_job, &gpu_vm->resv);
job_dma_fence = gpu_submit(&gpu_job));

add_dma_fence(job_dma_fence, &gpu_vm->resv);
dma_resv_unlock(gpu_vm->resv);”h]”hX°  dma_resv_lock(gpu_vm->resv);

// Validation section starts here.
for_each_gpu_vm_bo_on_evict_list(&gpu_vm->evict_list, &gpu_vm_bo) {
        validate_gem_bo(&gpu_vm_bo->gem_bo);

        // The following list iteration needs the Gem object's
        // dma_resv to be held (it protects the gpu_vm_bo's list of
        // gpu_vmas, but since local gem objects share the gpu_vm's
        // dma_resv, it is already held at this point.
        for_each_gpu_vma_of_gpu_vm_bo(&gpu_vm_bo, &gpu_vma)
               move_gpu_vma_to_rebind_list(&gpu_vma, &gpu_vm->rebind_list);
}

for_each_gpu_vma_on_rebind_list(&gpu vm->rebind_list, &gpu_vma) {
        rebind_gpu_vma(&gpu_vma);
        remove_gpu_vma_from_rebind_list(&gpu_vma);
}
// Validation section ends here, and job submission starts.

add_dependencies(&gpu_job, &gpu_vm->resv);
job_dma_fence = gpu_submit(&gpu_job));

add_dma_fence(job_dma_fence, &gpu_vm->resv);
dma_resv_unlock(gpu_vm->resv);”…””}”hj—  sbah}”(h]”h ]”h"]”h$]”h&]”h±h²Œforce”‰Œlanguage”ŒC”Œhighlight_args”}”uh1j•  hŸh³h K¤hjv  hžhubhÊ)”}”(hŒšThe reason for having a separate gpu_vm rebind list is that there
might be userptr gpu_vmas that are not mapping a buffer object that
also need rebinding.”h]”hŒšThe reason for having a separate gpu_vm rebind list is that there
might be userptr gpu_vmas that are not mapping a buffer object that
also need rebinding.”…””}”(hjª  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h KÀhjv  hžhubeh}”(h]”Œrevalidation”ah ]”h"]”Œrevalidation”ah$]”h&]”uh1h´hjW  hžhhŸh³h Kžubhµ)”}”(hhh]”(hº)”}”(hŒEviction”h]”hŒEviction”…””}”(hjÃ  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h¹hjÀ  hžhhŸh³h KÅubhÊ)”}”(hŒOEviction of one of these local objects will then look similar to the
following:”h]”hŒOEviction of one of these local objects will then look similar to the
following:”…””}”(hjÑ  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h KÇhjÀ  hžhubj–  )”}”(hX[  obj = get_object_from_lru();

dma_resv_lock(obj->resv);
for_each_gpu_vm_bo_of_obj(obj, &gpu_vm_bo);
        add_gpu_vm_bo_to_evict_list(&gpu_vm_bo, &gpu_vm->evict_list);

add_dependencies(&eviction_job, &obj->resv);
job_dma_fence = gpu_submit(&eviction_job);
add_dma_fence(&obj->resv, job_dma_fence);

dma_resv_unlock(&obj->resv);
put_object(obj);”h]”hX[  obj = get_object_from_lru();

dma_resv_lock(obj->resv);
for_each_gpu_vm_bo_of_obj(obj, &gpu_vm_bo);
        add_gpu_vm_bo_to_evict_list(&gpu_vm_bo, &gpu_vm->evict_list);

add_dependencies(&eviction_job, &obj->resv);
job_dma_fence = gpu_submit(&eviction_job);
add_dma_fence(&obj->resv, job_dma_fence);

dma_resv_unlock(&obj->resv);
put_object(obj);”…””}”hjß  sbah}”(h]”h ]”h"]”h$]”h&]”h±h²j¥  ‰j¦  j§  j¨  }”uh1j•  hŸh³h KÊhjÀ  hžhubhÊ)”}”(hX™  Note that since the object is local to the gpu_vm, it will share the gpu_vm's
dma_resv lock such that ``obj->resv == gpu_vm->resv``.
The gpu_vm_bos marked for eviction are put on the gpu_vm's evict list,
which is protected by ``gpu_vm->resv``. During eviction all local
objects have their dma_resv locked and, due to the above equality, also
the gpu_vm's dma_resv protecting the gpu_vm's evict list is locked.”h]”(hŒhNote that since the object is local to the gpu_vm, it will share the gpu_vmâ€™s
dma_resv lock such that ”…””}”(hjî  hžhhŸNh NubjJ  )”}”(hŒ``obj->resv == gpu_vm->resv``”h]”hŒobj->resv == gpu_vm->resv”…””}”(hjö  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1jI  hjî  ubhŒa.
The gpu_vm_bos marked for eviction are put on the gpu_vmâ€™s evict list,
which is protected by ”…””}”(hjî  hžhhŸNh NubjJ  )”}”(hŒ``gpu_vm->resv``”h]”hŒgpu_vm->resv”…””}”(hj  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1jI  hjî  ubhŒ«. During eviction all local
objects have their dma_resv locked and, due to the above equality, also
the gpu_vmâ€™s dma_resv protecting the gpu_vmâ€™s evict list is locked.”…””}”(hjî  hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h KÙhjÀ  hžhubhÊ)”}”(hX  With VM_BIND, gpu_vmas don't need to be unbound before eviction,
since the driver must ensure that the eviction blit or copy will wait
for GPU idle or depend on all previous GPU activity. Furthermore, any
subsequent attempt by the GPU to access freed memory through the
gpu_vma will be preceded by a new exec function, with a revalidation
section which will make sure all gpu_vmas are rebound. The eviction
code holding the object's dma_resv while revalidating will ensure a
new exec function may not race with the eviction.”h]”hX  With VM_BIND, gpu_vmas donâ€™t need to be unbound before eviction,
since the driver must ensure that the eviction blit or copy will wait
for GPU idle or depend on all previous GPU activity. Furthermore, any
subsequent attempt by the GPU to access freed memory through the
gpu_vma will be preceded by a new exec function, with a revalidation
section which will make sure all gpu_vmas are rebound. The eviction
code holding the objectâ€™s dma_resv while revalidating will ensure a
new exec function may not race with the eviction.”…””}”(hj   hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h KàhjÀ  hžhubhÊ)”}”(hŒðA driver can be implemented in such a way that, on each exec function,
only a subset of vmas are selected for rebind.  In this case, all vmas that are
*not* selected for rebind must be unbound before the exec
function workload is submitted.”h]”(hŒ—A driver can be implemented in such a way that, on each exec function,
only a subset of vmas are selected for rebind.  In this case, all vmas that are
”…””}”(hj.  hžhhŸNh NubhŒemphasis”“”)”}”(hŒ*not*”h]”hŒnot”…””}”(hj8  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j6  hj.  ubhŒT selected for rebind must be unbound before the exec
function workload is submitted.”…””}”(hj.  hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h KéhjÀ  hžhubeh}”(h]”Œeviction”ah ]”h"]”Œeviction”ah$]”h&]”uh1h´hjW  hžhhŸh³h KÅubeh}”(h]”Œ*revalidation-and-eviction-of-local-objects”ah ]”h"]”Œ*revalidation and eviction of local objects”ah$]”h&]”uh1h´hh¶hžhhŸh³h K—ubhµ)”}”(hhh]”(hº)”}”(hŒ$Locking with external buffer objects”h]”hŒ$Locking with external buffer objects”…””}”(hjc  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h¹hj`  hžhhŸh³h KïubhÊ)”}”(hX‰  Since external buffer objects may be shared by multiple gpu_vm's they
can't share their reservation object with a single gpu_vm. Instead
they need to have a reservation object of their own. The external
objects bound to a gpu_vm using one or many gpu_vmas are therefore put on a
per-gpu_vm list which is protected by the gpu_vm's dma_resv lock or
one of the :ref:`gpu_vm list spinlocks <Spinlock iteration>`. Once
the gpu_vm's reservation object is locked, it is safe to traverse the
external object list and lock the dma_resvs of all external
objects. However, if instead a list spinlock is used, a more elaborate
iteration scheme needs to be used.”h]”(hXl  Since external buffer objects may be shared by multiple gpu_vmâ€™s they
canâ€™t share their reservation object with a single gpu_vm. Instead
they need to have a reservation object of their own. The external
objects bound to a gpu_vm using one or many gpu_vmas are therefore put on a
per-gpu_vm list which is protected by the gpu_vmâ€™s dma_resv lock or
one of the ”…””}”(hjq  hžhhŸNh Nubh)”}”(hŒ1:ref:`gpu_vm list spinlocks <Spinlock iteration>`”h]”h÷)”}”(hj{  h]”hŒgpu_vm list spinlocks”…””}”(hj}  hžhhŸNh Nubah}”(h]”h ]”(j  Œstd”Œstd-ref”eh"]”h$]”h&]”uh1höhjy  ubah}”(h]”h ]”h"]”h$]”h&]”Œrefdoc”j  Œ	refdomain”j‡  Œreftype”Œref”Œrefexplicit”ˆŒrefwarn”ˆj  Œspinlock iteration”uh1hhŸh³h Kñhjq  ubhŒô. Once
the gpu_vmâ€™s reservation object is locked, it is safe to traverse the
external object list and lock the dma_resvs of all external
objects. However, if instead a list spinlock is used, a more elaborate
iteration scheme needs to be used.”…””}”(hjq  hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h Kñhj`  hžhubhÊ)”}”(hXÇ  At eviction time, the gpu_vm_bos of *all* the gpu_vms an external
object is bound to need to be put on their gpu_vm's evict list.
However, when evicting an external object, the dma_resvs of the
gpu_vms the object is bound to are typically not held. Only
the object's private dma_resv can be guaranteed to be held. If there
is a ww_acquire context at hand at eviction time we could grab those
dma_resvs but that could cause expensive ww_mutex rollbacks. A simple
option is to just mark the gpu_vm_bos of the evicted gem object with
an ``evicted`` bool that is inspected before the next time the
corresponding gpu_vm evicted list needs to be traversed. For example, when
traversing the list of external objects and locking them. At that time,
both the gpu_vm's dma_resv and the object's dma_resv is held, and the
gpu_vm_bo marked evicted, can then be added to the gpu_vm's list of
evicted gpu_vm_bos. The ``evicted`` bool is formally protected by the
object's dma_resv.”h]”(hŒ$At eviction time, the gpu_vm_bos of ”…””}”(hj£  hžhhŸNh Nubj7  )”}”(hŒ*all*”h]”hŒall”…””}”(hj«  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j6  hj£  ubhXñ   the gpu_vms an external
object is bound to need to be put on their gpu_vmâ€™s evict list.
However, when evicting an external object, the dma_resvs of the
gpu_vms the object is bound to are typically not held. Only
the objectâ€™s private dma_resv can be guaranteed to be held. If there
is a ww_acquire context at hand at eviction time we could grab those
dma_resvs but that could cause expensive ww_mutex rollbacks. A simple
option is to just mark the gpu_vm_bos of the evicted gem object with
an ”…””}”(hj£  hžhhŸNh NubjJ  )”}”(hŒ``evicted``”h]”hŒevicted”…””}”(hj½  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1jI  hj£  ubhXl   bool that is inspected before the next time the
corresponding gpu_vm evicted list needs to be traversed. For example, when
traversing the list of external objects and locking them. At that time,
both the gpu_vmâ€™s dma_resv and the objectâ€™s dma_resv is held, and the
gpu_vm_bo marked evicted, can then be added to the gpu_vmâ€™s list of
evicted gpu_vm_bos. The ”…””}”(hj£  hžhhŸNh NubjJ  )”}”(hŒ``evicted``”h]”hŒevicted”…””}”(hjÏ  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1jI  hj£  ubhŒ7 bool is formally protected by the
objectâ€™s dma_resv.”…””}”(hj£  hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h Kühj`  hžhubhÊ)”}”(hŒThe exec function becomes”h]”hŒThe exec function becomes”…””}”(hjç  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h Mhj`  hžhubj–  )”}”(hXÕ  dma_resv_lock(gpu_vm->resv);

// External object list is protected by the gpu_vm->resv lock.
for_each_gpu_vm_bo_on_extobj_list(gpu_vm, &gpu_vm_bo) {
        dma_resv_lock(gpu_vm_bo.gem_obj->resv);
        if (gpu_vm_bo_marked_evicted(&gpu_vm_bo))
                add_gpu_vm_bo_to_evict_list(&gpu_vm_bo, &gpu_vm->evict_list);
}

for_each_gpu_vm_bo_on_evict_list(&gpu_vm->evict_list, &gpu_vm_bo) {
        validate_gem_bo(&gpu_vm_bo->gem_bo);

        for_each_gpu_vma_of_gpu_vm_bo(&gpu_vm_bo, &gpu_vma)
               move_gpu_vma_to_rebind_list(&gpu_vma, &gpu_vm->rebind_list);
}

for_each_gpu_vma_on_rebind_list(&gpu vm->rebind_list, &gpu_vma) {
        rebind_gpu_vma(&gpu_vma);
        remove_gpu_vma_from_rebind_list(&gpu_vma);
}

add_dependencies(&gpu_job, &gpu_vm->resv);
job_dma_fence = gpu_submit(&gpu_job));

add_dma_fence(job_dma_fence, &gpu_vm->resv);
for_each_external_obj(gpu_vm, &obj)
       add_dma_fence(job_dma_fence, &obj->resv);
dma_resv_unlock_all_resv_locks();”h]”hXÕ  dma_resv_lock(gpu_vm->resv);

// External object list is protected by the gpu_vm->resv lock.
for_each_gpu_vm_bo_on_extobj_list(gpu_vm, &gpu_vm_bo) {
        dma_resv_lock(gpu_vm_bo.gem_obj->resv);
        if (gpu_vm_bo_marked_evicted(&gpu_vm_bo))
                add_gpu_vm_bo_to_evict_list(&gpu_vm_bo, &gpu_vm->evict_list);
}

for_each_gpu_vm_bo_on_evict_list(&gpu_vm->evict_list, &gpu_vm_bo) {
        validate_gem_bo(&gpu_vm_bo->gem_bo);

        for_each_gpu_vma_of_gpu_vm_bo(&gpu_vm_bo, &gpu_vma)
               move_gpu_vma_to_rebind_list(&gpu_vma, &gpu_vm->rebind_list);
}

for_each_gpu_vma_on_rebind_list(&gpu vm->rebind_list, &gpu_vma) {
        rebind_gpu_vma(&gpu_vma);
        remove_gpu_vma_from_rebind_list(&gpu_vma);
}

add_dependencies(&gpu_job, &gpu_vm->resv);
job_dma_fence = gpu_submit(&gpu_job));

add_dma_fence(job_dma_fence, &gpu_vm->resv);
for_each_external_obj(gpu_vm, &obj)
       add_dma_fence(job_dma_fence, &obj->resv);
dma_resv_unlock_all_resv_locks();”…””}”hjõ  sbah}”(h]”h ]”h"]”h$]”h&]”h±h²j¥  ‰j¦  j§  j¨  }”uh1j•  hŸh³h Mhj`  hžhubhÊ)”}”(hŒCAnd the corresponding shared-object aware eviction would look like:”h]”hŒCAnd the corresponding shared-object aware eviction would look like:”…””}”(hj  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h M-hj`  hžhubj–  )”}”(hXÂ  obj = get_object_from_lru();

dma_resv_lock(obj->resv);
for_each_gpu_vm_bo_of_obj(obj, &gpu_vm_bo)
        if (object_is_vm_local(obj))
             add_gpu_vm_bo_to_evict_list(&gpu_vm_bo, &gpu_vm->evict_list);
        else
             mark_gpu_vm_bo_evicted(&gpu_vm_bo);

add_dependencies(&eviction_job, &obj->resv);
job_dma_fence = gpu_submit(&eviction_job);
add_dma_fence(&obj->resv, job_dma_fence);

dma_resv_unlock(&obj->resv);
put_object(obj);”h]”hXÂ  obj = get_object_from_lru();

dma_resv_lock(obj->resv);
for_each_gpu_vm_bo_of_obj(obj, &gpu_vm_bo)
        if (object_is_vm_local(obj))
             add_gpu_vm_bo_to_evict_list(&gpu_vm_bo, &gpu_vm->evict_list);
        else
             mark_gpu_vm_bo_evicted(&gpu_vm_bo);

add_dependencies(&eviction_job, &obj->resv);
job_dma_fence = gpu_submit(&eviction_job);
add_dma_fence(&obj->resv, job_dma_fence);

dma_resv_unlock(&obj->resv);
put_object(obj);”…””}”hj  sbah}”(h]”h ]”h"]”h$]”h&]”h±h²j¥  ‰j¦  j§  j¨  }”uh1j•  hŸh³h M/hj`  hžhubj¤  )”}”(hŒ.. _Spinlock iteration:”h]”h}”(h]”h ]”h"]”h$]”h&]”j¯  Œspinlock-iteration”uh1j£  h MAhj`  hžhhŸh³ubeh}”(h]”Œ$locking-with-external-buffer-objects”ah ]”h"]”Œ$locking with external buffer objects”ah$]”h&]”uh1h´hh¶hžhhŸh³h Kïubhµ)”}”(hhh]”(hº)”}”(hŒ;Accessing the gpu_vm's lists without the dma_resv lock held”h]”hŒ=Accessing the gpu_vmâ€™s lists without the dma_resv lock held”…””}”(hj7  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h¹hj4  hžhhŸh³h MDubhÊ)”}”(hXq  Some drivers will hold the gpu_vm's dma_resv lock when accessing the
gpu_vm's evict list and external objects lists. However, there are
drivers that need to access these lists without the dma_resv lock
held, for example due to asynchronous state updates from within the
dma_fence signalling critical path. In such cases, a spinlock can be
used to protect manipulation of the lists. However, since higher level
sleeping locks need to be taken for each list item while iterating
over the lists, the items already iterated over need to be
temporarily moved to a private list and the spinlock released
while processing each item:”h]”hXu  Some drivers will hold the gpu_vmâ€™s dma_resv lock when accessing the
gpu_vmâ€™s evict list and external objects lists. However, there are
drivers that need to access these lists without the dma_resv lock
held, for example due to asynchronous state updates from within the
dma_fence signalling critical path. In such cases, a spinlock can be
used to protect manipulation of the lists. However, since higher level
sleeping locks need to be taken for each list item while iterating
over the lists, the items already iterated over need to be
temporarily moved to a private list and the spinlock released
while processing each item:”…””}”(hjE  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h MFhj4  hžhubh¢)”}”(hX@  code block:: C

struct list_head still_in_list;

INIT_LIST_HEAD(&still_in_list);

spin_lock(&gpu_vm->list_lock);
do {
        struct list_head *entry = list_first_entry_or_null(&gpu_vm->list, head);

        if (!entry)
                break;

        list_move_tail(&entry->head, &still_in_list);
        list_entry_get_unless_zero(entry);
        spin_unlock(&gpu_vm->list_lock);

        process(entry);

        spin_lock(&gpu_vm->list_lock);
        list_entry_put(entry);
} while (true);

list_splice_tail(&still_in_list, &gpu_vm->list);
spin_unlock(&gpu_vm->list_lock);”h]”hX@  code block:: C

struct list_head still_in_list;

INIT_LIST_HEAD(&still_in_list);

spin_lock(&gpu_vm->list_lock);
do {
        struct list_head *entry = list_first_entry_or_null(&gpu_vm->list, head);

        if (!entry)
                break;

        list_move_tail(&entry->head, &still_in_list);
        list_entry_get_unless_zero(entry);
        spin_unlock(&gpu_vm->list_lock);

        process(entry);

        spin_lock(&gpu_vm->list_lock);
        list_entry_put(entry);
} while (true);

list_splice_tail(&still_in_list, &gpu_vm->list);
spin_unlock(&gpu_vm->list_lock);”…””}”hjS  sbah}”(h]”h ]”h"]”h$]”h&]”h±h²uh1h¡hj4  hžhhŸh³h MjubhÊ)”}”(hXÙ  Due to the additional locking and atomic operations, drivers that *can*
avoid accessing the gpu_vm's list outside of the dma_resv lock
might want to avoid also this iteration scheme. Particularly, if the
driver anticipates a large number of list items. For lists where the
anticipated number of list items is small, where list iteration doesn't
happen very often or if there is a significant additional cost
associated with each iteration, the atomic operation overhead
associated with this type of iteration is, most likely, negligible. Note that
if this scheme is used, it is necessary to make sure this list
iteration is protected by an outer level lock or semaphore, since list
items are temporarily pulled off the list while iterating, and it is
also worth mentioning that the local list ``still_in_list`` should
also be considered protected by the ``gpu_vm->list_lock``, and it is
thus possible that items can be removed also from the local list
concurrently with list iteration.”h]”(hŒBDue to the additional locking and atomic operations, drivers that ”…””}”(hja  hžhhŸNh Nubj7  )”}”(hŒ*can*”h]”hŒcan”…””}”(hji  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j6  hja  ubhXÖ  
avoid accessing the gpu_vmâ€™s list outside of the dma_resv lock
might want to avoid also this iteration scheme. Particularly, if the
driver anticipates a large number of list items. For lists where the
anticipated number of list items is small, where list iteration doesnâ€™t
happen very often or if there is a significant additional cost
associated with each iteration, the atomic operation overhead
associated with this type of iteration is, most likely, negligible. Note that
if this scheme is used, it is necessary to make sure this list
iteration is protected by an outer level lock or semaphore, since list
items are temporarily pulled off the list while iterating, and it is
also worth mentioning that the local list ”…””}”(hja  hžhhŸNh NubjJ  )”}”(hŒ``still_in_list``”h]”hŒstill_in_list”…””}”(hj{  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1jI  hja  ubhŒ, should
also be considered protected by the ”…””}”(hja  hžhhŸNh NubjJ  )”}”(hŒ``gpu_vm->list_lock``”h]”hŒgpu_vm->list_lock”…””}”(hj  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1jI  hja  ubhŒn, and it is
thus possible that items can be removed also from the local list
concurrently with list iteration.”…””}”(hja  hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h Mkhj4  hžhubhÊ)”}”(hŒ†Please refer to the :ref:`DRM GPUVM locking section
<drm_gpuvm_locking>` and its internal
:c:func:`get_next_vm_bo_from_list` function.”h]”(hŒPlease refer to the ”…””}”(hj¥  hžhhŸNh Nubh)”}”(hŒ4:ref:`DRM GPUVM locking section
<drm_gpuvm_locking>`”h]”h÷)”}”(hj¯  h]”hŒDRM GPUVM locking section”…””}”(hj±  hžhhŸNh Nubah}”(h]”h ]”(j  Œstd”Œstd-ref”eh"]”h$]”h&]”uh1höhj­  ubah}”(h]”h ]”h"]”h$]”h&]”Œrefdoc”j  Œ	refdomain”j»  Œreftype”Œref”Œrefexplicit”ˆŒrefwarn”ˆj  Œdrm_gpuvm_locking”uh1hhŸh³h M{hj¥  ubhŒ and its internal
”…””}”(hj¥  hžhhŸNh Nubh)”}”(hŒ":c:func:`get_next_vm_bo_from_list`”h]”jJ  )”}”(hjÓ  h]”hŒget_next_vm_bo_from_list()”…””}”(hjÕ  hžhhŸNh Nubah}”(h]”h ]”(j  j   Œc-func”eh"]”h$]”h&]”uh1jI  hjÑ  ubah}”(h]”h ]”h"]”h$]”h&]”Œrefdoc”j  Œ	refdomain”j   Œreftype”Œfunc”Œrefexplicit”‰Œrefwarn”‰j  Œget_next_vm_bo_from_list”uh1hhŸh³h M{hj¥  ubhŒ
 function.”…””}”(hj¥  hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h M{hj4  hžhubeh}”(h]”(Œ;accessing-the-gpu-vm-s-lists-without-the-dma-resv-lock-held”j+  eh ]”h"]”(Œ;accessing the gpu_vm's lists without the dma_resv lock held”Œspinlock iteration”eh$]”h&]”uh1h´hh¶hžhhŸh³h MDjS  }”j   j!  sjU  }”j+  j!  subhµ)”}”(hhh]”(hº)”}”(hŒuserptr gpu_vmas”h]”hŒuserptr gpu_vmas”…””}”(hj  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h¹hj  hžhhŸh³h MubhÊ)”}”(hX4  A userptr gpu_vma is a gpu_vma that, instead of mapping a buffer object to a
GPU virtual address range, directly maps a CPU mm range of anonymous-
or file page-cache pages.
A very simple approach would be to just pin the pages using
pin_user_pages() at bind time and unpin them at unbind time, but this
creates a Denial-Of-Service vector since a single user-space process
would be able to pin down all of system memory, which is not
desirable. (For special use-cases and assuming proper accounting pinning might
still be a desirable feature, though). What we need to do in the
general case is to obtain a reference to the desired pages, make sure
we are notified using a MMU notifier just before the CPU mm unmaps the
pages, dirty them if they are not mapped read-only to the GPU, and
then drop the reference.
When we are notified by the MMU notifier that CPU mm is about to drop the
pages, we need to stop GPU access to the pages by waiting for VM idle
in the MMU notifier and make sure that before the next time the GPU
tries to access whatever is now present in the CPU mm range, we unmap
the old pages from the GPU page tables and repeat the process of
obtaining new page references. (See the :ref:`notifier example
<Invalidation example>` below). Note that when the core mm decides to
laundry pages, we get such an unmap MMU notification and can mark the
pages dirty again before the next GPU access. We also get similar MMU
notifications for NUMA accounting which the GPU driver doesn't really
need to care about, but so far it has proven difficult to exclude
certain notifications.”h]”(hX­  A userptr gpu_vma is a gpu_vma that, instead of mapping a buffer object to a
GPU virtual address range, directly maps a CPU mm range of anonymous-
or file page-cache pages.
A very simple approach would be to just pin the pages using
pin_user_pages() at bind time and unpin them at unbind time, but this
creates a Denial-Of-Service vector since a single user-space process
would be able to pin down all of system memory, which is not
desirable. (For special use-cases and assuming proper accounting pinning might
still be a desirable feature, though). What we need to do in the
general case is to obtain a reference to the desired pages, make sure
we are notified using a MMU notifier just before the CPU mm unmaps the
pages, dirty them if they are not mapped read-only to the GPU, and
then drop the reference.
When we are notified by the MMU notifier that CPU mm is about to drop the
pages, we need to stop GPU access to the pages by waiting for VM idle
in the MMU notifier and make sure that before the next time the GPU
tries to access whatever is now present in the CPU mm range, we unmap
the old pages from the GPU page tables and repeat the process of
obtaining new page references. (See the ”…””}”(hj  hžhhŸNh Nubh)”}”(hŒ.:ref:`notifier example
<Invalidation example>`”h]”h÷)”}”(hj   h]”hŒnotifier example”…””}”(hj"  hžhhŸNh Nubah}”(h]”h ]”(j  Œstd”Œstd-ref”eh"]”h$]”h&]”uh1höhj  ubah}”(h]”h ]”h"]”h$]”h&]”Œrefdoc”j  Œ	refdomain”j,  Œreftype”Œref”Œrefexplicit”ˆŒrefwarn”ˆj  Œinvalidation example”uh1hhŸh³h Mƒhj  ubhX[   below). Note that when the core mm decides to
laundry pages, we get such an unmap MMU notification and can mark the
pages dirty again before the next GPU access. We also get similar MMU
notifications for NUMA accounting which the GPU driver doesnâ€™t really
need to care about, but so far it has proven difficult to exclude
certain notifications.”…””}”(hj  hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h Mƒhj  hžhubhÊ)”}”(hŒ“Using a MMU notifier for device DMA (and other methods) is described in
:ref:`the pin_user_pages() documentation <mmu-notifier-registration-case>`.”h]”(hŒHUsing a MMU notifier for device DMA (and other methods) is described in
”…””}”(hjH  hžhhŸNh Nubh)”}”(hŒJ:ref:`the pin_user_pages() documentation <mmu-notifier-registration-case>`”h]”h÷)”}”(hjR  h]”hŒ"the pin_user_pages() documentation”…””}”(hjT  hžhhŸNh Nubah}”(h]”h ]”(j  Œstd”Œstd-ref”eh"]”h$]”h&]”uh1höhjP  ubah}”(h]”h ]”h"]”h$]”h&]”Œrefdoc”j  Œ	refdomain”j^  Œreftype”Œref”Œrefexplicit”ˆŒrefwarn”ˆj  Œmmu-notifier-registration-case”uh1hhŸh³h MhjH  ubhŒ.”…””}”(hjH  hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h Mhj  hžhubhÊ)”}”(hXŠ  Now, the method of obtaining struct page references using
get_user_pages() unfortunately can't be used under a dma_resv lock
since that would violate the locking order of the dma_resv lock vs the
mmap_lock that is grabbed when resolving a CPU pagefault. This means
the gpu_vm's list of userptr gpu_vmas needs to be protected by an
outer lock, which in our example below is the ``gpu_vm->lock``.”h]”(hX}  Now, the method of obtaining struct page references using
get_user_pages() unfortunately canâ€™t be used under a dma_resv lock
since that would violate the locking order of the dma_resv lock vs the
mmap_lock that is grabbed when resolving a CPU pagefault. This means
the gpu_vmâ€™s list of userptr gpu_vmas needs to be protected by an
outer lock, which in our example below is the ”…””}”(hjz  hžhhŸNh NubjJ  )”}”(hŒ``gpu_vm->lock``”h]”hŒgpu_vm->lock”…””}”(hj‚  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1jI  hjz  ubhŒ.”…””}”(hjz  hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h M hj  hžhubhÊ)”}”(hŒLThe MMU interval seqlock for a userptr gpu_vma is used in the following
way:”h]”hŒLThe MMU interval seqlock for a userptr gpu_vma is used in the following
way:”…””}”(hjš  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h M§hj  hžhubj–  )”}”(hX…  // Exclusive locking mode here is strictly needed only if there are
// invalidated userptr gpu_vmas present, to avoid concurrent userptr
// revalidations of the same userptr gpu_vma.
down_write(&gpu_vm->lock);
retry:

// Note: mmu_interval_read_begin() blocks until there is no
// invalidation notifier running anymore.
seq = mmu_interval_read_begin(&gpu_vma->userptr_interval);
if (seq != gpu_vma->saved_seq) {
        obtain_new_page_pointers(&gpu_vma);
        dma_resv_lock(&gpu_vm->resv);
        add_gpu_vma_to_revalidate_list(&gpu_vma, &gpu_vm);
        dma_resv_unlock(&gpu_vm->resv);
        gpu_vma->saved_seq = seq;
}

// The usual revalidation goes here.

// Final userptr sequence validation may not happen before the
// submission dma_fence is added to the gpu_vm's resv, from the POW
// of the MMU invalidation notifier. Hence the
// userptr_notifier_lock that will make them appear atomic.

add_dependencies(&gpu_job, &gpu_vm->resv);
down_read(&gpu_vm->userptr_notifier_lock);
if (mmu_interval_read_retry(&gpu_vma->userptr_interval, gpu_vma->saved_seq)) {
       up_read(&gpu_vm->userptr_notifier_lock);
       goto retry;
}

job_dma_fence = gpu_submit(&gpu_job));

add_dma_fence(job_dma_fence, &gpu_vm->resv);

for_each_external_obj(gpu_vm, &obj)
       add_dma_fence(job_dma_fence, &obj->resv);

dma_resv_unlock_all_resv_locks();
up_read(&gpu_vm->userptr_notifier_lock);
up_write(&gpu_vm->lock);”h]”hX…  // Exclusive locking mode here is strictly needed only if there are
// invalidated userptr gpu_vmas present, to avoid concurrent userptr
// revalidations of the same userptr gpu_vma.
down_write(&gpu_vm->lock);
retry:

// Note: mmu_interval_read_begin() blocks until there is no
// invalidation notifier running anymore.
seq = mmu_interval_read_begin(&gpu_vma->userptr_interval);
if (seq != gpu_vma->saved_seq) {
        obtain_new_page_pointers(&gpu_vma);
        dma_resv_lock(&gpu_vm->resv);
        add_gpu_vma_to_revalidate_list(&gpu_vma, &gpu_vm);
        dma_resv_unlock(&gpu_vm->resv);
        gpu_vma->saved_seq = seq;
}

// The usual revalidation goes here.

// Final userptr sequence validation may not happen before the
// submission dma_fence is added to the gpu_vm's resv, from the POW
// of the MMU invalidation notifier. Hence the
// userptr_notifier_lock that will make them appear atomic.

add_dependencies(&gpu_job, &gpu_vm->resv);
down_read(&gpu_vm->userptr_notifier_lock);
if (mmu_interval_read_retry(&gpu_vma->userptr_interval, gpu_vma->saved_seq)) {
       up_read(&gpu_vm->userptr_notifier_lock);
       goto retry;
}

job_dma_fence = gpu_submit(&gpu_job));

add_dma_fence(job_dma_fence, &gpu_vm->resv);

for_each_external_obj(gpu_vm, &obj)
       add_dma_fence(job_dma_fence, &obj->resv);

dma_resv_unlock_all_resv_locks();
up_read(&gpu_vm->userptr_notifier_lock);
up_write(&gpu_vm->lock);”…””}”hj¨  sbah}”(h]”h ]”h"]”h$]”h&]”h±h²j¥  ‰j¦  j§  j¨  }”uh1j•  hŸh³h Mªhj  hžhubhÊ)”}”(hXE  The code between ``mmu_interval_read_begin()`` and the
``mmu_interval_read_retry()`` marks the read side critical section of
what we call the ``userptr_seqlock``. In reality, the gpu_vm's userptr
gpu_vma list is looped through, and the check is done for *all* of its
userptr gpu_vmas, although we only show a single one here.”h]”(hŒThe code between ”…””}”(hj·  hžhhŸNh NubjJ  )”}”(hŒ``mmu_interval_read_begin()``”h]”hŒmmu_interval_read_begin()”…””}”(hj¿  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1jI  hj·  ubhŒ	 and the
”…””}”(hj·  hžhhŸNh NubjJ  )”}”(hŒ``mmu_interval_read_retry()``”h]”hŒmmu_interval_read_retry()”…””}”(hjÑ  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1jI  hj·  ubhŒ: marks the read side critical section of
what we call the ”…””}”(hj·  hžhhŸNh NubjJ  )”}”(hŒ``userptr_seqlock``”h]”hŒuserptr_seqlock”…””}”(hjã  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1jI  hj·  ubhŒ_. In reality, the gpu_vmâ€™s userptr
gpu_vma list is looped through, and the check is done for ”…””}”(hj·  hžhhŸNh Nubj7  )”}”(hŒ*all*”h]”hŒall”…””}”(hjõ  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j6  hj·  ubhŒB of its
userptr gpu_vmas, although we only show a single one here.”…””}”(hj·  hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h MÖhj  hžhubhÊ)”}”(hŒÆThe userptr gpu_vma MMU invalidation notifier might be called from
reclaim context and, again, to avoid locking order violations, we can't
take any dma_resv lock nor the gpu_vm->lock from within it.”h]”hŒÈThe userptr gpu_vma MMU invalidation notifier might be called from
reclaim context and, again, to avoid locking order violations, we canâ€™t
take any dma_resv lock nor the gpu_vm->lock from within it.”…””}”(hj	  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h MÜhj  hžhubj¤  )”}”(hŒ.. _Invalidation example:”h]”h}”(h]”h ]”h"]”h$]”h&]”j¯  Œinvalidation-example”uh1j£  h Màhj  hžhhŸh³ubj–  )”}”(hXh  bool gpu_vma_userptr_invalidate(userptr_interval, cur_seq)
{
        // Make sure the exec function either sees the new sequence
        // and backs off or we wait for the dma-fence:

        down_write(&gpu_vm->userptr_notifier_lock);
        mmu_interval_set_seq(userptr_interval, cur_seq);
        up_write(&gpu_vm->userptr_notifier_lock);

        // At this point, the exec function can't succeed in
        // submitting a new job, because cur_seq is an invalid
        // sequence number and will always cause a retry. When all
        // invalidation callbacks, the mmu notifier core will flip
        // the sequence number to a valid one. However we need to
        // stop gpu access to the old pages here.

        dma_resv_wait_timeout(&gpu_vm->resv, DMA_RESV_USAGE_BOOKKEEP,
                              false, MAX_SCHEDULE_TIMEOUT);
        return true;
}”h]”hXh  bool gpu_vma_userptr_invalidate(userptr_interval, cur_seq)
{
        // Make sure the exec function either sees the new sequence
        // and backs off or we wait for the dma-fence:

        down_write(&gpu_vm->userptr_notifier_lock);
        mmu_interval_set_seq(userptr_interval, cur_seq);
        up_write(&gpu_vm->userptr_notifier_lock);

        // At this point, the exec function can't succeed in
        // submitting a new job, because cur_seq is an invalid
        // sequence number and will always cause a retry. When all
        // invalidation callbacks, the mmu notifier core will flip
        // the sequence number to a valid one. However we need to
        // stop gpu access to the old pages here.

        dma_resv_wait_timeout(&gpu_vm->resv, DMA_RESV_USAGE_BOOKKEEP,
                              false, MAX_SCHEDULE_TIMEOUT);
        return true;
}”…””•ã<      }”hj&	  sbah}”(h]”j%	  ah ]”h"]”Œinvalidation example”ah$]”h&]”h±h²j¥  ‰j¦  j§  j¨  }”uh1j•  hŸh³h Máhj  hžhjS  }”j2	  j	  sjU  }”j%	  j	  subhÊ)”}”(hŒ¼When this invalidation notifier returns, the GPU can no longer be
accessing the old pages of the userptr gpu_vma and needs to redo the
page-binding before a new GPU submission can succeed.”h]”hŒ¼When this invalidation notifier returns, the GPU can no longer be
accessing the old pages of the userptr gpu_vma and needs to redo the
page-binding before a new GPU submission can succeed.”…””}”(hj8	  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h Møhj  hžhubhµ)”}”(hhh]”(hº)”}”(hŒ1Efficient userptr gpu_vma exec_function iteration”h]”hŒ1Efficient userptr gpu_vma exec_function iteration”…””}”(hjI	  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h¹hjF	  hžhhŸh³h MýubhÊ)”}”(hXN  If the gpu_vm's list of userptr gpu_vmas becomes large, it's
inefficient to iterate through the complete lists of userptrs on each
exec function to check whether each userptr gpu_vma's saved
sequence number is stale. A solution to this is to put all
*invalidated* userptr gpu_vmas on a separate gpu_vm list and
only check the gpu_vmas present on this list on each exec
function. This list will then lend itself very-well to the spinlock
locking scheme that is
:ref:`described in the spinlock iteration section <Spinlock iteration>`, since
in the mmu notifier, where we add the invalidated gpu_vmas to the
list, it's not possible to take any outer locks like the
``gpu_vm->lock`` or the ``gpu_vm->resv`` lock. Note that the
``gpu_vm->lock`` still needs to be taken while iterating to ensure the list is
complete, as also mentioned in that section.”h]”(hX   If the gpu_vmâ€™s list of userptr gpu_vmas becomes large, itâ€™s
inefficient to iterate through the complete lists of userptrs on each
exec function to check whether each userptr gpu_vmaâ€™s saved
sequence number is stale. A solution to this is to put all
”…””}”(hjW	  hžhhŸNh Nubj7  )”}”(hŒ*invalidated*”h]”hŒinvalidated”…””}”(hj_	  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j6  hjW	  ubhŒÅ userptr gpu_vmas on a separate gpu_vm list and
only check the gpu_vmas present on this list on each exec
function. This list will then lend itself very-well to the spinlock
locking scheme that is
”…””}”(hjW	  hžhhŸNh Nubh)”}”(hŒG:ref:`described in the spinlock iteration section <Spinlock iteration>`”h]”h÷)”}”(hjs	  h]”hŒ+described in the spinlock iteration section”…””}”(hju	  hžhhŸNh Nubah}”(h]”h ]”(j  Œstd”Œstd-ref”eh"]”h$]”h&]”uh1höhjq	  ubah}”(h]”h ]”h"]”h$]”h&]”Œrefdoc”j  Œ	refdomain”j	  Œreftype”Œref”Œrefexplicit”ˆŒrefwarn”ˆj  Œspinlock iteration”uh1hhŸh³h MÿhjW	  ubhŒ…, since
in the mmu notifier, where we add the invalidated gpu_vmas to the
list, itâ€™s not possible to take any outer locks like the
”…””}”(hjW	  hžhhŸNh NubjJ  )”}”(hŒ``gpu_vm->lock``”h]”hŒgpu_vm->lock”…””}”(hj•	  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1jI  hjW	  ubhŒ or the ”…””}”(hjW	  hžhhŸNh NubjJ  )”}”(hŒ``gpu_vm->resv``”h]”hŒgpu_vm->resv”…””}”(hj§	  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1jI  hjW	  ubhŒ lock. Note that the
”…””}”(hjW	  hžhhŸNh NubjJ  )”}”(hŒ``gpu_vm->lock``”h]”hŒgpu_vm->lock”…””}”(hj¹	  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1jI  hjW	  ubhŒk still needs to be taken while iterating to ensure the list is
complete, as also mentioned in that section.”…””}”(hjW	  hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h MÿhjF	  hžhubhÊ)”}”(hŒŠIf using an invalidated userptr list like this, the retry check in the
exec function trivially becomes a check for invalidated list empty.”h]”hŒŠIf using an invalidated userptr list like this, the retry check in the
exec function trivially becomes a check for invalidated list empty.”…””}”(hjÑ	  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h MhjF	  hžhubeh}”(h]”Œ1efficient-userptr-gpu-vma-exec-function-iteration”ah ]”h"]”Œ1efficient userptr gpu_vma exec_function iteration”ah$]”h&]”uh1h´hj  hžhhŸh³h Mýubeh}”(h]”Œuserptr-gpu-vmas”ah ]”h"]”Œuserptr gpu_vmas”ah$]”h&]”uh1h´hh¶hžhhŸh³h Mubhµ)”}”(hhh]”(hº)”}”(hŒLocking at bind and unbind time”h]”hŒLocking at bind and unbind time”…””}”(hjò	  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h¹hjï	  hžhhŸh³h MubhÊ)”}”(hXˆ  At bind time, assuming a GEM object backed gpu_vma, each
gpu_vma needs to be associated with a gpu_vm_bo and that
gpu_vm_bo in turn needs to be added to the GEM object's
gpu_vm_bo list, and possibly to the gpu_vm's external object
list. This is referred to as *linking* the gpu_vma, and typically
requires that the ``gpu_vm->lock`` and the ``gem_object->gpuva_lock``
are held. When unlinking a gpu_vma the same locks should be held,
and that ensures that when iterating over ``gpu_vmas`, either under
the ``gpu_vm->resv`` or the GEM object's dma_resv, that the gpu_vmas
stay alive as long as the lock under which we iterate is not released. For
userptr gpu_vmas it's similarly required that during vma destroy, the
outer ``gpu_vm->lock`` is held, since otherwise when iterating over
the invalidated userptr list as described in the previous section,
there is nothing keeping those userptr gpu_vmas alive.”h]”(hX  At bind time, assuming a GEM object backed gpu_vma, each
gpu_vma needs to be associated with a gpu_vm_bo and that
gpu_vm_bo in turn needs to be added to the GEM objectâ€™s
gpu_vm_bo list, and possibly to the gpu_vmâ€™s external object
list. This is referred to as ”…””}”(hj 
  hžhhŸNh Nubj7  )”}”(hŒ	*linking*”h]”hŒlinking”…””}”(hj
  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j6  hj 
  ubhŒ. the gpu_vma, and typically
requires that the ”…””}”(hj 
  hžhhŸNh NubjJ  )”}”(hŒ``gpu_vm->lock``”h]”hŒgpu_vm->lock”…””}”(hj
  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1jI  hj 
  ubhŒ	 and the ”…””}”(hj 
  hžhhŸNh NubjJ  )”}”(hŒ``gem_object->gpuva_lock``”h]”hŒgem_object->gpuva_lock”…””}”(hj,
  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1jI  hj 
  ubhŒm
are held. When unlinking a gpu_vma the same locks should be held,
and that ensures that when iterating over ”…””}”(hj 
  hžhhŸNh NubjJ  )”}”(hŒ.``gpu_vmas`, either under
the ``gpu_vm->resv``”h]”hŒ*gpu_vmas`, either under
the ``gpu_vm->resv”…””}”(hj>
  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1jI  hj 
  ubhŒÌ or the GEM objectâ€™s dma_resv, that the gpu_vmas
stay alive as long as the lock under which we iterate is not released. For
userptr gpu_vmas itâ€™s similarly required that during vma destroy, the
outer ”…””}”(hj 
  hžhhŸNh NubjJ  )”}”(hŒ``gpu_vm->lock``”h]”hŒgpu_vm->lock”…””}”(hjP
  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1jI  hj 
  ubhŒ§ is held, since otherwise when iterating over
the invalidated userptr list as described in the previous section,
there is nothing keeping those userptr gpu_vmas alive.”…””}”(hj 
  hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h Mhjï	  hžhubeh}”(h]”Œlocking-at-bind-and-unbind-time”ah ]”h"]”Œlocking at bind and unbind time”ah$]”h&]”uh1h´hh¶hžhhŸh³h Mubhµ)”}”(hhh]”(hº)”}”(hŒ5Locking for recoverable page-fault page-table updates”h]”hŒ5Locking for recoverable page-fault page-table updates”…””}”(hjs
  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h¹hjp
  hžhhŸh³h M$ubhÊ)”}”(hŒZThere are two important things we need to ensure with locking for
recoverable page-faults:”h]”hŒZThere are two important things we need to ensure with locking for
recoverable page-faults:”…””}”(hj
  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h M&hjp
  hžhubj;  )”}”(hhh]”(j@  )”}”(hŒ—At the time we return pages back to the system / allocator for
reuse, there should be no remaining GPU mappings and any GPU TLB
must have been flushed.”h]”hÊ)”}”(hŒ—At the time we return pages back to the system / allocator for
reuse, there should be no remaining GPU mappings and any GPU TLB
must have been flushed.”h]”hŒ—At the time we return pages back to the system / allocator for
reuse, there should be no remaining GPU mappings and any GPU TLB
must have been flushed.”…””}”(hj–
  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h M)hj’
  ubah}”(h]”h ]”h"]”h$]”h&]”uh1j?  hj
  hžhhŸh³h Nubj@  )”}”(hŒ6The unmapping and mapping of a gpu_vma must not race.
”h]”hÊ)”}”(hŒ5The unmapping and mapping of a gpu_vma must not race.”h]”hŒ5The unmapping and mapping of a gpu_vma must not race.”…””}”(hj®
  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h M,hjª
  ubah}”(h]”h ]”h"]”h$]”h&]”uh1j?  hj
  hžhhŸh³h Nubeh}”(h]”h ]”h"]”h$]”h&]”j1  j2  uh1j:  hŸh³h M)hjp
  hžhubhÊ)”}”(hXU  Since the unmapping (or zapping) of GPU ptes is typically taking place
where it is hard or even impossible to take any outer level locks we
must either introduce a new lock that is held at both mapping and
unmapping time, or look at the locks we do hold at unmapping time and
make sure that they are held also at mapping time. For userptr
gpu_vmas, the ``userptr_seqlock`` is held in write mode in the mmu
invalidation notifier where zapping happens. Hence, if the
``userptr_seqlock`` as well as the ``gpu_vm->userptr_notifier_lock``
is held in read mode during mapping, it will not race with the
zapping. For GEM object backed gpu_vmas, zapping will take place under
the GEM object's dma_resv and ensuring that the dma_resv is held also
when populating the page-tables for any gpu_vma pointing to the GEM
object, will similarly ensure we are race-free.”h]”(hXa  Since the unmapping (or zapping) of GPU ptes is typically taking place
where it is hard or even impossible to take any outer level locks we
must either introduce a new lock that is held at both mapping and
unmapping time, or look at the locks we do hold at unmapping time and
make sure that they are held also at mapping time. For userptr
gpu_vmas, the ”…””}”(hjÈ
  hžhhŸNh NubjJ  )”}”(hŒ``userptr_seqlock``”h]”hŒuserptr_seqlock”…””}”(hjÐ
  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1jI  hjÈ
  ubhŒ] is held in write mode in the mmu
invalidation notifier where zapping happens. Hence, if the
”…””}”(hjÈ
  hžhhŸNh NubjJ  )”}”(hŒ``userptr_seqlock``”h]”hŒuserptr_seqlock”…””}”(hjâ
  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1jI  hjÈ
  ubhŒ as well as the ”…””}”(hjÈ
  hžhhŸNh NubjJ  )”}”(hŒ!``gpu_vm->userptr_notifier_lock``”h]”hŒgpu_vm->userptr_notifier_lock”…””}”(hjô
  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1jI  hjÈ
  ubhXB  
is held in read mode during mapping, it will not race with the
zapping. For GEM object backed gpu_vmas, zapping will take place under
the GEM objectâ€™s dma_resv and ensuring that the dma_resv is held also
when populating the page-tables for any gpu_vma pointing to the GEM
object, will similarly ensure we are race-free.”…””}”(hjÈ
  hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h M.hjp
  hžhubhÊ)”}”(hŒàIf any part of the mapping is performed asynchronously
under a dma-fence with these locks released, the zapping will need to
wait for that dma-fence to signal under the relevant lock before
starting to modify the page-table.”h]”hŒàIf any part of the mapping is performed asynchronously
under a dma-fence with these locks released, the zapping will need to
wait for that dma-fence to signal under the relevant lock before
starting to modify the page-table.”…””}”(hj  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h M<hjp
  hžhubhÊ)”}”(hX3  Since modifying the
page-table structure in a way that frees up page-table memory
might also require outer level locks, the zapping of GPU ptes
typically focuses only on zeroing page-table or page-directory entries
and flushing TLB, whereas freeing of page-table memory is deferred to
unbind or rebind time.”h]”hX3  Since modifying the
page-table structure in a way that frees up page-table memory
might also require outer level locks, the zapping of GPU ptes
typically focuses only on zeroing page-table or page-directory entries
and flushing TLB, whereas freeing of page-table memory is deferred to
unbind or rebind time.”…””}”(hj  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h MAhjp
  hžhubeh}”(h]”Œ5locking-for-recoverable-page-fault-page-table-updates”ah ]”h"]”Œ5locking for recoverable page-fault page-table updates”ah$]”h&]”uh1h´hh¶hžhhŸh³h M$ubeh}”(h]”Œvm-bind-locking”ah ]”h"]”Œvm_bind locking”ah$]”h&]”uh1h´hhhžhhŸh³h Kubeh}”(h]”h ]”h"]”h$]”h&]”Œsource”h³uh1hŒcurrent_source”NŒcurrent_line”NŒsettings”Œdocutils.frontend”ŒValues”“”)”}”(h¹NŒ	generator”NŒ	datestamp”NŒsource_link”NŒ
source_url”NŒtoc_backlinks”Œentry”Œfootnote_backlinks”KŒsectnum_xform”KŒstrip_comments”NŒstrip_elements_with_classes”NŒstrip_classes”NŒreport_level”KŒ
halt_level”KŒexit_status_level”KŒdebug”NŒwarning_stream”NŒ	traceback”ˆŒinput_encoding”Œ	utf-8-sig”Œinput_encoding_error_handler”Œstrict”Œoutput_encoding”Œutf-8”Œoutput_encoding_error_handler”j[  Œerror_encoding”Œutf-8”Œerror_encoding_error_handler”Œbackslashreplace”Œlanguage_code”Œen”Œrecord_dependencies”NŒconfig”NŒ	id_prefix”hŒauto_id_prefix”Œid”Œdump_settings”NŒdump_internals”NŒdump_transforms”NŒdump_pseudo_xml”NŒexpose_internals”NŒstrict_visitor”NŒ_disable_config”NŒ_source”h³Œ_destination”NŒ_config_files”]”Œ7/var/lib/git/docbuild/linux/Documentation/docutils.conf”aŒfile_insertion_enabled”ˆŒraw_enabled”KŒline_length_limit”M'Œpep_references”NŒpep_base_url”Œhttps://peps.python.org/”Œpep_file_url_template”Œpep-%04d”Œrfc_references”NŒrfc_base_url”Œ&https://datatracker.ietf.org/doc/html/”Œ	tab_width”KŒtrim_footnote_reference_space”‰Œsyntax_highlight”Œlong”Œsmart_quotes”ˆŒsmartquotes_locales”]”Œcharacter_level_inline_markup”‰Œdoctitle_xform”‰Œdocinfo_xform”KŒsectsubtitle_xform”‰Œimage_loading”Œlink”Œembed_stylesheet”‰Œcloak_email_addresses”ˆŒsection_self_link”‰Œenv”NubŒreporter”NŒindirect_targets”]”Œsubstitution_defs”}”Œsubstitution_names”}”Œrefnames”}”Œrefids”}”(j°  ]”j¥  aj+  ]”j!  aj%	  ]”j	  auŒnameids”}”(j5  j2  j&  j#  j8  j5  j¶  j³  jP  j°  jO  jL  j]  jZ  j½  jº  jU  jR  j1  j.  j   j+  jÿ  jü  jì	  jé	  j2	  j%	  jä	  já	  jm
  jj
  j-  j*  uŒ	nametypes”}”(j5  ‰j&  ‰j8  ‰j¶  ‰jP  ˆjO  ‰j]  ‰j½  ‰jU  ‰j1  ‰j   ˆjÿ  ‰jì	  ‰j2	  ˆjä	  ‰jm
  ‰j-  ‰uh}”(j2  h¶j#  hÙj5  j)  j³  j;  j°  j¹  jL  j¹  jZ  jW  jº  jv  jR  jÀ  j.  j`  j+  j4  jü  j4  jé	  j  j%	  j&	  já	  jF	  jj
  jï	  j*  jp
  uŒfootnote_refs”}”Œcitation_refs”}”Œautofootnotes”]”Œautofootnote_refs”]”Œsymbol_footnotes”]”Œsymbol_footnote_refs”]”Œ	footnotes”]”Œ	citations”]”Œautofootnote_start”KŒsymbol_footnote_start”K Œ
id_counter”Œcollections”ŒCounter”“”}”…”R”Œparse_messages”]”Œtransform_messages”]”(hŒsystem_message”“”)”}”(hhh]”hÊ)”}”(hhh]”hŒ6Hyperlink target "gpu-vma-lifetime" is not referenced.”…””}”hjÇ  sbah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhjÄ  ubah}”(h]”h ]”h"]”h$]”h&]”Œlevel”KŒtype”ŒINFO”Œsource”h³Œline”Ksuh1jÂ  ubjÃ  )”}”(hhh]”hÊ)”}”(hhh]”hŒ8Hyperlink target "spinlock-iteration" is not referenced.”…””}”hjâ  sbah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhjß  ubah}”(h]”h ]”h"]”h$]”h&]”Œlevel”KŒtype”jÜ  Œsource”h³Œline”MAuh1jÂ  ubjÃ  )”}”(hhh]”hÊ)”}”(hhh]”hŒ:Hyperlink target "invalidation-example" is not referenced.”…””}”hjü  sbah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhjù  ubah}”(h]”h ]”h"]”h$]”h&]”Œlevel”KŒtype”jÜ  Œsource”h³Œline”Màuh1jÂ  ubeŒtransformer”NŒinclude_log”]”Œ
decoration”Nhžhub.