€•      Œsphinx.addnodes”Œdocument”“”)”}”(Œ	rawsource”Œ ”Œchildren”]”(Œtranslations”ŒLanguagesNode”“”)”}”(hhh]”(h Œpending_xref”“”)”}”(hhh]”Œdocutils.nodes”ŒText”“”ŒChinese (Simplified)”…””}”Œparent”hsbaŒ
attributes”}”(Œids”]”Œclasses”]”Œnames”]”Œdupnames”]”Œbackrefs”]”Œ	refdomain”Œstd”Œreftype”Œdoc”Œ	reftarget”Œ"/translations/zh_CN/gpu/rfc/gpusvm”Œmodname”NŒ	classname”NŒrefexplicit”ˆuŒtagname”hhhubh)”}”(hhh]”hŒChinese (Traditional)”…””}”hh2sbah}”(h]”h ]”h"]”h$]”h&]”Œ	refdomain”h)Œreftype”h+Œ	reftarget”Œ"/translations/zh_TW/gpu/rfc/gpusvm”Œmodname”NŒ	classname”NŒrefexplicit”ˆuh1hhhubh)”}”(hhh]”hŒItalian”…””}”hhFsbah}”(h]”h ]”h"]”h$]”h&]”Œ	refdomain”h)Œreftype”h+Œ	reftarget”Œ"/translations/it_IT/gpu/rfc/gpusvm”Œmodname”NŒ	classname”NŒrefexplicit”ˆuh1hhhubh)”}”(hhh]”hŒJapanese”…””}”hhZsbah}”(h]”h ]”h"]”h$]”h&]”Œ	refdomain”h)Œreftype”h+Œ	reftarget”Œ"/translations/ja_JP/gpu/rfc/gpusvm”Œmodname”NŒ	classname”NŒrefexplicit”ˆuh1hhhubh)”}”(hhh]”hŒKorean”…””}”hhnsbah}”(h]”h ]”h"]”h$]”h&]”Œ	refdomain”h)Œreftype”h+Œ	reftarget”Œ"/translations/ko_KR/gpu/rfc/gpusvm”Œmodname”NŒ	classname”NŒrefexplicit”ˆuh1hhhubh)”}”(hhh]”hŒSpanish”…””}”hh‚sbah}”(h]”h ]”h"]”h$]”h&]”Œ	refdomain”h)Œreftype”h+Œ	reftarget”Œ"/translations/sp_SP/gpu/rfc/gpusvm”Œmodname”NŒ	classname”NŒrefexplicit”ˆuh1hhhubeh}”(h]”h ]”h"]”h$]”h&]”Œcurrent_language”ŒEnglish”uh1h
hhŒ	_document”hŒsource”NŒline”NubhŒcomment”“”)”}”(hŒ*SPDX-License-Identifier: (GPL-2.0+ OR MIT)”h]”hŒ*SPDX-License-Identifier: (GPL-2.0+ OR MIT)”…””}”hh£sbah}”(h]”h ]”h"]”h$]”h&]”Œ	xml:space”Œpreserve”uh1h¡hhhžhhŸŒ</var/lib/git/docbuild/linux/Documentation/gpu/rfc/gpusvm.rst”h KubhŒsection”“”)”}”(hhh]”(hŒtitle”“”)”}”(hŒGPU SVM Section”h]”hŒGPU SVM Section”…””}”(hh»hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h¹hh¶hžhhŸh³h Kubhµ)”}”(hhh]”(hº)”}”(hŒAgreed upon design principles”h]”hŒAgreed upon design principles”…””}”(hhÌhžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h¹hhÉhžhhŸh³h KubhŒbullet_list”“”)”}”(hhh]”(hŒ	list_item”“”)”}”(hXD  migrate_to_ram path
      * Rely only on core MM concepts (migration PTEs, page references, and
        page locking).
      * No driver specific locks other than locks for hardware interaction in
        this path. These are not required and generally a bad idea to
        invent driver defined locks to seal core MM races.
      * An example of a driver-specific lock causing issues occurred before
        fixing do_swap_page to lock the faulting page. A driver-exclusive lock
        in migrate_to_ram produced a stable livelock if enough threads read
        the faulting page.
      * Partial migration is supported (i.e., a subset of pages attempting to
        migrate can actually migrate, with only the faulting page guaranteed
        to migrate).
      * Driver handles mixed migrations via retry loops rather than locking.”h]”hŒdefinition_list”“”)”}”(hhh]”hŒdefinition_list_item”“”)”}”(hXö  migrate_to_ram path
* Rely only on core MM concepts (migration PTEs, page references, and
  page locking).
* No driver specific locks other than locks for hardware interaction in
  this path. These are not required and generally a bad idea to
  invent driver defined locks to seal core MM races.
* An example of a driver-specific lock causing issues occurred before
  fixing do_swap_page to lock the faulting page. A driver-exclusive lock
  in migrate_to_ram produced a stable livelock if enough threads read
  the faulting page.
* Partial migration is supported (i.e., a subset of pages attempting to
  migrate can actually migrate, with only the faulting page guaranteed
  to migrate).
* Driver handles mixed migrations via retry loops rather than locking.”h]”(hŒterm”“”)”}”(hŒmigrate_to_ram path”h]”hŒmigrate_to_ram path”…””}”(hhòhžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hðhŸh³h KhhìubhŒ
definition”“”)”}”(hhh]”hÛ)”}”(hhh]”(hà)”}”(hŒRRely only on core MM concepts (migration PTEs, page references, and
page locking).”h]”hŒ	paragraph”“”)”}”(hŒRRely only on core MM concepts (migration PTEs, page references, and
page locking).”h]”hŒRRely only on core MM concepts (migration PTEs, page references, and
page locking).”…””}”(hj  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j  hŸh³h Khj  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhj  ubhà)”}”(hŒ¶No driver specific locks other than locks for hardware interaction in
this path. These are not required and generally a bad idea to
invent driver defined locks to seal core MM races.”h]”j  )”}”(hŒ¶No driver specific locks other than locks for hardware interaction in
this path. These are not required and generally a bad idea to
invent driver defined locks to seal core MM races.”h]”hŒ¶No driver specific locks other than locks for hardware interaction in
this path. These are not required and generally a bad idea to
invent driver defined locks to seal core MM races.”…””}”(hj&  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j  hŸh³h Khj"  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhj  ubhà)”}”(hŒáAn example of a driver-specific lock causing issues occurred before
fixing do_swap_page to lock the faulting page. A driver-exclusive lock
in migrate_to_ram produced a stable livelock if enough threads read
the faulting page.”h]”j  )”}”(hŒáAn example of a driver-specific lock causing issues occurred before
fixing do_swap_page to lock the faulting page. A driver-exclusive lock
in migrate_to_ram produced a stable livelock if enough threads read
the faulting page.”h]”hŒáAn example of a driver-specific lock causing issues occurred before
fixing do_swap_page to lock the faulting page. A driver-exclusive lock
in migrate_to_ram produced a stable livelock if enough threads read
the faulting page.”…””}”(hj>  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j  hŸh³h Khj:  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhj  ubhà)”}”(hŒ—Partial migration is supported (i.e., a subset of pages attempting to
migrate can actually migrate, with only the faulting page guaranteed
to migrate).”h]”j  )”}”(hŒ—Partial migration is supported (i.e., a subset of pages attempting to
migrate can actually migrate, with only the faulting page guaranteed
to migrate).”h]”hŒ—Partial migration is supported (i.e., a subset of pages attempting to
migrate can actually migrate, with only the faulting page guaranteed
to migrate).”…””}”(hjV  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j  hŸh³h KhjR  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhj  ubhà)”}”(hŒDDriver handles mixed migrations via retry loops rather than locking.”h]”j  )”}”(hjl  h]”hŒDDriver handles mixed migrations via retry loops rather than locking.”…””}”(hjn  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j  hŸh³h Khjj  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhj  ubeh}”(h]”h ]”h"]”h$]”h&]”Œbullet”Œ*”uh1hÚhŸh³h Khj  ubah}”(h]”h ]”h"]”h$]”h&]”uh1j   hhìubeh}”(h]”h ]”h"]”h$]”h&]”uh1hêhŸh³h Khhçubah}”(h]”h ]”h"]”h$]”h&]”uh1håhháubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhhÜhžhhŸNh Nubhà)”}”(hX’  Eviction
      * Eviction is defined as migrating data from the GPU back to the
        CPU without a virtual address to free up GPU memory.
      * Only looking at physical memory data structures and locks as opposed to
        looking at virtual memory data structures and locks.
      * No looking at mm/vma structs or relying on those being locked.
      * The rationale for the above two points is that CPU virtual addresses
        can change at any moment, while the physical pages remain stable.
      * GPU page table invalidation, which requires a GPU virtual address, is
        handled via the notifier that has access to the GPU virtual address.”h]”hæ)”}”(hhh]”hë)”}”(hX\  Eviction
* Eviction is defined as migrating data from the GPU back to the
  CPU without a virtual address to free up GPU memory.
* Only looking at physical memory data structures and locks as opposed to
  looking at virtual memory data structures and locks.
* No looking at mm/vma structs or relying on those being locked.
* The rationale for the above two points is that CPU virtual addresses
  can change at any moment, while the physical pages remain stable.
* GPU page table invalidation, which requires a GPU virtual address, is
  handled via the notifier that has access to the GPU virtual address.”h]”(hñ)”}”(hŒEviction”h]”hŒEviction”…””}”(hj¬  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hðhŸh³h K hj¨  ubj  )”}”(hhh]”hÛ)”}”(hhh]”(hà)”}”(hŒsEviction is defined as migrating data from the GPU back to the
CPU without a virtual address to free up GPU memory.”h]”j  )”}”(hŒsEviction is defined as migrating data from the GPU back to the
CPU without a virtual address to free up GPU memory.”h]”hŒsEviction is defined as migrating data from the GPU back to the
CPU without a virtual address to free up GPU memory.”…””}”(hjÄ  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j  hŸh³h KhjÀ  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhj½  ubhà)”}”(hŒ|Only looking at physical memory data structures and locks as opposed to
looking at virtual memory data structures and locks.”h]”j  )”}”(hŒ|Only looking at physical memory data structures and locks as opposed to
looking at virtual memory data structures and locks.”h]”hŒ|Only looking at physical memory data structures and locks as opposed to
looking at virtual memory data structures and locks.”…””}”(hjÜ  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j  hŸh³h KhjØ  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhj½  ubhà)”}”(hŒ>No looking at mm/vma structs or relying on those being locked.”h]”j  )”}”(hjò  h]”hŒ>No looking at mm/vma structs or relying on those being locked.”…””}”(hjô  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j  hŸh³h Khjð  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhj½  ubhà)”}”(hŒ†The rationale for the above two points is that CPU virtual addresses
can change at any moment, while the physical pages remain stable.”h]”j  )”}”(hŒ†The rationale for the above two points is that CPU virtual addresses
can change at any moment, while the physical pages remain stable.”h]”hŒ†The rationale for the above two points is that CPU virtual addresses
can change at any moment, while the physical pages remain stable.”…””}”(hj  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j  hŸh³h Khj  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhj½  ubhà)”}”(hŒŠGPU page table invalidation, which requires a GPU virtual address, is
handled via the notifier that has access to the GPU virtual address.”h]”j  )”}”(hŒŠGPU page table invalidation, which requires a GPU virtual address, is
handled via the notifier that has access to the GPU virtual address.”h]”hŒŠGPU page table invalidation, which requires a GPU virtual address, is
handled via the notifier that has access to the GPU virtual address.”…””}”(hj#  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j  hŸh³h K hj  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhj½  ubeh}”(h]”h ]”h"]”h$]”h&]”j‡  jˆ  uh1hÚhŸh³h Khjº  ubah}”(h]”h ]”h"]”h$]”h&]”uh1j   hj¨  ubeh}”(h]”h ]”h"]”h$]”h&]”uh1hêhŸh³h K hj¥  ubah}”(h]”h ]”h"]”h$]”h&]”uh1håhj¡  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhhÜhžhhŸNh Nubhà)”}”(hX•  GPU fault side
      * mmap_read only used around core MM functions which require this lock
        and should strive to take mmap_read lock only in GPU SVM layer.
      * Big retry loop to handle all races with the mmu notifier under the gpu
        pagetable locks/mmu notifier range lock/whatever we end up calling
        those.
      * Races (especially against concurrent eviction or migrate_to_ram)
        should not be handled on the fault side by trying to hold locks;
        rather, they should be handled using retry loops. One possible
        exception is holding a BO's dma-resv lock during the initial migration
        to VRAM, as this is a well-defined lock that can be taken underneath
        the mmap_read lock.
      * One possible issue with the above approach is if a driver has a strict
        migration policy requiring GPU access to occur in GPU memory.
        Concurrent CPU access could cause a livelock due to endless retries.
        While no current user (Xe) of GPU SVM has such a policy, it is likely
        to be added in the future. Ideally, this should be resolved on the
        core-MM side rather than through a driver-side lock.”h]”hæ)”}”(hhh]”hë)”}”(hX/  GPU fault side
* mmap_read only used around core MM functions which require this lock
  and should strive to take mmap_read lock only in GPU SVM layer.
* Big retry loop to handle all races with the mmu notifier under the gpu
  pagetable locks/mmu notifier range lock/whatever we end up calling
  those.
* Races (especially against concurrent eviction or migrate_to_ram)
  should not be handled on the fault side by trying to hold locks;
  rather, they should be handled using retry loops. One possible
  exception is holding a BO's dma-resv lock during the initial migration
  to VRAM, as this is a well-defined lock that can be taken underneath
  the mmap_read lock.
* One possible issue with the above approach is if a driver has a strict
  migration policy requiring GPU access to occur in GPU memory.
  Concurrent CPU access could cause a livelock due to endless retries.
  While no current user (Xe) of GPU SVM has such a policy, it is likely
  to be added in the future. Ideally, this should be resolved on the
  core-MM side rather than through a driver-side lock.”h]”(hñ)”}”(hŒGPU fault side”h]”hŒGPU fault side”…””}”(hj`  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hðhŸh³h K2hj\  ubj  )”}”(hhh]”hÛ)”}”(hhh]”(hà)”}”(hŒ„mmap_read only used around core MM functions which require this lock
and should strive to take mmap_read lock only in GPU SVM layer.”h]”j  )”}”(hŒ„mmap_read only used around core MM functions which require this lock
and should strive to take mmap_read lock only in GPU SVM layer.”h]”hŒ„mmap_read only used around core MM functions which require this lock
and should strive to take mmap_read lock only in GPU SVM layer.”…””}”(hjx  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j  hŸh³h K#hjt  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhjq  ubhà)”}”(hŒBig retry loop to handle all races with the mmu notifier under the gpu
pagetable locks/mmu notifier range lock/whatever we end up calling
those.”h]”j  )”}”(hŒBig retry loop to handle all races with the mmu notifier under the gpu
pagetable locks/mmu notifier range lock/whatever we end up calling
those.”h]”hŒBig retry loop to handle all races with the mmu notifier under the gpu
pagetable locks/mmu notifier range lock/whatever we end up calling
those.”…””}”(hj  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j  hŸh³h K%hjŒ  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhjq  ubhà)”}”(hX`  Races (especially against concurrent eviction or migrate_to_ram)
should not be handled on the fault side by trying to hold locks;
rather, they should be handled using retry loops. One possible
exception is holding a BO's dma-resv lock during the initial migration
to VRAM, as this is a well-defined lock that can be taken underneath
the mmap_read lock.”h]”j  )”}”(hX`  Races (especially against concurrent eviction or migrate_to_ram)
should not be handled on the fault side by trying to hold locks;
rather, they should be handled using retry loops. One possible
exception is holding a BO's dma-resv lock during the initial migration
to VRAM, as this is a well-defined lock that can be taken underneath
the mmap_read lock.”h]”hXb  Races (especially against concurrent eviction or migrate_to_ram)
should not be handled on the fault side by trying to hold locks;
rather, they should be handled using retry loops. One possible
exception is holding a BOâ€™s dma-resv lock during the initial migration
to VRAM, as this is a well-defined lock that can be taken underneath
the mmap_read lock.”…””}”(hj¨  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j  hŸh³h K(hj¤  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhjq  ubhà)”}”(hX‡  One possible issue with the above approach is if a driver has a strict
migration policy requiring GPU access to occur in GPU memory.
Concurrent CPU access could cause a livelock due to endless retries.
While no current user (Xe) of GPU SVM has such a policy, it is likely
to be added in the future. Ideally, this should be resolved on the
core-MM side rather than through a driver-side lock.”h]”j  )”}”(hX‡  One possible issue with the above approach is if a driver has a strict
migration policy requiring GPU access to occur in GPU memory.
Concurrent CPU access could cause a livelock due to endless retries.
While no current user (Xe) of GPU SVM has such a policy, it is likely
to be added in the future. Ideally, this should be resolved on the
core-MM side rather than through a driver-side lock.”h]”hX‡  One possible issue with the above approach is if a driver has a strict
migration policy requiring GPU access to occur in GPU memory.
Concurrent CPU access could cause a livelock due to endless retries.
While no current user (Xe) of GPU SVM has such a policy, it is likely
to be added in the future. Ideally, this should be resolved on the
core-MM side rather than through a driver-side lock.”…””}”(hjÀ  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j  hŸh³h K.hj¼  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhjq  ubeh}”(h]”h ]”h"]”h$]”h&]”j‡  jˆ  uh1hÚhŸh³h K#hjn  ubah}”(h]”h ]”h"]”h$]”h&]”uh1j   hj\  ubeh}”(h]”h ]”h"]”h$]”h&]”uh1hêhŸh³h K2hjY  ubah}”(h]”h ]”h"]”h$]”h&]”uh1håhjU  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhhÜhžhhŸNh Nubhà)”}”(hX0  Physical memory to virtual backpointer
      * This does not work, as no pointers from physical memory to virtual
        memory should exist. mremap() is an example of the core MM updating
        the virtual address without notifying the driver of address
        change rather the driver only receiving the invalidation notifier.
      * The physical memory backpointer (page->zone_device_data) should remain
        stable from allocation to page free. Safely updating this against a
        concurrent user would be very difficult unless the page is free.”h]”hæ)”}”(hhh]”hë)”}”(hX  Physical memory to virtual backpointer
* This does not work, as no pointers from physical memory to virtual
  memory should exist. mremap() is an example of the core MM updating
  the virtual address without notifying the driver of address
  change rather the driver only receiving the invalidation notifier.
* The physical memory backpointer (page->zone_device_data) should remain
  stable from allocation to page free. Safely updating this against a
  concurrent user would be very difficult unless the page is free.”h]”(hñ)”}”(hŒ&Physical memory to virtual backpointer”h]”hŒ&Physical memory to virtual backpointer”…””}”(hjý  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hðhŸh³h K:hjù  ubj  )”}”(hhh]”hÛ)”}”(hhh]”(hà)”}”(hX  This does not work, as no pointers from physical memory to virtual
memory should exist. mremap() is an example of the core MM updating
the virtual address without notifying the driver of address
change rather the driver only receiving the invalidation notifier.”h]”j  )”}”(hX  This does not work, as no pointers from physical memory to virtual
memory should exist. mremap() is an example of the core MM updating
the virtual address without notifying the driver of address
change rather the driver only receiving the invalidation notifier.”h]”hX  This does not work, as no pointers from physical memory to virtual
memory should exist. mremap() is an example of the core MM updating
the virtual address without notifying the driver of address
change rather the driver only receiving the invalidation notifier.”…””}”(hj  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j  hŸh³h K5hj  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhj  ubhà)”}”(hŒËThe physical memory backpointer (page->zone_device_data) should remain
stable from allocation to page free. Safely updating this against a
concurrent user would be very difficult unless the page is free.”h]”j  )”}”(hŒËThe physical memory backpointer (page->zone_device_data) should remain
stable from allocation to page free. Safely updating this against a
concurrent user would be very difficult unless the page is free.”h]”hŒËThe physical memory backpointer (page->zone_device_data) should remain
stable from allocation to page free. Safely updating this against a
concurrent user would be very difficult unless the page is free.”…””}”(hj-  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j  hŸh³h K9hj)  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhj  ubeh}”(h]”h ]”h"]”h$]”h&]”j‡  jˆ  uh1hÚhŸh³h K5hj  ubah}”(h]”h ]”h"]”h$]”h&]”uh1j   hjù  ubeh}”(h]”h ]”h"]”h$]”h&]”uh1hêhŸh³h K:hjö  ubah}”(h]”h ]”h"]”h$]”h&]”uh1håhjò  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhhÜhžhhŸNh Nubhà)”}”(hXK  GPU pagetable locking
      * Notifier lock only protects range tree, pages valid state for a range
        (rather than seqno due to wider notifiers), pagetable entries, and
        mmu notifier seqno tracking, it is not a global lock to protect
        against races.
      * All races handled with big retry as mentioned above.
”h]”hæ)”}”(hhh]”hë)”}”(hX-  GPU pagetable locking
* Notifier lock only protects range tree, pages valid state for a range
  (rather than seqno due to wider notifiers), pagetable entries, and
  mmu notifier seqno tracking, it is not a global lock to protect
  against races.
* All races handled with big retry as mentioned above.
”h]”(hñ)”}”(hŒGPU pagetable locking”h]”hŒGPU pagetable locking”…””}”(hjj  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hðhŸh³h KAhjf  ubj  )”}”(hhh]”hÛ)”}”(hhh]”(hà)”}”(hŒ×Notifier lock only protects range tree, pages valid state for a range
(rather than seqno due to wider notifiers), pagetable entries, and
mmu notifier seqno tracking, it is not a global lock to protect
against races.”h]”j  )”}”(hŒ×Notifier lock only protects range tree, pages valid state for a range
(rather than seqno due to wider notifiers), pagetable entries, and
mmu notifier seqno tracking, it is not a global lock to protect
against races.”h]”hŒ×Notifier lock only protects range tree, pages valid state for a range
(rather than seqno due to wider notifiers), pagetable entries, and
mmu notifier seqno tracking, it is not a global lock to protect
against races.”…””}”(hj‚  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j  hŸh³h K=hj~  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhj{  ubhà)”}”(hŒ5All races handled with big retry as mentioned above.
”h]”j  )”}”(hŒ4All races handled with big retry as mentioned above.”h]”hŒ4All races handled with big retry as mentioned above.”…””}”(hjš  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j  hŸh³h KAhj–  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhj{  ubeh}”(h]”h ]”h"]”h$]”h&]”j‡  jˆ  uh1hÚhŸh³h K=hjx  ubah}”(h]”h ]”h"]”h$]”h&]”uh1j   hjf  ubeh}”(h]”h ]”h"]”h$]”h&]”uh1hêhŸh³h KAhjc  ubah}”(h]”h ]”h"]”h$]”h&]”uh1håhj_  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhhÜhžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”j‡  jˆ  uh1hÚhŸh³h K
hhÉhžhubeh}”(h]”Œagreed-upon-design-principles”ah ]”h"]”Œagreed upon design principles”ah$]”h&]”uh1h´hh¶hžhhŸh³h Kubhµ)”}”(hhh]”(hº)”}”(hŒOverview of baseline design”h]”hŒOverview of baseline design”…””}”(hjÝ  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h¹hjÚ  hžhhŸh³h KDubj  )”}”(hXl  GPU Shared Virtual Memory (GPU SVM) layer for the Direct Rendering Manager (DRM)
is a component of the DRM framework designed to manage shared virtual memory
between the CPU and GPU. It enables efficient data exchange and processing
for GPU-accelerated applications by allowing memory sharing and
synchronization between the CPU's and GPU's virtual address spaces.”h]”hXp  GPU Shared Virtual Memory (GPU SVM) layer for the Direct Rendering Manager (DRM)
is a component of the DRM framework designed to manage shared virtual memory
between the CPU and GPU. It enables efficient data exchange and processing
for GPU-accelerated applications by allowing memory sharing and
synchronization between the CPUâ€™s and GPUâ€™s virtual address spaces.”…””}”(hjë  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j  hŸŒ[/var/lib/git/docbuild/linux/Documentation/gpu/rfc/gpusvm:70: ./drivers/gpu/drm/drm_gpusvm.c”h KhjÚ  hžhubj  )”}”(hŒKey GPU SVM Components:”h]”hŒKey GPU SVM Components:”…””}”(hjú  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j  hŸŒ[/var/lib/git/docbuild/linux/Documentation/gpu/rfc/gpusvm:70: ./drivers/gpu/drm/drm_gpusvm.c”h KhjÚ  hžhubhÛ)”}”(hhh]”(hà)”}”(hXÊ  Notifiers:
   Used for tracking memory intervals and notifying the GPU of changes,
   notifiers are sized based on a GPU SVM initialization parameter, with a
   recommendation of 512M or larger. They maintain a Red-BlacK tree and a
   list of ranges that fall within the notifier interval.  Notifiers are
   tracked within a GPU SVM Red-BlacK tree and list and are dynamically
   inserted or removed as ranges within the interval are created or
   destroyed.”h]”hæ)”}”(hhh]”hë)”}”(hXµ  Notifiers:
Used for tracking memory intervals and notifying the GPU of changes,
notifiers are sized based on a GPU SVM initialization parameter, with a
recommendation of 512M or larger. They maintain a Red-BlacK tree and a
list of ranges that fall within the notifier interval.  Notifiers are
tracked within a GPU SVM Red-BlacK tree and list and are dynamically
inserted or removed as ranges within the interval are created or
destroyed.”h]”(hñ)”}”(hŒ
Notifiers:”h]”hŒ
Notifiers:”…””}”(hj  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hðhŸŒ[/var/lib/git/docbuild/linux/Documentation/gpu/rfc/gpusvm:70: ./drivers/gpu/drm/drm_gpusvm.c”h K%hj  ubj  )”}”(hhh]”j  )”}”(hXª  Used for tracking memory intervals and notifying the GPU of changes,
notifiers are sized based on a GPU SVM initialization parameter, with a
recommendation of 512M or larger. They maintain a Red-BlacK tree and a
list of ranges that fall within the notifier interval.  Notifiers are
tracked within a GPU SVM Red-BlacK tree and list and are dynamically
inserted or removed as ranges within the interval are created or
destroyed.”h]”hXª  Used for tracking memory intervals and notifying the GPU of changes,
notifiers are sized based on a GPU SVM initialization parameter, with a
recommendation of 512M or larger. They maintain a Red-BlacK tree and a
list of ranges that fall within the notifier interval.  Notifiers are
tracked within a GPU SVM Red-BlacK tree and list and are dynamically
inserted or removed as ranges within the interval are created or
destroyed.”…””}”(hj)  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j  hŸŒ[/var/lib/git/docbuild/linux/Documentation/gpu/rfc/gpusvm:70: ./drivers/gpu/drm/drm_gpusvm.c”h K hj&  ubah}”(h]”h ]”h"]”h$]”h&]”uh1j   hj  ubeh}”(h]”h ]”h"]”h$]”h&]”uh1hêhŸj%  h K%hj  ubah}”(h]”h ]”h"]”h$]”h&]”uh1håhj  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhj	  ubhà)”}”(hX	  Ranges:
   Represent memory ranges mapped in a DRM device and managed by GPU SVM.
   They are sized based on an array of chunk sizes, which is a GPU SVM
   initialization parameter, and the CPU address space.  Upon GPU fault,
   the largest aligned chunk that fits within the faulting CPU address
   space is chosen for the range size. Ranges are expected to be
   dynamically allocated on GPU fault and removed on an MMU notifier UNMAP
   event. As mentioned above, ranges are tracked in a notifier's Red-Black
   tree.
”h]”hæ)”}”(hhh]”hë)”}”(hXñ  Ranges:
Represent memory ranges mapped in a DRM device and managed by GPU SVM.
They are sized based on an array of chunk sizes, which is a GPU SVM
initialization parameter, and the CPU address space.  Upon GPU fault,
the largest aligned chunk that fits within the faulting CPU address
space is chosen for the range size. Ranges are expected to be
dynamically allocated on GPU fault and removed on an MMU notifier UNMAP
event. As mentioned above, ranges are tracked in a notifier's Red-Black
tree.
”h]”(hñ)”}”(hŒRanges:”h]”hŒRanges:”…””}”(hj[  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hðhŸŒ[/var/lib/git/docbuild/linux/Documentation/gpu/rfc/gpusvm:70: ./drivers/gpu/drm/drm_gpusvm.c”h K/hjW  ubj  )”}”(hhh]”j  )”}”(hXè  Represent memory ranges mapped in a DRM device and managed by GPU SVM.
They are sized based on an array of chunk sizes, which is a GPU SVM
initialization parameter, and the CPU address space.  Upon GPU fault,
the largest aligned chunk that fits within the faulting CPU address
space is chosen for the range size. Ranges are expected to be
dynamically allocated on GPU fault and removed on an MMU notifier UNMAP
event. As mentioned above, ranges are tracked in a notifier's Red-Black
tree.”h]”hXê  Represent memory ranges mapped in a DRM device and managed by GPU SVM.
They are sized based on an array of chunk sizes, which is a GPU SVM
initialization parameter, and the CPU address space.  Upon GPU fault,
the largest aligned chunk that fits within the faulting CPU address
space is chosen for the range size. Ranges are expected to be
dynamically allocated on GPU fault and removed on an MMU notifier UNMAP
event. As mentioned above, ranges are tracked in a notifierâ€™s Red-Black
tree.”…””}”(hjm  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j  hŸŒ[/var/lib/git/docbuild/linux/Documentation/gpu/rfc/gpusvm:70: ./drivers/gpu/drm/drm_gpusvm.c”h K(hjj  ubah}”(h]”h ]”h"]”h$]”h&]”uh1j   hjW  ubeh}”(h]”h ]”h"]”h$]”h&]”uh1hêhŸji  h K/hjT  ubah}”(h]”h ]”h"]”h$]”h&]”uh1håhjP  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhj	  ubhà)”}”(hŒOperations:
   Define the interface for driver-specific GPU SVM operations such as
   range allocation, notifier allocation, and invalidations.
”h]”hæ)”}”(hhh]”hë)”}”(hŒŠOperations:
Define the interface for driver-specific GPU SVM operations such as
range allocation, notifier allocation, and invalidations.
”h]”(hñ)”}”(hŒOperations:”h]”hŒOperations:”…””}”(hjŸ  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hðhŸŒ[/var/lib/git/docbuild/linux/Documentation/gpu/rfc/gpusvm:70: ./drivers/gpu/drm/drm_gpusvm.c”h K3hj›  ubj  )”}”(hhh]”j  )”}”(hŒ}Define the interface for driver-specific GPU SVM operations such as
range allocation, notifier allocation, and invalidations.”h]”hŒ}Define the interface for driver-specific GPU SVM operations such as
range allocation, notifier allocation, and invalidations.”…””}”(hj±  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j  hŸŒ[/var/lib/git/docbuild/linux/Documentation/gpu/rfc/gpusvm:70: ./drivers/gpu/drm/drm_gpusvm.c”h K2hj®  ubah}”(h]”h ]”h"]”h$]”h&]”uh1j   hj›  ubeh}”(h]”h ]”h"]”h$]”h&]”uh1hêhŸj­  h K3hj˜  ubah}”(h]”h ]”h"]”h$]”h&]”uh1håhj”  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhj	  ubhà)”}”(hŒ‚Device Memory Allocations:
   Embedded structure containing enough information for GPU SVM to migrate
   to / from device memory.
”h]”hæ)”}”(hhh]”hë)”}”(hŒ|Device Memory Allocations:
Embedded structure containing enough information for GPU SVM to migrate
to / from device memory.
”h]”(hñ)”}”(hŒDevice Memory Allocations:”h]”hŒDevice Memory Allocations:”…””}”(hjã  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hðhŸŒ[/var/lib/git/docbuild/linux/Documentation/gpu/rfc/gpusvm:70: ./drivers/gpu/drm/drm_gpusvm.c”h K7hjß  ubj  )”}”(hhh]”j  )”}”(hŒ`Embedded structure containing enough information for GPU SVM to migrate
to / from device memory.”h]”hŒ`Embedded structure containing enough information for GPU SVM to migrate
to / from device memory.”…””}”(hjõ  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j  hŸŒ[/var/lib/git/docbuild/linux/Documentation/gpu/rfc/gpusvm:70: ./drivers/gpu/drm/drm_gpusvm.c”h K6hjò  ubah}”(h]”h ]”h"]”h$]”h&]”uh1j   hjß  ubeh}”(h]”h ]”h"]”h$]”h&]”uh1hêhŸjñ  h K7hjÜ  ubah}”(h]”h ]”h"]”h$]”h&]”uh1håhjØ  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhj	  ubhà)”}”(hŒ£Device Memory Operations:
   Define the interface for driver-specific device memory operations
   release memory, populate pfns, and copy to / from device memory.
”h]”hæ)”}”(hhh]”hë)”}”(hŒDevice Memory Operations:
Define the interface for driver-specific device memory operations
release memory, populate pfns, and copy to / from device memory.
”h]”(hñ)”}”(hŒDevice Memory Operations:”h]”hŒDevice Memory Operations:”…””}”(hj'  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hðhŸŒ[/var/lib/git/docbuild/linux/Documentation/gpu/rfc/gpusvm:70: ./drivers/gpu/drm/drm_gpusvm.c”h K;hj#  ubj  )”}”(hhh]”j  )”}”(hŒ‚Define the interface for driver-specific device memory operations
release memory, populate pfns, and copy to / from device memory.”h]”hŒ‚Define the interface for driver-specific device memory operations
release memory, populate pfns, and copy to / from device memory.”…””}”(hj9  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j  hŸŒ[/var/lib/git/docbuild/linux/Documentation/gpu/rfc/gpusvm:70: ./drivers/gpu/drm/drm_gpusvm.c”h K:hj6  ubah}”(h]”h ]”h"]”h$]”h&]”uh1j   hj#  ubeh}”(h]”h ]”h"]”h$]”h&]”uh1hêhŸj5  h K;hj   ubah}”(h]”h ]”h"]”h$]”h&]”uh1håhj  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhj	  ubeh}”(h]”h ]”h"]”h$]”h&]”j‡  Œ-”uh1hÚhŸŒ[/var/lib/git/docbuild/linux/Documentation/gpu/rfc/gpusvm:70: ./drivers/gpu/drm/drm_gpusvm.c”h KhjÚ  hžhubj  )”}”(hX|  This layer provides interfaces for allocating, mapping, migrating, and
releasing memory ranges between the CPU and GPU. It handles all core memory
management interactions (DMA mapping, HMM, and migration) and provides
driver-specific virtual functions (vfuncs). This infrastructure is sufficient
to build the expected driver components for an SVM implementation as detailed
below.”h]”hX|  This layer provides interfaces for allocating, mapping, migrating, and
releasing memory ranges between the CPU and GPU. It handles all core memory
management interactions (DMA mapping, HMM, and migration) and provides
driver-specific virtual functions (vfuncs). This infrastructure is sufficient
to build the expected driver components for an SVM implementation as detailed
below.”…””}”(hjh  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j  hŸŒ[/var/lib/git/docbuild/linux/Documentation/gpu/rfc/gpusvm:70: ./drivers/gpu/drm/drm_gpusvm.c”h K=hjÚ  hžhubj  )”}”(hŒExpected Driver Components:”h]”hŒExpected Driver Components:”…””}”(hjw  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j  hŸŒ[/var/lib/git/docbuild/linux/Documentation/gpu/rfc/gpusvm:70: ./drivers/gpu/drm/drm_gpusvm.c”h KDhjÚ  hžhubhÛ)”}”(hhh]”(hà)”}”(hŒ¦GPU page fault handler:
   Used to create ranges and notifiers based on the fault address,
   optionally migrate the range to device memory, and create GPU bindings.
”h]”hæ)”}”(hhh]”hë)”}”(hŒ GPU page fault handler:
Used to create ranges and notifiers based on the fault address,
optionally migrate the range to device memory, and create GPU bindings.
”h]”(hñ)”}”(hŒGPU page fault handler:”h]”hŒGPU page fault handler:”…””}”(hj”  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hðhŸŒ[/var/lib/git/docbuild/linux/Documentation/gpu/rfc/gpusvm:70: ./drivers/gpu/drm/drm_gpusvm.c”h KHhj  ubj  )”}”(hhh]”j  )”}”(hŒ‡Used to create ranges and notifiers based on the fault address,
optionally migrate the range to device memory, and create GPU bindings.”h]”hŒ‡Used to create ranges and notifiers based on the fault address,
optionally migrate the range to device memory, and create GPU bindings.”…””}”(hj¦  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j  hŸŒ[/var/lib/git/docbuild/linux/Documentation/gpu/rfc/gpusvm:70: ./drivers/gpu/drm/drm_gpusvm.c”h KGhj£  ubah}”(h]”h ]”h"]”h$]”h&]”uh1j   hj  ubeh}”(h]”h ]”h"]”h$]”h&]”uh1hêhŸj¢  h KHhj  ubah}”(h]”h ]”h"]”h$]”h&]”uh1håhj‰  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhj†  ubhà)”}”(hŒ½Garbage collector:
   Used to unmap and destroy GPU bindings for ranges.  Ranges are expected
   to be added to the garbage collector upon a MMU_NOTIFY_UNMAP event in
   notifier callback.
”h]”hæ)”}”(hhh]”hë)”}”(hŒ´Garbage collector:
Used to unmap and destroy GPU bindings for ranges.  Ranges are expected
to be added to the garbage collector upon a MMU_NOTIFY_UNMAP event in
notifier callback.
”h]”(hñ)”}”(hŒGarbage collector:”h]”hŒGarbage collector:”…””}”(hjØ  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hðhŸŒ[/var/lib/git/docbuild/linux/Documentation/gpu/rfc/gpusvm:70: ./drivers/gpu/drm/drm_gpusvm.c”h KMhjÔ  ubj  )”}”(hhh]”j  )”}”(hŒ Used to unmap and destroy GPU bindings for ranges.  Ranges are expected
to be added to the garbage collector upon a MMU_NOTIFY_UNMAP event in
notifier callback.”h]”hŒ Used to unmap and destroy GPU bindings for ranges.  Ranges are expected
to be added to the garbage collector upon a MMU_NOTIFY_UNMAP event in
notifier callback.”…””}”(hjê  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j  hŸŒ[/var/lib/git/docbuild/linux/Documentation/gpu/rfc/gpusvm:70: ./drivers/gpu/drm/drm_gpusvm.c”h KKhjç  ubah}”(h]”h ]”h"]”h$]”h&]”uh1j   hjÔ  ubeh}”(h]”h ]”h"]”h$]”h&]”uh1hêhŸjæ  h KMhjÑ  ubah}”(h]”h ]”h"]”h$]”h&]”uh1håhjÍ  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhj†  ubhà)”}”(hŒQNotifier callback:
   Used to invalidate and DMA unmap GPU bindings for ranges.

”h]”hæ)”}”(hhh]”hë)”}”(hŒNNotifier callback:
Used to invalidate and DMA unmap GPU bindings for ranges.

”h]”(hñ)”}”(hŒNotifier callback:”h]”hŒNotifier callback:”…””}”(hj  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hðhŸŒ[/var/lib/git/docbuild/linux/Documentation/gpu/rfc/gpusvm:70: ./drivers/gpu/drm/drm_gpusvm.c”h KQhj  ubj  )”}”(hhh]”j  )”}”(hŒ9Used to invalidate and DMA unmap GPU bindings for ranges.”h]”hŒ9Used to invalidate and DMA unmap GPU bindings for ranges.”…””}”(hj.  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j  hŸŒ[/var/lib/git/docbuild/linux/Documentation/gpu/rfc/gpusvm:70: ./drivers/gpu/drm/drm_gpusvm.c”h KPhj+  ubah}”(h]”h ]”h"]”h$]”h&]”uh1j   hj  ubeh}”(h]”h ]”h"]”h$]”h&]”uh1hêhŸj*  h KQhj  ubah}”(h]”h ]”h"]”h$]”h&]”uh1håhj  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhj†  ubeh}”(h]”h ]”h"]”h$]”h&]”j‡  jf  uh1hÚhŸŒ[/var/lib/git/docbuild/linux/Documentation/gpu/rfc/gpusvm:70: ./drivers/gpu/drm/drm_gpusvm.c”h KFhjÚ  hžhubj  )”}”(hŒaGPU SVM handles locking for core MM interactions, i.e., it locks/unlocks the
mmap lock as needed.”h]”hŒaGPU SVM handles locking for core MM interactions, i.e., it locks/unlocks the
mmap lock as needed.”…””}”(hj\  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j  hŸŒ[/var/lib/git/docbuild/linux/Documentation/gpu/rfc/gpusvm:73: ./drivers/gpu/drm/drm_gpusvm.c”h KVhjÚ  hžhubj  )”}”(hX3  GPU SVM introduces a global notifier lock, which safeguards the notifier's
range RB tree and list, as well as the range's DMA mappings and sequence
number. GPU SVM manages all necessary locking and unlocking operations,
except for the recheck range's pages being valid
(drm_gpusvm_range_pages_valid) when the driver is committing GPU bindings.
This lock corresponds to the ``driver->update`` lock mentioned in
Documentation/mm/hmm.rst. Future revisions may transition from a GPU SVM
global lock to a per-notifier lock if finer-grained locking is deemed
necessary.”h]”(hX{  GPU SVM introduces a global notifier lock, which safeguards the notifierâ€™s
range RB tree and list, as well as the rangeâ€™s DMA mappings and sequence
number. GPU SVM manages all necessary locking and unlocking operations,
except for the recheck rangeâ€™s pages being valid
(drm_gpusvm_range_pages_valid) when the driver is committing GPU bindings.
This lock corresponds to the ”…””}”(hjk  hžhhŸNh NubhŒliteral”“”)”}”(hŒ``driver->update``”h]”hŒdriver->update”…””}”(hju  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1js  hjk  ubhŒ¬ lock mentioned in
Documentation/mm/hmm.rst. Future revisions may transition from a GPU SVM
global lock to a per-notifier lock if finer-grained locking is deemed
necessary.”…””}”(hjk  hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1j  hŸŒ[/var/lib/git/docbuild/linux/Documentation/gpu/rfc/gpusvm:73: ./drivers/gpu/drm/drm_gpusvm.c”h KYhjÚ  hžhubj  )”}”(hXÝ  In addition to the locking mentioned above, the driver should implement a
lock to safeguard core GPU SVM function calls that modify state, such as
drm_gpusvm_range_find_or_insert and drm_gpusvm_range_remove. This lock is
denoted as 'driver_svm_lock' in code examples. Finer grained driver side
locking should also be possible for concurrent GPU fault processing within a
single GPU SVM. The 'driver_svm_lock' can be via drm_gpusvm_driver_set_lock
to add annotations to GPU SVM.”h]”hXå  In addition to the locking mentioned above, the driver should implement a
lock to safeguard core GPU SVM function calls that modify state, such as
drm_gpusvm_range_find_or_insert and drm_gpusvm_range_remove. This lock is
denoted as â€˜driver_svm_lockâ€™ in code examples. Finer grained driver side
locking should also be possible for concurrent GPU fault processing within a
single GPU SVM. The â€˜driver_svm_lockâ€™ can be via drm_gpusvm_driver_set_lock
to add annotations to GPU SVM.”…””}”(hjŽ  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j  hŸŒ[/var/lib/git/docbuild/linux/Documentation/gpu/rfc/gpusvm:73: ./drivers/gpu/drm/drm_gpusvm.c”h KchjÚ  hžhubj  )”}”(hXK  Partial unmapping of ranges (e.g., 1M out of 2M is unmapped by CPU resulting
in MMU_NOTIFY_UNMAP event) presents several challenges, with the main one
being that a subset of the range still has CPU and GPU mappings. If the
backing store for the range is in device memory, a subset of the backing
store has references. One option would be to split the range and device
memory backing store, but the implementation for this would be quite
complicated. Given that partial unmappings are rare and driver-defined range
sizes are relatively small, GPU SVM does not support splitting of ranges.”h]”hXK  Partial unmapping of ranges (e.g., 1M out of 2M is unmapped by CPU resulting
in MMU_NOTIFY_UNMAP event) presents several challenges, with the main one
being that a subset of the range still has CPU and GPU mappings. If the
backing store for the range is in device memory, a subset of the backing
store has references. One option would be to split the range and device
memory backing store, but the implementation for this would be quite
complicated. Given that partial unmappings are rare and driver-defined range
sizes are relatively small, GPU SVM does not support splitting of ranges.”…””}”(hj  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j  hŸŒ[/var/lib/git/docbuild/linux/Documentation/gpu/rfc/gpusvm:76: ./drivers/gpu/drm/drm_gpusvm.c”h KohjÚ  hžhubj  )”}”(hX  With no support for range splitting, upon partial unmapping of a range, the
driver is expected to invalidate and destroy the entire range. If the range
has device memory as its backing, the driver is also expected to migrate any
remaining pages back to RAM.”h]”hX  With no support for range splitting, upon partial unmapping of a range, the
driver is expected to invalidate and destroy the entire range. If the range
has device memory as its backing, the driver is also expected to migrate any
remaining pages back to RAM.”…””}”(hj¬  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j  hŸŒ[/var/lib/git/docbuild/linux/Documentation/gpu/rfc/gpusvm:76: ./drivers/gpu/drm/drm_gpusvm.c”h KxhjÚ  hžhubj  )”}”(hŒ¢This section provides three examples of how to build the expected driver
components: the GPU page fault handler, the garbage collector, and the
notifier callback.”h]”hŒ¢This section provides three examples of how to build the expected driver
components: the GPU page fault handler, the garbage collector, and the
notifier callback.”…””}”(hj»  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j  hŸŒ[/var/lib/git/docbuild/linux/Documentation/gpu/rfc/gpusvm:79: ./drivers/gpu/drm/drm_gpusvm.c”h KhjÚ  hžhubj  )”}”(hŒÌThe generic code provided does not include logic for complex migration
policies, optimized invalidations, fined grained driver locking, or other
potentially required driver locking (e.g., DMA-resv locks).”h]”hŒÌThe generic code provided does not include logic for complex migration
policies, optimized invalidations, fined grained driver locking, or other
potentially required driver locking (e.g., DMA-resv locks).”…””}”(hjÊ  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j  hŸŒ[/var/lib/git/docbuild/linux/Documentation/gpu/rfc/gpusvm:79: ./drivers/gpu/drm/drm_gpusvm.c”h K…hjÚ  hžhubhŒenumerated_list”“”)”}”(hhh]”hà)”}”(hŒGPU page fault handler
”h]”j  )”}”(hŒGPU page fault handler”h]”hŒGPU page fault handler”…””}”(hjâ  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j  hŸŒ[/var/lib/git/docbuild/linux/Documentation/gpu/rfc/gpusvm:79: ./drivers/gpu/drm/drm_gpusvm.c”h K‰hjÞ  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhjÛ  ubah}”(h]”h ]”h"]”h$]”h&]”Œenumtype”Œarabic”Œprefix”hŒsuffix”Œ)”uh1jÙ  hjÚ  hžhhŸNh NubhŒliteral_block”“”)”}”(hXÜ  int driver_bind_range(struct drm_gpusvm *gpusvm, struct drm_gpusvm_range *range)
{
        int err = 0;

        driver_alloc_and_setup_memory_for_bind(gpusvm, range);

        drm_gpusvm_notifier_lock(gpusvm);
        if (drm_gpusvm_range_pages_valid(range))
                driver_commit_bind(gpusvm, range);
        else
                err = -EAGAIN;
        drm_gpusvm_notifier_unlock(gpusvm);

        return err;
}

int driver_gpu_fault(struct drm_gpusvm *gpusvm, unsigned long fault_addr,
                     unsigned long gpuva_start, unsigned long gpuva_end)
{
        struct drm_gpusvm_ctx ctx = {};
        int err;

        driver_svm_lock();
retry:
        // Always process UNMAPs first so view of GPU SVM ranges is current
        driver_garbage_collector(gpusvm);

        range = drm_gpusvm_range_find_or_insert(gpusvm, fault_addr,
                                                gpuva_start, gpuva_end,
                                                &ctx);
        if (IS_ERR(range)) {
                err = PTR_ERR(range);
                goto unlock;
        }

        if (driver_migration_policy(range)) {
                err = drm_pagemap_populate_mm(driver_choose_drm_pagemap(),
                                              gpuva_start, gpuva_end, gpusvm->mm,
                                              ctx->timeslice_ms);
                if (err)        // CPU mappings may have changed
                        goto retry;
        }

        err = drm_gpusvm_range_get_pages(gpusvm, range, &ctx);
        if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) {    // CPU mappings changed
                if (err == -EOPNOTSUPP)
                        drm_gpusvm_range_evict(gpusvm, range);
                goto retry;
        } else if (err) {
                goto unlock;
        }

        err = driver_bind_range(gpusvm, range);
        if (err == -EAGAIN)     // CPU mappings changed
                goto retry

unlock:
        driver_svm_unlock();
        return err;
}”h]”hXÜ  int driver_bind_range(struct drm_gpusvm *gpusvm, struct drm_gpusvm_range *range)
{
        int err = 0;

        driver_alloc_and_setup_memory_for_bind(gpusvm, range);

        drm_gpusvm_notifier_lock(gpusvm);
        if (drm_gpusvm_range_pages_valid(range))
                driver_commit_bind(gpusvm, range);
        else
                err = -EAGAIN;
        drm_gpusvm_notifier_unlock(gpusvm);

        return err;
}

int driver_gpu_fault(struct drm_gpusvm *gpusvm, unsigned long fault_addr,
                     unsigned long gpuva_start, unsigned long gpuva_end)
{
        struct drm_gpusvm_ctx ctx = {};
        int err;

        driver_svm_lock();
retry:
        // Always process UNMAPs first so view of GPU SVM ranges is current
        driver_garbage_collector(gpusvm);

        range = drm_gpusvm_range_find_or_insert(gpusvm, fault_addr,
                                                gpuva_start, gpuva_end,
                                                &ctx);
        if (IS_ERR(range)) {
                err = PTR_ERR(range);
                goto unlock;
        }

        if (driver_migration_policy(range)) {
                err = drm_pagemap_populate_mm(driver_choose_drm_pagemap(),
                                              gpuva_start, gpuva_end, gpusvm->mm,
                                              ctx->timeslice_ms);
                if (err)        // CPU mappings may have changed
                        goto retry;
        }

        err = drm_gpusvm_range_get_pages(gpusvm, range, &ctx);
        if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) {    // CPU mappings changed
                if (err == -EOPNOTSUPP)
                        drm_gpusvm_range_evict(gpusvm, range);
                goto retry;
        } else if (err) {
                goto unlock;
        }

        err = driver_bind_range(gpusvm, range);
        if (err == -EAGAIN)     // CPU mappings changed
                goto retry

unlock:
        driver_svm_unlock();
        return err;
}”…””}”hj  sbah}”(h]”h ]”h"]”h$]”h&]”h±h²Œforce”‰Œlanguage”Œc”Œhighlight_args”}”uh1j  hŸŒ[/var/lib/git/docbuild/linux/Documentation/gpu/rfc/gpusvm:79: ./drivers/gpu/drm/drm_gpusvm.c”h K‹hjÚ  hžhubjÚ  )”}”(hhh]”hà)”}”(hŒGarbage Collector
”h]”j  )”}”(hŒGarbage Collector”h]”hŒGarbage Collector”…””}”(hj  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j  hŸŒ[/var/lib/git/docbuild/linux/Documentation/gpu/rfc/gpusvm:79: ./drivers/gpu/drm/drm_gpusvm.c”h KÊhj  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhj  ubah}”(h]”h ]”h"]”h$]”h&]”jý  jþ  jÿ  hj   j  Œstart”Kuh1jÙ  hjÚ  hžhhŸNh Nubj  )”}”(hXŒ  void __driver_garbage_collector(struct drm_gpusvm *gpusvm,
                                struct drm_gpusvm_range *range)
{
        assert_driver_svm_locked(gpusvm);

        // Partial unmap, migrate any remaining device memory pages back to RAM
        if (range->flags.partial_unmap)
                drm_gpusvm_range_evict(gpusvm, range);

        driver_unbind_range(range);
        drm_gpusvm_range_remove(gpusvm, range);
}

void driver_garbage_collector(struct drm_gpusvm *gpusvm)
{
        assert_driver_svm_locked(gpusvm);

        for_each_range_in_garbage_collector(gpusvm, range)
                __driver_garbage_collector(gpusvm, range);
}”h]”hXŒ  void __driver_garbage_collector(struct drm_gpusvm *gpusvm,
                                struct drm_gpusvm_range *range)
{
        assert_driver_svm_locked(gpusvm);

        // Partial unmap, migrate any remaining device memory pages back to RAM
        if (range->flags.partial_unmap)
                drm_gpusvm_range_evict(gpusvm, range);

        driver_unbind_range(range);
        drm_gpusvm_range_remove(gpusvm, range);
}

void driver_garbage_collector(struct drm_gpusvm *gpusvm)
{
        assert_driver_svm_locked(gpusvm);

        for_each_range_in_garbage_collector(gpusvm, range)
                __driver_garbage_collector(gpusvm, range);
}”…””}”hj;  sbah}”(h]”h ]”h"]”h$]”h&]”h±h²j  ‰j  j  j  }”uh1j  hŸŒ[/var/lib/git/docbuild/linux/Documentation/gpu/rfc/gpusvm:79: ./drivers/gpu/drm/drm_gpusvm.c”h KÌhjÚ  hžhubjÚ  )”}”(hhh]”hà)”}”(hŒNotifier callback
”h]”j  )”}”(hŒNotifier callback”h]”hŒNotifier callback”…””}”(hjR  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j  hŸŒ[/var/lib/git/docbuild/linux/Documentation/gpu/rfc/gpusvm:79: ./drivers/gpu/drm/drm_gpusvm.c”h KãhjN  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhjK  ubah}”(h]”h ]”h"]”h$]”h&]”jý  jþ  jÿ  hj   j  j:  Kuh1jÙ  hjÚ  hžhhŸNh Nubj  )”}”(hX  void driver_invalidation(struct drm_gpusvm *gpusvm,
                         struct drm_gpusvm_notifier *notifier,
                         const struct mmu_notifier_range *mmu_range)
{
        struct drm_gpusvm_ctx ctx = { .in_notifier = true, };
        struct drm_gpusvm_range *range = NULL;

        driver_invalidate_device_pages(gpusvm, mmu_range->start, mmu_range->end);

        drm_gpusvm_for_each_range(range, notifier, mmu_range->start,
                                  mmu_range->end) {
                drm_gpusvm_range_unmap_pages(gpusvm, range, &ctx);

                if (mmu_range->event != MMU_NOTIFY_UNMAP)
                        continue;

                drm_gpusvm_range_set_unmapped(range, mmu_range);
                driver_garbage_collector_add(gpusvm, range);
        }
}”h]”hX  void driver_invalidation(struct drm_gpusvm *gpusvm,
                         struct drm_gpusvm_notifier *notifier,
                         const struct mmu_notifier_range *mmu_range)
{
        struct drm_gpusvm_ctx ctx = { .in_notifier = true, };
        struct drm_gpusvm_range *range = NULL;

        driver_invalidate_device_pages(gpusvm, mmu_range->start, mmu_range->end);

        drm_gpusvm_for_each_range(range, notifier, mmu_range->start,
                                  mmu_range->end) {
                drm_gpusvm_range_unmap_pages(gpusvm, range, &ctx);

                if (mmu_range->event != MMU_NOTIFY_UNMAP)
                        continue;

                drm_gpusvm_range_set_unmapped(range, mmu_range);
                driver_garbage_collector_add(gpusvm, range);
        }
}”…””}”hjm  sbah}”(h]”h ]”h"]”h$]”h&]”h±h²j  ‰j  j  j  }”uh1j  hŸŒ[/var/lib/git/docbuild/linux/Documentation/gpu/rfc/gpusvm:79: ./drivers/gpu/drm/drm_gpusvm.c”h KåhjÚ  hžhubeh}”(h]”Œoverview-of-baseline-design”ah ]”h"]”Œoverview of baseline design”ah$]”h&]”uh1h´hh¶hžhhŸh³h KDubhµ)”}”(hhh]”(hº)”}”(hŒOverview of drm_pagemap design”h]”hŒOverview of drm_pagemap design”…””}”(hjˆ  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h¹hj…  hžhhŸh³h KSubj  )”}”(hXÇ  The DRM pagemap layer is intended to augment the dev_pagemap functionality by
providing a way to populate a struct mm_struct virtual range with device
private pages and to provide helpers to abstract device memory allocations,
to migrate memory back and forth between device memory and system RAM and
to handle access (and in the future migration) between devices implementing
a fast interconnect that is not necessarily visible to the rest of the
system.”h]”hXÇ  The DRM pagemap layer is intended to augment the dev_pagemap functionality by
providing a way to populate a struct mm_struct virtual range with device
private pages and to provide helpers to abstract device memory allocations,
to migrate memory back and forth between device memory and system RAM and
to handle access (and in the future migration) between devices implementing
a fast interconnect that is not necessarily visible to the rest of the
system.”…””}”(hj–  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j  hŸŒ\/var/lib/git/docbuild/linux/Documentation/gpu/rfc/gpusvm:85: ./drivers/gpu/drm/drm_pagemap.c”h Khj…  hžhubj  )”}”(hXt  Typically the DRM pagemap receives requests from one or more DRM GPU SVM
instances to populate struct mm_struct virtual ranges with memory, and the
migration is best effort only and may thus fail. The implementation should
also handle device unbinding by blocking (return an -ENODEV) error for new
population requests and after that migrate all device pages to system ram.”h]”hXt  Typically the DRM pagemap receives requests from one or more DRM GPU SVM
instances to populate struct mm_struct virtual ranges with memory, and the
migration is best effort only and may thus fail. The implementation should
also handle device unbinding by blocking (return an -ENODEV) error for new
population requests and after that migrate all device pages to system ram.”…””}”(hj¥  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j  hŸŒ\/var/lib/git/docbuild/linux/Documentation/gpu/rfc/gpusvm:85: ./drivers/gpu/drm/drm_pagemap.c”h Khj…  hžhubj  )”}”(hXz  Migration granularity typically follows the GPU SVM range requests, but
if there are clashes, due to races or due to the fact that multiple GPU
SVM instances have different views of the ranges used, and because of that
parts of a requested range is already present in the requested device memory,
the implementation has a variety of options. It can fail and it can choose
to populate only the part of the range that isn't already in device memory,
and it can evict the range to system before trying to migrate. Ideally an
implementation would just try to migrate the missing part of the range and
allocate just enough memory to do so.”h]”hX|  Migration granularity typically follows the GPU SVM range requests, but
if there are clashes, due to races or due to the fact that multiple GPU
SVM instances have different views of the ranges used, and because of that
parts of a requested range is already present in the requested device memory,
the implementation has a variety of options. It can fail and it can choose
to populate only the part of the range that isnâ€™t already in device memory,
and it can evict the range to system before trying to migrate. Ideally an
implementation would just try to migrate the missing part of the range and
allocate just enough memory to do so.”…””}”(hj´  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j  hŸŒ\/var/lib/git/docbuild/linux/Documentation/gpu/rfc/gpusvm:88: ./drivers/gpu/drm/drm_pagemap.c”h Khj…  hžhubj  )”}”(hXT  When migrating to system memory as a response to a cpu fault or a device
memory eviction request, currently a full device memory allocation is
migrated back to system. Moving forward this might need improvement for
situations where a single page needs bouncing between system memory and
device memory due to, for example, atomic operations.”h]”hXT  When migrating to system memory as a response to a cpu fault or a device
memory eviction request, currently a full device memory allocation is
migrated back to system. Moving forward this might need improvement for
situations where a single page needs bouncing between system memory and
device memory due to, for example, atomic operations.”…””}”(hjÃ  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j  hŸŒ\/var/lib/git/docbuild/linux/Documentation/gpu/rfc/gpusvm:88: ./drivers/gpu/drm/drm_pagemap.c”h K)hj…  hžhubj  )”}”(hŒKey DRM pagemap components:”h]”hŒKey DRM pagemap components:”…””}”(hjÒ  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j  hŸŒ\/var/lib/git/docbuild/linux/Documentation/gpu/rfc/gpusvm:88: ./drivers/gpu/drm/drm_pagemap.c”h K/hj…  hžhubhÛ)”}”(hhh]”(hà)”}”(hŒŠDevice Memory Allocations:
   Embedded structure containing enough information for the drm_pagemap to
   migrate to / from device memory.
”h]”hæ)”}”(hhh]”hë)”}”(hŒ„Device Memory Allocations:
Embedded structure containing enough information for the drm_pagemap to
migrate to / from device memory.
”h]”(hñ)”}”(hŒDevice Memory Allocations:”h]”hŒDevice Memory Allocations:”…””}”(hjï  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hðhŸŒ\/var/lib/git/docbuild/linux/Documentation/gpu/rfc/gpusvm:88: ./drivers/gpu/drm/drm_pagemap.c”h K3hjë  ubj  )”}”(hhh]”j  )”}”(hŒhEmbedded structure containing enough information for the drm_pagemap to
migrate to / from device memory.”h]”hŒhEmbedded structure containing enough information for the drm_pagemap to
migrate to / from device memory.”…””}”(hj  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j  hŸŒ\/var/lib/git/docbuild/linux/Documentation/gpu/rfc/gpusvm:88: ./drivers/gpu/drm/drm_pagemap.c”h K2hjþ  ubah}”(h]”h ]”h"]”h$]”h&]”uh1j   hjë  ubeh}”(h]”h ]”h"]”h$]”h&]”uh1hêhŸjý  h K3hjè  ubah}”(h]”h ]”h"]”h$]”h&]”uh1håhjä  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhjá  ubhà)”}”(hŒ¤Device Memory Operations:
   Define the interface for driver-specific device memory operations
   release memory, populate pfns, and copy to / from device memory.

”h]”hæ)”}”(hhh]”hë)”}”(hŒžDevice Memory Operations:
Define the interface for driver-specific device memory operations
release memory, populate pfns, and copy to / from device memory.

”h]”(hñ)”}”(hŒDevice Memory Operations:”h]”hŒDevice Memory Operations:”…””}”(hj3  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hðhŸŒ\/var/lib/git/docbuild/linux/Documentation/gpu/rfc/gpusvm:88: ./drivers/gpu/drm/drm_pagemap.c”h K8hj/  ubj  )”}”(hhh]”j  )”}”(hŒ‚Define the interface for driver-specific device memory operations
release memory, populate pfns, and copy to / from device memory.”h]”hŒ‚Define the interface for driver-specific device memory operations
release memory, populate pfns, and copy to / from device memory.”…””}”(hjE  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j  hŸŒ\/var/lib/git/docbuild/linux/Documentation/gpu/rfc/gpusvm:88: ./drivers/gpu/drm/drm_pagemap.c”h K6hjB  ubah}”(h]”h ]”h"]”h$]”h&]”uh1j   hj/  ubeh}”(h]”h ]”h"]”h$]”h&]”uh1hêhŸjA  h K8hj,  ubah}”(h]”h ]”h"]”h$]”h&]”uh1håhj(  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhjá  ubeh}”(h]”h ]”h"]”h$]”h&]”j‡  jf  uh1hÚhŸŒ\/var/lib/git/docbuild/linux/Documentation/gpu/rfc/gpusvm:88: ./drivers/gpu/drm/drm_pagemap.c”h K1hj…  hžhubeh}”(h]”Œoverview-of-drm-pagemap-design”ah ]”h"]”Œoverview of drm_pagemap design”ah$]”h&]”uh1h´hh¶hžhhŸh³h KSubhµ)”}”(hhh]”(hº)”}”(hŒPossible future design features”h]”hŒPossible future design features”…””}”(hj~  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h¹hj{  hžhhŸh³h K\ubhÛ)”}”(hhh]”(hà)”}”(hŒúConcurrent GPU faults
      * CPU faults are concurrent so makes sense to have concurrent GPU
        faults.
      * Should be possible with fined grained locking in the driver GPU
        fault handler.
      * No expected GPU SVM changes required.”h]”hæ)”}”(hhh]”hë)”}”(hŒÜConcurrent GPU faults
* CPU faults are concurrent so makes sense to have concurrent GPU
  faults.
* Should be possible with fined grained locking in the driver GPU
  fault handler.
* No expected GPU SVM changes required.”h]”(hñ)”}”(hŒConcurrent GPU faults”h]”hŒConcurrent GPU faults”…””}”(hjš  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hðhŸh³h Kbhj–  ubj  )”}”(hhh]”hÛ)”}”(hhh]”(hà)”}”(hŒGCPU faults are concurrent so makes sense to have concurrent GPU
faults.”h]”j  )”}”(hŒGCPU faults are concurrent so makes sense to have concurrent GPU
faults.”h]”hŒGCPU faults are concurrent so makes sense to have concurrent GPU
faults.”…””}”(hj²  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j  hŸh³h K_hj®  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhj«  ubhà)”}”(hŒNShould be possible with fined grained locking in the driver GPU
fault handler.”h]”j  )”}”(hŒNShould be possible with fined grained locking in the driver GPU
fault handler.”h]”hŒNShould be possible with fined grained locking in the driver GPU
fault handler.”…””}”(hjÊ  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j  hŸh³h KahjÆ  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhj«  ubhà)”}”(hŒ%No expected GPU SVM changes required.”h]”j  )”}”(hjà  h]”hŒ%No expected GPU SVM changes required.”…””}”(hjâ  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j  hŸh³h KchjÞ  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhj«  ubeh}”(h]”h ]”h"]”h$]”h&]”j‡  jˆ  uh1hÚhŸh³h K_hj¨  ubah}”(h]”h ]”h"]”h$]”h&]”uh1j   hj–  ubeh}”(h]”h ]”h"]”h$]”h&]”uh1hêhŸh³h Kbhj“  ubah}”(h]”h ]”h"]”h$]”h&]”uh1håhj  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhjŒ  hžhhŸNh Nubhà)”}”(hŒqRanges with mixed system and device pages
      * Can be added if required to drm_gpusvm_get_pages fairly easily.”h]”hæ)”}”(hhh]”hë)”}”(hŒkRanges with mixed system and device pages
* Can be added if required to drm_gpusvm_get_pages fairly easily.”h]”(hñ)”}”(hŒ)Ranges with mixed system and device pages”h]”hŒ)Ranges with mixed system and device pages”…””}”(hj	  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hðhŸh³h Kdhj	  ubj  )”}”(hhh]”hÛ)”}”(hhh]”hà)”}”(hŒ?Can be added if required to drm_gpusvm_get_pages fairly easily.”h]”j  )”}”(hj4	  h]”hŒ?Can be added if required to drm_gpusvm_get_pages fairly easily.”…””}”(hj6	  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j  hŸh³h Kehj2	  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhj/	  ubah}”(h]”h ]”h"]”h$]”h&]”j‡  jˆ  uh1hÚhŸh³h Kehj,	  ubah}”(h]”h ]”h"]”h$]”h&]”uh1j   hj	  ubeh}”(h]”h ]”h"]”h$]”h&]”uh1hêhŸh³h Kdhj	  ubah}”(h]”h ]”h"]”h$]”h&]”uh1håhj	  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhjŒ  hžhhŸNh Nubhà)”}”(hŒ­Multi-GPU support
      * Work in progress and patches expected after initially landing on GPU
        SVM.
      * Ideally can be done with little to no changes to GPU SVM.”h]”hæ)”}”(hhh]”hë)”}”(hŒ›Multi-GPU support
* Work in progress and patches expected after initially landing on GPU
  SVM.
* Ideally can be done with little to no changes to GPU SVM.”h]”(hñ)”}”(hŒMulti-GPU support”h]”hŒMulti-GPU support”…””}”(hjr	  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hðhŸh³h Khhjn	  ubj  )”}”(hhh]”hÛ)”}”(hhh]”(hà)”}”(hŒIWork in progress and patches expected after initially landing on GPU
SVM.”h]”j  )”}”(hŒIWork in progress and patches expected after initially landing on GPU
SVM.”h]”hŒIWork in progress and patches expected after initially landing on GPU
SVM.”…””}”(hjŠ	  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j  hŸh³h Kghj†	  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhjƒ	  ubhà)”}”(hŒ9Ideally can be done with little to no changes to GPU SVM.”h]”j  )”}”(hj 	  h]”hŒ9Ideally can be done with little to no changes to GPU SVM.”…””}”(hj¢	  hžhhŸNh Nubah}”(h]”h ]”•æ       h"]”h$]”h&]”uh1j  hŸh³h Kihjž	  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhjƒ	  ubeh}”(h]”h ]”h"]”h$]”h&]”j‡  jˆ  uh1hÚhŸh³h Kghj€	  ubah}”(h]”h ]”h"]”h$]”h&]”uh1j   hjn	  ubeh}”(h]”h ]”h"]”h$]”h&]”uh1hêhŸh³h Khhjk	  ubah}”(h]”h ]”h"]”h$]”h&]”uh1håhjg	  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhjŒ  hžhhŸNh Nubhà)”}”(hŒQDrop ranges in favor of radix tree
      * May be desirable for faster notifiers.”h]”hæ)”}”(hhh]”hë)”}”(hŒKDrop ranges in favor of radix tree
* May be desirable for faster notifiers.”h]”(hñ)”}”(hŒ"Drop ranges in favor of radix tree”h]”hŒ"Drop ranges in favor of radix tree”…””}”(hjÞ	  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hðhŸh³h KjhjÚ	  ubj  )”}”(hhh]”hÛ)”}”(hhh]”hà)”}”(hŒ&May be desirable for faster notifiers.”h]”j  )”}”(hjô	  h]”hŒ&May be desirable for faster notifiers.”…””}”(hjö	  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j  hŸh³h Kkhjò	  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhjï	  ubah}”(h]”h ]”h"]”h$]”h&]”j‡  jˆ  uh1hÚhŸh³h Kkhjì	  ubah}”(h]”h ]”h"]”h$]”h&]”uh1j   hjÚ	  ubeh}”(h]”h ]”h"]”h$]”h&]”uh1hêhŸh³h Kjhj×	  ubah}”(h]”h ]”h"]”h$]”h&]”uh1håhjÓ	  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhjŒ  hžhhŸNh Nubhà)”}”(hX  Compound device pages
      * Nvidia, AMD, and Intel all have agreed expensive core MM functions in
        migrate device layer are a performance bottleneck, having compound
        device pages should help increase performance by reducing the number
        of these expensive calls.”h]”hæ)”}”(hhh]”hë)”}”(hX  Compound device pages
* Nvidia, AMD, and Intel all have agreed expensive core MM functions in
  migrate device layer are a performance bottleneck, having compound
  device pages should help increase performance by reducing the number
  of these expensive calls.”h]”(hñ)”}”(hŒCompound device pages”h]”hŒCompound device pages”…””}”(hj2
  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hðhŸh³h Kohj.
  ubj  )”}”(hhh]”hÛ)”}”(hhh]”hà)”}”(hŒçNvidia, AMD, and Intel all have agreed expensive core MM functions in
migrate device layer are a performance bottleneck, having compound
device pages should help increase performance by reducing the number
of these expensive calls.”h]”j  )”}”(hŒçNvidia, AMD, and Intel all have agreed expensive core MM functions in
migrate device layer are a performance bottleneck, having compound
device pages should help increase performance by reducing the number
of these expensive calls.”h]”hŒçNvidia, AMD, and Intel all have agreed expensive core MM functions in
migrate device layer are a performance bottleneck, having compound
device pages should help increase performance by reducing the number
of these expensive calls.”…””}”(hjJ
  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j  hŸh³h KmhjF
  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhjC
  ubah}”(h]”h ]”h"]”h$]”h&]”j‡  jˆ  uh1hÚhŸh³h Kmhj@
  ubah}”(h]”h ]”h"]”h$]”h&]”uh1j   hj.
  ubeh}”(h]”h ]”h"]”h$]”h&]”uh1hêhŸh³h Kohj+
  ubah}”(h]”h ]”h"]”h$]”h&]”uh1håhj'
  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhjŒ  hžhhŸNh Nubhà)”}”(hŒ°Higher order dma mapping for migration
      * 4k dma mapping adversely affects migration performance on Intel
        hardware, higher order (2M) dma mapping should help here.”h]”hæ)”}”(hhh]”hë)”}”(hŒ¤Higher order dma mapping for migration
* 4k dma mapping adversely affects migration performance on Intel
  hardware, higher order (2M) dma mapping should help here.”h]”(hñ)”}”(hŒ&Higher order dma mapping for migration”h]”hŒ&Higher order dma mapping for migration”…””}”(hj‡
  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hðhŸh³h Krhjƒ
  ubj  )”}”(hhh]”hÛ)”}”(hhh]”hà)”}”(hŒy4k dma mapping adversely affects migration performance on Intel
hardware, higher order (2M) dma mapping should help here.”h]”j  )”}”(hŒy4k dma mapping adversely affects migration performance on Intel
hardware, higher order (2M) dma mapping should help here.”h]”hŒy4k dma mapping adversely affects migration performance on Intel
hardware, higher order (2M) dma mapping should help here.”…””}”(hjŸ
  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j  hŸh³h Krhj›
  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhj˜
  ubah}”(h]”h ]”h"]”h$]”h&]”j‡  jˆ  uh1hÚhŸh³h Krhj•
  ubah}”(h]”h ]”h"]”h$]”h&]”uh1j   hjƒ
  ubeh}”(h]”h ]”h"]”h$]”h&]”uh1hêhŸh³h Krhj€
  ubah}”(h]”h ]”h"]”h$]”h&]”uh1håhj|
  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhjŒ  hžhhŸNh Nubhà)”}”(hŒ5Build common userptr implementation on top of GPU SVM”h]”j  )”}”(hjÓ
  h]”hŒ5Build common userptr implementation on top of GPU SVM”…””}”(hjÕ
  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j  hŸh³h KthjÑ
  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhjŒ  hžhhŸh³h Nubhà)”}”(hŒ9Driver side madvise implementation and migration policies”h]”j  )”}”(hjê
  h]”hŒ9Driver side madvise implementation and migration policies”…””}”(hjì
  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j  hŸh³h Kuhjè
  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhjŒ  hžhhŸh³h Nubhà)”}”(hŒJPull in pending dma-mapping API changes from Leon / Nvidia when these land”h]”j  )”}”(hj  h]”hŒJPull in pending dma-mapping API changes from Leon / Nvidia when these land”…””}”(hj  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j  hŸh³h Kvhjÿ
  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hßhjŒ  hžhhŸh³h Nubeh}”(h]”h ]”h"]”h$]”h&]”j‡  jˆ  uh1hÚhŸh³h K^hj{  hžhubeh}”(h]”Œpossible-future-design-features”ah ]”h"]”Œpossible future design features”ah$]”h&]”uh1h´hh¶hžhhŸh³h K\ubeh}”(h]”Œgpu-svm-section”ah ]”h"]”Œgpu svm section”ah$]”h&]”uh1h´hhhžhhŸh³h Kubeh}”(h]”h ]”h"]”h$]”h&]”Œsource”h³uh1hŒcurrent_source”NŒcurrent_line”NŒsettings”Œdocutils.frontend”ŒValues”“”)”}”(h¹NŒ	generator”NŒ	datestamp”NŒsource_link”NŒ
source_url”NŒtoc_backlinks”Œentry”Œfootnote_backlinks”KŒsectnum_xform”KŒstrip_comments”NŒstrip_elements_with_classes”NŒstrip_classes”NŒreport_level”KŒ
halt_level”KŒexit_status_level”KŒdebug”NŒwarning_stream”NŒ	traceback”ˆŒinput_encoding”Œ	utf-8-sig”Œinput_encoding_error_handler”Œstrict”Œoutput_encoding”Œutf-8”Œoutput_encoding_error_handler”jO  Œerror_encoding”Œutf-8”Œerror_encoding_error_handler”Œbackslashreplace”Œlanguage_code”Œen”Œrecord_dependencies”NŒconfig”NŒ	id_prefix”hŒauto_id_prefix”Œid”Œdump_settings”NŒdump_internals”NŒdump_transforms”NŒdump_pseudo_xml”NŒexpose_internals”NŒstrict_visitor”NŒ_disable_config”NŒ_source”h³Œ_destination”NŒ_config_files”]”Œ7/var/lib/git/docbuild/linux/Documentation/docutils.conf”aŒfile_insertion_enabled”ˆŒraw_enabled”KŒline_length_limit”M'Œpep_references”NŒpep_base_url”Œhttps://peps.python.org/”Œpep_file_url_template”Œpep-%04d”Œrfc_references”NŒrfc_base_url”Œ&https://datatracker.ietf.org/doc/html/”Œ	tab_width”KŒtrim_footnote_reference_space”‰Œsyntax_highlight”Œlong”Œsmart_quotes”ˆŒsmartquotes_locales”]”Œcharacter_level_inline_markup”‰Œdoctitle_xform”‰Œdocinfo_xform”KŒsectsubtitle_xform”‰Œimage_loading”Œlink”Œembed_stylesheet”‰Œcloak_email_addresses”ˆŒsection_self_link”‰Œenv”NubŒreporter”NŒindirect_targets”]”Œsubstitution_defs”}”Œsubstitution_names”}”Œrefnames”}”Œrefids”}”Œnameids”}”(j)  j&  j×  jÔ  j‚  j  jx  ju  j!  j  uŒ	nametypes”}”(j)  ‰j×  ‰j‚  ‰jx  ‰j!  ‰uh}”(j&  h¶jÔ  hÉj  jÚ  ju  j…  j  j{  uŒfootnote_refs”}”Œcitation_refs”}”Œautofootnotes”]”Œautofootnote_refs”]”Œsymbol_footnotes”]”Œsymbol_footnote_refs”]”Œ	footnotes”]”Œ	citations”]”Œautofootnote_start”KŒsymbol_footnote_start”K Œ
id_counter”Œcollections”ŒCounter”“”}”…”R”Œparse_messages”]”(hŒsystem_message”“”)”}”(hhh]”j  )”}”(hŒ:Enumerated list start value not ordinal-1: "2" (ordinal 2)”h]”hŒ>Enumerated list start value not ordinal-1: â€œ2â€ (ordinal 2)”…””}”(hj¶  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j  hj³  ubah}”(h]”h ]”h"]”h$]”h&]”Œlevel”KŒtype”ŒINFO”Œsource”Œ[/var/lib/git/docbuild/linux/Documentation/gpu/rfc/gpusvm:79: ./drivers/gpu/drm/drm_gpusvm.c”Œline”Küuh1j±  hjÚ  hžhhŸNh Nubj²  )”}”(hhh]”j  )”}”(hŒ:Enumerated list start value not ordinal-1: "3" (ordinal 3)”h]”hŒ>Enumerated list start value not ordinal-1: â€œ3â€ (ordinal 3)”…””}”(hjÓ  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j  hjÐ  ubah}”(h]”h ]”h"]”h$]”h&]”Œlevel”KŒtype”jÌ  Œsource”jÎ  Œline”Küuh1j±  hjÚ  hžhhŸNh NubeŒtransform_messages”]”Œtransformer”NŒinclude_log”]”Œ
decoration”Nhžhub.