€•‰sŒsphinx.addnodes”Œdocument”“”)”}”(Œ rawsource”Œ”Œchildren”]”(Œ translations”Œ LanguagesNode”“”)”}”(hhh]”(hŒ pending_xref”“”)”}”(hhh]”Œdocutils.nodes”ŒText”“”ŒChinese (Simplified)”…””}”Œparent”hsbaŒ attributes”}”(Œids”]”Œclasses”]”Œnames”]”Œdupnames”]”Œbackrefs”]”Œ refdomain”Œstd”Œreftype”Œdoc”Œ reftarget”Œ /translations/zh_CN/mm/transhuge”Œmodname”NŒ classname”NŒ refexplicit”ˆuŒtagname”hhh ubh)”}”(hhh]”hŒChinese (Traditional)”…””}”hh2sbah}”(h]”h ]”h"]”h$]”h&]”Œ refdomain”h)Œreftype”h+Œ reftarget”Œ /translations/zh_TW/mm/transhuge”Œmodname”NŒ classname”NŒ refexplicit”ˆuh1hhh ubh)”}”(hhh]”hŒItalian”…””}”hhFsbah}”(h]”h ]”h"]”h$]”h&]”Œ refdomain”h)Œreftype”h+Œ reftarget”Œ /translations/it_IT/mm/transhuge”Œmodname”NŒ classname”NŒ refexplicit”ˆuh1hhh ubh)”}”(hhh]”hŒJapanese”…””}”hhZsbah}”(h]”h ]”h"]”h$]”h&]”Œ refdomain”h)Œreftype”h+Œ reftarget”Œ /translations/ja_JP/mm/transhuge”Œmodname”NŒ classname”NŒ refexplicit”ˆuh1hhh ubh)”}”(hhh]”hŒKorean”…””}”hhnsbah}”(h]”h ]”h"]”h$]”h&]”Œ refdomain”h)Œreftype”h+Œ reftarget”Œ /translations/ko_KR/mm/transhuge”Œmodname”NŒ classname”NŒ refexplicit”ˆuh1hhh ubh)”}”(hhh]”hŒSpanish”…””}”hh‚sbah}”(h]”h ]”h"]”h$]”h&]”Œ refdomain”h)Œreftype”h+Œ reftarget”Œ /translations/sp_SP/mm/transhuge”Œmodname”NŒ classname”NŒ refexplicit”ˆuh1hhh ubeh}”(h]”h ]”h"]”h$]”h&]”Œcurrent_language”ŒEnglish”uh1h hhŒ _document”hŒsource”NŒline”NubhŒsection”“”)”}”(hhh]”(hŒtitle”“”)”}”(hŒTransparent Hugepage Support”h]”hŒTransparent Hugepage Support”…””}”(hh¨hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h¦hh£hžhhŸŒ:/var/lib/git/docbuild/linux/Documentation/mm/transhuge.rst”h KubhŒ paragraph”“”)”}”(hŒ–This document describes design principles for Transparent Hugepage (THP) support and its interaction with other parts of the memory management system.”h]”hŒ–This document describes design principles for Transparent Hugepage (THP) support and its interaction with other parts of the memory management system.”…””}”(hh¹hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h Khh£hžhubh¢)”}”(hhh]”(h§)”}”(hŒDesign principles”h]”hŒDesign principles”…””}”(hhÊhžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h¦hhÇhžhhŸh¶h K ubhŒ bullet_list”“”)”}”(hhh]”(hŒ list_item”“”)”}”(hX"graceful fallback": mm components which don't have transparent hugepage knowledge fall back to breaking huge pmd mapping into table of ptes and, if necessary, split a transparent hugepage. Therefore these components can continue working on the regular pages or regular pte mappings. ”h]”h¸)”}”(hX"graceful fallback": mm components which don't have transparent hugepage knowledge fall back to breaking huge pmd mapping into table of ptes and, if necessary, split a transparent hugepage. Therefore these components can continue working on the regular pages or regular pte mappings.”h]”hX!“graceful fallbackâ€: mm components which don’t have transparent hugepage knowledge fall back to breaking huge pmd mapping into table of ptes and, if necessary, split a transparent hugepage. Therefore these components can continue working on the regular pages or regular pte mappings.”…””}”(hhãhžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h K hhßubah}”(h]”h ]”h"]”h$]”h&]”uh1hÝhhÚhžhhŸh¶h NubhÞ)”}”(hŒÖif a hugepage allocation fails because of memory fragmentation, regular pages should be gracefully allocated instead and mixed in the same vma without any failure or significant delay and without userland noticing ”h]”h¸)”}”(hŒÕif a hugepage allocation fails because of memory fragmentation, regular pages should be gracefully allocated instead and mixed in the same vma without any failure or significant delay and without userland noticing”h]”hŒÕif a hugepage allocation fails because of memory fragmentation, regular pages should be gracefully allocated instead and mixed in the same vma without any failure or significant delay and without userland noticing”…””}”(hhûhžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h Khh÷ubah}”(h]”h ]”h"]”h$]”h&]”uh1hÝhhÚhžhhŸh¶h NubhÞ)”}”(hŒÛif some task quits and more hugepages become available (either immediately in the buddy or through the VM), guest physical memory backed by regular pages should be relocated on hugepages automatically (with khugepaged) ”h]”h¸)”}”(hŒÚif some task quits and more hugepages become available (either immediately in the buddy or through the VM), guest physical memory backed by regular pages should be relocated on hugepages automatically (with khugepaged)”h]”hŒÚif some task quits and more hugepages become available (either immediately in the buddy or through the VM), guest physical memory backed by regular pages should be relocated on hugepages automatically (with khugepaged)”…””}”(hjhžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h Khjubah}”(h]”h ]”h"]”h$]”h&]”uh1hÝhhÚhžhhŸh¶h NubhÞ)”}”(hX[it doesn't require memory reservation and in turn it uses hugepages whenever possible (the only possible reservation here is kernelcore= to avoid unmovable pages to fragment all the memory but such a tweak is not specific to transparent hugepage support and it's a generic feature that applies to all dynamic high order allocations in the kernel) ”h]”h¸)”}”(hXZit doesn't require memory reservation and in turn it uses hugepages whenever possible (the only possible reservation here is kernelcore= to avoid unmovable pages to fragment all the memory but such a tweak is not specific to transparent hugepage support and it's a generic feature that applies to all dynamic high order allocations in the kernel)”h]”hX^it doesn’t require memory reservation and in turn it uses hugepages whenever possible (the only possible reservation here is kernelcore= to avoid unmovable pages to fragment all the memory but such a tweak is not specific to transparent hugepage support and it’s a generic feature that applies to all dynamic high order allocations in the kernel)”…””}”(hj+hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h Khj'ubah}”(h]”h ]”h"]”h$]”h&]”uh1hÝhhÚhžhhŸh¶h Nubeh}”(h]”h ]”h"]”h$]”h&]”Œbullet”Œ-”uh1hØhŸh¶h K hhÇhžhubeh}”(h]”Œdesign-principles”ah ]”h"]”Œdesign principles”ah$]”h&]”uh1h¡hh£hžhhŸh¶h K ubh¢)”}”(hhh]”(h§)”}”(hŒ!get_user_pages and pin_user_pages”h]”hŒ!get_user_pages and pin_user_pages”…””}”(hjRhžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h¦hjOhžhhŸh¶h K#ubh¸)”}”(hX›get_user_pages and pin_user_pages if run on a hugepage, will return the head or tail pages as usual (exactly as they would do on hugetlbfs). Most GUP users will only care about the actual physical address of the page and its temporary pinning to release after the I/O is complete, so they won't ever notice the fact the page is huge. But if any driver is going to mangle over the page structure of the tail page (like for checking page->mapping or other bits that are relevant for the head page and not the tail page), it should be updated to jump to check head page instead. Taking a reference on any head/tail page would prevent the page from being split by anyone.”h]”hXget_user_pages and pin_user_pages if run on a hugepage, will return the head or tail pages as usual (exactly as they would do on hugetlbfs). Most GUP users will only care about the actual physical address of the page and its temporary pinning to release after the I/O is complete, so they won’t ever notice the fact the page is huge. But if any driver is going to mangle over the page structure of the tail page (like for checking page->mapping or other bits that are relevant for the head page and not the tail page), it should be updated to jump to check head page instead. Taking a reference on any head/tail page would prevent the page from being split by anyone.”…””}”(hj`hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h K%hjOhžhubhŒnote”“”)”}”(hŒÝthese aren't new constraints to the GUP API, and they match the same constraints that apply to hugetlbfs too, so any driver capable of handling GUP on hugetlbfs will also work fine on transparent hugepage backed mappings.”h]”h¸)”}”(hŒÝthese aren't new constraints to the GUP API, and they match the same constraints that apply to hugetlbfs too, so any driver capable of handling GUP on hugetlbfs will also work fine on transparent hugepage backed mappings.”h]”hŒßthese aren’t new constraints to the GUP API, and they match the same constraints that apply to hugetlbfs too, so any driver capable of handling GUP on hugetlbfs will also work fine on transparent hugepage backed mappings.”…””}”(hjthžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h K1hjpubah}”(h]”h ]”h"]”h$]”h&]”uh1jnhjOhžhhŸh¶h Nubeh}”(h]”Œ!get-user-pages-and-pin-user-pages”ah ]”h"]”Œ!get_user_pages and pin_user_pages”ah$]”h&]”uh1h¡hh£hžhhŸh¶h K#ubh¢)”}”(hhh]”(h§)”}”(hŒGraceful fallback”h]”hŒGraceful fallback”…””}”(hj“hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h¦hjhžhhŸh¶h K7ubh¸)”}”(hXåCode walking pagetables but unaware about huge pmds can simply call split_huge_pmd(vma, pmd, addr) where the pmd is the one returned by pmd_offset. It's trivial to make the code transparent hugepage aware by just grepping for "pmd_offset" and adding split_huge_pmd where missing after pmd_offset returns the pmd. Thanks to the graceful fallback design, with a one liner change, you can avoid to write hundreds if not thousands of lines of complex code to make your code hugepage aware.”h]”hXëCode walking pagetables but unaware about huge pmds can simply call split_huge_pmd(vma, pmd, addr) where the pmd is the one returned by pmd_offset. It’s trivial to make the code transparent hugepage aware by just grepping for “pmd_offset†and adding split_huge_pmd where missing after pmd_offset returns the pmd. Thanks to the graceful fallback design, with a one liner change, you can avoid to write hundreds if not thousands of lines of complex code to make your code hugepage aware.”…””}”(hj¡hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h K9hjhžhubh¸)”}”(hXNIf you're not walking pagetables but you run into a physical hugepage that you can't handle natively in your code, you can split it by calling split_huge_page(page). This is what the Linux VM does before it tries to swapout the hugepage for example. split_huge_page() can fail if the page is pinned and you must handle this correctly.”h]”hXRIf you’re not walking pagetables but you run into a physical hugepage that you can’t handle natively in your code, you can split it by calling split_huge_page(page). This is what the Linux VM does before it tries to swapout the hugepage for example. split_huge_page() can fail if the page is pinned and you must handle this correctly.”…””}”(hj¯hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h KBhjhžhubh¸)”}”(hŒMExample to make mremap.c transparent hugepage aware with a one liner change::”h]”hŒLExample to make mremap.c transparent hugepage aware with a one liner change:”…””}”(hj½hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h KHhjhžhubhŒ literal_block”“”)”}”(hX5diff --git a/mm/mremap.c b/mm/mremap.c --- a/mm/mremap.c +++ b/mm/mremap.c @@ -41,6 +41,7 @@ static pmd_t *get_old_pmd(struct mm_stru return NULL; pmd = pmd_offset(pud, addr); + split_huge_pmd(vma, pmd, addr); if (pmd_none_or_clear_bad(pmd)) return NULL;”h]”hX5diff --git a/mm/mremap.c b/mm/mremap.c --- a/mm/mremap.c +++ b/mm/mremap.c @@ -41,6 +41,7 @@ static pmd_t *get_old_pmd(struct mm_stru return NULL; pmd = pmd_offset(pud, addr); + split_huge_pmd(vma, pmd, addr); if (pmd_none_or_clear_bad(pmd)) return NULL;”…””}”hjÍsbah}”(h]”h ]”h"]”h$]”h&]”Œ xml:space”Œpreserve”uh1jËhŸh¶h KKhjhžhubeh}”(h]”Œgraceful-fallback”ah ]”h"]”Œgraceful fallback”ah$]”h&]”uh1h¡hh£hžhhŸh¶h K7ubh¢)”}”(hhh]”(h§)”}”(hŒLocking in hugepage aware code”h]”hŒLocking in hugepage aware code”…””}”(hjèhžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h¦hjåhžhhŸh¶h KWubh¸)”}”(hŒmWe want as much code as possible hugepage aware, as calling split_huge_page() or split_huge_pmd() has a cost.”h]”hŒmWe want as much code as possible hugepage aware, as calling split_huge_page() or split_huge_pmd() has a cost.”…””}”(hjöhžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h KYhjåhžhubh¸)”}”(hX´To make pagetable walks huge pmd aware, all you need to do is to call pmd_trans_huge() on the pmd returned by pmd_offset. You must hold the mmap_lock in read (or write) mode to be sure a huge pmd cannot be created from under you by khugepaged (khugepaged collapse_huge_page takes the mmap_lock in write mode in addition to the anon_vma lock). If pmd_trans_huge returns false, you just fallback in the old code paths. If instead pmd_trans_huge returns true, you have to take the page table lock (pmd_lock()) and re-run pmd_trans_huge. Taking the page table lock will prevent the huge pmd being converted into a regular pmd from under you (split_huge_pmd can run in parallel to the pagetable walk). If the second pmd_trans_huge returns false, you should just drop the page table lock and fallback to the old code as before. Otherwise, you can proceed to process the huge pmd and the hugepage natively. Once finished, you can drop the page table lock.”h]”hX´To make pagetable walks huge pmd aware, all you need to do is to call pmd_trans_huge() on the pmd returned by pmd_offset. You must hold the mmap_lock in read (or write) mode to be sure a huge pmd cannot be created from under you by khugepaged (khugepaged collapse_huge_page takes the mmap_lock in write mode in addition to the anon_vma lock). If pmd_trans_huge returns false, you just fallback in the old code paths. If instead pmd_trans_huge returns true, you have to take the page table lock (pmd_lock()) and re-run pmd_trans_huge. Taking the page table lock will prevent the huge pmd being converted into a regular pmd from under you (split_huge_pmd can run in parallel to the pagetable walk). If the second pmd_trans_huge returns false, you should just drop the page table lock and fallback to the old code as before. Otherwise, you can proceed to process the huge pmd and the hugepage natively. Once finished, you can drop the page table lock.”…””}”(hjhžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h K\hjåhžhubeh}”(h]”Œlocking-in-hugepage-aware-code”ah ]”h"]”Œlocking in hugepage aware code”ah$]”h&]”uh1h¡hh£hžhhŸh¶h KWubh¢)”}”(hhh]”(h§)”}”(hŒ$Refcounts and transparent huge pages”h]”hŒ$Refcounts and transparent huge pages”…””}”(hjhžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h¦hjhžhhŸh¶h Klubh¸)”}”(hŒQRefcounting on THP is mostly consistent with refcounting on other compound pages:”h]”hŒQRefcounting on THP is mostly consistent with refcounting on other compound pages:”…””}”(hj+hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h KnhjhžhubhŒ block_quote”“”)”}”(hXP- get_page()/put_page() and GUP operate on the folio->_refcount. - ->_refcount in tail pages is always zero: get_page_unless_zero() never succeeds on tail pages. - map/unmap of a PMD entry for the whole THP increment/decrement folio->_entire_mapcount and folio->_large_mapcount. We also maintain the two slots for tracking MM owners (MM ID and corresponding mapcount), and the current status ("maybe mapped shared" vs. "mapped exclusively"). With CONFIG_PAGE_MAPCOUNT, we also increment/decrement folio->_nr_pages_mapped by ENTIRELY_MAPPED when _entire_mapcount goes from -1 to 0 or 0 to -1. - map/unmap of individual pages with PTE entry increment/decrement folio->_large_mapcount. We also maintain the two slots for tracking MM owners (MM ID and corresponding mapcount), and the current status ("maybe mapped shared" vs. "mapped exclusively"). With CONFIG_PAGE_MAPCOUNT, we also increment/decrement page->_mapcount and increment/decrement folio->_nr_pages_mapped when page->_mapcount goes from -1 to 0 or 0 to -1 as this counts the number of pages mapped by PTE. ”h]”hÙ)”}”(hhh]”(hÞ)”}”(hŒ?get_page()/put_page() and GUP operate on the folio->_refcount. ”h]”h¸)”}”(hŒ>get_page()/put_page() and GUP operate on the folio->_refcount.”h]”hŒ>get_page()/put_page() and GUP operate on the folio->_refcount.”…””}”(hjFhžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h KqhjBubah}”(h]”h ]”h"]”h$]”h&]”uh1hÝhj?ubhÞ)”}”(hŒ_->_refcount in tail pages is always zero: get_page_unless_zero() never succeeds on tail pages. ”h]”h¸)”}”(hŒ^->_refcount in tail pages is always zero: get_page_unless_zero() never succeeds on tail pages.”h]”hŒ^->_refcount in tail pages is always zero: get_page_unless_zero() never succeeds on tail pages.”…””}”(hj^hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h KshjZubah}”(h]”h ]”h"]”h$]”h&]”uh1hÝhj?ubhÞ)”}”(hX®map/unmap of a PMD entry for the whole THP increment/decrement folio->_entire_mapcount and folio->_large_mapcount. We also maintain the two slots for tracking MM owners (MM ID and corresponding mapcount), and the current status ("maybe mapped shared" vs. "mapped exclusively"). With CONFIG_PAGE_MAPCOUNT, we also increment/decrement folio->_nr_pages_mapped by ENTIRELY_MAPPED when _entire_mapcount goes from -1 to 0 or 0 to -1. ”h]”(h¸)”}”(hŒrmap/unmap of a PMD entry for the whole THP increment/decrement folio->_entire_mapcount and folio->_large_mapcount.”h]”hŒrmap/unmap of a PMD entry for the whole THP increment/decrement folio->_entire_mapcount and folio->_large_mapcount.”…””}”(hjvhžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h Kvhjrubh¸)”}”(hŒ¢We also maintain the two slots for tracking MM owners (MM ID and corresponding mapcount), and the current status ("maybe mapped shared" vs. "mapped exclusively").”h]”hŒªWe also maintain the two slots for tracking MM owners (MM ID and corresponding mapcount), and the current status (“maybe mapped shared†vs. “mapped exclusivelyâ€).”…””}”(hj„hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h Kyhjrubh¸)”}”(hŒ•With CONFIG_PAGE_MAPCOUNT, we also increment/decrement folio->_nr_pages_mapped by ENTIRELY_MAPPED when _entire_mapcount goes from -1 to 0 or 0 to -1.”h]”hŒ•With CONFIG_PAGE_MAPCOUNT, we also increment/decrement folio->_nr_pages_mapped by ENTIRELY_MAPPED when _entire_mapcount goes from -1 to 0 or 0 to -1.”…””}”(hj’hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h K}hjrubeh}”(h]”h ]”h"]”h$]”h&]”uh1hÝhj?ubhÞ)”}”(hXÙmap/unmap of individual pages with PTE entry increment/decrement folio->_large_mapcount. We also maintain the two slots for tracking MM owners (MM ID and corresponding mapcount), and the current status ("maybe mapped shared" vs. "mapped exclusively"). With CONFIG_PAGE_MAPCOUNT, we also increment/decrement page->_mapcount and increment/decrement folio->_nr_pages_mapped when page->_mapcount goes from -1 to 0 or 0 to -1 as this counts the number of pages mapped by PTE. ”h]”(h¸)”}”(hŒXmap/unmap of individual pages with PTE entry increment/decrement folio->_large_mapcount.”h]”hŒXmap/unmap of individual pages with PTE entry increment/decrement folio->_large_mapcount.”…””}”(hjªhžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h Khj¦ubh¸)”}”(hŒ¢We also maintain the two slots for tracking MM owners (MM ID and corresponding mapcount), and the current status ("maybe mapped shared" vs. "mapped exclusively").”h]”hŒªWe also maintain the two slots for tracking MM owners (MM ID and corresponding mapcount), and the current status (“maybe mapped shared†vs. “mapped exclusivelyâ€).”…””}”(hj¸hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h K„hj¦ubh¸)”}”(hŒÚWith CONFIG_PAGE_MAPCOUNT, we also increment/decrement page->_mapcount and increment/decrement folio->_nr_pages_mapped when page->_mapcount goes from -1 to 0 or 0 to -1 as this counts the number of pages mapped by PTE.”h]”hŒÚWith CONFIG_PAGE_MAPCOUNT, we also increment/decrement page->_mapcount and increment/decrement folio->_nr_pages_mapped when page->_mapcount goes from -1 to 0 or 0 to -1 as this counts the number of pages mapped by PTE.”…””}”(hjÆhžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h Kˆhj¦ubeh}”(h]”h ]”h"]”h$]”h&]”uh1hÝhj?ubeh}”(h]”h ]”h"]”h$]”h&]”jEjFuh1hØhŸh¶h Kqhj;ubah}”(h]”h ]”h"]”h$]”h&]”uh1j9hŸh¶h Kqhjhžhubh¸)”}”(hXsplit_huge_page internally has to distribute the refcounts in the head page to the tail pages before clearing all PG_head/tail bits from the page structures. It can be done easily for refcounts taken by page table entries, but we don't have enough information on how to distribute any additional pins (i.e. from get_user_pages). split_huge_page() fails any requests to split pinned huge pages: it expects page count to be equal to the sum of mapcount of all sub-pages plus one (split_huge_page caller must have a reference to the head page).”h]”hXsplit_huge_page internally has to distribute the refcounts in the head page to the tail pages before clearing all PG_head/tail bits from the page structures. It can be done easily for refcounts taken by page table entries, but we don’t have enough information on how to distribute any additional pins (i.e. from get_user_pages). split_huge_page() fails any requests to split pinned huge pages: it expects page count to be equal to the sum of mapcount of all sub-pages plus one (split_huge_page caller must have a reference to the head page).”…””}”(hjæhžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h Khjhžhubh¸)”}”(hŒ‰split_huge_page uses migration entries to stabilize page->_refcount and page->_mapcount of anonymous pages. File pages just get unmapped.”h]”hŒ‰split_huge_page uses migration entries to stabilize page->_refcount and page->_mapcount of anonymous pages. File pages just get unmapped.”…””}”(hjôhžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h K–hjhžhubh¸)”}”(hŒŒWe are safe against physical memory scanners too: the only legitimate way a scanner can get a reference to a page is get_page_unless_zero().”h]”hŒŒWe are safe against physical memory scanners too: the only legitimate way a scanner can get a reference to a page is get_page_unless_zero().”…””}”(hjhžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h K™hjhžhubh¸)”}”(hXAll tail pages have zero ->_refcount until atomic_add(). This prevents the scanner from getting a reference to the tail page up to that point. After the atomic_add() we don't care about the ->_refcount value. We already know how many references should be uncharged from the head page.”h]”hXAll tail pages have zero ->_refcount until atomic_add(). This prevents the scanner from getting a reference to the tail page up to that point. After the atomic_add() we don’t care about the ->_refcount value. We already know how many references should be uncharged from the head page.”…””}”(hjhžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h Kœhjhžhubh¸)”}”(hŒ–For head page get_page_unless_zero() will succeed and we don't mind. It's clear where references should go after split: it will stay on the head page.”h]”hŒšFor head page get_page_unless_zero() will succeed and we don’t mind. It’s clear where references should go after split: it will stay on the head page.”…””}”(hjhžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h K¡hjhžhubh¸)”}”(hŒvNote that split_huge_pmd() doesn't have any limitations on refcounting: pmd can be split at any point and never fails.”h]”hŒxNote that split_huge_pmd() doesn’t have any limitations on refcounting: pmd can be split at any point and never fails.”…””}”(hj,hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h K¤hjhžhubeh}”(h]”Œ$refcounts-and-transparent-huge-pages”ah ]”h"]”Œ$refcounts and transparent huge pages”ah$]”h&]”uh1h¡hh£hžhhŸh¶h Klubh¢)”}”(hhh]”(h§)”}”(hŒ8Partial unmap and deferred_split_folio() (anon THP only)”h]”hŒ8Partial unmap and deferred_split_folio() (anon THP only)”…””}”(hjEhžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h¦hjBhžhhŸh¶h K¨ubh¸)”}”(hX Unmapping part of THP (with munmap() or other way) is not going to free memory immediately. Instead, we detect that a subpage of THP is not in use in folio_remove_rmap_*() and queue the THP for splitting if memory pressure comes. Splitting will free up unused subpages.”h]”hX Unmapping part of THP (with munmap() or other way) is not going to free memory immediately. Instead, we detect that a subpage of THP is not in use in folio_remove_rmap_*() and queue the THP for splitting if memory pressure comes. Splitting will free up unused subpages.”…””}”(hjShžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h KªhjBhžhubh¸)”}”(hŒòSplitting the page right away is not an option due to locking context in the place where we can detect partial unmap. It also might be counterproductive since in many cases partial unmap happens during exit(2) if a THP crosses a VMA boundary.”h]”hŒòSplitting the page right away is not an option due to locking context in the place where we can detect partial unmap. It also might be counterproductive since in many cases partial unmap happens during exit(2) if a THP crosses a VMA boundary.”…””}”(hjahžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h K¯hjBhžhubh¸)”}”(hŒ The function deferred_split_folio() is used to queue a folio for splitting. The splitting itself will happen when we get memory pressure via shrinker interface.”h]”hŒ The function deferred_split_folio() is used to queue a folio for splitting. The splitting itself will happen when we get memory pressure via shrinker interface.”…””}”(hjohžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h K´hjBhžhubh¸)”}”(hŒ`With CONFIG_PAGE_MAPCOUNT, we reliably detect partial mappings based on folio->_nr_pages_mapped.”h]”hŒ`With CONFIG_PAGE_MAPCOUNT, we reliably detect partial mappings based on folio->_nr_pages_mapped.”…””}”(hj}hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h K¸hjBhžhubh¸)”}”(hX¯With CONFIG_NO_PAGE_MAPCOUNT, we detect partial mappings based on the average per-page mapcount in a THP: if the average is < 1, an anon THP is certainly partially mapped. As long as only a single process maps a THP, this detection is reliable. With long-running child processes, there can be scenarios where partial mappings can currently not be detected, and might need asynchronous detection during memory reclaim in the future.”h]”hX¯With CONFIG_NO_PAGE_MAPCOUNT, we detect partial mappings based on the average per-page mapcount in a THP: if the average is < 1, an anon THP is certainly partially mapped. As long as only a single process maps a THP, this detection is reliable. With long-running child processes, there can be scenarios where partial mappings can currently not be detected, and might need asynchronous detection during memory reclaim in the future.”…””}”(hj‹hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h K»hjBhžhubeh}”(h]”Œ4partial-unmap-and-deferred-split-folio-anon-thp-only”ah ]”h"]”Œ8partial unmap and deferred_split_folio() (anon thp only)”ah$]”h&]”uh1h¡hh£hžhhŸh¶h K¨ubeh}”(h]”Œtransparent-hugepage-support”ah ]”h"]”Œtransparent hugepage support”ah$]”h&]”uh1h¡hhhžhhŸh¶h Kubeh}”(h]”h ]”h"]”h$]”h&]”Œsource”h¶uh1hŒcurrent_source”NŒ current_line”NŒsettings”Œdocutils.frontend”ŒValues”“”)”}”(h¦NŒ generator”NŒ datestamp”NŒ source_link”NŒ source_url”NŒ toc_backlinks”Œentry”Œfootnote_backlinks”KŒ sectnum_xform”KŒstrip_comments”NŒstrip_elements_with_classes”NŒ strip_classes”NŒ report_level”KŒ halt_level”KŒexit_status_level”KŒdebug”NŒwarning_stream”NŒ traceback”ˆŒinput_encoding”Œ utf-8-sig”Œinput_encoding_error_handler”Œstrict”Œoutput_encoding”Œutf-8”Œoutput_encoding_error_handler”jÌŒerror_encoding”Œutf-8”Œerror_encoding_error_handler”Œbackslashreplace”Œ language_code”Œen”Œrecord_dependencies”NŒconfig”NŒ id_prefix”hŒauto_id_prefix”Œid”Œ dump_settings”NŒdump_internals”NŒdump_transforms”NŒdump_pseudo_xml”NŒexpose_internals”NŒstrict_visitor”NŒ_disable_config”NŒ_source”h¶Œ _destination”NŒ _config_files”]”Œ7/var/lib/git/docbuild/linux/Documentation/docutils.conf”aŒfile_insertion_enabled”ˆŒ raw_enabled”KŒline_length_limit”M'Œpep_references”NŒ pep_base_url”Œhttps://peps.python.org/”Œpep_file_url_template”Œpep-%04d”Œrfc_references”NŒ rfc_base_url”Œ&https://datatracker.ietf.org/doc/html/”Œ tab_width”KŒtrim_footnote_reference_space”‰Œsyntax_highlight”Œlong”Œ smart_quotes”ˆŒsmartquotes_locales”]”Œcharacter_level_inline_markup”‰Œdoctitle_xform”‰Œ docinfo_xform”KŒsectsubtitle_xform”‰Œ image_loading”Œlink”Œembed_stylesheet”‰Œcloak_email_addresses”ˆŒsection_self_link”‰Œenv”NubŒreporter”NŒindirect_targets”]”Œsubstitution_defs”}”Œsubstitution_names”}”Œrefnames”}”Œrefids”}”Œnameids”}”(j¦j£jLjIjjŠjâjßjjj?j<jžj›uŒ nametypes”}”(j¦‰jL‰j‰jâ‰j‰j?‰jž‰uh}”(j£h£jIhÇjŠjOjßjjjåj<jj›jBuŒ footnote_refs”}”Œ citation_refs”}”Œ autofootnotes”]”Œautofootnote_refs”]”Œsymbol_footnotes”]”Œsymbol_footnote_refs”]”Œ footnotes”]”Œ citations”]”Œautofootnote_start”KŒsymbol_footnote_start”KŒ id_counter”Œ collections”ŒCounter”“”}”…”R”Œparse_messages”]”Œtransform_messages”]”Œ transformer”NŒ include_log”]”Œ decoration”Nhžhub.