€•&ˆ      Œsphinx.addnodes”Œdocument”“”)”}”(Œ	rawsource”Œ ”Œchildren”]”(Œtranslations”ŒLanguagesNode”“”)”}”(hhh]”(h Œpending_xref”“”)”}”(hhh]”Œdocutils.nodes”ŒText”“”ŒChinese (Simplified)”…””}”Œparent”hsbaŒ
attributes”}”(Œids”]”Œclasses”]”Œnames”]”Œdupnames”]”Œbackrefs”]”Œ	refdomain”Œstd”Œreftype”Œdoc”Œ	reftarget”Œ+/translations/zh_CN/admin-guide/mm/concepts”Œmodname”NŒ	classname”NŒrefexplicit”ˆuŒtagname”hhhubh)”}”(hhh]”hŒChinese (Traditional)”…””}”hh2sbah}”(h]”h ]”h"]”h$]”h&]”Œ	refdomain”h)Œreftype”h+Œ	reftarget”Œ+/translations/zh_TW/admin-guide/mm/concepts”Œmodname”NŒ	classname”NŒrefexplicit”ˆuh1hhhubh)”}”(hhh]”hŒItalian”…””}”hhFsbah}”(h]”h ]”h"]”h$]”h&]”Œ	refdomain”h)Œreftype”h+Œ	reftarget”Œ+/translations/it_IT/admin-guide/mm/concepts”Œmodname”NŒ	classname”NŒrefexplicit”ˆuh1hhhubh)”}”(hhh]”hŒJapanese”…””}”hhZsbah}”(h]”h ]”h"]”h$]”h&]”Œ	refdomain”h)Œreftype”h+Œ	reftarget”Œ+/translations/ja_JP/admin-guide/mm/concepts”Œmodname”NŒ	classname”NŒrefexplicit”ˆuh1hhhubh)”}”(hhh]”hŒKorean”…””}”hhnsbah}”(h]”h ]”h"]”h$]”h&]”Œ	refdomain”h)Œreftype”h+Œ	reftarget”Œ+/translations/ko_KR/admin-guide/mm/concepts”Œmodname”NŒ	classname”NŒrefexplicit”ˆuh1hhhubh)”}”(hhh]”hŒSpanish”…””}”hh‚sbah}”(h]”h ]”h"]”h$]”h&]”Œ	refdomain”h)Œreftype”h+Œ	reftarget”Œ+/translations/sp_SP/admin-guide/mm/concepts”Œmodname”NŒ	classname”NŒrefexplicit”ˆuh1hhhubeh}”(h]”h ]”h"]”h$]”h&]”Œcurrent_language”ŒEnglish”uh1h
hhŒ	_document”hŒsource”NŒline”NubhŒsection”“”)”}”(hhh]”(hŒtitle”“”)”}”(hŒConcepts overview”h]”hŒConcepts overview”…””}”(hh¨hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h¦hh£hžhhŸŒE/var/lib/git/docbuild/linux/Documentation/admin-guide/mm/concepts.rst”h KubhŒ	paragraph”“”)”}”(hX  The memory management in Linux is a complex system that evolved over the
years and included more and more functionality to support a variety of
systems from MMU-less microcontrollers to supercomputers. The memory
management for systems without an MMU is called ``nommu`` and it
definitely deserves a dedicated document, which hopefully will be
eventually written. Yet, although some of the concepts are the same,
here we assume that an MMU is available and a CPU can translate a virtual
address to a physical address.”h]”(hX  The memory management in Linux is a complex system that evolved over the
years and included more and more functionality to support a variety of
systems from MMU-less microcontrollers to supercomputers. The memory
management for systems without an MMU is called ”…””}”(hh¹hžhhŸNh NubhŒliteral”“”)”}”(hŒ	``nommu``”h]”hŒnommu”…””}”(hhÃhžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÁhh¹ubhŒ÷ and it
definitely deserves a dedicated document, which hopefully will be
eventually written. Yet, although some of the concepts are the same,
here we assume that an MMU is available and a CPU can translate a virtual
address to a physical address.”…””}”(hh¹hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h Khh£hžhubhŒtopic”“”)”}”(hhh]”hŒbullet_list”“”)”}”(hhh]”(hŒ	list_item”“”)”}”(hhh]”h¸)”}”(hhh]”hŒ	reference”“”)”}”(hhh]”hŒVirtual Memory Primer”…””}”(hhïhžhhŸNh Nubah}”(h]”Œid1”ah ]”h"]”h$]”h&]”Œrefid”Œvirtual-memory-primer”uh1híhhêubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hhçubah}”(h]”h ]”h"]”h$]”h&]”uh1håhhâubhæ)”}”(hhh]”h¸)”}”(hhh]”hî)”}”(hhh]”hŒ
Huge Pages”…””}”(hj  hžhhŸNh Nubah}”(h]”Œid2”ah ]”h"]”h$]”h&]”Œrefid”Œ
huge-pages”uh1híhj  ubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hj  ubah}”(h]”h ]”h"]”h$]”h&]”uh1håhhâubhæ)”}”(hhh]”h¸)”}”(hhh]”hî)”}”(hhh]”hŒZones”…””}”(hj3  hžhhŸNh Nubah}”(h]”Œid3”ah ]”h"]”h$]”h&]”Œrefid”Œzones”uh1híhj0  ubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hj-  ubah}”(h]”h ]”h"]”h$]”h&]”uh1håhhâubhæ)”}”(hhh]”h¸)”}”(hhh]”hî)”}”(hhh]”hŒNodes”…””}”(hjU  hžhhŸNh Nubah}”(h]”Œid4”ah ]”h"]”h$]”h&]”Œrefid”Œnodes”uh1híhjR  ubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hjO  ubah}”(h]”h ]”h"]”h$]”h&]”uh1håhhâubhæ)”}”(hhh]”h¸)”}”(hhh]”hî)”}”(hhh]”hŒ
Page cache”…””}”(hjw  hžhhŸNh Nubah}”(h]”Œid5”ah ]”h"]”h$]”h&]”Œrefid”Œ
page-cache”uh1híhjt  ubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hjq  ubah}”(h]”h ]”h"]”h$]”h&]”uh1håhhâubhæ)”}”(hhh]”h¸)”}”(hhh]”hî)”}”(hhh]”hŒAnonymous Memory”…””}”(hj™  hžhhŸNh Nubah}”(h]”Œid6”ah ]”h"]”h$]”h&]”Œrefid”Œanonymous-memory”uh1híhj–  ubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hj“  ubah}”(h]”h ]”h"]”h$]”h&]”uh1håhhâubhæ)”}”(hhh]”h¸)”}”(hhh]”hî)”}”(hhh]”hŒReclaim”…””}”(hj»  hžhhŸNh Nubah}”(h]”Œid7”ah ]”h"]”h$]”h&]”Œrefid”Œreclaim”uh1híhj¸  ubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hjµ  ubah}”(h]”h ]”h"]”h$]”h&]”uh1håhhâubhæ)”}”(hhh]”h¸)”}”(hhh]”hî)”}”(hhh]”hŒ
Compaction”…””}”(hjÝ  hžhhŸNh Nubah}”(h]”Œid8”ah ]”h"]”h$]”h&]”Œrefid”Œ
compaction”uh1híhjÚ  ubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hj×  ubah}”(h]”h ]”h"]”h$]”h&]”uh1håhhâubhæ)”}”(hhh]”h¸)”}”(hhh]”hî)”}”(hhh]”hŒ
OOM killer”…””}”(hjÿ  hžhhŸNh Nubah}”(h]”Œid9”ah ]”h"]”h$]”h&]”Œrefid”Œ
oom-killer”uh1híhjü  ubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hjù  ubah}”(h]”h ]”h"]”h$]”h&]”uh1håhhâubeh}”(h]”h ]”h"]”h$]”h&]”uh1hàhhÝhžhhŸNh Nubah}”(h]”Œcontents”ah ]”(Œcontents”Œlocal”eh"]”Œcontents”ah$]”h&]”uh1hÛhŸh¶h Khh£hžhubh¢)”}”(hhh]”(h§)”}”(hŒVirtual Memory Primer”h]”hŒVirtual Memory Primer”…””}”(hj.  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”Œrefid”høuh1h¦hj+  hžhhŸh¶h Kubh¸)”}”(hXÃ  The physical memory in a computer system is a limited resource and
even for systems that support memory hotplug there is a hard limit on
the amount of memory that can be installed. The physical memory is not
necessarily contiguous; it might be accessible as a set of distinct
address ranges. Besides, different CPU architectures, and even
different implementations of the same architecture have different views
of how these address ranges are defined.”h]”hXÃ  The physical memory in a computer system is a limited resource and
even for systems that support memory hotplug there is a hard limit on
the amount of memory that can be installed. The physical memory is not
necessarily contiguous; it might be accessible as a set of distinct
address ranges. Besides, different CPU architectures, and even
different implementations of the same architecture have different views
of how these address ranges are defined.”…””}”(hj=  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h Khj+  hžhubh¸)”}”(hŒŠAll this makes dealing directly with physical memory quite complex and
to avoid this complexity a concept of virtual memory was developed.”h]”hŒŠAll this makes dealing directly with physical memory quite complex and
to avoid this complexity a concept of virtual memory was developed.”…””}”(hjK  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h Khj+  hžhubh¸)”}”(hX  The virtual memory abstracts the details of physical memory from the
application software, allows to keep only needed information in the
physical memory (demand paging) and provides a mechanism for the
protection and controlled sharing of data between processes.”h]”hX  The virtual memory abstracts the details of physical memory from the
application software, allows to keep only needed information in the
physical memory (demand paging) and provides a mechanism for the
protection and controlled sharing of data between processes.”…””}”(hjY  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h Khj+  hžhubh¸)”}”(hX'  With virtual memory, each and every memory access uses a virtual
address. When the CPU decodes an instruction that reads (or
writes) from (or to) the system memory, it translates the `virtual`
address encoded in that instruction to a `physical` address that the
memory controller can understand.”h]”(hŒ·With virtual memory, each and every memory access uses a virtual
address. When the CPU decodes an instruction that reads (or
writes) from (or to) the system memory, it translates the ”…””}”(hjg  hžhhŸNh NubhŒtitle_reference”“”)”}”(hŒ	`virtual`”h]”hŒvirtual”…””}”(hjq  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1jo  hjg  ubhŒ*
address encoded in that instruction to a ”…””}”(hjg  hžhhŸNh Nubjp  )”}”(hŒ
`physical`”h]”hŒphysical”…””}”(hjƒ  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1jo  hjg  ubhŒ3 address that the
memory controller can understand.”…””}”(hjg  hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h K#hj+  hžhubh¸)”}”(hX0  The physical system memory is divided into page frames, or pages. The
size of each page is architecture specific. Some architectures allow
selection of the page size from several supported values; this
selection is performed at the kernel build time by setting an
appropriate kernel configuration option.”h]”hX0  The physical system memory is divided into page frames, or pages. The
size of each page is architecture specific. Some architectures allow
selection of the page size from several supported values; this
selection is performed at the kernel build time by setting an
appropriate kernel configuration option.”…””}”(hj›  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h K)hj+  hžhubh¸)”}”(hŒþEach physical memory page can be mapped as one or more virtual
pages. These mappings are described by page tables that allow
translation from a virtual address used by programs to the physical
memory address. The page tables are organized hierarchically.”h]”hŒþEach physical memory page can be mapped as one or more virtual
pages. These mappings are described by page tables that allow
translation from a virtual address used by programs to the physical
memory address. The page tables are organized hierarchically.”…””}”(hj©  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h K/hj+  hžhubh¸)”}”(hX¼  The tables at the lowest level of the hierarchy contain physical
addresses of actual pages used by the software. The tables at higher
levels contain physical addresses of the pages belonging to the lower
levels. The pointer to the top level page table resides in a
register. When the CPU performs the address translation, it uses this
register to access the top level page table. The high bits of the
virtual address are used to index an entry in the top level page
table. That entry is then used to access the next level in the
hierarchy with the next bits of the virtual address as the index to
that level page table. The lowest bits in the virtual address define
the offset inside the actual page.”h]”hX¼  The tables at the lowest level of the hierarchy contain physical
addresses of actual pages used by the software. The tables at higher
levels contain physical addresses of the pages belonging to the lower
levels. The pointer to the top level page table resides in a
register. When the CPU performs the address translation, it uses this
register to access the top level page table. The high bits of the
virtual address are used to index an entry in the top level page
table. That entry is then used to access the next level in the
hierarchy with the next bits of the virtual address as the index to
that level page table. The lowest bits in the virtual address define
the offset inside the actual page.”…””}”(hj·  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h K4hj+  hžhubeh}”(h]”hþah ]”h"]”Œvirtual memory primer”ah$]”h&]”uh1h¡hh£hžhhŸh¶h Kubh¢)”}”(hhh]”(h§)”}”(hŒ
Huge Pages”h]”hŒ
Huge Pages”…””}”(hjÏ  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”j<  j  uh1h¦hjÌ  hžhhŸh¶h KAubh¸)”}”(hX›  The address translation requires several memory accesses and memory
accesses are slow relatively to CPU speed. To avoid spending precious
processor cycles on the address translation, CPUs maintain a cache of
such translations called Translation Lookaside Buffer (or
TLB). Usually TLB is pretty scarce resource and applications with
large memory working set will experience performance hit because of
TLB misses.”h]”hX›  The address translation requires several memory accesses and memory
accesses are slow relatively to CPU speed. To avoid spending precious
processor cycles on the address translation, CPUs maintain a cache of
such translations called Translation Lookaside Buffer (or
TLB). Usually TLB is pretty scarce resource and applications with
large memory working set will experience performance hit because of
TLB misses.”…””}”(hjÝ  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h KChjÌ  hžhubh¸)”}”(hX”  Many modern CPU architectures allow mapping of the memory pages
directly by the higher levels in the page table. For instance, on x86,
it is possible to map 2M and even 1G pages using entries in the second
and the third level page tables. In Linux such pages are called
`huge`. Usage of huge pages significantly reduces pressure on TLB,
improves TLB hit-rate and thus improves overall system performance.”h]”(hX  Many modern CPU architectures allow mapping of the memory pages
directly by the higher levels in the page table. For instance, on x86,
it is possible to map 2M and even 1G pages using entries in the second
and the third level page tables. In Linux such pages are called
”…””}”(hjë  hžhhŸNh Nubjp  )”}”(hŒ`huge`”h]”hŒhuge”…””}”(hjó  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1jo  hjë  ubhŒ€. Usage of huge pages significantly reduces pressure on TLB,
improves TLB hit-rate and thus improves overall system performance.”…””}”(hjë  hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h KKhjÌ  hžhubh¸)”}”(hX…  There are two mechanisms in Linux that enable mapping of the physical
memory with the huge pages. The first one is `HugeTLB filesystem`, or
hugetlbfs. It is a pseudo filesystem that uses RAM as its backing
store. For the files created in this filesystem the data resides in
the memory and mapped using huge pages. The hugetlbfs is described at
Documentation/admin-guide/mm/hugetlbpage.rst.”h]”(hŒsThere are two mechanisms in Linux that enable mapping of the physical
memory with the huge pages. The first one is ”…””}”(hj  hžhhŸNh Nubjp  )”}”(hŒ`HugeTLB filesystem`”h]”hŒHugeTLB filesystem”…””}”(hj  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1jo  hj  ubhŒþ, or
hugetlbfs. It is a pseudo filesystem that uses RAM as its backing
store. For the files created in this filesystem the data resides in
the memory and mapped using huge pages. The hugetlbfs is described at
Documentation/admin-guide/mm/hugetlbpage.rst.”…””}”(hj  hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h KRhjÌ  hžhubh¸)”}”(hXŸ  Another, more recent, mechanism that enables use of the huge pages is
called `Transparent HugePages`, or THP. Unlike the hugetlbfs that
requires users and/or system administrators to configure what parts of
the system memory should and can be mapped by the huge pages, THP
manages such mappings transparently to the user and hence the
name. See Documentation/admin-guide/mm/transhuge.rst for more details
about THP.”h]”(hŒMAnother, more recent, mechanism that enables use of the huge pages is
called ”…””}”(hj+  hžhhŸNh Nubjp  )”}”(hŒ`Transparent HugePages`”h]”hŒTransparent HugePages”…””}”(hj3  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1jo  hj+  ubhX;  , or THP. Unlike the hugetlbfs that
requires users and/or system administrators to configure what parts of
the system memory should and can be mapped by the huge pages, THP
manages such mappings transparently to the user and hence the
name. See Documentation/admin-guide/mm/transhuge.rst for more details
about THP.”…””}”(hj+  hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h KYhjÌ  hžhubeh}”(h]”j   ah ]”h"]”Œ
huge pages”ah$]”h&]”uh1h¡hh£hžhhŸh¶h KAubh¢)”}”(hhh]”(h§)”}”(hŒZones”h]”hŒZones”…””}”(hjU  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”j<  j<  uh1h¦hjR  hžhhŸh¶h Kbubh¸)”}”(hXy  Often hardware poses restrictions on how different physical memory
ranges can be accessed. In some cases, devices cannot perform DMA to
all the addressable memory. In other cases, the size of the physical
memory exceeds the maximal addressable size of virtual memory and
special actions are required to access portions of the memory. Linux
groups memory pages into `zones` according to their possible
usage. For example, ZONE_DMA will contain memory that can be used by
devices for DMA, ZONE_HIGHMEM will contain memory that is not
permanently mapped into kernel's address space and ZONE_NORMAL will
contain normally addressed pages.”h]”(hXm  Often hardware poses restrictions on how different physical memory
ranges can be accessed. In some cases, devices cannot perform DMA to
all the addressable memory. In other cases, the size of the physical
memory exceeds the maximal addressable size of virtual memory and
special actions are required to access portions of the memory. Linux
groups memory pages into ”…””}”(hjc  hžhhŸNh Nubjp  )”}”(hŒ`zones`”h]”hŒzones”…””}”(hjk  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1jo  hjc  ubhX   according to their possible
usage. For example, ZONE_DMA will contain memory that can be used by
devices for DMA, ZONE_HIGHMEM will contain memory that is not
permanently mapped into kernelâ€™s address space and ZONE_NORMAL will
contain normally addressed pages.”…””}”(hjc  hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h KdhjR  hžhubh¸)”}”(hŒ¦The actual layout of the memory zones is hardware dependent as not all
architectures define all zones, and requirements for DMA are different
for different platforms.”h]”hŒ¦The actual layout of the memory zones is hardware dependent as not all
architectures define all zones, and requirements for DMA are different
for different platforms.”…””}”(hjƒ  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h KohjR  hžhubeh}”(h]”jB  ah ]”h"]”Œzones”ah$]”h&]”uh1h¡hh£hžhhŸh¶h Kbubh¢)”}”(hhh]”(h§)”}”(hŒNodes”h]”hŒNodes”…””}”(hj›  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”j<  j^  uh1h¦hj˜  hžhhŸh¶h Ktubh¸)”}”(hX$  Many multi-processor machines are NUMA - Non-Uniform Memory Access -
systems. In such systems the memory is arranged into banks that have
different access latency depending on the "distance" from the
processor. Each bank is referred to as a `node` and for each node Linux
constructs an independent memory management subsystem. A node has its
own set of zones, lists of free and used pages and various statistics
counters. You can find more details about NUMA in
Documentation/mm/numa.rst` and in
Documentation/admin-guide/mm/numa_memory_policy.rst.”h]”(hŒõMany multi-processor machines are NUMA - Non-Uniform Memory Access -
systems. In such systems the memory is arranged into banks that have
different access latency depending on the â€œdistanceâ€ from the
processor. Each bank is referred to as a ”…””}”(hj©  hžhhŸNh Nubjp  )”}”(hŒ`node`”h]”hŒnode”…””}”(hj±  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1jo  hj©  ubhX-   and for each node Linux
constructs an independent memory management subsystem. A node has its
own set of zones, lists of free and used pages and various statistics
counters. You can find more details about NUMA in
Documentation/mm/numa.rst` and in
Documentation/admin-guide/mm/numa_memory_policy.rst.”…””}”(hj©  hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h Kvhj˜  hžhubeh}”(h]”jd  ah ]”h"]”Œnodes”ah$]”h&]”uh1h¡hh£hžhhŸh¶h Ktubh¢)”}”(hhh]”(h§)”}”(hŒ
Page cache”h]”hŒ
Page cache”…””}”(hjÓ  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”j<  j€  uh1h¦hjÐ  hžhhŸh¶h Kubh¸)”}”(hX  The physical memory is volatile and the common case for getting data
into the memory is to read it from files. Whenever a file is read, the
data is put into the `page cache` to avoid expensive disk access on
the subsequent reads. Similarly, when one writes to a file, the data
is placed in the page cache and eventually gets into the backing
storage device. The written pages are marked as `dirty` and when Linux
decides to reuse them for other purposes, it makes sure to synchronize
the file contents on the device with the updated data.”h]”(hŒ¡The physical memory is volatile and the common case for getting data
into the memory is to read it from files. Whenever a file is read, the
data is put into the ”…””}”(hjá  hžhhŸNh Nubjp  )”}”(hŒ`page cache`”h]”hŒ
page cache”…””}”(hjé  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1jo  hjá  ubhŒÙ to avoid expensive disk access on
the subsequent reads. Similarly, when one writes to a file, the data
is placed in the page cache and eventually gets into the backing
storage device. The written pages are marked as ”…””}”(hjá  hžhhŸNh Nubjp  )”}”(hŒ`dirty`”h]”hŒdirty”…””}”(hjû  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1jo  hjá  ubhŒ and when Linux
decides to reuse them for other purposes, it makes sure to synchronize
the file contents on the device with the updated data.”…””}”(hjá  hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h KƒhjÐ  hžhubeh}”(h]”j†  ah ]”h"]”Œ
page cache”ah$]”h&]”uh1h¡hh£hžhhŸh¶h Kubh¢)”}”(hhh]”(h§)”}”(hŒAnonymous Memory”h]”hŒAnonymous Memory”…””}”(hj  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”j<  j¢  uh1h¦hj  hžhhŸh¶h Kubh¸)”}”(hXŠ  The `anonymous memory` or `anonymous mappings` represent memory that
is not backed by a filesystem. Such mappings are implicitly created
for program's stack and heap or by explicit calls to mmap(2) system
call. Usually, the anonymous mappings only define virtual memory areas
that the program is allowed to access. The read accesses will result
in creation of a page table entry that references a special physical
page filled with zeroes. When the program performs a write, a regular
physical page will be allocated to hold the written data. The page
will be marked dirty and if the kernel decides to repurpose it,
the dirty page will be swapped out.”h]”(hŒThe ”…””}”(hj+  hžhhŸNh Nubjp  )”}”(hŒ`anonymous memory`”h]”hŒanonymous memory”…””}”(hj3  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1jo  hj+  ubhŒ or ”…””}”(hj+  hžhhŸNh Nubjp  )”}”(hŒ`anonymous mappings`”h]”hŒanonymous mappings”…””}”(hjE  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1jo  hj+  ubhX^   represent memory that
is not backed by a filesystem. Such mappings are implicitly created
for programâ€™s stack and heap or by explicit calls to mmap(2) system
call. Usually, the anonymous mappings only define virtual memory areas
that the program is allowed to access. The read accesses will result
in creation of a page table entry that references a special physical
page filled with zeroes. When the program performs a write, a regular
physical page will be allocated to hold the written data. The page
will be marked dirty and if the kernel decides to repurpose it,
the dirty page will be swapped out.”…””}”(hj+  hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h Khj  hžhubeh}”(h]”j¨  ah ]”h"]”Œanonymous memory”ah$]”h&]”uh1h¡hh£hžhhŸh¶h Kubh¢)”}”(hhh]”(h§)”}”(hŒReclaim”h]”hŒReclaim”…””}”(hjg  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”j<  jÄ  uh1h¦hjd  hžhhŸh¶h K›ubh¸)”}”(hŒÿThroughout the system lifetime, a physical page can be used for storing
different types of data. It can be kernel internal data structures,
DMA'able buffers for device drivers use, data read from a filesystem,
memory allocated by user space processes etc.”h]”hX  Throughout the system lifetime, a physical page can be used for storing
different types of data. It can be kernel internal data structures,
DMAâ€™able buffers for device drivers use, data read from a filesystem,
memory allocated by user space processes etc.”…””}”(hju  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h Khjd  hžhubh¸)”}”(hX…  Depending on the page usage it is treated differently by the Linux
memory management. The pages that can be freed at any time, either
because they cache the data available elsewhere, for instance, on a
hard disk, or because they can be swapped out, again, to the hard
disk, are called `reclaimable`. The most notable categories of the
reclaimable pages are page cache and anonymous memory.”h]”(hX  Depending on the page usage it is treated differently by the Linux
memory management. The pages that can be freed at any time, either
because they cache the data available elsewhere, for instance, on a
hard disk, or because they can be swapped out, again, to the hard
disk, are called ”…””}”(hjƒ  hžhhŸNh Nubjp  )”}”(hŒ`reclaimable`”h]”hŒreclaimable”…””}”(hj‹  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1jo  hjƒ  ubhŒ[. The most notable categories of the
reclaimable pages are page cache and anonymous memory.”…””}”(hjƒ  hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h K¢hjd  hžhubh¸)”}”(hXæ  In most cases, the pages holding internal kernel data and used as DMA
buffers cannot be repurposed, and they remain pinned until freed by
their user. Such pages are called `unreclaimable`. However, in certain
circumstances, even pages occupied with kernel data structures can be
reclaimed. For instance, in-memory caches of filesystem metadata can
be re-read from the storage device and therefore it is possible to
discard them from the main memory when system is under memory
pressure.”h]”(hŒ¬In most cases, the pages holding internal kernel data and used as DMA
buffers cannot be repurposed, and they remain pinned until freed by
their user. Such pages are called ”…””}”(hj£  hžhhŸNh Nubjp  )”}”(hŒ`unreclaimable`”h]”hŒunreclaimable”…””}”(hj«  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1jo  hj£  ubhX+  . However, in certain
circumstances, even pages occupied with kernel data structures can be
reclaimed. For instance, in-memory caches of filesystem metadata can
be re-read from the storage device and therefore it is possible to
discard them from the main memory when system is under memory
pressure.”…””}”(hj£  hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h K©hjd  hžhubh¸)”}”(hX²  The process of freeing the reclaimable physical memory pages and
repurposing them is called (surprise!) `reclaim`. Linux can reclaim
pages either asynchronously or synchronously, depending on the state
of the system. When the system is not loaded, most of the memory is free
and allocation requests will be satisfied immediately from the free
pages supply. As the load increases, the amount of the free pages goes
down and when it reaches a certain threshold (low watermark), an
allocation request will awaken the ``kswapd`` daemon. It will
asynchronously scan memory pages and either just free them if the data
they contain is available elsewhere, or evict to the backing storage
device (remember those dirty pages?). As memory usage increases even
more and reaches another threshold - min watermark - an allocation
will trigger `direct reclaim`. In this case allocation is stalled
until enough memory pages are reclaimed to satisfy the request.”h]”(hŒhThe process of freeing the reclaimable physical memory pages and
repurposing them is called (surprise!) ”…””}”(hjÃ  hžhhŸNh Nubjp  )”}”(hŒ	`reclaim`”h]”hŒreclaim”…””}”(hjË  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1jo  hjÃ  ubhX‘  . Linux can reclaim
pages either asynchronously or synchronously, depending on the state
of the system. When the system is not loaded, most of the memory is free
and allocation requests will be satisfied immediately from the free
pages supply. As the load increases, the amount of the free pages goes
down and when it reaches a certain threshold (low watermark), an
allocation request will awaken the ”…””}”(hjÃ  hžhhŸNh NubhÂ)”}”(hŒ
``kswapd``”h]”hŒkswapd”…””}”(hjÝ  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÁhjÃ  ubhX2   daemon. It will
asynchronously scan memory pages and either just free them if the data
they contain is available elsewhere, or evict to the backing storage
device (remember those dirty pages?). As memory usage increases even
more and reaches another threshold - min watermark - an allocation
will trigger ”…””}”(hjÃ  hžhhŸNh Nubjp  )”}”(hŒ`direct reclaim`”h]”hŒdirect reclaim”…””}”(hjï  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1jo  hjÃ  ubhŒd. In this case allocation is stalled
until enough memory pages are reclaimed to satisfy the request.”…””}”(hjÃ  hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h K²hjd  hžhubeh}”(h]”jÊ  ah ]”h"]”Œreclaim”ah$]”h&]”uh1h¡hh£hžhhŸh¶h K›ubh¢)”}”(hhh]”(h§)”}”(hŒ
Compaction”h]”hŒ
Compaction”…””}”(hj  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”j<  jæ  uh1h¦hj  hžhhŸh¶h KÂubh¸)”}”(hXß  As the system runs, tasks allocate and free the memory and it becomes
fragmented. Although with virtual memory it is possible to present
scattered physical pages as virtually contiguous range, sometimes it is
necessary to allocate large physically contiguous memory areas. Such
need may arise, for instance, when a device driver requires a large
buffer for DMA, or when THP allocates a huge page. Memory `compaction`
addresses the fragmentation issue. This mechanism moves occupied pages
from the lower part of a memory zone to free pages in the upper part
of the zone. When a compaction scan is finished free pages are grouped
together at the beginning of the zone and allocations of large
physically contiguous areas become possible.”h]”(hX”  As the system runs, tasks allocate and free the memory and it becomes
fragmented. Although with virtual memory it is possible to present
scattered physical pages as virtually contiguous range, sometimes it is
necessary to allocate large physically contiguous memory areas. Such
need may arise, for instance, when a device driver requires a large
buffer for DMA, or when THP allocates a huge page. Memory ”…””}”(hj  hžhhŸNh Nubjp  )”}”(hŒ`compaction`”h]”hŒ
compaction”…””}”(hj'  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1jo  hj  ubhX?  
addresses the fragmentation issue. This mechanism moves occupied pages
from the lower part of a memory zone to free pages in the upper part
of the zone. When a compaction scan is finished free pages are grouped
together at the beginning of the zone and allocations of large
physically contiguous areas become possible.”…””}”(hj  hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h KÄhj  hžhubh¸)”}”(hŒLike reclaim, the compaction may happen asynchronously in the ``kcompactd``
daemon or synchronously as a result of a memory allocation request.”h]”(hŒ>Like reclaim, the compaction may happen asynchronously in the ”…””}”(hj?  hžhhŸNh NubhÂ)”}”(hŒ``kcompactd``”h]”hŒ	kcompactd”…””}”(hjG  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÁhj?  ubhŒD
daemon or synchronously as a result of a memory allocation request.”…””}”(hj?  hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h KÐhj  hžhubeh}”(h]”jì  ah ]”h"]”Œ
compaction”ah$]”h&]”uh1h¡hh£hžhhŸh¶h KÂubh¢)”}”(hhh]”(h§)”}”(hŒ
OOM killer”h]”hŒ
OOM killer”…””}”(hji  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”j<  j  uh1h¦hjf  hžhhŸh¶h KÔubh¸)”}”(hŒÕIt is possible that on a loaded machine memory will be exhausted and the
kernel will be unable to reclaim enough memory to continue to operate. In
order to save the rest of the system, it invokes the `OOM killer`.”h]”(hŒÈIt is possible that on a loaded machine memory will be exhausted and the
kernel will be unable to reclaim enough memory to continue to operate. In
order to save the rest of the system, it invokes the ”…””}”(hjw  hžhhŸNh Nubjp  )”}”(hŒ`OOM killer`”h]”hŒ
OOM killer”…””}”(hj  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1jo  hjw  ubhŒ.”…””}”(hjw  hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h KÖhjf  hžhubh¸)”}”(hŒËThe `OOM killer` selects a task to sacrifice for the sake of the overall
system health. The selected task is killed in a hope that after it exits
enough memory will be freed to continue normal operation.”h]”(hŒThe ”…””}”(hj—  hžhhŸNh Nubjp  )”}”(hŒ`OOM killer`”h]”hŒ
OOM killer”…””}”(hjŸ  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1jo  hj—  ubhŒ» selects a task to sacrifice for the sake of the overall
system health. The selected task is killed in a hope that after it exits
enough memory will be freed to continue normal operation.”…””}”(hj—  hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h KÚhjf  hžhubeh}”(h]”j  ah ]”h"]”Œ
oom killer”ah$]”h&]”uh1h¡hh£hžhhŸh¶h KÔubeh}”(h]”Œconcepts-overview”ah ]”h"]”Œconcepts overview”ah$]”h&]”uh1h¡hhhžhhŸh¶h Kubeh}”(h]”h ]”h"]”h$]”h&]”Œsource”h¶uh1hŒcurrent_source”NŒcurrent_line”NŒsettings”Œdocutils.frontend”ŒValues”“”)”}”(h¦NŒ	generator”NŒ	datestamp”NŒsource_link”NŒ
source_url”NŒtoc_backlinks”Œentry”Œfootnote_backlinks”KŒsectnum_xform”KŒstrip_comments”NŒstrip_elements_with_classes”NŒstrip_classes”NŒreport_level”KŒ
halt_level”KŒexit_status_level”KŒdebug”NŒwarning_stream”NŒ	traceback”ˆŒinput_encoding”Œ	utf-8-sig”Œinput_encoding_error_handler”Œstrict”Œoutput_encoding”Œutf-8”Œoutput_encoding_error_handler”jé  Œerror_encoding”Œutf-8”Œerror_encoding_error_handler”Œbackslashreplace”Œlanguage_code”Œen”Œrecord_dependencies”NŒconfig”NŒ	id_prefix”hŒauto_id_prefix”Œid”Œdump_settings”NŒdump_internals”NŒdump_transforms”NŒdump_pseudo_xml”NŒexpose_internals”NŒstrict_visitor”NŒ_disable_config”NŒ_source”h¶Œ_destination”NŒ_config_files”]”Œ7/var/lib/git/docbuild/linux/Documentation/docutils.conf”aŒfile_insertion_enabled”ˆŒraw_enabled”KŒline_length_limit”M'Œpep_references”NŒpep_base_url”Œhttps://peps.python.org/”Œpep_file_url_template”Œpep-%04d”Œrfc_references”NŒrfc_base_url”Œ&https://datatracker.ietf.org/doc/html/”Œ	tab_width”KŒtrim_footnote_reference_space”‰Œsyntax_highlight”Œlong”Œsmart_quotes”ˆŒsmartquotes_locales”]”Œcharacter_level_inline_markup”‰Œdoctitle_xform”‰Œdocinfo_xform”KŒsectsubtitle_xform”‰Œimage_loading”Œlink”Œembed_stylesheet”‰Œcloak_email_addresses”ˆŒsection_self_link”‰Œenv”NubŒreporter”NŒindirect_targets”]”Œsubstitution_defs”}”Œsubstitution_names”}”Œrefnames”}”Œrefids”}”Œnameids”}”(jÃ  jÀ  j(  j#  jÉ  hþjO  j   j•  jB  jÍ  jd  j  j†  ja  j¨  j  jÊ  jc  jì  j»  j  uŒ	nametypes”}”(jÃ  ‰j(  ‰jÉ  ‰jO  ‰j•  ‰jÍ  ‰j  ‰ja  ‰j  ‰jc  ‰j»  ‰uh}”(jÀ  h£j#  hÝhþj+  j   jÌ  jB  jR  jd  j˜  j†  jÐ  j¨  j  jÊ  jd  jì  j  j  jf  høhïj  j  j<  j3  j^  jU  j€  jw  j¢  j™  jÄ  j»  jæ  jÝ  j  jÿ  uŒfootnote_refs”}”Œcitation_refs”}”Œautofootnotes”]”Œautofootnote_refs”]”Œsymbol_footnotes”]”Œsymbol_footnote_refs”]”Œ	footnotes”]”Œ	citations”]”Œautofootnote_start”KŒsymbol_footnote_start”K Œ
id_counter”Œcollections”ŒCounter”“”}”j÷  K	s…”R”Œparse_messages”]”Œtransform_messages”]”Œtransformer”NŒinclude_log”]”Œ
decoration”Nhžhub.