€•ÊB      Œsphinx.addnodes”Œdocument”“”)”}”(Œ	rawsource”Œ ”Œchildren”]”(Œtranslations”ŒLanguagesNode”“”)”}”(hhh]”(h Œpending_xref”“”)”}”(hhh]”Œdocutils.nodes”ŒText”“”ŒChinese (Simplified)”…””}”Œparent”hsbaŒ
attributes”}”(Œids”]”Œclasses”]”Œnames”]”Œdupnames”]”Œbackrefs”]”Œ	refdomain”Œstd”Œreftype”Œdoc”Œ	reftarget”Œ /translations/zh_CN/arch/x86/tlb”Œmodname”NŒ	classname”NŒrefexplicit”ˆuŒtagname”hhhubh)”}”(hhh]”hŒChinese (Traditional)”…””}”hh2sbah}”(h]”h ]”h"]”h$]”h&]”Œ	refdomain”h)Œreftype”h+Œ	reftarget”Œ /translations/zh_TW/arch/x86/tlb”Œmodname”NŒ	classname”NŒrefexplicit”ˆuh1hhhubh)”}”(hhh]”hŒItalian”…””}”hhFsbah}”(h]”h ]”h"]”h$]”h&]”Œ	refdomain”h)Œreftype”h+Œ	reftarget”Œ /translations/it_IT/arch/x86/tlb”Œmodname”NŒ	classname”NŒrefexplicit”ˆuh1hhhubh)”}”(hhh]”hŒJapanese”…””}”hhZsbah}”(h]”h ]”h"]”h$]”h&]”Œ	refdomain”h)Œreftype”h+Œ	reftarget”Œ /translations/ja_JP/arch/x86/tlb”Œmodname”NŒ	classname”NŒrefexplicit”ˆuh1hhhubh)”}”(hhh]”hŒKorean”…””}”hhnsbah}”(h]”h ]”h"]”h$]”h&]”Œ	refdomain”h)Œreftype”h+Œ	reftarget”Œ /translations/ko_KR/arch/x86/tlb”Œmodname”NŒ	classname”NŒrefexplicit”ˆuh1hhhubh)”}”(hhh]”hŒPortuguese (Brazilian)”…””}”hh‚sbah}”(h]”h ]”h"]”h$]”h&]”Œ	refdomain”h)Œreftype”h+Œ	reftarget”Œ /translations/pt_BR/arch/x86/tlb”Œmodname”NŒ	classname”NŒrefexplicit”ˆuh1hhhubh)”}”(hhh]”hŒSpanish”…””}”hh–sbah}”(h]”h ]”h"]”h$]”h&]”Œ	refdomain”h)Œreftype”h+Œ	reftarget”Œ /translations/sp_SP/arch/x86/tlb”Œmodname”NŒ	classname”NŒrefexplicit”ˆuh1hhhubeh}”(h]”h ]”h"]”h$]”h&]”Œcurrent_language”ŒEnglish”uh1h
hhŒ	_document”hŒsource”NŒline”NubhŒcomment”“”)”}”(hŒ SPDX-License-Identifier: GPL-2.0”h]”hŒ SPDX-License-Identifier: GPL-2.0”…””}”hh·sbah}”(h]”h ]”h"]”h$]”h&]”Œ	xml:space”Œpreserve”uh1hµhhh²hh³Œ:/var/lib/git/docbuild/linux/Documentation/arch/x86/tlb.rst”h´KubhŒsection”“”)”}”(hhh]”(hŒtitle”“”)”}”(hŒThe TLB”h]”hŒThe TLB”…””}”(hhÏh²hh³Nh´Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÍhhÊh²hh³hÇh´KubhŒ	paragraph”“”)”}”(hŒ[When the kernel unmaps or modified the attributes of a range of
memory, it has two choices:”h]”hŒ[When the kernel unmaps or modified the attributes of a range of
memory, it has two choices:”…””}”(hhßh²hh³Nh´Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÝh³hÇh´KhhÊh²hubhŒblock_quote”“”)”}”(hXÛ  1. Flush the entire TLB with a two-instruction sequence.  This is
   a quick operation, but it causes collateral damage: TLB entries
   from areas other than the one we are trying to flush will be
   destroyed and must be refilled later, at some cost.
2. Use the invlpg instruction to invalidate a single page at a
   time.  This could potentially cost many more instructions, but
   it is a much more precise operation, causing no collateral
   damage to other TLB entries.
”h]”hŒenumerated_list”“”)”}”(hhh]”(hŒ	list_item”“”)”}”(hŒïFlush the entire TLB with a two-instruction sequence.  This is
a quick operation, but it causes collateral damage: TLB entries
from areas other than the one we are trying to flush will be
destroyed and must be refilled later, at some cost.”h]”hÞ)”}”(hŒïFlush the entire TLB with a two-instruction sequence.  This is
a quick operation, but it causes collateral damage: TLB entries
from areas other than the one we are trying to flush will be
destroyed and must be refilled later, at some cost.”h]”hŒïFlush the entire TLB with a two-instruction sequence.  This is
a quick operation, but it causes collateral damage: TLB entries
from areas other than the one we are trying to flush will be
destroyed and must be refilled later, at some cost.”…””}”(hhþh²hh³Nh´Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÝh³hÇh´K
hhúubah}”(h]”h ]”h"]”h$]”h&]”uh1høhhõubhù)”}”(hŒÓUse the invlpg instruction to invalidate a single page at a
time.  This could potentially cost many more instructions, but
it is a much more precise operation, causing no collateral
damage to other TLB entries.
”h]”hÞ)”}”(hŒÒUse the invlpg instruction to invalidate a single page at a
time.  This could potentially cost many more instructions, but
it is a much more precise operation, causing no collateral
damage to other TLB entries.”h]”hŒÒUse the invlpg instruction to invalidate a single page at a
time.  This could potentially cost many more instructions, but
it is a much more precise operation, causing no collateral
damage to other TLB entries.”…””}”(hj  h²hh³Nh´Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÝh³hÇh´Khj  ubah}”(h]”h ]”h"]”h$]”h&]”uh1høhhõubeh}”(h]”h ]”h"]”h$]”h&]”Œenumtype”Œarabic”Œprefix”hŒsuffix”Œ.”uh1hóhhïubah}”(h]”h ]”h"]”h$]”h&]”uh1híh³hÇh´K
hhÊh²hubhÞ)”}”(hŒ+Which method to do depends on a few things:”h]”hŒ+Which method to do depends on a few things:”…””}”(hj;  h²hh³Nh´Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÝh³hÇh´KhhÊh²hubhî)”}”(hX+  1. The size of the flush being performed.  A flush of the entire
   address space is obviously better performed by flushing the
   entire TLB than doing 2^48/PAGE_SIZE individual flushes.
2. The contents of the TLB.  If the TLB is empty, then there will
   be no collateral damage caused by doing the global flush, and
   all of the individual flush will have ended up being wasted
   work.
3. The size of the TLB.  The larger the TLB, the more collateral
   damage we do with a full flush.  So, the larger the TLB, the
   more attractive an individual flush looks.  Data and
   instructions have separate TLBs, as do different page sizes.
4. The microarchitecture.  The TLB has become a multi-level
   cache on modern CPUs, and the global flushes have become more
   expensive relative to single-page flushes.
”h]”hô)”}”(hhh]”(hù)”}”(hŒ²The size of the flush being performed.  A flush of the entire
address space is obviously better performed by flushing the
entire TLB than doing 2^48/PAGE_SIZE individual flushes.”h]”hÞ)”}”(hŒ²The size of the flush being performed.  A flush of the entire
address space is obviously better performed by flushing the
entire TLB than doing 2^48/PAGE_SIZE individual flushes.”h]”hŒ²The size of the flush being performed.  A flush of the entire
address space is obviously better performed by flushing the
entire TLB than doing 2^48/PAGE_SIZE individual flushes.”…””}”(hjT  h²hh³Nh´Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÝh³hÇh´KhjP  ubah}”(h]”h ]”h"]”h$]”h&]”uh1høhjM  ubhù)”}”(hŒ¾The contents of the TLB.  If the TLB is empty, then there will
be no collateral damage caused by doing the global flush, and
all of the individual flush will have ended up being wasted
work.”h]”hÞ)”}”(hŒ¾The contents of the TLB.  If the TLB is empty, then there will
be no collateral damage caused by doing the global flush, and
all of the individual flush will have ended up being wasted
work.”h]”hŒ¾The contents of the TLB.  If the TLB is empty, then there will
be no collateral damage caused by doing the global flush, and
all of the individual flush will have ended up being wasted
work.”…””}”(hjl  h²hh³Nh´Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÝh³hÇh´Khjh  ubah}”(h]”h ]”h"]”h$]”h&]”uh1høhjM  ubhù)”}”(hŒìThe size of the TLB.  The larger the TLB, the more collateral
damage we do with a full flush.  So, the larger the TLB, the
more attractive an individual flush looks.  Data and
instructions have separate TLBs, as do different page sizes.”h]”hÞ)”}”(hŒìThe size of the TLB.  The larger the TLB, the more collateral
damage we do with a full flush.  So, the larger the TLB, the
more attractive an individual flush looks.  Data and
instructions have separate TLBs, as do different page sizes.”h]”hŒìThe size of the TLB.  The larger the TLB, the more collateral
damage we do with a full flush.  So, the larger the TLB, the
more attractive an individual flush looks.  Data and
instructions have separate TLBs, as do different page sizes.”…””}”(hj„  h²hh³Nh´Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÝh³hÇh´Khj€  ubah}”(h]”h ]”h"]”h$]”h&]”uh1høhjM  ubhù)”}”(hŒ¢The microarchitecture.  The TLB has become a multi-level
cache on modern CPUs, and the global flushes have become more
expensive relative to single-page flushes.
”h]”hÞ)”}”(hŒ¡The microarchitecture.  The TLB has become a multi-level
cache on modern CPUs, and the global flushes have become more
expensive relative to single-page flushes.”h]”hŒ¡The microarchitecture.  The TLB has become a multi-level
cache on modern CPUs, and the global flushes have become more
expensive relative to single-page flushes.”…””}”(hjœ  h²hh³Nh´Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÝh³hÇh´K hj˜  ubah}”(h]”h ]”h"]”h$]”h&]”uh1høhjM  ubeh}”(h]”h ]”h"]”h$]”h&]”j0  j1  j2  hj3  j4  uh1hóhjI  ubah}”(h]”h ]”h"]”h$]”h&]”uh1híh³hÇh´KhhÊh²hubhÞ)”}”(hŒ÷There is obviously no way the kernel can know all these things,
especially the contents of the TLB during a given flush.  The
sizes of the flush will vary greatly depending on the workload as
well.  There is essentially no "right" point to choose.”h]”hŒûThere is obviously no way the kernel can know all these things,
especially the contents of the TLB during a given flush.  The
sizes of the flush will vary greatly depending on the workload as
well.  There is essentially no â€œrightâ€ point to choose.”…””}”(hj¼  h²hh³Nh´Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÝh³hÇh´K$hhÊh²hubhÞ)”}”(hŒìYou may be doing too many individual invalidations if you see the
invlpg instruction (or instructions _near_ it) show up high in
profiles.  If you believe that individual invalidations being
called too often, you can lower the tunable::”h]”hŒëYou may be doing too many individual invalidations if you see the
invlpg instruction (or instructions _near_ it) show up high in
profiles.  If you believe that individual invalidations being
called too often, you can lower the tunable:”…””}”(hjÊ  h²hh³Nh´Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÝh³hÇh´K)hhÊh²hubhŒliteral_block”“”)”}”(hŒ3/sys/kernel/debug/x86/tlb_single_page_flush_ceiling”h]”hŒ3/sys/kernel/debug/x86/tlb_single_page_flush_ceiling”…””}”hjÚ  sbah}”(h]”h ]”h"]”h$]”h&]”hÅhÆuh1jØ  h³hÇh´K.hhÊh²hubhÞ)”}”(hŒæThis will cause us to do the global flush for more cases.
Lowering it to 0 will disable the use of the individual flushes.
Setting it to 1 is a very conservative setting and it should
never need to be 0 under normal circumstances.”h]”hŒæThis will cause us to do the global flush for more cases.
Lowering it to 0 will disable the use of the individual flushes.
Setting it to 1 is a very conservative setting and it should
never need to be 0 under normal circumstances.”…””}”(hjè  h²hh³Nh´Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÝh³hÇh´K0hhÊh²hubhÞ)”}”(hŒ¹Despite the fact that a single individual flush on x86 is
guaranteed to flush a full 2MB [1]_, hugetlbfs always uses the full
flushes.  THP is treated exactly the same as normal memory.”h]”(hŒYDespite the fact that a single individual flush on x86 is
guaranteed to flush a full 2MB ”…””}”(hjö  h²hh³Nh´NubhŒfootnote_reference”“”)”}”(hŒ[1]_”h]”hŒ1”…””}”(hj   h²hh³Nh´Nubah}”(h]”Œid1”ah ]”h"]”h$]”h&]”Œrefid”Œid2”Œdocname”Œarch/x86/tlb”uh1jþ  hjö  Œresolved”KubhŒ\, hugetlbfs always uses the full
flushes.  THP is treated exactly the same as normal memory.”…””}”(hjö  h²hh³Nh´Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1hÝh³hÇh´K5hhÊh²hubhÞ)”}”(hŒ±You might see invlpg inside of flush_tlb_mm_range() show up in
profiles, or you can use the trace_tlb_flush() tracepoints. to
determine how long the flush operations are taking.”h]”hŒ±You might see invlpg inside of flush_tlb_mm_range() show up in
profiles, or you can use the trace_tlb_flush() tracepoints. to
determine how long the flush operations are taking.”…””}”(hj  h²hh³Nh´Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÝh³hÇh´K9hhÊh²hubhÞ)”}”(hŒxEssentially, you are balancing the cycles you spend doing invlpg
with the cycles that you spend refilling the TLB later.”h]”hŒxEssentially, you are balancing the cycles you spend doing invlpg
with the cycles that you spend refilling the TLB later.”…””}”(hj,  h²hh³Nh´Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÝh³hÇh´K=hhÊh²hubhÞ)”}”(hŒhYou can measure how expensive TLB refills are by using
performance counters and 'perf stat', like this::”h]”hŒkYou can measure how expensive TLB refills are by using
performance counters and â€˜perf statâ€™, like this:”…””}”(hj:  h²hh³Nh´Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÝh³hÇh´K@hhÊh²hubjÙ  )”}”(hXŒ  perf stat -e
  cpu/event=0x8,umask=0x84,name=dtlb_load_misses_walk_duration/,
  cpu/event=0x8,umask=0x82,name=dtlb_load_misses_walk_completed/,
  cpu/event=0x49,umask=0x4,name=dtlb_store_misses_walk_duration/,
  cpu/event=0x49,umask=0x2,name=dtlb_store_misses_walk_completed/,
  cpu/event=0x85,umask=0x4,name=itlb_misses_walk_duration/,
  cpu/event=0x85,umask=0x2,name=itlb_misses_walk_completed/”h]”hXŒ  perf stat -e
  cpu/event=0x8,umask=0x84,name=dtlb_load_misses_walk_duration/,
  cpu/event=0x8,umask=0x82,name=dtlb_load_misses_walk_completed/,
  cpu/event=0x49,umask=0x4,name=dtlb_store_misses_walk_duration/,
  cpu/event=0x49,umask=0x2,name=dtlb_store_misses_walk_completed/,
  cpu/event=0x85,umask=0x4,name=itlb_misses_walk_duration/,
  cpu/event=0x85,umask=0x2,name=itlb_misses_walk_completed/”…””}”hjH  sbah}”(h]”h ]”h"]”h$]”h&]”hÅhÆuh1jØ  h³hÇh´KChhÊh²hubhÞ)”}”(hX  That works on an IvyBridge-era CPU (i5-3320M).  Different CPUs
may have differently-named counters, but they should at least
be there in some form.  You can use pmu-tools 'ocperf list'
(https://github.com/andikleen/pmu-tools) to find the right
counters for a given CPU.”h]”(hŒ¾That works on an IvyBridge-era CPU (i5-3320M).  Different CPUs
may have differently-named counters, but they should at least
be there in some form.  You can use pmu-tools â€˜ocperf listâ€™
(”…””}”(hjV  h²hh³Nh´NubhŒ	reference”“”)”}”(hŒ&https://github.com/andikleen/pmu-tools”h]”hŒ&https://github.com/andikleen/pmu-tools”…””}”(hj`  h²hh³Nh´Nubah}”(h]”h ]”h"]”h$]”h&]”Œrefuri”jb  uh1j^  hjV  ubhŒ-) to find the right
counters for a given CPU.”…””}”(hjV  h²hh³Nh´Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1hÝh³hÇh´KKhhÊh²hubhŒfootnote”“”)”}”(hŒœA footnote in Intel's SDM "4.10.4.2 Recommended Invalidation"
says: "One execution of INVLPG is sufficient even for a page
with size greater than 4 KBytes."”h]”(hŒlabel”“”)”}”(hŒ1”h]”hŒ1”…””}”(hj  h²hh³Nh´Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j  hj{  ubhÞ)”}”(hŒœA footnote in Intel's SDM "4.10.4.2 Recommended Invalidation"
says: "One execution of INVLPG is sufficient even for a page
with size greater than 4 KBytes."”h]”hŒ¦A footnote in Intelâ€™s SDM â€œ4.10.4.2 Recommended Invalidationâ€
says: â€œOne execution of INVLPG is sufficient even for a page
with size greater than 4 KBytes.â€”…””}”(hj  h²hh³Nh´Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÝh³hÇh´KQhj{  ubeh}”(h]”j  ah ]”h"]”Œ1”ah$]”h&]”j
  aj  j  uh1jy  h³hÇh´KQhhÊh²hj  Kubeh}”(h]”Œthe-tlb”ah ]”h"]”Œthe tlb”ah$]”h&]”uh1hÈhhh²hh³hÇh´Kubeh}”(h]”h ]”h"]”h$]”h&]”Œsource”hÇuh1hŒcurrent_source”NŒcurrent_line”NŒsettings”Œdocutils.frontend”ŒValues”“”)”}”(hÍNŒ	generator”NŒ	datestamp”NŒsource_link”NŒ
source_url”NŒtoc_backlinks”Œentry”Œfootnote_backlinks”KŒsectnum_xform”KŒstrip_comments”NŒstrip_elements_with_classes”NŒstrip_classes”NŒreport_level”KŒ
halt_level”KŒexit_status_level”KŒdebug”NŒwarning_stream”NŒ	traceback”ˆŒinput_encoding”Œ	utf-8-sig”Œinput_encoding_error_handler”Œstrict”Œoutput_encoding”Œutf-8”Œoutput_encoding_error_handler”jÏ  Œerror_encoding”Œutf-8”Œerror_encoding_error_handler”Œbackslashreplace”Œlanguage_code”Œen”Œrecord_dependencies”NŒconfig”NŒ	id_prefix”hŒauto_id_prefix”Œid”Œdump_settings”NŒdump_internals”NŒdump_transforms”NŒdump_pseudo_xml”NŒexpose_internals”NŒstrict_visitor”NŒ_disable_config”NŒ_source”hÇŒ_destination”NŒ_config_files”]”Œ7/var/lib/git/docbuild/linux/Documentation/docutils.conf”aŒfile_insertion_enabled”ˆŒraw_enabled”KŒline_length_limit”M'Œpep_references”NŒpep_base_url”Œhttps://peps.python.org/”Œpep_file_url_template”Œpep-%04d”Œrfc_references”NŒrfc_base_url”Œ&https://datatracker.ietf.org/doc/html/”Œ	tab_width”KŒtrim_footnote_reference_space”‰Œsyntax_highlight”Œlong”Œsmart_quotes”ˆŒsmartquotes_locales”]”Œcharacter_level_inline_markup”‰Œdoctitle_xform”‰Œdocinfo_xform”KŒsectsubtitle_xform”‰Œimage_loading”Œlink”Œembed_stylesheet”‰Œcloak_email_addresses”ˆŒsection_self_link”‰Œenv”NubŒreporter”NŒindirect_targets”]”Œsubstitution_defs”}”Œsubstitution_names”}”Œrefnames”}”Œ1”]”j   asŒrefids”}”Œnameids”}”(j©  j¦  j¡  j  uŒ	nametypes”}”(j©  ‰j¡  ˆuh}”(j¦  hÊj
  j   j  j{  uŒfootnote_refs”}”j  ]”j   asŒcitation_refs”}”Œautofootnotes”]”Œautofootnote_refs”]”Œsymbol_footnotes”]”Œsymbol_footnote_refs”]”Œ	footnotes”]”j{  aŒ	citations”]”Œautofootnote_start”KŒsymbol_footnote_start”K Œ
id_counter”Œcollections”ŒCounter”“”}”jÝ  Ks…”R”Œparse_messages”]”Œtransform_messages”]”Œtransformer”NŒinclude_log”]”Œ
decoration”Nh²hub.