€•†      Œsphinx.addnodes”Œdocument”“”)”}”(Œ	rawsource”Œ ”Œchildren”]”(Œtranslations”ŒLanguagesNode”“”)”}”(hhh]”(h Œpending_xref”“”)”}”(hhh]”Œdocutils.nodes”ŒText”“”ŒChinese (Simplified)”…””}”Œparent”hsbaŒ
attributes”}”(Œids”]”Œclasses”]”Œnames”]”Œdupnames”]”Œbackrefs”]”Œ	refdomain”Œstd”Œreftype”Œdoc”Œ	reftarget”Œ0/translations/zh_CN/kernel-hacking/false-sharing”Œmodname”NŒ	classname”NŒrefexplicit”ˆuŒtagname”hhhubh)”}”(hhh]”hŒChinese (Traditional)”…””}”hh2sbah}”(h]”h ]”h"]”h$]”h&]”Œ	refdomain”h)Œreftype”h+Œ	reftarget”Œ0/translations/zh_TW/kernel-hacking/false-sharing”Œmodname”NŒ	classname”NŒrefexplicit”ˆuh1hhhubh)”}”(hhh]”hŒItalian”…””}”hhFsbah}”(h]”h ]”h"]”h$]”h&]”Œ	refdomain”h)Œreftype”h+Œ	reftarget”Œ0/translations/it_IT/kernel-hacking/false-sharing”Œmodname”NŒ	classname”NŒrefexplicit”ˆuh1hhhubh)”}”(hhh]”hŒJapanese”…””}”hhZsbah}”(h]”h ]”h"]”h$]”h&]”Œ	refdomain”h)Œreftype”h+Œ	reftarget”Œ0/translations/ja_JP/kernel-hacking/false-sharing”Œmodname”NŒ	classname”NŒrefexplicit”ˆuh1hhhubh)”}”(hhh]”hŒKorean”…””}”hhnsbah}”(h]”h ]”h"]”h$]”h&]”Œ	refdomain”h)Œreftype”h+Œ	reftarget”Œ0/translations/ko_KR/kernel-hacking/false-sharing”Œmodname”NŒ	classname”NŒrefexplicit”ˆuh1hhhubh)”}”(hhh]”hŒSpanish”…””}”hh‚sbah}”(h]”h ]”h"]”h$]”h&]”Œ	refdomain”h)Œreftype”h+Œ	reftarget”Œ0/translations/sp_SP/kernel-hacking/false-sharing”Œmodname”NŒ	classname”NŒrefexplicit”ˆuh1hhhubeh}”(h]”h ]”h"]”h$]”h&]”Œcurrent_language”ŒEnglish”uh1h
hhŒ	_document”hŒsource”NŒline”NubhŒcomment”“”)”}”(hŒ SPDX-License-Identifier: GPL-2.0”h]”hŒ SPDX-License-Identifier: GPL-2.0”…””}”hh£sbah}”(h]”h ]”h"]”h$]”h&]”Œ	xml:space”Œpreserve”uh1h¡hhhžhhŸŒJ/var/lib/git/docbuild/linux/Documentation/kernel-hacking/false-sharing.rst”h KubhŒsection”“”)”}”(hhh]”(hŒtitle”“”)”}”(hŒFalse Sharing”h]”hŒFalse Sharing”…””}”(hh»hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h¹hh¶hžhhŸh³h Kubhµ)”}”(hhh]”(hº)”}”(hŒWhat is False Sharing”h]”hŒWhat is False Sharing”…””}”(hhÌhžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h¹hhÉhžhhŸh³h KubhŒ	paragraph”“”)”}”(hŒßFalse sharing is related with cache mechanism of maintaining the data
coherence of one cache line stored in multiple CPU's caches; then
academic definition for it is in [1]_. Consider a struct with a
refcount and a string::”h]”(hŒ«False sharing is related with cache mechanism of maintaining the data
coherence of one cache line stored in multiple CPUâ€™s caches; then
academic definition for it is in ”…””}”(hhÜhžhhŸNh NubhŒfootnote_reference”“”)”}”(hŒ[1]_”h]”hŒ1”…””}”(hhæhžhhŸNh Nubah}”(h]”Œid1”ah ]”h"]”h$]”h&]”Œrefid”Œid4”Œdocname”Œkernel-hacking/false-sharing”uh1hähhÜŒresolved”KubhŒ1. Consider a struct with a
refcount and a string:”…””}”(hhÜhžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1hÚhŸh³h K	hhÉhžhubhŒliteral_block”“”)”}”(hŒustruct foo {
        refcount_t refcount;
        ...
        char name[16];
} ____cacheline_internodealigned_in_smp;”h]”hŒustruct foo {
        refcount_t refcount;
        ...
        char name[16];
} ____cacheline_internodealigned_in_smp;”…””}”hj  sbah}”(h]”h ]”h"]”h$]”h&]”h±h²uh1j  hŸh³h KhhÉhžhubhÛ)”}”(hŒFMember 'refcount'(A) and 'name'(B) _share_ one cache line like below::”h]”hŒMMember â€˜refcountâ€™(A) and â€˜nameâ€™(B) _share_ one cache line like below:”…””}”(hj  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÚhŸh³h KhhÉhžhubj  )”}”(hXß                +-----------+                     +-----------+
              |   CPU 0   |                     |   CPU 1   |
              +-----------+                     +-----------+
             /                                        |
            /                                         |
           V                                          V
       +----------------------+             +----------------------+
       | A      B             | Cache 0     | A       B            | Cache 1
       +----------------------+             +----------------------+
                           |                  |
---------------------------+------------------+-----------------------------
                           |                  |
                         +----------------------+
                         |                      |
                         +----------------------+
            Main Memory  | A       B            |
                         +----------------------+”h]”hXß                +-----------+                     +-----------+
              |   CPU 0   |                     |   CPU 1   |
              +-----------+                     +-----------+
             /                                        |
            /                                         |
           V                                          V
       +----------------------+             +----------------------+
       | A      B             | Cache 0     | A       B            | Cache 1
       +----------------------+             +----------------------+
                           |                  |
---------------------------+------------------+-----------------------------
                           |                  |
                         +----------------------+
                         |                      |
                         +----------------------+
            Main Memory  | A       B            |
                         +----------------------+”…””}”hj"  sbah}”(h]”h ]”h"]”h$]”h&]”h±h²uh1j  hŸh³h KhhÉhžhubhÛ)”}”(hXx  'refcount' is modified frequently, but 'name' is set once at object
creation time and is never modified.  When many CPUs access 'foo' at
the same time, with 'refcount' being only bumped by one CPU frequently
and 'name' being read by other CPUs, all those reading CPUs have to
reload the whole cache line over and over due to the 'sharing', even
though 'name' is never changed.”h]”hX”  â€˜refcountâ€™ is modified frequently, but â€˜nameâ€™ is set once at object
creation time and is never modified.  When many CPUs access â€˜fooâ€™ at
the same time, with â€˜refcountâ€™ being only bumped by one CPU frequently
and â€˜nameâ€™ being read by other CPUs, all those reading CPUs have to
reload the whole cache line over and over due to the â€˜sharingâ€™, even
though â€˜nameâ€™ is never changed.”…””}”(hj0  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÚhŸh³h K(hhÉhžhubhÛ)”}”(hŒêThere are many real-world cases of performance regressions caused by
false sharing.  One of these is a rw_semaphore 'mmap_lock' inside
mm_struct struct, whose cache line layout change triggered a
regression and Linus analyzed in [2]_.”h]”(hŒéThere are many real-world cases of performance regressions caused by
false sharing.  One of these is a rw_semaphore â€˜mmap_lockâ€™ inside
mm_struct struct, whose cache line layout change triggered a
regression and Linus analyzed in ”…””}”(hj>  hžhhŸNh Nubhå)”}”(hŒ[2]_”h]”hŒ2”…””}”(hjF  hžhhŸNh Nubah}”(h]”Œid2”ah ]”h"]”h$]”h&]”hõŒid5”h÷høuh1hähj>  hùKubhŒ.”…””}”(hj>  hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1hÚhŸh³h K/hhÉhžhubhÛ)”}”(hŒ6There are two key factors for a harmful false sharing:”h]”hŒ6There are two key factors for a harmful false sharing:”…””}”(hj`  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÚhŸh³h K4hhÉhžhubhŒbullet_list”“”)”}”(hhh]”(hŒ	list_item”“”)”}”(hŒ-A global datum accessed (shared) by many CPUs”h]”hÛ)”}”(hjw  h]”hŒ-A global datum accessed (shared) by many CPUs”…””}”(hjy  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÚhŸh³h K6hju  ubah}”(h]”h ]”h"]”h$]”h&]”uh1js  hjp  hžhhŸh³h Nubjt  )”}”(hŒpIn the concurrent accesses to the data, there is at least one write
operation: write/write or write/read cases.
”h]”hÛ)”}”(hŒoIn the concurrent accesses to the data, there is at least one write
operation: write/write or write/read cases.”h]”hŒoIn the concurrent accesses to the data, there is at least one write
operation: write/write or write/read cases.”…””}”(hj  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÚhŸh³h K7hjŒ  ubah}”(h]”h ]”h"]”h$]”h&]”uh1js  hjp  hžhhŸh³h Nubeh}”(h]”h ]”h"]”h$]”h&]”Œbullet”Œ*”uh1jn  hŸh³h K6hhÉhžhubhÛ)”}”(hŒtThe sharing could be from totally unrelated kernel components, or
different code paths of the same kernel component.”h]”hŒtThe sharing could be from totally unrelated kernel components, or
different code paths of the same kernel component.”…””}”(hj¬  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÚhŸh³h K:hhÉhžhubeh}”(h]”Œwhat-is-false-sharing”ah ]”h"]”Œwhat is false sharing”ah$]”h&]”uh1h´hh¶hžhhŸh³h Kubhµ)”}”(hhh]”(hº)”}”(hŒFalse Sharing Pitfalls”h]”hŒFalse Sharing Pitfalls”…””}”(hjÅ  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h¹hjÂ  hžhhŸh³h K?ubhÛ)”}”(hX˜  Back in time when one platform had only one or a few CPUs, hot data
members could be purposely put in the same cache line to make them
cache hot and save cacheline/TLB, like a lock and the data protected
by it.  But for recent large system with hundreds of CPUs, this may
not work when the lock is heavily contended, as the lock owner CPU
could write to the data, while other CPUs are busy spinning the lock.”h]”hX˜  Back in time when one platform had only one or a few CPUs, hot data
members could be purposely put in the same cache line to make them
cache hot and save cacheline/TLB, like a lock and the data protected
by it.  But for recent large system with hundreds of CPUs, this may
not work when the lock is heavily contended, as the lock owner CPU
could write to the data, while other CPUs are busy spinning the lock.”…””}”(hjÓ  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÚhŸh³h K@hjÂ  hžhubhÛ)”}”(hŒYLooking at past cases, there are several frequently occurring patterns
for false sharing:”h]”hŒYLooking at past cases, there are several frequently occurring patterns
for false sharing:”…””}”(hjá  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÚhŸh³h KGhjÂ  hžhubjo  )”}”(hhh]”(jt  )”}”(hŒ]lock (spinlock/mutex/semaphore) and data protected by it are
purposely put in one cache line.”h]”hÛ)”}”(hŒ]lock (spinlock/mutex/semaphore) and data protected by it are
purposely put in one cache line.”h]”hŒ]lock (spinlock/mutex/semaphore) and data protected by it are
purposely put in one cache line.”…””}”(hjö  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÚhŸh³h KJhjò  ubah}”(h]”h ]”h"]”h$]”h&]”uh1js  hjï  hžhhŸh³h Nubjt  )”}”(hŒ¿global data being put together in one cache line. Some kernel
subsystems have many global parameters of small size (4 bytes),
which can easily be grouped together and put into one cache line.”h]”hÛ)”}”(hŒ¿global data being put together in one cache line. Some kernel
subsystems have many global parameters of small size (4 bytes),
which can easily be grouped together and put into one cache line.”h]”hŒ¿global data being put together in one cache line. Some kernel
subsystems have many global parameters of small size (4 bytes),
which can easily be grouped together and put into one cache line.”…””}”(hj  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÚhŸh³h KLhj
  ubah}”(h]”h ]”h"]”h$]”h&]”uh1js  hjï  hžhhŸh³h Nubjt  )”}”(hŒ™data members of a big data structure randomly sitting together
without being noticed (cache line is usually 64 bytes or more),
like 'mem_cgroup' struct.
”h]”hÛ)”}”(hŒ˜data members of a big data structure randomly sitting together
without being noticed (cache line is usually 64 bytes or more),
like 'mem_cgroup' struct.”h]”hŒœdata members of a big data structure randomly sitting together
without being noticed (cache line is usually 64 bytes or more),
like â€˜mem_cgroupâ€™ struct.”…””}”(hj&  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÚhŸh³h KOhj"  ubah}”(h]”h ]”h"]”h$]”h&]”uh1js  hjï  hžhhŸh³h Nubeh}”(h]”h ]”h"]”h$]”h&]”jª  j«  uh1jn  hŸh³h KJhjÂ  hžhubhÛ)”}”(hŒ<Following 'mitigation' section provides real-world examples.”h]”hŒ@Following â€˜mitigationâ€™ section provides real-world examples.”…””}”(hj@  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÚhŸh³h KShjÂ  hžhubhÛ)”}”(hŒáFalse sharing could easily happen unless they are intentionally
checked, and it is valuable to run specific tools for performance
critical workloads to detect false sharing affecting performance case
and optimize accordingly.”h]”hŒáFalse sharing could easily happen unless they are intentionally
checked, and it is valuable to run specific tools for performance
critical workloads to detect false sharing affecting performance case
and optimize accordingly.”…””}”(hjN  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÚhŸh³h KUhjÂ  hžhubeh}”(h]”Œfalse-sharing-pitfalls”ah ]”h"]”Œfalse sharing pitfalls”ah$]”h&]”uh1h´hh¶hžhhŸh³h K?ubhµ)”}”(hhh]”(hº)”}”(hŒ'How to detect and analyze False Sharing”h]”hŒ'How to detect and analyze False Sharing”…””}”(hjg  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h¹hjd  hžhhŸh³h K\ubhÛ)”}”(hXG  perf record/report/stat are widely used for performance tuning, and
once hotspots are detected, tools like 'perf-c2c' and 'pahole' can
be further used to detect and pinpoint the possible false sharing
data structures.  'addr2line' is also good at decoding instruction
pointer when there are multiple layers of inline functions.”h]”hXS  perf record/report/stat are widely used for performance tuning, and
once hotspots are detected, tools like â€˜perf-c2câ€™ and â€˜paholeâ€™ can
be further used to detect and pinpoint the possible false sharing
data structures.  â€˜addr2lineâ€™ is also good at decoding instruction
pointer when there are multiple layers of inline functions.”…””}”(hju  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÚhŸh³h K]hjd  hžhubhÛ)”}”(hŒ»perf-c2c can capture the cache lines with most false sharing hits,
decoded functions (line number of file) accessing that cache line,
and in-line offset of the data. Simple commands are::”h]”hŒºperf-c2c can capture the cache lines with most false sharing hits,
decoded functions (line number of file) accessing that cache line,
and in-line offset of the data. Simple commands are:”…””}”(hjƒ  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÚhŸh³h Kchjd  hžhubj  )”}”(hŒL$ perf c2c record -ag sleep 3
$ perf c2c report --call-graph none -k vmlinux”h]”hŒL$ perf c2c record -ag sleep 3
$ perf c2c report --call-graph none -k vmlinux”…””}”hj‘  sbah}”(h]”h ]”h"]”h$]”h&]”h±h²uh1j  hŸh³h Kghjd  hžhubhÛ)”}”(hŒ`When running above during testing will-it-scale's tlb_flush1 case,
perf reports something like::”h]”hŒaWhen running above during testing will-it-scaleâ€™s tlb_flush1 case,
perf reports something like:”…””}”(hjŸ  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÚhŸh³h Kjhjd  hžhubj  )”}”(hXÃ  Total records                     :    1658231
Locked Load/Store Operations      :      89439
Load Operations                   :     623219
Load Local HITM                   :      92117
Load Remote HITM                  :        139

#----------------------------------------------------------------------
    4        0     2374        0        0        0  0xff1100088366d880
#----------------------------------------------------------------------
  0.00%   42.29%    0.00%    0.00%    0.00%    0x8     1       1  0xffffffff81373b7b         0       231       129     5312        64  [k] __mod_lruvec_page_state    [kernel.vmlinux]  memcontrol.h:752   1
  0.00%   13.10%    0.00%    0.00%    0.00%    0x8     1       1  0xffffffff81374718         0       226        97     3551        64  [k] folio_lruvec_lock_irqsave  [kernel.vmlinux]  memcontrol.h:752   1
  0.00%   11.20%    0.00%    0.00%    0.00%    0x8     1       1  0xffffffff812c29bf         0       170       136      555        64  [k] lru_add_fn                 [kernel.vmlinux]  mm_inline.h:41     1
  0.00%    7.62%    0.00%    0.00%    0.00%    0x8     1       1  0xffffffff812c3ec5         0       175       108      632        64  [k] release_pages              [kernel.vmlinux]  mm_inline.h:41     1
  0.00%   23.29%    0.00%    0.00%    0.00%   0x10     1       1  0xffffffff81372d0a         0       234       279     1051        64  [k] __mod_memcg_lruvec_state   [kernel.vmlinux]  memcontrol.c:736   1”h]”hXÃ  Total records                     :    1658231
Locked Load/Store Operations      :      89439
Load Operations                   :     623219
Load Local HITM                   :      92117
Load Remote HITM                  :        139

#----------------------------------------------------------------------
    4        0     2374        0        0        0  0xff1100088366d880
#----------------------------------------------------------------------
  0.00%   42.29%    0.00%    0.00%    0.00%    0x8     1       1  0xffffffff81373b7b         0       231       129     5312        64  [k] __mod_lruvec_page_state    [kernel.vmlinux]  memcontrol.h:752   1
  0.00%   13.10%    0.00%    0.00%    0.00%    0x8     1       1  0xffffffff81374718         0       226        97     3551        64  [k] folio_lruvec_lock_irqsave  [kernel.vmlinux]  memcontrol.h:752   1
  0.00%   11.20%    0.00%    0.00%    0.00%    0x8     1       1  0xffffffff812c29bf         0       170       136      555        64  [k] lru_add_fn                 [kernel.vmlinux]  mm_inline.h:41     1
  0.00%    7.62%    0.00%    0.00%    0.00%    0x8     1       1  0xffffffff812c3ec5         0       175       108      632        64  [k] release_pages              [kernel.vmlinux]  mm_inline.h:41     1
  0.00%   23.29%    0.00%    0.00%    0.00%   0x10     1       1  0xffffffff81372d0a         0       234       279     1051        64  [k] __mod_memcg_lruvec_state   [kernel.vmlinux]  memcontrol.c:736   1”…””}”hj­  sbah}”(h]”h ]”h"]”h$]”h&]”h±h²uh1j  hŸh³h Kmhjd  hžhubhÛ)”}”(hŒ)A nice introduction for perf-c2c is [3]_.”h]”(hŒ$A nice introduction for perf-c2c is ”…””}”(hj»  hžhhŸNh Nubhå)”}”(hŒ[3]_”h]”hŒ3”…””}”(hjÃ  hžhhŸNh Nubah}”(h]”Œid3”ah ]”h"]”h$]”h&]”hõŒid6”h÷høuh1hähj»  hùKubhŒ.”…””}”(hj»  hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”uh1hÚhŸh³h K|hjd  hžhubhÛ)”}”(hŒ÷'pahole' decodes data structure layouts delimited in cache line
granularity.  Users can match the offset in perf-c2c output with
pahole's decoding to locate the exact data members.  For global
data, users can search the data address in System.map.”h]”hŒýâ€˜paholeâ€™ decodes data structure layouts delimited in cache line
granularity.  Users can match the offset in perf-c2c output with
paholeâ€™s decoding to locate the exact data members.  For global
data, users can search the data address in System.map.”…””}”(hjÝ  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÚhŸh³h K~hjd  hžhubeh}”(h]”Œ'how-to-detect-and-analyze-false-sharing”ah ]”h"]”Œ'how to detect and analyze false sharing”ah$]”h&]”uh1h´hh¶hžhhŸh³h K\ubhµ)”}”(hhh]”(hº)”}”(hŒPossible Mitigations”h]”hŒPossible Mitigations”…””}”(hjö  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h¹hjó  hžhhŸh³h K…ubhÛ)”}”(hX  False sharing does not always need to be mitigated.  False sharing
mitigations should balance performance gains with complexity and
space consumption.  Sometimes, lower performance is OK, and it's
unnecessary to hyper-optimize every rarely used data structure or
a cold data path.”h]”hX  False sharing does not always need to be mitigated.  False sharing
mitigations should balance performance gains with complexity and
space consumption.  Sometimes, lower performance is OK, and itâ€™s
unnecessary to hyper-optimize every rarely used data structure or
a cold data path.”…””}”(hj  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÚhŸh³h K†hjó  hžhubhÛ)”}”(hX"  False sharing hurting performance cases are seen more frequently with
core count increasing.  Because of these detrimental effects, many
patches have been proposed across variety of subsystems (like
networking and memory management) and merged.  Some common mitigations
(with examples) are:”h]”hX"  False sharing hurting performance cases are seen more frequently with
core count increasing.  Because of these detrimental effects, many
patches have been proposed across variety of subsystems (like
networking and memory management) and merged.  Some common mitigations
(with examples) are:”…””}”(hj  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÚhŸh³h KŒhjó  hžhubjo  )”}”(hhh]”(jt  )”}”(hŒýSeparate hot global data in its own dedicated cache line, even if it
is just a 'short' type. The downside is more consumption of memory,
cache line and TLB entries.

- Commit 91b6d3256356 ("net: cache align tcp_memory_allocated, tcp_sockets_allocated")
”h]”(hÛ)”}”(hŒ¤Separate hot global data in its own dedicated cache line, even if it
is just a 'short' type. The downside is more consumption of memory,
cache line and TLB entries.”h]”hŒ¨Separate hot global data in its own dedicated cache line, even if it
is just a â€˜shortâ€™ type. The downside is more consumption of memory,
cache line and TLB entries.”…””}”(hj'  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÚhŸh³h K’hj#  ubjo  )”}”(hhh]”jt  )”}”(hŒUCommit 91b6d3256356 ("net: cache align tcp_memory_allocated, tcp_sockets_allocated")
”h]”hÛ)”}”(hŒTCommit 91b6d3256356 ("net: cache align tcp_memory_allocated, tcp_sockets_allocated")”h]”hŒXCommit 91b6d3256356 (â€œnet: cache align tcp_memory_allocated, tcp_sockets_allocatedâ€)”…””}”(hj<  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÚhŸh³h K–hj8  ubah}”(h]”h ]”h"]”h$]”h&]”uh1js  hj5  ubah}”(h]”h ]”h"]”h$]”h&]”jª  Œ-”uh1jn  hŸh³h K–hj#  ubeh}”(h]”h ]”h"]”h$]”h&]”uh1js  hj   hžhhŸNh Nubjt  )”}”(hŒùReorganize the data structure, separate the interfering members to
different cache lines.  One downside is it may introduce new false
sharing of other members.

- Commit 802f1d522d5f ("mm: page_counter: re-layout structure to reduce false sharing")
”h]”(hÛ)”}”(hŒŸReorganize the data structure, separate the interfering members to
different cache lines.  One downside is it may introduce new false
sharing of other members.”h]”hŒŸReorganize the data structure, separate the interfering members to
different cache lines.  One downside is it may introduce new false
sharing of other members.”…””}”(hja  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÚhŸh³h K˜hj]  ubjo  )”}”(hhh]”jt  )”}”(hŒVCommit 802f1d522d5f ("mm: page_counter: re-layout structure to reduce false sharing")
”h]”hÛ)”}”(hŒUCommit 802f1d522d5f ("mm: page_counter: re-layout structure to reduce false sharing")”h]”hŒYCommit 802f1d522d5f (â€œmm: page_counter: re-layout structure to reduce false sharingâ€)”…””}”(hjv  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÚhŸh³h Kœhjr  ubah}”(h]”h ]”h"]”h$]”h&]”uh1js  hjo  ubah}”(h]”h ]”h"]”h$]”h&]”jª  jV  uh1jn  hŸh³h Kœhj]  ubeh}”(h]”h ]”h"]”h$]”h&]”uh1js  hj   hžhhŸNh Nubjt  )”}”(hX  Replace 'write' with 'read' when possible, especially in loops.
Like for some global variable, use compare(read)-then-write instead
of unconditional write. For example, use::

      if (!test_bit(XXX))
              set_bit(XXX);

instead of directly "set_bit(XXX);", similarly for atomic_t data::

      if (atomic_read(XXX) == AAA)
              atomic_set(XXX, BBB);

- Commit 7b1002f7cfe5 ("bcache: fixup bcache_dev_sectors_dirty_add() multithreaded CPU false sharing")
- Commit 292648ac5cf1 ("mm: gup: allow FOLL_PIN to scale in SMP")
”h]”(hÛ)”}”(hŒ®Replace 'write' with 'read' when possible, especially in loops.
Like for some global variable, use compare(read)-then-write instead
of unconditional write. For example, use::”h]”hŒµReplace â€˜writeâ€™ with â€˜readâ€™ when possible, especially in loops.
Like for some global variable, use compare(read)-then-write instead
of unconditional write. For example, use:”…””}”(hjš  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÚhŸh³h Kžhj–  ubj  )”}”(hŒ)if (!test_bit(XXX))
        set_bit(XXX);”h]”hŒ)if (!test_bit(XXX))
        set_bit(XXX);”…””}”hj¨  sbah}”(h]”h ]”h"]”h$]”h&]”h±h²uh1j  hŸh³h K¢hj–  ubhÛ)”}”(hŒBinstead of directly "set_bit(XXX);", similarly for atomic_t data::”h]”hŒEinstead of directly â€œset_bit(XXX);â€, similarly for atomic_t data:”…””}”(hj¶  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÚhŸh³h K¥hj–  ubj  )”}”(hŒ:if (atomic_read(XXX) == AAA)
        atomic_set(XXX, BBB);”h]”hŒ:if (atomic_read(XXX) == AAA)
        atomic_set(XXX, BBB);”…””}”hjÄ  sbah}”(h]”h ]”h"]”h$]”h&]”h±h²uh1j  hŸh³h K§hj–  ubjo  )”}”(hhh]”(jt  )”}”(hŒdCommit 7b1002f7cfe5 ("bcache: fixup bcache_dev_sectors_dirty_add() multithreaded CPU false sharing")”h]”hÛ)”}”(hj×  h]”hŒhCommit 7b1002f7cfe5 (â€œbcache: fixup bcache_dev_sectors_dirty_add() multithreaded CPU false sharingâ€)”…””}”(hjÙ  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÚhŸh³h KªhjÕ  ubah}”(h]”h ]”h"]”h$]”h&]”uh1js  hjÒ  ubjt  )”}”(hŒ@Commit 292648ac5cf1 ("mm: gup: allow FOLL_PIN to scale in SMP")
”h]”hÛ)”}”(hŒ?Commit 292648ac5cf1 ("mm: gup: allow FOLL_PIN to scale in SMP")”h]”hŒCCommit 292648ac5cf1 (â€œmm: gup: allow FOLL_PIN to scale in SMPâ€)”…””}”(hjð  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÚhŸh³h K«hjì  ubah}”(h]”h ]”h"]”h$]”h&]”uh1js  hjÒ  ubeh}”(h]”h ]”h"]”h$]”h&]”jª  jV  uh1jn  hŸh³h Kªhj–  ubeh}”(h]”h ]”h"]”h$]”h&]”uh1js  hj   hžhhŸNh Nubjt  )”}”(hX  Turn hot global data to 'per-cpu data + global data' when possible,
or reasonably increase the threshold for syncing per-cpu data to
global data, to reduce or postpone the 'write' to that global data.

- Commit 520f897a3554 ("ext4: use percpu_counters for extent_status cache hits/misses")
- Commit 56f3547bfa4d ("mm: adjust vm_committed_as_batch according to vm overcommit policy")
”h]”(hÛ)”}”(hŒÈTurn hot global data to 'per-cpu data + global data' when possible,
or reasonably increase the threshold for syncing per-cpu data to
global data, to reduce or postpone the 'write' to that global data.”h]”hŒÐTurn hot global data to â€˜per-cpu data + global dataâ€™ when possible,
or reasonably increase the threshold for syncing per-cpu data to
global data, to reduce or postpone the â€˜writeâ€™ to that global data.”…””}”(hj  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÚhŸh³h K­hj  ubjo  )”}”(hhh]”(jt  )”}”(hŒUCommit 520f897a3554 ("ext4: use percpu_counters for extent_status cache hits/misses")”h]”hÛ)”}”(hj'  h]”hŒYCommit 520f897a3554 (â€œext4: use percpu_counters for extent_status cache hits/missesâ€)”…””}”(hj)  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÚhŸh³h K±hj%  ubah}”(h]”h ]”h"]”h$]”h&]”uh1js  hj"  ubjt  )”}”(hŒ[Commit 56f3547bfa4d ("mm: adjust vm_committed_as_batch according to vm overcommit policy")
”h]”hÛ)”}”(hŒZCommit 56f3547bfa4d ("mm: adjust vm_committed_as_batch according to vm overcommit policy")”h]”hŒ^Commit 56f3547bfa4d (â€œmm: adjust vm_committed_as_batch according to vm overcommit policyâ€)”…””}”(hj@  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÚhŸh³h K²hj<  ubah}”(h]”h ]”h"]”h$]”h&]”uh1js  hj"  ubeh}”(h]”h ]”h"]”h$]”h&]”jª  jV  uh1jn  hŸh³h K±hj  ubeh}”(h]”h ]”h"]”h$]”h&]”uh1js  hj   hžhhŸNh Nubeh}”(h]”h ]”h"]”h$]”h&]”jª  j«  uh1jn  hŸh³h K’hjó  hžhubhÛ)”}”(hŒSurely, all mitigations should be carefully verified to not cause side
effects.  To avoid introducing false sharing when coding, it's better
to:”h]”hŒ’Surely, all mitigations should be carefully verified to not cause side
effects.  To avoid introducing false sharing when coding, itâ€™s better
to:”…””}”(hjf  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÚhŸh³h K´hjó  hžhubjo  )”}”(hhh]”(jt  )”}”(hŒ!Be aware of cache line boundaries”h]”hÛ)”}”(hjy  h]”hŒ!Be aware of cache line boundaries”…””}”(hj{  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÚhŸh³h K¸hjw  ubah}”(h]”h ]”h"]”h$]”h&]”uh1js  hjt  hžhhŸh³h Nubjt  )”}”(hŒ&Group mostly read-only fields together”h]”hÛ)”}”(hj  h]”hŒ&Group mostly read-only fields together”…””}”(hj’  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÚhŸh³h K¹hjŽ  ubah}”(h]”h ]”h"]”h$]”h&]”uh1js  hjt  hžhhŸh³h Nubjt  )”}”(hŒ7Group things that are written at the same time together”h]”hÛ)”}”(hj§  h]”hŒ7Group things that are written at the same time together”…””}”(hj©  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÚhŸh³h Kºhj¥  ubah}”(h]”h ]”h"]”h$]”h&]”uh1js  hjt  hžhhŸh³h Nubjt  )”}”(hŒQSeparate frequently read and frequently written fields on
different cache lines.
”h]”hÛ)”}”(hŒPSeparate frequently read and frequently written fields on
different cache lines.”h]”hŒPSeparate frequently read and frequently written fields on
different cache lines.”…””}”(hjÀ  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÚhŸh³h K»hj¼  ubah}”(h]”h ]”h"]”h$]”h&]”uh1js  hjt  hžhhŸh³h Nubeh}”(h]”h ]”h"]”h$]”h&]”jª  j«  uh1jn  hŸh³h K¸hjó  hžhubhÛ)”}”(hŒAand better add a comment stating the false sharing consideration.”h]”hŒAand better add a comment stating the false sharing consideration.”…””}”(hjÚ  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÚhŸh³h K¾hjó  hžhubhÛ)”}”(hŒ®One note is, sometimes even after a severe false sharing is detected
and solved, the performance may still have no obvious improvement as
the hotspot switches to a new place.”h]”hŒ®One note is, sometimes even after a severe false sharing is detected
and solved, the performance may still have no obvious improvement as
the hotspot switches to a new place.”…””}”(hjè  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÚhŸh³h KÀhjó  hžhubeh}”(h]”Œpossible-mitigations”ah ]”h"]”Œpossible mitigations”ah$]”h&]”uh1h´hh¶hžhhŸh³h K…ubhµ)”}”(hhh]”(hº)”}”(hŒMiscellaneous”h]”hŒMiscellaneous”…””}”(hj  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h¹hjþ  hžhhŸh³h KÆubhÛ)”}”(hŒ§One open issue is that the kernel has an optional data structure
randomization mechanism, which also randomizes the situation of cache
line sharing among data members.”h]”hŒ§One open issue is that the kernel has an optional data structure
randomization mechanism, which also randomizes the situation of cache
line sharing among data members.”…””}”(hj  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÚhŸh³h KÇhjþ  hžhubhŒfootnote”“”)”}”(hŒ+https://en.wikipedia.org/wiki/False_sharing”h]”(hŒlabel”“”)”}”(hŒ1”h]”hŒ1”…””}”(hj%  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j#  hj  ubhÛ)”}”(hj!  h]”hŒ	reference”“”)”}”(hj!  h]”hŒ+https://en.wikipedia.org/wiki/False_sharing”…””}”(hj8  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”Œrefuri”j!  uh1j6  hj3  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hÚhŸh³h KÌhj  ubeh}”(h]”höah ]”h"]”Œ1”ah$]”h&]”hðah÷høuh1j  hŸh³h KÌhjþ  hžhhùKubj  )”}”(hŒ`https://lore.kernel.org/lkml/CAHk-=whoqV=cX5VC80mmR9rr+Z+yQ6fiQZm36Fb-izsanHg23w@mail.gmail.com/”h]”(j$  )”}”(hŒ2”h]”hŒ2”…””}”(hjW  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j#  hjS  ubhÛ)”}”(hjU  h]”j7  )”}”(hjU  h]”hŒ`https://lore.kernel.org/lkml/CAHk-=whoqV=cX5VC80mmR9rr+Z+yQ6fiQZm36Fb-izsanHg23w@mail.gmail.com/”…””}”(hjh  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”Œrefuri”jU  uh1j6  hje  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hÚhŸh³h KÍhjS  ubeh}”(h]”jU  ah ]”h"]”Œ2”ah$]”h&]”jP  ah÷høuh1j  hŸh³h KÍhjþ  hžhhùKubj  )”}”(hŒ4https://joemario.github.io/blog/2016/09/01/c2c-blog/”h]”(j$  )”}”(hŒ3”h]”hŒ3”…””}”(hj‡  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1j#  hjƒ  ubhÛ)”}”(hj…  h]”j7  )”}”(hj…  h]”hŒ4https://joemario.github.io/blog/2016/09/01/c2c-blog/”…””}”(hj˜  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”Œrefuri”j…  uh1j6  hj•  ubah}”(h]”h ]”h"]”h$]”h&]”uh1hÚhŸh³h KÎhjƒ  ubeh}”(h]”jÒ  ah ]”h"]”Œ3”ah$]”h&]”jÍ  ah÷høuh1j  hŸh³h KÎhjþ  hžhhùKubeh}”(h]”Œmiscellaneous”ah ]”h"]”Œmiscellaneous”ah$]”h&]”uh1h´hh¶hžhhŸh³h KÆubeh}”(h]”Œfalse-sharing”ah ]”h"]”Œfalse sharing”ah$]”h&]”uh1h´hhhžhhŸh³h Kubeh}”(h]”h ]”h"]”h$]”h&]”Œsource”h³uh1hŒcurrent_source”NŒcurrent_line”NŒsettings”Œdocutils.frontend”ŒValues”“”)”}”(h¹NŒ	generator”NŒ	datestamp”NŒsource_link”NŒ
source_url”NŒtoc_backlinks”Œentry”Œfootnote_backlinks”KŒsectnum_xform”KŒstrip_comments”NŒstrip_elements_with_classes”NŒstrip_classes”NŒreport_level”KŒ
halt_level”KŒexit_status_level”KŒdebug”NŒwarning_stream”NŒ	traceback”ˆŒinput_encoding”Œ	utf-8-sig”Œinput_encoding_error_handler”Œstrict”Œoutput_encoding”Œutf-8”Œoutput_encoding_error_handler”jæ  Œerror_encoding”Œutf-8”Œerror_encoding_error_handler”Œbackslashreplace”Œlanguage_code”Œen”Œrecord_dependencies”NŒconfig”NŒ	id_prefix”hŒauto_id_prefix”Œid”Œdump_settings”NŒdump_internals”NŒdump_transforms”NŒdump_pseudo_xml”NŒexpose_internals”NŒstrict_visitor”NŒ_disable_config”NŒ_source”h³Œ_destination”NŒ_config_files”]”Œ7/var/lib/git/docbuild/linux/Documentation/docutils.conf”aŒfile_insertion_enabled”ˆŒraw_enabled”KŒline_length_limit”M'Œpep_references”NŒpep_base_url”Œhttps://peps.python.org/”Œpep_file_url_template”Œpep-%04d”Œrfc_references”NŒrfc_base_url”Œ&https://datatracker.ietf.org/doc/html/”Œ	tab_width”KŒtrim_footnote_reference_space”‰Œsyntax_highlight”Œlong”Œsmart_quotes”ˆŒsmartquotes_locales”]”Œcharacter_level_inline_markup”‰Œdoctitle_xform”‰Œdocinfo_xform”KŒsectsubtitle_xform”‰Œimage_loading”Œlink”Œembed_stylesheet”‰Œcloak_email_addresses”ˆŒsection_self_link”‰Œenv”NubŒreporter”NŒindirect_targets”]”Œsubstitution_defs”}”Œsubstitution_names”}”Œrefnames”}”(Œ1”]”hæaŒ2”]”jF  aŒ3”]”jÃ  auŒrefids”}”Œnameids”}”(jÀ  j½  j¿  j¼  ja  j^  jð  jí  jû  jø  j¸  jµ  jP  höj€  jU  j°  jÒ  uŒ	nametypes”}”(jÀ  ‰j¿  ‰ja  ‰jð  ‰jû  ‰j¸  ‰jP  ˆj€  ˆj°  ˆuh}”(j½  h¶j¼  hÉhðhæjP  jF  j^  jÂ  jí  jd  jÍ  jÃ  jø  jó  jµ  jþ  höj  jU  jS  jÒ  jƒ  uŒfootnote_refs”}”(j&  ]”hæaj(  ]”jF  aj*  ]”jÃ  auŒcitation_refs”}”Œautofootnotes”]”Œautofootnote_refs”]”Œsymbol_footnotes”]”Œsymbol_footnote_refs”]”Œ	footnotes”]”(j  jS  jƒ  eŒ	citations”]”Œautofootnote_start”KŒsymbol_footnote_start”K Œ
id_counter”Œcollections”ŒCounter”“”}”jô  Ks…”R”Œparse_messages”]”Œtransform_messages”]”Œtransformer”NŒinclude_log”]”Œ
decoration”Nhžhub.