€•6w      Œsphinx.addnodes”Œdocument”“”)”}”(Œ	rawsource”Œ ”Œchildren”]”(Œtranslations”ŒLanguagesNode”“”)”}”(hhh]”(h Œpending_xref”“”)”}”(hhh]”Œdocutils.nodes”ŒText”“”ŒChinese (Simplified)”…””}”Œparent”hsbaŒ
attributes”}”(Œids”]”Œclasses”]”Œnames”]”Œdupnames”]”Œbackrefs”]”Œ	refdomain”Œstd”Œreftype”Œdoc”Œ	reftarget”Œ#/translations/zh_CN/arch/arm/vlocks”Œmodname”NŒ	classname”NŒrefexplicit”ˆuŒtagname”hhhubh)”}”(hhh]”hŒChinese (Traditional)”…””}”hh2sbah}”(h]”h ]”h"]”h$]”h&]”Œ	refdomain”h)Œreftype”h+Œ	reftarget”Œ#/translations/zh_TW/arch/arm/vlocks”Œmodname”NŒ	classname”NŒrefexplicit”ˆuh1hhhubh)”}”(hhh]”hŒItalian”…””}”hhFsbah}”(h]”h ]”h"]”h$]”h&]”Œ	refdomain”h)Œreftype”h+Œ	reftarget”Œ#/translations/it_IT/arch/arm/vlocks”Œmodname”NŒ	classname”NŒrefexplicit”ˆuh1hhhubh)”}”(hhh]”hŒJapanese”…””}”hhZsbah}”(h]”h ]”h"]”h$]”h&]”Œ	refdomain”h)Œreftype”h+Œ	reftarget”Œ#/translations/ja_JP/arch/arm/vlocks”Œmodname”NŒ	classname”NŒrefexplicit”ˆuh1hhhubh)”}”(hhh]”hŒKorean”…””}”hhnsbah}”(h]”h ]”h"]”h$]”h&]”Œ	refdomain”h)Œreftype”h+Œ	reftarget”Œ#/translations/ko_KR/arch/arm/vlocks”Œmodname”NŒ	classname”NŒrefexplicit”ˆuh1hhhubh)”}”(hhh]”hŒSpanish”…””}”hh‚sbah}”(h]”h ]”h"]”h$]”h&]”Œ	refdomain”h)Œreftype”h+Œ	reftarget”Œ#/translations/sp_SP/arch/arm/vlocks”Œmodname”NŒ	classname”NŒrefexplicit”ˆuh1hhhubeh}”(h]”h ]”h"]”h$]”h&]”Œcurrent_language”ŒEnglish”uh1h
hhŒ	_document”hŒsource”NŒline”NubhŒsection”“”)”}”(hhh]”(hŒtitle”“”)”}”(hŒ&vlocks for Bare-Metal Mutual Exclusion”h]”hŒ&vlocks for Bare-Metal Mutual Exclusion”…””}”(hh¨hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h¦hh£hžhhŸŒ=/var/lib/git/docbuild/linux/Documentation/arch/arm/vlocks.rst”h KubhŒ	paragraph”“”)”}”(hŒVoting Locks, or "vlocks" provide a simple low-level mutual exclusion
mechanism, with reasonable but minimal requirements on the memory
system.”h]”hŒ“Voting Locks, or â€œvlocksâ€ provide a simple low-level mutual exclusion
mechanism, with reasonable but minimal requirements on the memory
system.”…””}”(hh¹hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h Khh£hžhubh¸)”}”(hŒÞThese are intended to be used to coordinate critical activity among CPUs
which are otherwise non-coherent, in situations where the hardware
provides no other mechanism to support this and ordinary spinlocks
cannot be used.”h]”hŒÞThese are intended to be used to coordinate critical activity among CPUs
which are otherwise non-coherent, in situations where the hardware
provides no other mechanism to support this and ordinary spinlocks
cannot be used.”…””}”(hhÇhžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h K	hh£hžhubh¸)”}”(hX2  vlocks make use of the atomicity provided by the memory system for
writes to a single memory location.  To arbitrate, every CPU "votes for
itself", by storing a unique number to a common memory location.  The
final value seen in that memory location when all the votes have been
cast identifies the winner.”h]”hX6  vlocks make use of the atomicity provided by the memory system for
writes to a single memory location.  To arbitrate, every CPU â€œvotes for
itselfâ€, by storing a unique number to a common memory location.  The
final value seen in that memory location when all the votes have been
cast identifies the winner.”…””}”(hhÕhžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h Khh£hžhubh¸)”}”(hŒßIn order to make sure that the election produces an unambiguous result
in finite time, a CPU will only enter the election in the first place if
no winner has been chosen and the election does not appear to have
started yet.”h]”hŒßIn order to make sure that the election produces an unambiguous result
in finite time, a CPU will only enter the election in the first place if
no winner has been chosen and the election does not appear to have
started yet.”…””}”(hhãhžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h Khh£hžhubh¢)”}”(hhh]”(h§)”}”(hŒ	Algorithm”h]”hŒ	Algorithm”…””}”(hhôhžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h¦hhñhžhhŸh¶h Kubh¸)”}”(hŒJThe easiest way to explain the vlocks algorithm is with some pseudo-code::”h]”hŒIThe easiest way to explain the vlocks algorithm is with some pseudo-code:”…””}”(hj  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h KhhñhžhubhŒliteral_block”“”)”}”(hXU  int currently_voting[NR_CPUS] = { 0, };
int last_vote = -1; /* no votes yet */

bool vlock_trylock(int this_cpu)
{
        /* signal our desire to vote */
        currently_voting[this_cpu] = 1;
        if (last_vote != -1) {
                /* someone already volunteered himself */
                currently_voting[this_cpu] = 0;
                return false; /* not ourself */
        }

        /* let's suggest ourself */
        last_vote = this_cpu;
        currently_voting[this_cpu] = 0;

        /* then wait until everyone else is done voting */
        for_each_cpu(i) {
                while (currently_voting[i] != 0)
                        /* wait */;
        }

        /* result */
        if (last_vote == this_cpu)
                return true; /* we won */
        return false;
}

bool vlock_unlock(void)
{
        last_vote = -1;
}”h]”hXU  int currently_voting[NR_CPUS] = { 0, };
int last_vote = -1; /* no votes yet */

bool vlock_trylock(int this_cpu)
{
        /* signal our desire to vote */
        currently_voting[this_cpu] = 1;
        if (last_vote != -1) {
                /* someone already volunteered himself */
                currently_voting[this_cpu] = 0;
                return false; /* not ourself */
        }

        /* let's suggest ourself */
        last_vote = this_cpu;
        currently_voting[this_cpu] = 0;

        /* then wait until everyone else is done voting */
        for_each_cpu(i) {
                while (currently_voting[i] != 0)
                        /* wait */;
        }

        /* result */
        if (last_vote == this_cpu)
                return true; /* we won */
        return false;
}

bool vlock_unlock(void)
{
        last_vote = -1;
}”…””}”hj  sbah}”(h]”h ]”h"]”h$]”h&]”Œ	xml:space”Œpreserve”uh1j  hŸh¶h K!hhñhžhubh¸)”}”(hŒ¿The currently_voting[] array provides a way for the CPUs to determine
whether an election is in progress, and plays a role analogous to the
"entering" array in Lamport's bakery algorithm [1].”h]”hŒÅThe currently_voting[] array provides a way for the CPUs to determine
whether an election is in progress, and plays a role analogous to the
â€œenteringâ€ array in Lamportâ€™s bakery algorithm [1].”…””}”(hj"  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h KDhhñhžhubh¸)”}”(hŒÚHowever, once the election has started, the underlying memory system
atomicity is used to pick the winner.  This avoids the need for a static
priority rule to act as a tie-breaker, or any counters which could
overflow.”h]”hŒÚHowever, once the election has started, the underlying memory system
atomicity is used to pick the winner.  This avoids the need for a static
priority rule to act as a tie-breaker, or any counters which could
overflow.”…””}”(hj0  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h KHhhñhžhubh¸)”}”(hŒ©As long as the last_vote variable is globally visible to all CPUs, it
will contain only one value that won't change once every CPU has cleared
its currently_voting flag.”h]”hŒ«As long as the last_vote variable is globally visible to all CPUs, it
will contain only one value that wonâ€™t change once every CPU has cleared
its currently_voting flag.”…””}”(hj>  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h KMhhñhžhubeh}”(h]”Œ	algorithm”ah ]”h"]”Œ	algorithm”ah$]”h&]”uh1h¡hh£hžhhŸh¶h Kubh¢)”}”(hhh]”(h§)”}”(hŒFeatures and limitations”h]”hŒFeatures and limitations”…””}”(hjW  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h¦hjT  hžhhŸh¶h KSubhŒblock_quote”“”)”}”(hX	  * vlocks are not intended to be fair.  In the contended case, it is the
  _last_ CPU which attempts to get the lock which will be most likely
  to win.

  vlocks are therefore best suited to situations where it is necessary
  to pick a unique winner, but it does not matter which CPU actually
  wins.

* Like other similar mechanisms, vlocks will not scale well to a large
  number of CPUs.

  vlocks can be cascaded in a voting hierarchy to permit better scaling
  if necessary, as in the following hypothetical example for 4096 CPUs::

       /* first level: local election */
       my_town = towns[(this_cpu >> 4) & 0xf];
       I_won = vlock_trylock(my_town, this_cpu & 0xf);
       if (I_won) {
               /* we won the town election, let's go for the state */
               my_state = states[(this_cpu >> 8) & 0xf];
               I_won = vlock_lock(my_state, this_cpu & 0xf));
               if (I_won) {
                       /* and so on */
                       I_won = vlock_lock(the_whole_country, this_cpu & 0xf];
                       if (I_won) {
                               /* ... */
                       }
                       vlock_unlock(the_whole_country);
               }
               vlock_unlock(my_state);
       }
       vlock_unlock(my_town);

”h]”hŒbullet_list”“”)”}”(hhh]”(hŒ	list_item”“”)”}”(hX!  vlocks are not intended to be fair.  In the contended case, it is the
_last_ CPU which attempts to get the lock which will be most likely
to win.

vlocks are therefore best suited to situations where it is necessary
to pick a unique winner, but it does not matter which CPU actually
wins.
”h]”(h¸)”}”(hŒ‘vlocks are not intended to be fair.  In the contended case, it is the
_last_ CPU which attempts to get the lock which will be most likely
to win.”h]”hŒ‘vlocks are not intended to be fair.  In the contended case, it is the
_last_ CPU which attempts to get the lock which will be most likely
to win.”…””}”(hjv  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h KUhjr  ubh¸)”}”(hŒvlocks are therefore best suited to situations where it is necessary
to pick a unique winner, but it does not matter which CPU actually
wins.”h]”hŒvlocks are therefore best suited to situations where it is necessary
to pick a unique winner, but it does not matter which CPU actually
wins.”…””}”(hj„  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h KYhjr  ubeh}”(h]”h ]”h"]”h$]”h&]”uh1jp  hjm  ubjq  )”}”(hX¯  Like other similar mechanisms, vlocks will not scale well to a large
number of CPUs.

vlocks can be cascaded in a voting hierarchy to permit better scaling
if necessary, as in the following hypothetical example for 4096 CPUs::

     /* first level: local election */
     my_town = towns[(this_cpu >> 4) & 0xf];
     I_won = vlock_trylock(my_town, this_cpu & 0xf);
     if (I_won) {
             /* we won the town election, let's go for the state */
             my_state = states[(this_cpu >> 8) & 0xf];
             I_won = vlock_lock(my_state, this_cpu & 0xf));
             if (I_won) {
                     /* and so on */
                     I_won = vlock_lock(the_whole_country, this_cpu & 0xf];
                     if (I_won) {
                             /* ... */
                     }
                     vlock_unlock(the_whole_country);
             }
             vlock_unlock(my_state);
     }
     vlock_unlock(my_town);

”h]”(h¸)”}”(hŒTLike other similar mechanisms, vlocks will not scale well to a large
number of CPUs.”h]”hŒTLike other similar mechanisms, vlocks will not scale well to a large
number of CPUs.”…””}”(hjœ  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h K]hj˜  ubh¸)”}”(hŒŒvlocks can be cascaded in a voting hierarchy to permit better scaling
if necessary, as in the following hypothetical example for 4096 CPUs::”h]”hŒ‹vlocks can be cascaded in a voting hierarchy to permit better scaling
if necessary, as in the following hypothetical example for 4096 CPUs:”…””}”(hjª  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h K`hj˜  ubj  )”}”(hXo  /* first level: local election */
my_town = towns[(this_cpu >> 4) & 0xf];
I_won = vlock_trylock(my_town, this_cpu & 0xf);
if (I_won) {
        /* we won the town election, let's go for the state */
        my_state = states[(this_cpu >> 8) & 0xf];
        I_won = vlock_lock(my_state, this_cpu & 0xf));
        if (I_won) {
                /* and so on */
                I_won = vlock_lock(the_whole_country, this_cpu & 0xf];
                if (I_won) {
                        /* ... */
                }
                vlock_unlock(the_whole_country);
        }
        vlock_unlock(my_state);
}
vlock_unlock(my_town);”h]”hXo  /* first level: local election */
my_town = towns[(this_cpu >> 4) & 0xf];
I_won = vlock_trylock(my_town, this_cpu & 0xf);
if (I_won) {
        /* we won the town election, let's go for the state */
        my_state = states[(this_cpu >> 8) & 0xf];
        I_won = vlock_lock(my_state, this_cpu & 0xf));
        if (I_won) {
                /* and so on */
                I_won = vlock_lock(the_whole_country, this_cpu & 0xf];
                if (I_won) {
                        /* ... */
                }
                vlock_unlock(the_whole_country);
        }
        vlock_unlock(my_state);
}
vlock_unlock(my_town);”…””}”hj¸  sbah}”(h]”h ]”h"]”h$]”h&]”j   j!  uh1j  hŸh¶h Kchj˜  ubeh}”(h]”h ]”h"]”h$]”h&]”uh1jp  hjm  ubeh}”(h]”h ]”h"]”h$]”h&]”Œbullet”Œ*”uh1jk  hŸh¶h KUhjg  ubah}”(h]”h ]”h"]”h$]”h&]”uh1je  hŸh¶h KUhjT  hžhubeh}”(h]”Œfeatures-and-limitations”ah ]”h"]”Œfeatures and limitations”ah$]”h&]”uh1h¡hh£hžhhŸh¶h KSubh¢)”}”(hhh]”(h§)”}”(hŒARM implementation”h]”hŒARM implementation”…””}”(hjå  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h¦hjâ  hžhhŸh¶h Kxubh¸)”}”(hŒZThe current ARM implementation [2] contains some optimisations beyond
the basic algorithm:”h]”hŒZThe current ARM implementation [2] contains some optimisations beyond
the basic algorithm:”…””}”(hjó  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h Kzhjâ  hžhubjf  )”}”(hXÉ	  * By packing the members of the currently_voting array close together,
  we can read the whole array in one transaction (providing the number
  of CPUs potentially contending the lock is small enough).  This
  reduces the number of round-trips required to external memory.

  In the ARM implementation, this means that we can use a single load
  and comparison::

       LDR     Rt, [Rn]
       CMP     Rt, #0

  ...in place of code equivalent to::

       LDRB    Rt, [Rn]
       CMP     Rt, #0
       LDRBEQ  Rt, [Rn, #1]
       CMPEQ   Rt, #0
       LDRBEQ  Rt, [Rn, #2]
       CMPEQ   Rt, #0
       LDRBEQ  Rt, [Rn, #3]
       CMPEQ   Rt, #0

  This cuts down on the fast-path latency, as well as potentially
  reducing bus contention in contended cases.

  The optimisation relies on the fact that the ARM memory system
  guarantees coherency between overlapping memory accesses of
  different sizes, similarly to many other architectures.  Note that
  we do not care which element of currently_voting appears in which
  bits of Rt, so there is no need to worry about endianness in this
  optimisation.

  If there are too many CPUs to read the currently_voting array in
  one transaction then multiple transactions are still required.  The
  implementation uses a simple loop of word-sized loads for this
  case.  The number of transactions is still fewer than would be
  required if bytes were loaded individually.


  In principle, we could aggregate further by using LDRD or LDM, but
  to keep the code simple this was not attempted in the initial
  implementation.


* vlocks are currently only used to coordinate between CPUs which are
  unable to enable their caches yet.  This means that the
  implementation removes many of the barriers which would be required
  when executing the algorithm in cached memory.

  packing of the currently_voting array does not work with cached
  memory unless all CPUs contending the lock are cache-coherent, due
  to cache writebacks from one CPU clobbering values written by other
  CPUs.  (Though if all the CPUs are cache-coherent, you should be
  probably be using proper spinlocks instead anyway).


* The "no votes yet" value used for the last_vote variable is 0 (not
  -1 as in the pseudocode).  This allows statically-allocated vlocks
  to be implicitly initialised to an unlocked state simply by putting
  them in .bss.

  An offset is added to each CPU's ID for the purpose of setting this
  variable, so that no CPU uses the value 0 for its ID.

”h]”jl  )”}”(hhh]”(jq  )”}”(hXæ  By packing the members of the currently_voting array close together,
we can read the whole array in one transaction (providing the number
of CPUs potentially contending the lock is small enough).  This
reduces the number of round-trips required to external memory.

In the ARM implementation, this means that we can use a single load
and comparison::

     LDR     Rt, [Rn]
     CMP     Rt, #0

...in place of code equivalent to::

     LDRB    Rt, [Rn]
     CMP     Rt, #0
     LDRBEQ  Rt, [Rn, #1]
     CMPEQ   Rt, #0
     LDRBEQ  Rt, [Rn, #2]
     CMPEQ   Rt, #0
     LDRBEQ  Rt, [Rn, #3]
     CMPEQ   Rt, #0

This cuts down on the fast-path latency, as well as potentially
reducing bus contention in contended cases.

The optimisation relies on the fact that the ARM memory system
guarantees coherency between overlapping memory accesses of
different sizes, similarly to many other architectures.  Note that
we do not care which element of currently_voting appears in which
bits of Rt, so there is no need to worry about endianness in this
optimisation.

If there are too many CPUs to read the currently_voting array in
one transaction then multiple transactions are still required.  The
implementation uses a simple loop of word-sized loads for this
case.  The number of transactions is still fewer than would be
required if bytes were loaded individually.


In principle, we could aggregate further by using LDRD or LDM, but
to keep the code simple this was not attempted in the initial
implementation.

”h]”(h¸)”}”(hX  By packing the members of the currently_voting array close together,
we can read the whole array in one transaction (providing the number
of CPUs potentially contending the lock is small enough).  This
reduces the number of round-trips required to external memory.”h]”hX  By packing the members of the currently_voting array close together,
we can read the whole array in one transaction (providing the number
of CPUs potentially contending the lock is small enough).  This
reduces the number of round-trips required to external memory.”…””}”(hj  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h K}hj  ubh¸)”}”(hŒTIn the ARM implementation, this means that we can use a single load
and comparison::”h]”hŒSIn the ARM implementation, this means that we can use a single load
and comparison:”…””}”(hj  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h K‚hj  ubj  )”}”(hŒLDR     Rt, [Rn]
CMP     Rt, #0”h]”hŒLDR     Rt, [Rn]
CMP     Rt, #0”…””}”hj(  sbah}”(h]”h ]”h"]”h$]”h&]”j   j!  uh1j  hŸh¶h K…hj  ubh¸)”}”(hŒ#...in place of code equivalent to::”h]”hŒ"...in place of code equivalent to:”…””}”(hj6  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h Kˆhj  ubj  )”}”(hŒ‹LDRB    Rt, [Rn]
CMP     Rt, #0
LDRBEQ  Rt, [Rn, #1]
CMPEQ   Rt, #0
LDRBEQ  Rt, [Rn, #2]
CMPEQ   Rt, #0
LDRBEQ  Rt, [Rn, #3]
CMPEQ   Rt, #0”h]”hŒ‹LDRB    Rt, [Rn]
CMP     Rt, #0
LDRBEQ  Rt, [Rn, #1]
CMPEQ   Rt, #0
LDRBEQ  Rt, [Rn, #2]
CMPEQ   Rt, #0
LDRBEQ  Rt, [Rn, #3]
CMPEQ   Rt, #0”…””}”hjD  sbah}”(h]”h ]”h"]”h$]”h&]”j   j!  uh1j  hŸh¶h KŠhj  ubh¸)”}”(hŒkThis cuts down on the fast-path latency, as well as potentially
reducing bus contention in contended cases.”h]”hŒkThis cuts down on the fast-path latency, as well as potentially
reducing bus contention in contended cases.”…””}”(hjR  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h K“hj  ubh¸)”}”(hXO  The optimisation relies on the fact that the ARM memory system
guarantees coherency between overlapping memory accesses of
different sizes, similarly to many other architectures.  Note that
we do not care which element of currently_voting appears in which
bits of Rt, so there is no need to worry about endianness in this
optimisation.”h]”hXO  The optimisation relies on the fact that the ARM memory system
guarantees coherency between overlapping memory accesses of
different sizes, similarly to many other architectures.  Note that
we do not care which element of currently_voting appears in which
bits of Rt, so there is no need to worry about endianness in this
optimisation.”…””}”(hj`  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h K–hj  ubh¸)”}”(hX.  If there are too many CPUs to read the currently_voting array in
one transaction then multiple transactions are still required.  The
implementation uses a simple loop of word-sized loads for this
case.  The number of transactions is still fewer than would be
required if bytes were loaded individually.”h]”hX.  If there are too many CPUs to read the currently_voting array in
one transaction then multiple transactions are still required.  The
implementation uses a simple loop of word-sized loads for this
case.  The number of transactions is still fewer than would be
required if bytes were loaded individually.”…””}”(hjn  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h Khj  ubh¸)”}”(hŒIn principle, we could aggregate further by using LDRD or LDM, but
to keep the code simple this was not attempted in the initial
implementation.”h]”hŒIn principle, we could aggregate further by using LDRD or LDM, but
to keep the code simple this was not attempted in the initial
implementation.”…””}”(hj|  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h K¤hj  ubeh}”(h]”h ]”h"]”h$]”h&]”uh1jp  hj  ubjq  )”}”(hX-  vlocks are currently only used to coordinate between CPUs which are
unable to enable their caches yet.  This means that the
implementation removes many of the barriers which would be required
when executing the algorithm in cached memory.

packing of the currently_voting array does not work with cached
memory unless all CPUs contending the lock are cache-coherent, due
to cache writebacks from one CPU clobbering values written by other
CPUs.  (Though if all the CPUs are cache-coherent, you should be
probably be using proper spinlocks instead anyway).

”h]”(h¸)”}”(hŒîvlocks are currently only used to coordinate between CPUs which are
unable to enable their caches yet.  This means that the
implementation removes many of the barriers which would be required
when executing the algorithm in cached memory.”h]”hŒîvlocks are currently only used to coordinate between CPUs which are
unable to enable their caches yet.  This means that the
implementation removes many of the barriers which would be required
when executing the algorithm in cached memory.”…””}”(hj”  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h K©hj  ubh¸)”}”(hX;  packing of the currently_voting array does not work with cached
memory unless all CPUs contending the lock are cache-coherent, due
to cache writebacks from one CPU clobbering values written by other
CPUs.  (Though if all the CPUs are cache-coherent, you should be
probably be using proper spinlocks instead anyway).”h]”hX;  packing of the currently_voting array does not work with cached
memory unless all CPUs contending the lock are cache-coherent, due
to cache writebacks from one CPU clobbering values written by other
CPUs.  (Though if all the CPUs are cache-coherent, you should be
probably be using proper spinlocks instead anyway).”…””}”(hj¢  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h K®hj  ubeh}”(h]”h ]”h"]”h$]”h&]”uh1jp  hj  ubjq  )”}”(hXT  The "no votes yet" value used for the last_vote variable is 0 (not
-1 as in the pseudocode).  This allows statically-allocated vlocks
to be implicitly initialised to an unlocked state simply by putting
them in .bss.

An offset is added to each CPU's ID for the purpose of setting this
variable, so that no CPU uses the value 0 for its ID.

”h]”(h¸)”}”(hŒ×The "no votes yet" value used for the last_vote variable is 0 (not
-1 as in the pseudocode).  This allows statically-allocated vlocks
to be implicitly initialised to an unlocked state simply by putting
them in .bss.”h]”hŒÛThe â€œno votes yetâ€ value used for the last_vote variable is 0 (not
-1 as in the pseudocode).  This allows statically-allocated vlocks
to be implicitly initialised to an unlocked state simply by putting
them in .bss.”…””}”(hjº  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h Kµhj¶  ubh¸)”}”(hŒyAn offset is added to each CPU's ID for the purpose of setting this
variable, so that no CPU uses the value 0 for its ID.”h]”hŒ{An offset is added to each CPUâ€™s ID for the purpose of setting this
variable, so that no CPU uses the value 0 for its ID.”…””}”(hjÈ  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h Kºhj¶  ubeh}”(h]”h ]”h"]”h$]”h&]”uh1jp  hj  ubeh}”(h]”h ]”h"]”h$]”h&]”jÒ  jÓ  uh1jk  hŸh¶h K}hj  ubah}”(h]”h ]”h"]”h$]”h&]”uh1je  hŸh¶h K}hjâ  hžhubeh}”(h]”Œarm-implementation”ah ]”h"]”Œarm implementation”ah$]”h&]”uh1h¡hh£hžhhŸh¶h Kxubh¢)”}”(hhh]”(h§)”}”(hŒColophon”h]”hŒColophon”…””}”(hjó  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h¦hjð  hžhhŸh¶h K¿ubh¸)”}”(hX.  Originally created and documented by Dave Martin for Linaro Limited, for
use in ARM-based big.LITTLE platforms, with review and input gratefully
received from Nicolas Pitre and Achin Gupta.  Thanks to Nicolas for
grabbing most of this text out of the relevant mail thread and writing
up the pseudocode.”h]”hX.  Originally created and documented by Dave Martin for Linaro Limited, for
use in ARM-based big.LITTLE platforms, with review and input gratefully
received from Nicolas Pitre and Achin Gupta.  Thanks to Nicolas for
grabbing most of this text out of the relevant mail thread and writing
up the pseudocode.”…””}”(hj  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h KÁhjð  hžhubh¸)”}”(hŒCopyright (C) 2012-2013  Linaro Limited
Distributed under the terms of Version 2 of the GNU General Public
License, as defined in linux/COPYING.”h]”hŒCopyright (C) 2012-2013  Linaro Limited
Distributed under the terms of Version 2 of the GNU General Public
License, as defined in linux/COPYING.”…””}”(hj  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h KÇhjð  hžhubeh}”(h]”Œcolophon”ah ]”h"]”Œcolophon”ah$]”h&]”uh1h¡hh£hžhhŸh¶h K¿ubh¢)”}”(hhh]”(h§)”}”(hŒ
References”h]”hŒ
References”…””}”(hj(  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h¦hj%  hžhhŸh¶h KÍubhŒdefinition_list”“”)”}”(hhh]”hŒdefinition_list_item”“”)”}”(hŒÃ[1] Lamport, L. "A New Solution of Dijkstra's Concurrent Programming
Problem", Communications of the ACM 17, 8 (August 1974), 453-455.

https://en.wikipedia.org/wiki/Lamport%27s_bakery_algorithm
”h]”(hŒterm”“”)”}”(hŒD[1] Lamport, L. "A New Solution of Dijkstra's Concurrent Programming”h]”hŒH[1] Lamport, L. â€œA New Solution of Dijkstraâ€™s Concurrent Programming”…””}”(hjC  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1jA  hŸh¶h KÒhj=  ubhŒ
definition”“”)”}”(hhh]”(h¸)”}”(hŒAProblem", Communications of the ACM 17, 8 (August 1974), 453-455.”h]”hŒCProblemâ€, Communications of the ACM 17, 8 (August 1974), 453-455.”…””}”(hjV  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h KÐhjS  ubh¸)”}”(hŒ:https://en.wikipedia.org/wiki/Lamport%27s_bakery_algorithm”h]”hŒ	reference”“”)”}”(hjf  h]”hŒ:https://en.wikipedia.org/wiki/Lamport%27s_bakery_algorithm”…””}”(hjj  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”Œrefuri”jf  uh1jh  hjd  ubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h KÒhjS  ubeh}”(h]”h ]”h"]”h$]”h&]”uh1jQ  hj=  ubeh}”(h]”h ]”h"]”h$]”h&]”uh1j;  hŸh¶h KÒhj8  ubah}”(h]”h ]”h"]”h$]”h&]”uh1j6  hj%  hžhhŸh¶h Nubh¸)”}”(hŒ2[2] linux/arch/arm/common/vlock.S, www.kernel.org.”h]”hŒ2[2] linux/arch/arm/common/vlock.S, www.kernel.org.”…””}”(hj  hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h·hŸh¶h KÔhj%  hžhubeh}”(h]”Œ
references”ah ]”h"]”Œ
references”ah$]”h&]”uh1h¡hh£hžhhŸh¶h KÍubeh}”(h]”Œ&vlocks-for-bare-metal-mutual-exclusion”ah ]”h"]”Œ&vlocks for bare-metal mutual exclusion”ah$]”h&]”uh1h¡hhhžhhŸh¶h Kubeh}”(h]”h ]”h"]”h$]”h&]”Œsource”h¶uh1hŒcurrent_source”NŒcurrent_line”NŒsettings”Œdocutils.frontend”ŒValues”“”)”}”(h¦NŒ	generator”NŒ	datestamp”NŒsource_link”NŒ
source_url”NŒtoc_backlinks”Œentry”Œfootnote_backlinks”KŒsectnum_xform”KŒstrip_comments”NŒstrip_elements_with_classes”NŒstrip_classes”NŒreport_level”KŒ
halt_level”KŒexit_status_level”KŒdebug”NŒwarning_stream”NŒ	traceback”ˆŒinput_encoding”Œ	utf-8-sig”Œinput_encoding_error_handler”Œstrict”Œoutput_encoding”Œutf-8”Œoutput_encoding_error_handler”jÑ  Œerror_encoding”Œutf-8”Œerror_encoding_error_handler”Œbackslashreplace”Œlanguage_code”Œen”Œrecord_dependencies”NŒconfig”NŒ	id_prefix”hŒauto_id_prefix”Œid”Œdump_settings”NŒdump_internals”NŒdump_transforms”NŒdump_pseudo_xml”NŒexpose_internals”NŒstrict_visitor”NŒ_disable_config”NŒ_source”h¶Œ_destination”NŒ_config_files”]”Œ7/var/lib/git/docbuild/linux/Documentation/docutils.conf”aŒfile_insertion_enabled”ˆŒraw_enabled”KŒline_length_limit”M'Œpep_references”NŒpep_base_url”Œhttps://peps.python.org/”Œpep_file_url_template”Œpep-%04d”Œrfc_references”NŒrfc_base_url”Œ&https://datatracker.ietf.org/doc/html/”Œ	tab_width”KŒtrim_footnote_reference_space”‰Œsyntax_highlight”Œlong”Œsmart_quotes”ˆŒsmartquotes_locales”]”Œcharacter_level_inline_markup”‰Œdoctitle_xform”‰Œdocinfo_xform”KŒsectsubtitle_xform”‰Œimage_loading”Œlink”Œembed_stylesheet”‰Œcloak_email_addresses”ˆŒsection_self_link”‰Œenv”NubŒreporter”NŒindirect_targets”]”Œsubstitution_defs”}”Œsubstitution_names”}”Œrefnames”}”Œrefids”}”Œnameids”}”(j«  j¨  jQ  jN  jß  jÜ  jí  jê  j"  j  j£  j   uŒ	nametypes”}”(j«  ‰jQ  ‰jß  ‰jí  ‰j"  ‰j£  ‰uh}”(j¨  h£jN  hñjÜ  jT  jê  jâ  j  jð  j   j%  uŒfootnote_refs”}”Œcitation_refs”}”Œautofootnotes”]”Œautofootnote_refs”]”Œsymbol_footnotes”]”Œsymbol_footnote_refs”]”Œ	footnotes”]”Œ	citations”]”Œautofootnote_start”KŒsymbol_footnote_start”K Œ
id_counter”Œcollections”ŒCounter”“”}”…”R”Œparse_messages”]”Œtransform_messages”]”Œtransformer”NŒinclude_log”]”Œ
decoration”Nhžhub.