core/num/int_bits.rs
1//! Implementations for `uN::gather_bits` and `uN::scatter_bits`
2//!
3//! For the purposes of this implementation, the operations can be thought
4//! of as operating on the input bits as a list, starting from the least
5//! significant bit. Gathering is like `Vec::retain` that deletes bits
6//! where the mask has a zero. Scattering is like doing the inverse by
7//! inserting the zeros that gathering would delete.
8//!
9//! Key observation: Each bit that is gathered/scattered needs to be
10//! shifted by the count of zeros up to the corresponding mask bit.
11//!
12//! With that in mind, the general idea is to decompose the operation into
13//! a sequence of stages in `0..log2(BITS)`, where each stage shifts some
14//! of the bits by `n = 1 << stage`. The masks for each stage are computed
15//! via prefix counts of zeros in the mask.
16//!
17//! # Gathering
18//!
19//! Consider the input as a sequence of runs of data (bitstrings A,B,C,...),
20//! split by fixed-width groups of zeros ('.'), initially at width `n = 1`.
21//! Counting the groups of zeros, each stage shifts the odd-indexed runs of
22//! data right by `n`, effectively swapping them with the preceding zeros.
23//! For the next stage, `n` is doubled as all the zeros are now paired.
24//! ```text
25//! .A.B.C.D.E.F.G.H
26//! ..AB..CD..EF..GH
27//! ....ABCD....EFGH
28//! ........ABCDEFGH
29//! ```
30//! What makes this nontrivial is that the lengths of the bitstrings are not
31//! the same. Using lowercase for individual bits, the above might look like
32//! ```text
33//! .a.bbb.ccccc.dd.e..g.hh
34//! ..abbb..cccccdd..e..ghh
35//! ....abbbcccccdd....eghh
36//! ........abbbcccccddeghh
37//! ```
38//!
39//! # Scattering
40//!
41//! For `scatter_bits`, the stages are reversed. We start with a single run of
42//! data in the low bits. Each stage then splits each run of data in two by
43//! shifting part of it left by `n`, which is halved each stage.
44//! ```text
45//! ........ABCDEFGH
46//! ....ABCD....EFGH
47//! ..AB..CD..EF..GH
48//! .A.B.C.D.E.F.G.H
49//! ```
50//!
51//! # Stage masks
52//!
53//! To facilitate the shifts at each stage, we compute a mask that covers both
54//! the bitstrings to shift, and the zeros they shift into.
55//! ```text
56//! .A.B.C.D.E.F.G.H
57//! ## ## ## ##
58//! ..AB..CD..EF..GH
59//! #### ####
60//! ....ABCD....EFGH
61//! ########
62//! ........ABCDEFGH
63//! ```
64
65macro_rules! uint_impl {
66 ($U:ident) => {
67 pub(super) mod $U {
68 const STAGES: usize = $U::BITS.ilog2() as usize;
69 #[inline]
70 const fn prepare(sparse: $U) -> [$U; STAGES] {
71 // We'll start with `zeros` as a mask of the bits to be removed,
72 // and compute into `masks` the parts that shift at each stage.
73 let mut zeros = !sparse;
74 let mut masks = [0; STAGES];
75 let mut stage = 0;
76 while stage < STAGES {
77 let n = 1 << stage;
78 // Suppose `zeros` has bits set at ranges `{ a..a+n, b..b+n, ... }`.
79 // Then `parity` will be computed as `{ a.. } XOR { b.. } XOR ...`,
80 // which will be the ranges `{ a..b, c..d, e.. }`.
81 let mut parity = zeros;
82 let mut len = n;
83 while len < $U::BITS {
84 parity ^= parity << len;
85 len <<= 1;
86 }
87 masks[stage] = parity;
88
89 // Toggle off the bits that are shifted into:
90 // { a..a+n, b..b+n, ... } & !{ a..b, c..d, e.. }
91 // == { b..b+n, d..d+n, ... }
92 zeros &= !parity;
93 // Expand the remaining ranges down to the bits that were
94 // shifted from: { b-n..b+n, d-n..d+n, ... }
95 zeros ^= zeros >> n;
96
97 stage += 1;
98 }
99 masks
100 }
101
102 #[inline(always)]
103 pub(in super::super) const fn gather_impl(mut x: $U, sparse: $U) -> $U {
104 let masks = prepare(sparse);
105 x &= sparse;
106 let mut stage = 0;
107 while stage < STAGES {
108 let n = 1 << stage;
109 // Consider each two runs of data with their leading
110 // groups of `n` 0-bits. Suppose that the run that is
111 // shifted right has length `a`, and the other one has
112 // length `b`. Assume that only zeros are shifted in.
113 // ```text
114 // [0; n], [X; a], [0; n], [Y; b] // x
115 // [0; n], [X; a], [0; n], [0; b] // q
116 // [0; n], [0; a + n], [Y; b] // x ^= q
117 // [0; n + n], [X; a], [0; b] // q >> n
118 // [0; n], [0; n], [X; a], [Y; b] // x ^= q << n
119 // ```
120 // Only zeros are shifted out, satisfying the assumption
121 // for the next group.
122
123 // In effect, the upper run of data is swapped with the
124 // group of `n` zeros below it.
125 let q = x & masks[stage];
126 x ^= q;
127 x ^= q >> n;
128
129 stage += 1;
130 }
131 x
132 }
133 #[inline(always)]
134 pub(in super::super) const fn scatter_impl(mut x: $U, sparse: $U) -> $U {
135 let masks = prepare(sparse);
136 let mut stage = STAGES;
137 while stage > 0 {
138 stage -= 1;
139 let n = 1 << stage;
140 // Consider each run of data with the `2 * n` arbitrary bits
141 // above it. Suppose that the run has length `a + b`, with
142 // `a` being the length of the part that needs to be
143 // shifted. Assume that only zeros are shifted in.
144 // ```text
145 // [_; n], [_; n], [X; a], [Y; b] // x
146 // [0; n], [_; n], [X; a], [0; b] // q
147 // [_; n], [0; n + a], [Y; b] // x ^= q
148 // [_; n], [X; a], [0; b + n] // q << n
149 // [_; n], [X; a], [0; n], [Y; b] // x ^= q << n
150 // ```
151 // Only zeros are shifted out, satisfying the assumption
152 // for the next group.
153
154 // In effect, `n` 0-bits are inserted somewhere in each run
155 // of data to spread it, and the two groups of `n` bits
156 // above are XOR'd together.
157 let q = x & masks[stage];
158 x ^= q;
159 x ^= q << n;
160 }
161 x & sparse
162 }
163 }
164 };
165}
166
167uint_impl!(u8);
168uint_impl!(u16);
169uint_impl!(u32);
170uint_impl!(u64);
171uint_impl!(u128);