core/num/
int_bits.rs

1//! Implementations for `uN::gather_bits` and `uN::scatter_bits`
2//!
3//! For the purposes of this implementation, the operations can be thought
4//! of as operating on the input bits as a list, starting from the least
5//! significant bit. Gathering is like `Vec::retain` that deletes bits
6//! where the mask has a zero. Scattering is like doing the inverse by
7//! inserting the zeros that gathering would delete.
8//!
9//! Key observation: Each bit that is gathered/scattered needs to be
10//! shifted by the count of zeros up to the corresponding mask bit.
11//!
12//! With that in mind, the general idea is to decompose the operation into
13//! a sequence of stages in `0..log2(BITS)`, where each stage shifts some
14//! of the bits by `n = 1 << stage`. The masks for each stage are computed
15//! via prefix counts of zeros in the mask.
16//!
17//! # Gathering
18//!
19//! Consider the input as a sequence of runs of data (bitstrings A,B,C,...),
20//! split by fixed-width groups of zeros ('.'), initially at width `n = 1`.
21//! Counting the groups of zeros, each stage shifts the odd-indexed runs of
22//! data right by `n`, effectively swapping them with the preceding zeros.
23//! For the next stage, `n` is doubled as all the zeros are now paired.
24//! ```text
25//! .A.B.C.D.E.F.G.H
26//! ..AB..CD..EF..GH
27//! ....ABCD....EFGH
28//! ........ABCDEFGH
29//! ```
30//! What makes this nontrivial is that the lengths of the bitstrings are not
31//! the same. Using lowercase for individual bits, the above might look like
32//! ```text
33//! .a.bbb.ccccc.dd.e..g.hh
34//! ..abbb..cccccdd..e..ghh
35//! ....abbbcccccdd....eghh
36//! ........abbbcccccddeghh
37//! ```
38//!
39//! # Scattering
40//!
41//! For `scatter_bits`, the stages are reversed. We start with a single run of
42//! data in the low bits. Each stage then splits each run of data in two by
43//! shifting part of it left by `n`, which is halved each stage.
44//! ```text
45//! ........ABCDEFGH
46//! ....ABCD....EFGH
47//! ..AB..CD..EF..GH
48//! .A.B.C.D.E.F.G.H
49//! ```
50//!
51//! # Stage masks
52//!
53//! To facilitate the shifts at each stage, we compute a mask that covers both
54//! the bitstrings to shift, and the zeros they shift into.
55//! ```text
56//! .A.B.C.D.E.F.G.H
57//!  ##  ##  ##  ##
58//! ..AB..CD..EF..GH
59//!   ####    ####
60//! ....ABCD....EFGH
61//!     ########
62//! ........ABCDEFGH
63//! ```
64
65macro_rules! uint_impl {
66    ($U:ident) => {
67        pub(super) mod $U {
68            const STAGES: usize = $U::BITS.ilog2() as usize;
69            #[inline]
70            const fn prepare(sparse: $U) -> [$U; STAGES] {
71                // We'll start with `zeros` as a mask of the bits to be removed,
72                // and compute into `masks` the parts that shift at each stage.
73                let mut zeros = !sparse;
74                let mut masks = [0; STAGES];
75                let mut stage = 0;
76                while stage < STAGES {
77                    let n = 1 << stage;
78                    // Suppose `zeros` has bits set at ranges `{ a..a+n, b..b+n, ... }`.
79                    // Then `parity` will be computed as `{ a.. } XOR { b.. } XOR ...`,
80                    // which will be the ranges `{ a..b, c..d, e.. }`.
81                    let mut parity = zeros;
82                    let mut len = n;
83                    while len < $U::BITS {
84                        parity ^= parity << len;
85                        len <<= 1;
86                    }
87                    masks[stage] = parity;
88
89                    // Toggle off the bits that are shifted into:
90                    // { a..a+n, b..b+n, ... } & !{ a..b, c..d, e.. }
91                    // == { b..b+n, d..d+n, ... }
92                    zeros &= !parity;
93                    // Expand the remaining ranges down to the bits that were
94                    // shifted from: { b-n..b+n, d-n..d+n, ... }
95                    zeros ^= zeros >> n;
96
97                    stage += 1;
98                }
99                masks
100            }
101
102            #[inline(always)]
103            pub(in super::super) const fn gather_impl(mut x: $U, sparse: $U) -> $U {
104                let masks = prepare(sparse);
105                x &= sparse;
106                let mut stage = 0;
107                while stage < STAGES {
108                    let n = 1 << stage;
109                    // Consider each two runs of data with their leading
110                    // groups of `n` 0-bits. Suppose that the run that is
111                    // shifted right has length `a`, and the other one has
112                    // length `b`. Assume that only zeros are shifted in.
113                    // ```text
114                    // [0; n], [X; a], [0; n], [Y; b] // x
115                    // [0; n], [X; a], [0; n], [0; b] // q
116                    // [0; n], [0; a   +   n], [Y; b] // x ^= q
117                    // [0; n   +   n], [X; a], [0; b] // q >> n
118                    // [0; n], [0; n], [X; a], [Y; b] // x ^= q << n
119                    // ```
120                    // Only zeros are shifted out, satisfying the assumption
121                    // for the next group.
122
123                    // In effect, the upper run of data is swapped with the
124                    // group of `n` zeros below it.
125                    let q = x & masks[stage];
126                    x ^= q;
127                    x ^= q >> n;
128
129                    stage += 1;
130                }
131                x
132            }
133            #[inline(always)]
134            pub(in super::super) const fn scatter_impl(mut x: $U, sparse: $U) -> $U {
135                let masks = prepare(sparse);
136                let mut stage = STAGES;
137                while stage > 0 {
138                    stage -= 1;
139                    let n = 1 << stage;
140                    // Consider each run of data with the `2 * n` arbitrary bits
141                    // above it. Suppose that the run has length `a + b`, with
142                    // `a` being the length of the part that needs to be
143                    // shifted. Assume that only zeros are shifted in.
144                    // ```text
145                    // [_; n], [_; n], [X; a], [Y; b] // x
146                    // [0; n], [_; n], [X; a], [0; b] // q
147                    // [_; n], [0; n   +   a], [Y; b] // x ^= q
148                    // [_; n], [X; a], [0; b   +   n] // q << n
149                    // [_; n], [X; a], [0; n], [Y; b] // x ^= q << n
150                    // ```
151                    // Only zeros are shifted out, satisfying the assumption
152                    // for the next group.
153
154                    // In effect, `n` 0-bits are inserted somewhere in each run
155                    // of data to spread it, and the two groups of `n` bits
156                    // above are XOR'd together.
157                    let q = x & masks[stage];
158                    x ^= q;
159                    x ^= q << n;
160                }
161                x & sparse
162            }
163        }
164    };
165}
166
167uint_impl!(u8);
168uint_impl!(u16);
169uint_impl!(u32);
170uint_impl!(u64);
171uint_impl!(u128);