Skip to content

Commit adfd24b

Browse files
committed
Auto merge of #153589 - Zalathar:bit-set, r=<try>
[EXPERIMENT] RawBitSet experiments
2 parents 8b86f48 + 974f59e commit adfd24b

File tree

2 files changed

+207
-137
lines changed

2 files changed

+207
-137
lines changed

‎compiler/rustc_index/src/bit_set.rs‎

Lines changed: 30 additions & 137 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,16 @@ use std::marker::PhantomData;
33
use std::mem;
44
use std::ops::{Bound, Range, RangeBounds};
55
use std::rc::Rc;
6-
use std::{fmt, iter, slice};
6+
use std::{fmt, iter};
77

88
use Chunk::*;
99
#[cfg(feature = "nightly")]
1010
use rustc_macros::{Decodable_NoContext, Encodable_NoContext};
1111

12+
use crate::bit_set::raw::RawBitIter;
1213
use crate::{Idx, IndexVec};
1314

15+
mod raw;
1416
#[cfg(test)]
1517
mod tests;
1618

@@ -183,48 +185,18 @@ impl<T: Idx> DenseBitSet<T> {
183185
self.words.iter().all(|a| *a == 0)
184186
}
185187

186-
/// Insert `elem`. Returns whether the set has changed.
188+
/// Inserts `value` into the set, and returns true if the set has changed
189+
/// (i.e. the set did not contain the value).
187190
#[inline]
188-
pub fn insert(&mut self, elem: T) -> bool {
189-
assert!(
190-
elem.index() < self.domain_size,
191-
"inserting element at index {} but domain size is {}",
192-
elem.index(),
193-
self.domain_size,
194-
);
195-
let (word_index, mask) = word_index_and_mask(elem);
196-
let word_ref = &mut self.words[word_index];
197-
let word = *word_ref;
198-
let new_word = word | mask;
199-
*word_ref = new_word;
200-
new_word != word
191+
pub fn insert(&mut self, value: T) -> bool {
192+
raw::insert(self.domain_size, &mut self.words, value.index())
201193
}
202194

203195
#[inline]
204-
pub fn insert_range(&mut self, elems: impl RangeBounds<T>) {
205-
let Some((start, end)) = inclusive_start_end(elems, self.domain_size) else {
206-
return;
207-
};
208-
209-
let (start_word_index, start_mask) = word_index_and_mask(start);
210-
let (end_word_index, end_mask) = word_index_and_mask(end);
211-
212-
// Set all words in between start and end (exclusively of both).
213-
for word_index in (start_word_index + 1)..end_word_index {
214-
self.words[word_index] = !0;
215-
}
216-
217-
if start_word_index != end_word_index {
218-
// Start and end are in different words, so we handle each in turn.
219-
//
220-
// We set all leading bits. This includes the start_mask bit.
221-
self.words[start_word_index] |= !(start_mask - 1);
222-
// And all trailing bits (i.e. from 0..=end) in the end word,
223-
// including the end.
224-
self.words[end_word_index] |= end_mask | (end_mask - 1);
225-
} else {
226-
self.words[start_word_index] |= end_mask | (end_mask - start_mask);
227-
}
196+
pub fn insert_range(&mut self, range: impl RangeBounds<T>) {
197+
let start = range.start_bound().map(|i| i.index());
198+
let end = range.end_bound().map(|i| i.index());
199+
raw::insert_range(self.domain_size, &mut self.words, (start, end));
228200
}
229201

230202
/// Sets all bits to true.
@@ -235,40 +207,17 @@ impl<T: Idx> DenseBitSet<T> {
235207

236208
/// Checks whether any bit in the given range is a 1.
237209
#[inline]
238-
pub fn contains_any(&self, elems: impl RangeBounds<T>) -> bool {
239-
let Some((start, end)) = inclusive_start_end(elems, self.domain_size) else {
240-
return false;
241-
};
242-
let (start_word_index, start_mask) = word_index_and_mask(start);
243-
let (end_word_index, end_mask) = word_index_and_mask(end);
244-
245-
if start_word_index == end_word_index {
246-
self.words[start_word_index] & (end_mask | (end_mask - start_mask)) != 0
247-
} else {
248-
if self.words[start_word_index] & !(start_mask - 1) != 0 {
249-
return true;
250-
}
251-
252-
let remaining = start_word_index + 1..end_word_index;
253-
if remaining.start <= remaining.end {
254-
self.words[remaining].iter().any(|&w| w != 0)
255-
|| self.words[end_word_index] & (end_mask | (end_mask - 1)) != 0
256-
} else {
257-
false
258-
}
259-
}
210+
pub fn contains_any(&self, range: impl RangeBounds<T>) -> bool {
211+
let start = range.start_bound().map(|i| i.index());
212+
let end = range.end_bound().map(|i| i.index());
213+
raw::contains_any(self.domain_size, &self.words, (start, end))
260214
}
261215

262-
/// Returns `true` if the set has changed.
216+
/// Removes `value` from the set, and returns true if the set has changed
217+
/// (i.e. the set did contain the value).
263218
#[inline]
264-
pub fn remove(&mut self, elem: T) -> bool {
265-
assert!(elem.index() < self.domain_size);
266-
let (word_index, mask) = word_index_and_mask(elem);
267-
let word_ref = &mut self.words[word_index];
268-
let word = *word_ref;
269-
let new_word = word & !mask;
270-
*word_ref = new_word;
271-
new_word != word
219+
pub fn remove(&mut self, value: T) -> bool {
220+
raw::remove(self.domain_size, &mut self.words, value.index())
272221
}
273222

274223
/// Iterates over the indices of set bits in a sorted order.
@@ -278,33 +227,9 @@ impl<T: Idx> DenseBitSet<T> {
278227
}
279228

280229
pub fn last_set_in(&self, range: impl RangeBounds<T>) -> Option<T> {
281-
let (start, end) = inclusive_start_end(range, self.domain_size)?;
282-
let (start_word_index, _) = word_index_and_mask(start);
283-
let (end_word_index, end_mask) = word_index_and_mask(end);
284-
285-
let end_word = self.words[end_word_index] & (end_mask | (end_mask - 1));
286-
if end_word != 0 {
287-
let pos = max_bit(end_word) + WORD_BITS * end_word_index;
288-
if start <= pos {
289-
return Some(T::new(pos));
290-
}
291-
}
292-
293-
// We exclude end_word_index from the range here, because we don't want
294-
// to limit ourselves to *just* the last word: the bits set it in may be
295-
// after `end`, so it may not work out.
296-
if let Some(offset) =
297-
self.words[start_word_index..end_word_index].iter().rposition(|&w| w != 0)
298-
{
299-
let word_idx = start_word_index + offset;
300-
let start_word = self.words[word_idx];
301-
let pos = max_bit(start_word) + WORD_BITS * word_idx;
302-
if start <= pos {
303-
return Some(T::new(pos));
304-
}
305-
}
306-
307-
None
230+
let start = range.start_bound().map(|i| i.index());
231+
let end = range.end_bound().map(|i| i.index());
232+
raw::last_set_in(self.domain_size, &self.words, (start, end)).map(T::new)
308233
}
309234

310235
bit_relations_inherent_impls! {}
@@ -410,54 +335,22 @@ impl<T: Idx> ToString for DenseBitSet<T> {
410335
}
411336

412337
pub struct BitIter<'a, T: Idx> {
413-
/// A copy of the current word, but with any already-visited bits cleared.
414-
/// (This lets us use `trailing_zeros()` to find the next set bit.) When it
415-
/// is reduced to 0, we move onto the next word.
416-
word: Word,
417-
418-
/// The offset (measured in bits) of the current word.
419-
offset: usize,
420-
421-
/// Underlying iterator over the words.
422-
iter: slice::Iter<'a, Word>,
423-
338+
raw: RawBitIter<'a>,
424339
marker: PhantomData<T>,
425340
}
426341

427342
impl<'a, T: Idx> BitIter<'a, T> {
428-
#[inline]
429-
fn new(words: &'a [Word]) -> BitIter<'a, T> {
430-
// We initialize `word` and `offset` to degenerate values. On the first
431-
// call to `next()` we will fall through to getting the first word from
432-
// `iter`, which sets `word` to the first word (if there is one) and
433-
// `offset` to 0. Doing it this way saves us from having to maintain
434-
// additional state about whether we have started.
435-
BitIter {
436-
word: 0,
437-
offset: usize::MAX - (WORD_BITS - 1),
438-
iter: words.iter(),
439-
marker: PhantomData,
440-
}
343+
#[inline(always)]
344+
fn new(words: &'a [Word]) -> Self {
345+
BitIter { raw: RawBitIter::new(words), marker: PhantomData }
441346
}
442347
}
443348

444349
impl<'a, T: Idx> Iterator for BitIter<'a, T> {
445350
type Item = T;
446-
fn next(&mut self) -> Option<T> {
447-
loop {
448-
if self.word != 0 {
449-
// Get the position of the next set bit in the current word,
450-
// then clear the bit.
451-
let bit_pos = self.word.trailing_zeros() as usize;
452-
self.word ^= 1 << bit_pos;
453-
return Some(T::new(bit_pos + self.offset));
454-
}
455351

456-
// Move onto the next word. `wrapping_add()` is needed to handle
457-
// the degenerate initial value given to `offset` in `new()`.
458-
self.word = *self.iter.next()?;
459-
self.offset = self.offset.wrapping_add(WORD_BITS);
460-
}
352+
fn next(&mut self) -> Option<Self::Item> {
353+
self.raw.next().map(T::new)
461354
}
462355
}
463356

@@ -740,7 +633,7 @@ impl<T: Idx> ChunkedBitSet<T> {
740633
Some(Ones) => ChunkIter::Ones(0..chunk_domain_size as usize),
741634
Some(Mixed { ones_count: _, words }) => {
742635
let num_words = num_words(chunk_domain_size as usize);
743-
ChunkIter::Mixed(BitIter::new(&words[0..num_words]))
636+
ChunkIter::Mixed(RawBitIter::new(&words[0..num_words]))
744637
}
745638
None => ChunkIter::Finished,
746639
}
@@ -1058,7 +951,7 @@ impl Chunk {
1058951
enum ChunkIter<'a> {
1059952
Zeros,
1060953
Ones(Range<usize>),
1061-
Mixed(BitIter<'a, usize>),
954+
Mixed(RawBitIter<'a>),
1062955
Finished,
1063956
}
1064957

0 commit comments

Comments
 (0)