@@ -177,6 +177,51 @@ impl DefaultResizePolicy {
177177// element.
178178//
179179// FIXME(Gankro, pczarn): review the proof and put it all in a separate README.md
180+ //
181+ // Adaptive early resizing
182+ // ----------------------
183+ // To protect against degenerate performance scenarios (including DOS attacks),
184+ // the implementation includes an adaptive behavior that can resize the map
185+ // early (before it's capacity is exceeded) when suspiciously long probe or
186+ // foward shifts sequences are encounted.
187+ //
188+ // With this algorithm in place it would be possible to turn a CPU attack into
189+ // a memory attack due to the agressive resizing. To prevent that the
190+ // adaptive behavior only triggers when the map occupancy is half the maximum occupancy.
191+ // This reduces the effectivenes of the algorithm but also makes it completelly safe.
192+ //
193+ // The previous safety measure that also prevents degenerate iteractions with
194+ // really bad quality hash algorithms that can make normal inputs look like a
195+ // DOS attack.
196+ //
197+ const DISPLACEMENT_THRESHOLD : usize = 128 ;
198+ const FORWARD_SHIFT_THRESHOLD : usize = 512 ;
199+ //
200+ // The thresholds of 128 and 512 are chosen to minimize the chance of exceeding them.
201+ // In particular, we want that chance to be less than 10^-8 with a load of 90%.
202+ // For displacement, the smallest constant that fits our needs is 90,
203+ // so we round that up to 128. For the number of forward-shifted buckets,
204+ // we choose k=512. Keep in mind that the run length is a sum of the displacement and
205+ // the number of forward-shifted buckets, so its threshold is 128+512=640.
206+ // Even though the probability of having a run length of more than 640 buckets may be
207+ // higher than the probability we want, it should be low enough.
208+ //
209+ // At a load factor of α, the odds of finding the target bucket after exactly n
210+ // unsuccesful probes[1] are
211+ //
212+ // Pr_α{displacement = n} =
213+ // (1 - α) / α * ∑_{k≥1} e^(-kα) * (kα)^(k+n) / (k + n)! * (1 - kα / (k + n + 1))
214+ //
215+ // We use this formula to find the probability of loading half of triggering the adaptive behavior
216+ //
217+ // Pr_0.909{displacement > 128} = 1.601 * 10^-11
218+ //
219+ // FIXME: Extend with math for shift threshold in [2]
220+ //
221+ // 1. Alfredo Viola (2005). Distributional analysis of Robin Hood linear probing
222+ // hashing with buckets.
223+ // 2. http://www.cs.tau.ac.il/~zwick/Adv-Alg-2015/Linear-Probing.pdf
224+
180225
181226/// A hash map implementation which uses linear probing with Robin Hood bucket
182227/// stealing.
@@ -360,6 +405,8 @@ pub struct HashMap<K, V, S = RandomState> {
360405 table : RawTable < K , V > ,
361406
362407 resize_policy : DefaultResizePolicy ,
408+
409+ long_probes : bool ,
363410}
364411
365412/// Search for a pre-hashed key.
@@ -385,7 +432,7 @@ fn search_hashed<K, V, M, F>(table: M, hash: SafeHash, mut is_match: F) -> Inter
385432 // Found a hole!
386433 return InternalEntry :: Vacant {
387434 hash : hash,
388- elem : NoElem ( bucket) ,
435+ elem : NoElem ( bucket, displacement ) ,
389436 } ;
390437 }
391438 Full ( bucket) => bucket,
@@ -447,15 +494,15 @@ fn robin_hood<'a, K: 'a, V: 'a>(bucket: FullBucketMut<'a, K, V>,
447494 mut hash : SafeHash ,
448495 mut key : K ,
449496 mut val : V )
450- -> & ' a mut V {
451- let starting_index = bucket. index ( ) ;
497+ -> ( usize , & ' a mut V ) {
498+ let start_index = bucket. index ( ) ;
452499 let size = bucket. table ( ) . size ( ) ;
453500 // Save the *starting point*.
454501 let mut bucket = bucket. stash ( ) ;
455502 // There can be at most `size - dib` buckets to displace, because
456503 // in the worst case, there are `size` elements and we already are
457504 // `displacement` buckets away from the initial one.
458- let idx_end = starting_index + size - bucket. displacement ( ) ;
505+ let idx_end = start_index + size - bucket. displacement ( ) ;
459506
460507 loop {
461508 let ( old_hash, old_key, old_val) = bucket. replace ( hash, key, val) ;
@@ -472,14 +519,15 @@ fn robin_hood<'a, K: 'a, V: 'a>(bucket: FullBucketMut<'a, K, V>,
472519 Empty ( bucket) => {
473520 // Found a hole!
474521 let bucket = bucket. put ( hash, key, val) ;
522+ let end_index = bucket. index ( ) ;
475523 // Now that it's stolen, just read the value's pointer
476524 // right out of the table! Go back to the *starting point*.
477525 //
478526 // This use of `into_table` is misleading. It turns the
479527 // bucket, which is a FullBucket on top of a
480528 // FullBucketMut, into just one FullBucketMut. The "table"
481529 // refers to the inner FullBucketMut in this context.
482- return bucket. into_table ( ) . into_mut_refs ( ) . 1 ;
530+ return ( end_index - start_index , bucket. into_table ( ) . into_mut_refs ( ) . 1 ) ;
483531 }
484532 Full ( bucket) => bucket,
485533 } ;
@@ -617,6 +665,7 @@ impl<K, V, S> HashMap<K, V, S>
617665 hash_builder : hash_builder,
618666 resize_policy : DefaultResizePolicy :: new ( ) ,
619667 table : RawTable :: new ( 0 ) ,
668+ long_probes : false ,
620669 }
621670 }
622671
@@ -649,6 +698,7 @@ impl<K, V, S> HashMap<K, V, S>
649698 hash_builder : hash_builder,
650699 resize_policy : resize_policy,
651700 table : RawTable :: new ( raw_cap) ,
701+ long_probes : false ,
652702 }
653703 }
654704
@@ -706,6 +756,11 @@ impl<K, V, S> HashMap<K, V, S>
706756 let min_cap = self . len ( ) . checked_add ( additional) . expect ( "reserve overflow" ) ;
707757 let raw_cap = self . resize_policy . raw_capacity ( min_cap) ;
708758 self . resize ( raw_cap) ;
759+ } else if self . long_probes && remaining <= self . len ( ) {
760+ // Probe sequence is too long and table is half full,
761+ // resize early to reduce probing length.
762+ let new_capacity = self . table . capacity ( ) * 2 ;
763+ self . resize ( new_capacity) ;
709764 }
710765 }
711766
@@ -718,10 +773,11 @@ impl<K, V, S> HashMap<K, V, S>
718773 assert ! ( self . table. size( ) <= new_raw_cap) ;
719774 assert ! ( new_raw_cap. is_power_of_two( ) || new_raw_cap == 0 ) ;
720775
776+ self . long_probes = false ;
721777 let mut old_table = replace ( & mut self . table , RawTable :: new ( new_raw_cap) ) ;
722778 let old_size = old_table. size ( ) ;
723779
724- if old_table. capacity ( ) == 0 || old_table . size ( ) == 0 {
780+ if old_table. size ( ) == 0 {
725781 return ;
726782 }
727783
@@ -798,7 +854,8 @@ impl<K, V, S> HashMap<K, V, S>
798854 /// If the key already exists, the hashtable will be returned untouched
799855 /// and a reference to the existing element will be returned.
800856 fn insert_hashed_nocheck ( & mut self , hash : SafeHash , k : K , v : V ) -> Option < V > {
801- let entry = search_hashed ( & mut self . table , hash, |key| * key == k) . into_entry ( k) ;
857+ let entry = search_hashed ( & mut self . table , hash, |key| * key == k)
858+ . into_entry ( k, & mut self . long_probes ) ;
802859 match entry {
803860 Some ( Occupied ( mut elem) ) => Some ( elem. insert ( v) ) ,
804861 Some ( Vacant ( elem) ) => {
@@ -953,7 +1010,9 @@ impl<K, V, S> HashMap<K, V, S>
9531010 pub fn entry ( & mut self , key : K ) -> Entry < K , V > {
9541011 // Gotta resize now.
9551012 self . reserve ( 1 ) ;
956- self . search_mut ( & key) . into_entry ( key) . expect ( "unreachable" )
1013+ let hash = self . make_hash ( & key) ;
1014+ search_hashed ( & mut self . table , hash, |q| q. eq ( & key) )
1015+ . into_entry ( key, & mut self . long_probes ) . expect ( "unreachable" )
9571016 }
9581017
9591018 /// Returns the number of elements in the map.
@@ -1407,7 +1466,7 @@ impl<K, V, M> InternalEntry<K, V, M> {
14071466
14081467impl < ' a , K , V > InternalEntry < K , V , & ' a mut RawTable < K , V > > {
14091468 #[ inline]
1410- fn into_entry ( self , key : K ) -> Option < Entry < ' a , K , V > > {
1469+ fn into_entry ( self , key : K , long_probes : & ' a mut bool ) -> Option < Entry < ' a , K , V > > {
14111470 match self {
14121471 InternalEntry :: Occupied { elem } => {
14131472 Some ( Occupied ( OccupiedEntry {
@@ -1420,6 +1479,7 @@ impl<'a, K, V> InternalEntry<K, V, &'a mut RawTable<K, V>> {
14201479 hash : hash,
14211480 key : key,
14221481 elem : elem,
1482+ long_probes : long_probes,
14231483 } ) )
14241484 }
14251485 InternalEntry :: TableIsEmpty => None ,
@@ -1492,6 +1552,7 @@ pub struct VacantEntry<'a, K: 'a, V: 'a> {
14921552 hash : SafeHash ,
14931553 key : K ,
14941554 elem : VacantEntryState < K , V , & ' a mut RawTable < K , V > > ,
1555+ long_probes : & ' a mut bool ,
14951556}
14961557
14971558#[ stable( feature= "debug_hash_map" , since = "1.12.0" ) ]
@@ -1509,7 +1570,7 @@ enum VacantEntryState<K, V, M> {
15091570 /// and will kick the current one out on insertion.
15101571 NeqElem ( FullBucket < K , V , M > , usize ) ,
15111572 /// The index is genuinely vacant.
1512- NoElem ( EmptyBucket < K , V , M > ) ,
1573+ NoElem ( EmptyBucket < K , V , M > , usize ) ,
15131574}
15141575
15151576#[ stable( feature = "rust1" , since = "1.0.0" ) ]
@@ -2066,8 +2127,20 @@ impl<'a, K: 'a, V: 'a> VacantEntry<'a, K, V> {
20662127 #[ stable( feature = "rust1" , since = "1.0.0" ) ]
20672128 pub fn insert ( self , value : V ) -> & ' a mut V {
20682129 match self . elem {
2069- NeqElem ( bucket, disp) => robin_hood ( bucket, disp, self . hash , self . key , value) ,
2070- NoElem ( bucket) => bucket. put ( self . hash , self . key , value) . into_mut_refs ( ) . 1 ,
2130+ NeqElem ( bucket, disp) => {
2131+ let ( shift, v_ref) = robin_hood ( bucket, disp, self . hash , self . key , value) ;
2132+ if disp >= DISPLACEMENT_THRESHOLD || shift >= FORWARD_SHIFT_THRESHOLD {
2133+ * self . long_probes = true ;
2134+ }
2135+ v_ref
2136+ } ,
2137+ NoElem ( bucket, disp) => {
2138+ if disp >= DISPLACEMENT_THRESHOLD {
2139+ * self . long_probes = true ;
2140+ }
2141+ let bucket = bucket. put ( self . hash , self . key , value) ;
2142+ bucket. into_mut_refs ( ) . 1
2143+ } ,
20712144 }
20722145 }
20732146}
@@ -3192,4 +3265,24 @@ mod test_map {
31923265 assert_eq ! ( map[ & 4 ] , 40 ) ;
31933266 assert_eq ! ( map[ & 6 ] , 60 ) ;
31943267 }
3268+
3269+ #[ test]
3270+ fn test_adaptive ( ) {
3271+ const TEST_LEN : usize = 5000 ;
3272+ // by cloning we get maps with the same hasher seed
3273+ let mut first = HashMap :: new ( ) ;
3274+ let mut second = first. clone ( ) ;
3275+ first. extend ( ( 0 ..TEST_LEN ) . map ( |i| ( i, i) ) ) ;
3276+ second. extend ( ( TEST_LEN ..TEST_LEN * 2 ) . map ( |i| ( i, i) ) ) ;
3277+
3278+ for ( & k, & v) in & second {
3279+ let prev_cap = first. capacity ( ) ;
3280+ let expect_grow = first. len ( ) == prev_cap;
3281+ first. insert ( k, v) ;
3282+ if !expect_grow && first. capacity ( ) != prev_cap {
3283+ return ;
3284+ }
3285+ }
3286+ panic ! ( "Adaptive early resize failed" ) ;
3287+ }
31953288}
0 commit comments