@@ -5,20 +5,24 @@ use std::collections::hash_map::DefaultHasher;
55use std:: hash:: { Hash , Hasher } ;
66
77#[ derive( Serialize , Debug , Deserialize , Clone , Readable , Writable ) ]
8- pub struct ProbabilisticSet {
8+ pub struct ProbSet {
99 // todo: consider making bits and array?
1010 bits : Vec < u8 > ,
1111 size_bits : usize ,
1212 seed : u64 , // Store the random seed
1313}
1414
15- impl ProbabilisticSet {
15+ impl ProbSet {
1616 pub fn new ( expected_items : usize , bits_per_item : usize ) -> Self {
17+ if expected_items == 0 || bits_per_item == 0 {
18+ panic ! ( "expected_items and bits_per_item must be greater than 0" ) ;
19+ }
20+
1721 let size_bits = expected_items * bits_per_item;
1822 let seed = rand:: thread_rng ( ) . r#gen ( ) ; // Generate random seed
1923
20- ProbabilisticSet {
21- bits : vec ! [ 0 ; ( size_bits + 7 ) / 8 ] ,
24+ ProbSet {
25+ bits : vec ! [ 0 ; size_bits. div_ceil ( 8 ) ] ,
2226 size_bits,
2327 seed,
2428 }
@@ -27,8 +31,8 @@ impl ProbabilisticSet {
2731 // Create with a specific seed (useful for testing or controlled randomness)
2832 pub fn with_seed ( expected_items : usize , bits_per_item : usize , seed : u64 ) -> Self {
2933 let size_bits: usize = expected_items * bits_per_item;
30- ProbabilisticSet {
31- bits : vec ! [ 0 ; ( size_bits + 7 ) / 8 ] ,
34+ ProbSet {
35+ bits : vec ! [ 0 ; size_bits. div_ceil ( 8 ) ] ,
3236 size_bits,
3337 seed,
3438 }
@@ -74,4 +78,126 @@ impl ProbabilisticSet {
7478}
7579
7680#[ cfg( test) ]
77- mod tests { }
81+ mod tests {
82+ use super :: * ;
83+ use uuid:: Uuid ;
84+
85+ #[ test]
86+ fn test_uuid_collision_rates ( ) {
87+ // Test different configurations with UUIDs
88+ let configs = vec ! [ ( 1000 , 2 ) , ( 1000 , 4 ) ] ;
89+
90+ for ( expected_items, bits_per_item) in configs {
91+ let collision_rate = measure_uuid_collision_rate ( expected_items, bits_per_item, 1000 ) ;
92+ println ! (
93+ "Config: {} items, {} bits/item -> Collision rate: {:.2}%" ,
94+ expected_items,
95+ bits_per_item,
96+ collision_rate * 100.0
97+ ) ;
98+ }
99+ }
100+
101+ #[ test]
102+ fn test_uuid_false_positive_rate ( ) {
103+ let mut set = ProbSet :: new ( 1000 , 4 ) ;
104+ let mut inserted_uuids = Vec :: new ( ) ;
105+
106+ for _ in 0 ..500 {
107+ let uuid = Uuid :: new_v4 ( ) ;
108+ let uuid_u128 = uuid. as_u128 ( ) ;
109+ set. insert ( uuid_u128) ;
110+ inserted_uuids. push ( uuid_u128) ;
111+ }
112+
113+ for & uuid in & inserted_uuids {
114+ assert ! ( set. contains( uuid) , "False negative detected!" ) ;
115+ }
116+
117+ // Test false positive rate with 10,000 random UUIDs
118+ let test_size = 10000 ;
119+ let mut false_positives = 0 ;
120+
121+ for _ in 0 ..test_size {
122+ let test_uuid = Uuid :: new_v4 ( ) . as_u128 ( ) ;
123+
124+ if inserted_uuids. contains ( & test_uuid) {
125+ continue ;
126+ }
127+
128+ if set. contains ( test_uuid) {
129+ false_positives += 1 ;
130+ }
131+ }
132+
133+ let false_positive_rate = false_positives as f64 / test_size as f64 ;
134+ println ! ( "False positive rate: {:.2}%" , false_positive_rate * 100.0 ) ;
135+
136+ assert ! (
137+ false_positive_rate < 0.20 ,
138+ "False positive rate too high: {:.2}%" ,
139+ false_positive_rate * 100.0
140+ ) ;
141+ }
142+
143+ #[ test]
144+ fn test_capacity_vs_collision_rate ( ) {
145+ let bits_per_item = 16 ;
146+ let max_capacity = 1000 ;
147+
148+ // Test collision rates at different capacity utilizations
149+ for utilization in [ 0.25 , 0.5 , 0.75 , 1.0 , 1.25 , 1.5 ] {
150+ let num_items = ( max_capacity as f64 * utilization) as usize ;
151+ let collision_rate =
152+ measure_uuid_collision_rate ( max_capacity, bits_per_item, num_items) ;
153+
154+ println ! (
155+ "Utilization: {:.0}% -> Collision rate: {:.2}%" ,
156+ utilization * 100.0 ,
157+ collision_rate * 100.0
158+ ) ;
159+ }
160+ }
161+
162+ #[ test]
163+ fn test_deterministic_behavior ( ) {
164+ // Test that same seed produces same results
165+ let seed = 12345 ;
166+ let mut set1 = ProbSet :: with_seed ( 1000 , 16 , seed) ;
167+ let mut set2 = ProbSet :: with_seed ( 1000 , 16 , seed) ;
168+
169+ let test_uuids: Vec < u128 > = ( 0 ..100 ) . map ( |_| Uuid :: new_v4 ( ) . as_u128 ( ) ) . collect ( ) ;
170+
171+ for & uuid in & test_uuids {
172+ set1. insert ( uuid) ;
173+ set2. insert ( uuid) ;
174+ }
175+
176+ for _ in 0 ..1000 {
177+ let test_uuid = Uuid :: new_v4 ( ) . as_u128 ( ) ;
178+ assert_eq ! (
179+ set1. contains( test_uuid) ,
180+ set2. contains( test_uuid) ,
181+ "Sets with same seed should behave identically"
182+ ) ;
183+ }
184+ }
185+ // Helper function to measure collision rate
186+ fn measure_uuid_collision_rate (
187+ expected_items : usize ,
188+ bits_per_item : usize ,
189+ num_test_items : usize ,
190+ ) -> f64 {
191+ let set = ProbSet :: new ( expected_items, bits_per_item) ;
192+ let mut unique_positions = std:: collections:: HashMap :: new ( ) ;
193+
194+ for _ in 0 ..num_test_items {
195+ let uuid = Uuid :: new_v4 ( ) . as_u128 ( ) ;
196+ let position = set. hash ( uuid) % set. size_bits ;
197+ * unique_positions. entry ( position) . or_insert ( 0 ) += 1 ;
198+ }
199+
200+ let collisions = num_test_items - unique_positions. len ( ) ;
201+ collisions as f64 / num_test_items as f64
202+ }
203+ }
0 commit comments