@@ -1317,106 +1317,209 @@ normal_s(Mean, Variance, State0) when 0 =< Variance ->
13171317 {Mean + (math :sqrt (Variance ) * X ), State }.
13181318
13191319
1320- -spec shuffle (list ()) -> list ().
1320+ -doc """
1321+ Shuffle a list.
1322+
1323+ Like `shuffle_s/2` but operates on the state stored in
1324+ the process dictionary. Returns the shuffled list.
1325+ """ .
1326+ -doc (#{group => <<" Plug-in framework API" >>,since => <<" OTP 29.0" >>}).
1327+ -spec shuffle (List :: list ()) -> ShuffledList :: list ().
13211328shuffle (List ) ->
13221329 {ShuffledList , State } = shuffle_s (List , seed_get ()),
13231330 _ = seed_put (State ),
13241331 ShuffledList .
13251332
1326- -spec shuffle_s (list (), state ()) -> {list (), state ()}.
1327- shuffle_s (List , State ) when is_list (List ) ->
1328- shuffle_r (List , State , []).
1333+ -doc """
1334+ Shuffle a list.
1335+
1336+ From the specified `State` shuffles the elements in argument `List` so that,
1337+ given that the [PRNG algorithm](#algorithms) in `State` is perfect,
1338+ every possible permutation of the elements in the returned `ShuffledList`
1339+ has the same probability.
1340+
1341+ In other words, the quality of the shuffling depends only on
1342+ the quality of the backend [random number generator](#algorithms)
1343+ and [seed](`seed_s/1`). If a cryptographically unpredictable
1344+ shuffling is needed, use for example `crypto:rand_seed_alg_s/1`
1345+ to initialize the random number generator.
1346+
1347+ Returns the shuffled list [`ShuffledList`](`t:list/0`)
1348+ and the [`NewState`](`t:state/0`).
1349+ """ .
1350+ -doc (#{group => <<" Plug-in framework API" >>,since => <<" OTP 29.0" >>}).
1351+ -spec shuffle_s (List , State ) ->
1352+ {ShuffledList :: list (), NewState :: state ()}
1353+ when
1354+ List :: list (),
1355+ State :: state ().
1356+ shuffle_s (List , {#{bits := _ , next := Next } = AlgHandler , R0 })
1357+ when is_list (List ) ->
1358+ WeakLowBits = maps :get (weak_low_bits , AlgHandler , 0 ),
1359+ [P0 |S0 ] = shuffle_init_bitstream (R0 , Next , WeakLowBits ),
1360+ {ShuffledList , _P1 , [R1 |_ ]= _S1 } = shuffle_r (List , [], P0 , S0 ),
1361+ {ShuffledList , {AlgHandler , R1 }};
1362+ shuffle_s (List , {#{max := _ , next := Next } = AlgHandler , R0 })
1363+ when is_list (List ) ->
1364+ % % Old spec - assume 2 weak low bits
1365+ WeakLowBits = 2 ,
1366+ [P0 |S0 ] = shuffle_init_bitstream (R0 , Next , WeakLowBits ),
1367+ {ShuffledList , _P1 , [R1 |_ ]= _S1 } = shuffle_r (List , [], P0 , S0 ),
1368+ {ShuffledList , {AlgHandler , R1 }}.
13291369
13301370% % Random-split-and-shuffle algorithm suggested by Richard A. O'Keefe
1331- % % on ErlangForums, as I interpreted it...
1332- % %
1333- % % Randomly split the list in two lists,
1334- % % recursively shuffle the two smaller lists,
1335- % % randomize the order between the lists according to their size.
1336- % %
1337- % % This is equivalent to assigning a random number to each
1338- % % element and sorting, but extending the numbers on demand
1339- % % while there still are duplicates.
1340-
1341- % % Leaf cases - random permutations for 0..4 elements
1342- shuffle_r ([], State , Acc ) ->
1343- {Acc , State };
1344- shuffle_r ([X ], State , Acc ) ->
1345- {[X | Acc ], State };
1346- shuffle_r ([X , Y ], State0 , Acc ) ->
1347- {V , State1 } = uniform_s (2 , State0 ),
1348- {case V of
1349- 1 -> [Y , X | Acc ];
1350- 2 -> [X , Y | Acc ]
1351- end , State1 };
1352- shuffle_r ([X , Y , Z ], State0 , Acc ) ->
1353- {V , State1 } = uniform_s (6 , State0 ),
1354- {case V of
1355- 1 -> [Z , Y , X | Acc ];
1356- 2 -> [Y , Z , X | Acc ];
1357- 3 -> [Z , X , Y | Acc ];
1358- 4 -> [X , Z , Y | Acc ];
1359- 5 -> [Y , X , Z | Acc ];
1360- 6 -> [X , Y , Z | Acc ]
1361- end , State1 };
1362- shuffle_r ([X , Y , Z , Q ], State0 , Acc ) ->
1363- {V , State1 } = uniform_s (24 , State0 ),
1364- {case V of
1365- 1 -> [Q , Z , Y , X | Acc ];
1366- 2 -> [Z , Q , Y , X | Acc ];
1367- 3 -> [Q , Y , Z , X | Acc ];
1368- 4 -> [Y , Q , Z , X | Acc ];
1369- 5 -> [Z , Y , Q , X | Acc ];
1370- 6 -> [Y , Z , Q , X | Acc ];
1371- 7 -> [Q , Z , X , Y | Acc ];
1372- 8 -> [Z , Q , X , Y | Acc ];
1373- 9 -> [Q , X , Z , Y | Acc ];
1374- 10 -> [X , Q , Z , Y | Acc ];
1375- 11 -> [Z , X , Q , Y | Acc ];
1376- 12 -> [X , Z , Q , Y | Acc ];
1377- 13 -> [Q , Y , X , Z | Acc ];
1378- 14 -> [Y , Q , X , Z | Acc ];
1379- 15 -> [Q , X , Y , Z | Acc ];
1380- 16 -> [X , Q , Y , Z | Acc ];
1381- 17 -> [Y , X , Q , Z | Acc ];
1382- 18 -> [X , Y , Q , Z | Acc ];
1383- 19 -> [Z , Y , X , Q | Acc ];
1384- 20 -> [Y , Z , X , Q | Acc ];
1385- 21 -> [Z , X , Y , Q | Acc ];
1386- 22 -> [X , Z , Y , Q | Acc ];
1387- 23 -> [Y , X , Z , Q | Acc ];
1388- 24 -> [X , Y , Z , Q | Acc ]
1389- end , State1 };
1390- % %
1371+ % % on ErlangForums, as I interpreted it... "basically a randomized
1372+ % % quicksort", shall we name it Quickshuffle?
1373+ % %
1374+ % % Randomly split the list in two lists, and recursively shuffle
1375+ % % the two smaller lists.
1376+ % %
1377+ % % How the algorithm works and why it is correct can be explained like this:
1378+ % %
1379+ % % The objective is, given a list of elements, to return a random
1380+ % % permutation of those elements so that every possible permutation
1381+ % % has the same probability to be returned.
1382+ % %
1383+ % % One of the two correct and bias free algorithms described on the Wikipedia
1384+ % % page for Fisher-Yates shuffling is to assign a random number to each
1385+ % % element in the list and order the elements according to the numbers.
1386+ % % For this to be correct the generated numbers must not have duplicates.
1387+ % %
1388+ % % This algorithm does that, but assigning a number and ordering
1389+ % % the elements is more or less the same step, which is taken
1390+ % % one binary bit at the time.
1391+ % %
1392+ % % It can be seen as, to each element, assign a fixpoint number
1393+ % % of infinite length starting with bit weight 1/2, continuing with 1/4,
1394+ % % and so on..., but reveal it incrementally.
1395+ % %
1396+ % % The list to shuffle is traversed, and a random bit is generated
1397+ % % for each element. If it is a 0, the element is assigned the zero bit
1398+ % % by moving it to the head of the list Zero, and if it is a 1,
1399+ % % to the head of the list One.
1400+ % %
1401+ % % Then the list Zero is recursively shuffled onto the accumulator list Acc,
1402+ % % after that the list One. By that all elements in Zero are ordered
1403+ % % before the ones in One, according to the generated numbers.
1404+ % % The order is actually not important as long as it is consistent.
1405+ % %
1406+ % % The algorithm recurses until the Zero or One list has length
1407+ % % 0 or 1, which is when the generated fixpoint number has no duplicate.
1408+ % % The fixpoint number in itself only exists in the guise of the
1409+ % % recursion call stack, that is whether an element belongs to list
1410+ % % Zero or One on each recursion level.
1411+ % % Here is the bare algorithm:
1412+ % %
1413+ % % quickshuffle([], Acc) -> Acc;
1414+ % % quickshuffle([X], Acc) -> [X | Acc];
1415+ % % quickshuffle([_|_] = L, Acc) ->
1416+ % % {Zero, One} = quickshuffle_split(L, [], []),
1417+ % % quickshuffle(One, quickshuffle(Zero, Acc)).
1418+ % %
1419+ % % quickshuffle_split([], Zero, One) ->
1420+ % % {Zero, One};
1421+ % % quickshuffle_split([X | L], Zero, One) ->
1422+ % % case random_bit() of
1423+ % % 0 -> quickshuffle_split(L, [X | Zero], One);
1424+ % % 1 -> quickshuffle_split(L, Zero, [X | One])
1425+ % % end.
1426+ % %
1427+ % % As an optimization, since the algorithm is equivalent to its objective
1428+ % % to randomly permute a list, we can when reaching a small enough list
1429+ % % as in 3 or 2 instead do an explicit random permutation of the list.
1430+ % %
1431+ % % The `random_bit()` can be generated with small overhead by generating
1432+ % % a random word and cache it, then shift out one bit at the time.
1433+ % %
1434+ % % Also, it is faster to do a 4-way split by 2 bits instead of,
1435+ % % as described above, a 2-way split by 1 bit.
1436+
1437+ % % Leaf cases - random permutations for 0..3 elements
1438+ shuffle_r ([], Acc , P , S ) ->
1439+ {Acc , P , S };
1440+ shuffle_r ([X ], Acc , P , S ) ->
1441+ {[X | Acc ], P , S };
1442+ shuffle_r ([X , Y ], Acc , P , S ) ->
1443+ shuffle_r_2 (X , Acc , P , S , Y );
1444+ shuffle_r ([X , Y , Z ], Acc , P , S ) ->
1445+ shuffle_r_3 (X , Acc , P , S , Y , Z );
13911446% % General case - split and recursive shuffle
1392- shuffle_r ([_ , _ , _ , _ | _ ] = List , State0 , Acc0 ) ->
1393- {Left , Right , State1 } = shuffle_split (List , State0 ),
1394- {Acc1 , State2 } = shuffle_r (Left , State1 , Acc0 ),
1395- shuffle_r (Right , State2 , Acc1 ).
1447+ shuffle_r ([_ , _ , _ | _ ] = List , Acc , P , S ) ->
1448+ % % P and S is bitstream cache and state
1449+ shuffle_r (List , Acc , P , S , [], [], [], []).
1450+ % %
1451+ % % Split L into 4 random subsets
1452+ % %
1453+ shuffle_r ([], Acc0 , P0 , S0 , Zero , One , Two , Three ) ->
1454+ % % Split done, recursively shuffle the splitted lists onto Acc
1455+ {Acc1 , P1 , S1 } = shuffle_r (Zero , Acc0 , P0 , S0 ),
1456+ {Acc2 , P2 , S2 } = shuffle_r (One , Acc1 , P1 , S1 ),
1457+ {Acc3 , P3 , S3 } = shuffle_r (Two , Acc2 , P2 , S2 ),
1458+ shuffle_r (Three , Acc3 , P3 , S3 );
1459+ shuffle_r ([X | L ], Acc , P0 , S , Zero , One , Two , Three )
1460+ when is_integer (P0 ), 3 < P0 , P0 < 1 bsl 57 ->
1461+ P1 = P0 bsr 2 ,
1462+ case P0 band 3 of
1463+ 0 -> shuffle_r (L , Acc , P1 , S , [X | Zero ], One , Two , Three );
1464+ 1 -> shuffle_r (L , Acc , P1 , S , Zero , [X | One ], Two , Three );
1465+ 2 -> shuffle_r (L , Acc , P1 , S , Zero , One , [X | Two ], Three );
1466+ 3 -> shuffle_r (L , Acc , P1 , S , Zero , One , Two , [X | Three ])
1467+ end ;
1468+ shuffle_r ([_ | _ ] = L , Acc , _P , S0 , Zero , One , Two , Three ) ->
1469+ [P |S1 ] = shuffle_new_bits (S0 ),
1470+ shuffle_r (L , Acc , P , S1 , Zero , One , Two , Three ).
1471+
1472+ % % Permute 2 elements
1473+ shuffle_r_2 (X , Acc , P , S , Y )
1474+ when is_integer (P ), 1 < P , P < 1 bsl 57 ->
1475+ {case P band 1 of
1476+ 0 -> [Y , X | Acc ];
1477+ 1 -> [X , Y | Acc ]
1478+ end , P bsr 1 , S };
1479+ shuffle_r_2 (X , Acc , _P , S0 , Y ) ->
1480+ [P |S1 ] = shuffle_new_bits (S0 ),
1481+ shuffle_r_2 (X , Acc , P , S1 , Y ).
1482+
1483+ % % Permute 3 elements
1484+ % %
1485+ % % Uses 3 random bits per iteration with a probability of 1/4
1486+ % % to reject and retry, which on average is 3 * 4/3
1487+ % % (infinite sum of (1/4)^k) = 4 bits per permutation
1488+ shuffle_r_3 (X , Acc , P0 , S , Y , Z )
1489+ when is_integer (P0 ), 7 < P0 , P0 < 1 bsl 57 ->
1490+ P1 = P0 bsr 3 ,
1491+ case P0 band 7 of
1492+ 0 -> {[Z , Y , X | Acc ], P1 , S };
1493+ 1 -> {[Y , Z , X | Acc ], P1 , S };
1494+ 2 -> {[Z , X , Y | Acc ], P1 , S };
1495+ 3 -> {[X , Z , Y | Acc ], P1 , S };
1496+ 4 -> {[Y , X , Z | Acc ], P1 , S };
1497+ 5 -> {[X , Y , Z | Acc ], P1 , S };
1498+ _ -> % Reject and retry
1499+ shuffle_r_3 (X , Acc , P1 , S , Y , Z )
1500+ end ;
1501+ shuffle_r_3 (X , Acc , _P , S0 , Y , Z ) ->
1502+ [P |S1 ] = shuffle_new_bits (S0 ),
1503+ shuffle_r_3 (X , Acc , P , S1 , Y , Z ).
13961504
1397- % % Split L into two random subsets: Left and Right
1505+ - dialyzer ({ no_improper_lists , shuffle_init_bitstream / 3 }).
13981506% %
1399- shuffle_split (L , State ) ->
1400- shuffle_split (L , State , 1 , [], []).
1507+ shuffle_init_bitstream (R , Next , WeakLowBits ) ->
1508+ P = 1 , % Marker for out of random bits
1509+ W = {Next ,WeakLowBits }, % Generator
1510+ S = [R |W ], % Generator state
1511+ [P |S ]. % Bit cash and state
1512+
1513+ -dialyzer ({no_improper_lists , shuffle_new_bits / 1 }).
14011514% %
1402- shuffle_split ([], State , _P , Left , Right ) ->
1403- {Left , Right , State };
1404- shuffle_split ([_ | _ ] = L , State0 , 1 , Left , Right ) ->
1515+ shuffle_new_bits ([R0 |{Next ,WeakLowBits }= W ]) ->
1516+ {V , R1 } = Next (R0 ),
1517+ % % Setting the top bit M here provides the marker
1518+ % % for when we are out of random bits: P =:= 1
14051519 M = 1 bsl 56 ,
1406- case rand :uniform_s (M , State0 ) of
1407- {V , State1 } when is_integer (V ), 1 =< V , V =< M ->
1408- % % Setting the top bit M here provides the marker
1409- % % for when we are out of random bits: P =:= 1
1410- shuffle_split (L , State1 , (V - 1 ) + M , Left , Right )
1411- end ;
1412- shuffle_split ([X | L ], State , P , Left , Right )
1413- when is_integer (P ), 1 =< P , P < 1 bsl 57 ->
1414- case P band 1 of
1415- 0 ->
1416- shuffle_split (L , State , P bsr 1 , [X | Left ], Right );
1417- 1 ->
1418- shuffle_split (L , State , P bsr 1 , Left , [X | Right ])
1419- end .
1520+ P = ((V bsr WeakLowBits ) band (M - 1 )) bor M ,
1521+ S = [R1 |W ],
1522+ [P |S ].
14201523
14211524% % =====================================================================
14221525% % Internal functions
0 commit comments