@@ -1328,15 +1328,50 @@ shuffle_s(List, State) when is_list(List) ->
13281328 shuffle_r (List , State , []).
13291329
13301330% % Random-split-and-shuffle algorithm suggested by Richard A. O'Keefe
1331- % % on ErlangForums, as I interpreted it...
1331+ % % on ErlangForums, as I interpreted it... "basically a randomized
1332+ % % quicksort", shall we name it Quickshuffle?
13321333% %
1333- % % Randomly split the list in two lists,
1334- % % recursively shuffle the two smaller lists,
1335- % % randomize the order between the lists according to their size.
1334+ % % Randomly split the list in two lists, and recursively shuffle
1335+ % % the two smaller lists.
13361336% %
1337- % % This is equivalent to assigning a random number to each
1338- % % element and sorting, but extending the numbers on demand
1339- % % while there still are duplicates.
1337+ % % How the algorithm works and why it is correct can be explained like this:
1338+ % %
1339+ % % The objective is, given a list of elements, to return a random
1340+ % % permutation of those elements so that every possible permutation
1341+ % % has the same probability to be returned.
1342+ % %
1343+ % % One of the two correct and bias free algorithms described on the Wikipedia
1344+ % % page for Fisher-Yates shuffling is to assign a random number to each
1345+ % % element in the list and order the elements according to the numbers.
1346+ % % For this to be correct the generated numbers must not have duplicates.
1347+ % %
1348+ % % This algorithm does that, but assigning a number and ordering
1349+ % % the elements is more or less the same step, which is taken
1350+ % % one binary bit at the time.
1351+ % %
1352+ % % It can be seen as, to each element, assign a fixpoint number
1353+ % % of infinite length starting with bit weight 1/2, continuing with 1/4,
1354+ % % and so on..., but reveal it incrementally.
1355+ % %
1356+ % % The list to shuffle is traversed, and a random bit is generated
1357+ % % for each element. If it is a 0, the element is assigned the zero bit
1358+ % % by moving it to the head of the list Zero, and if it is a 1,
1359+ % % to the head of the list One.
1360+ % %
1361+ % % Then the list Zero is recursively shuffled onto the accumulator list Acc,
1362+ % % after that the list One. By that all elements in Zero are ordered
1363+ % % before the ones in One, according to the generated numbers.
1364+ % % The order is actually not important as long as it is consistent.
1365+ % %
1366+ % % The algorithm recurses until the Zero or One list has length
1367+ % % 0 or 1, which is when the generated fixpoint number has no duplicate.
1368+ % % The fixpoint number in itself only exists in the guise of the
1369+ % % recursion call stack, that is whether an element belongs to list
1370+ % % Zero or One on each recursion level.
1371+ % %
1372+ % % As an optimization, since the algorithm is equivalent to its objective
1373+ % % to randomly permute a list, we can when reaching a small enough list
1374+ % % as in 4 or less instead do an explicit random permutation of the list.
13401375
13411376% % Leaf cases - random permutations for 0..4 elements
13421377shuffle_r ([], State , Acc ) ->
@@ -1387,35 +1422,34 @@ shuffle_r([X, Y, Z, Q], State0, Acc) ->
13871422 23 -> [Y , X , Z , Q | Acc ];
13881423 24 -> [X , Y , Z , Q | Acc ]
13891424 end , State1 };
1390- % %
13911425% % General case - split and recursive shuffle
13921426shuffle_r ([_ , _ , _ , _ | _ ] = List , State0 , Acc0 ) ->
1393- {Left , Right , State1 } = shuffle_split (List , State0 ),
1394- {Acc1 , State2 } = shuffle_r (Left , State1 , Acc0 ),
1395- shuffle_r (Right , State2 , Acc1 ).
1427+ {Zero , One , State1 } = shuffle_split (List , State0 ),
1428+ {Acc1 , State2 } = shuffle_r (Zero , State1 , Acc0 ),
1429+ shuffle_r (One , State2 , Acc1 ).
13961430
1397- % % Split L into two random subsets: Left and Right
1431+ % % Split L into two random subsets: Zero and One
13981432% %
13991433shuffle_split (L , State ) ->
14001434 shuffle_split (L , State , 1 , [], []).
14011435% %
1402- shuffle_split ([], State , _P , Left , Right ) ->
1403- {Left , Right , State };
1404- shuffle_split ([_ | _ ] = L , State0 , 1 , Left , Right ) ->
1436+ shuffle_split ([], State , _P , Zero , One ) ->
1437+ {Zero , One , State };
1438+ shuffle_split ([_ | _ ] = L , State0 , 1 , Zero , One ) ->
14051439 M = 1 bsl 56 ,
1406- case rand : uniform_s (M , State0 ) of
1440+ case uniform_s (M , State0 ) of
14071441 {V , State1 } when is_integer (V ), 1 =< V , V =< M ->
14081442 % % Setting the top bit M here provides the marker
14091443 % % for when we are out of random bits: P =:= 1
1410- shuffle_split (L , State1 , (V - 1 ) + M , Left , Right )
1444+ shuffle_split (L , State1 , (V - 1 ) + M , Zero , One )
14111445 end ;
1412- shuffle_split ([X | L ], State , P , Left , Right )
1413- when is_integer (P ), 1 = < P , P < 1 bsl 57 ->
1446+ shuffle_split ([X | L ], State , P , Zero , One )
1447+ when is_integer (P ), 1 < P , P < 1 bsl 57 ->
14141448 case P band 1 of
14151449 0 ->
1416- shuffle_split (L , State , P bsr 1 , [X | Left ], Right );
1450+ shuffle_split (L , State , P bsr 1 , [X | Zero ], One );
14171451 1 ->
1418- shuffle_split (L , State , P bsr 1 , Left , [X | Right ])
1452+ shuffle_split (L , State , P bsr 1 , Zero , [X | One ])
14191453 end .
14201454
14211455% % =====================================================================
0 commit comments