@@ -1328,15 +1328,68 @@ shuffle_s(List, State) when is_list(List) ->
13281328    shuffle_r (List , State , []).
13291329
13301330% % Random-split-and-shuffle algorithm suggested by Richard A. O'Keefe
1331- % % on ErlangForums, as I interpreted it...
1332- % %
1333- % % Randomly split the list in two lists,
1334- % % recursively shuffle the two smaller lists,
1335- % % randomize the order between the lists according to their size.
1336- % %
1337- % % This is equivalent to assigning a random number to each
1338- % % element and sorting, but extending the numbers on demand
1339- % % while there still are duplicates.
1331+ % % on ErlangForums, as I interpreted it...  "basically a randomized
1332+ % % quicksort", shall we name it Quickshuffle?
1333+ % %
1334+ % % Randomly split the list in two lists, and recursively shuffle
1335+ % % the two smaller lists.
1336+ % %
1337+ % % How the algorithm works and why it is correct can be explained like this:
1338+ % %
1339+ % % The objective is, given a list of elements, to return a random
1340+ % % permutation of those elements so that every possible permutation
1341+ % % has the same probability to be returned.
1342+ % %
1343+ % % One of the two correct and bias free algorithms described on the Wikipedia
1344+ % % page for Fisher-Yates shuffling is to assign a random number to each
1345+ % % element in the list and order the elements according to the numbers.
1346+ % % For this to be correct the generated numbers must not have duplicates.
1347+ % %
1348+ % % This algorithm does that, but assigning a number and ordering
1349+ % % the elements is more or less the same step, which is taken
1350+ % % one binary bit at the time.
1351+ % %
1352+ % % It can be seen as, to each element, assign a fixpoint number
1353+ % % of infinite length starting with bit weight 1/2, continuing with 1/4,
1354+ % % and so on..., but reveal it incrementally.
1355+ % %
1356+ % % The list to shuffle is traversed, and a random bit is generated
1357+ % % for each element.  If it is a 0, the element is assigned the zero bit
1358+ % % by moving it to the head of the list Zero, and if it is a 1,
1359+ % % to the head of the list One.
1360+ % %
1361+ % % Then the list Zero is recursively shuffled onto the accumulator list Acc,
1362+ % % after that the list One.  By that all elements in Zero are ordered
1363+ % % before the ones in One, according to the generated numbers.
1364+ % % The order is actually not important as long as it is consistent.
1365+ % %
1366+ % % The algorithm recurses until the Zero or One list has length
1367+ % % 0 or 1, which is when the generated fixpoint number has no duplicate.
1368+ % % The fixpoint number in itself only exists in the guise of the
1369+ % % recursion call stack, that is whether an element belongs to list
1370+ % % Zero or One on each recursion level.
1371+ % % Here is the bare algorithm:
1372+ % %
1373+ % % quickshuffle([], Acc) -> Acc;
1374+ % % quickshuffle([X], Acc) -> [X | Acc];
1375+ % % quickshuffle([_|_] = L, Acc) ->
1376+ % %     {Zero, One} = quickshuffle_split(L, [], []),
1377+ % %     quickshuffle(One, quickshuffle(Zero, Acc)).
1378+ % %
1379+ % % quickshuffle_split([], Zero, One) ->
1380+ % %     {Zero, One};
1381+ % % quickshuffle_split([X | L], Zero, One) ->
1382+ % %     case random_bit() of
1383+ % %         0 -> quickshuffle_split(L, [X | Zero], One);
1384+ % %         1 -> quickshuffle_split(L, Zero, [X | One])
1385+ % %     end.
1386+ % %
1387+ % % As an optimization, since the algorithm is equivalent to its objective
1388+ % % to randomly permute a list, we can when reaching a small enough list
1389+ % % as in 4 or less instead do an explicit random permutation of the list.
1390+ % %
1391+ % % The `random_bit()` can be generated with small overhead by generating
1392+ % % a random word and cache it, then shift out one bit at the time.
13401393
13411394% % Leaf cases - random permutations for 0..4 elements
13421395shuffle_r ([], State , Acc ) -> 
@@ -1387,35 +1440,34 @@ shuffle_r([X, Y, Z, Q], State0, Acc) ->
13871440         23  -> [Y , X , Z , Q  | Acc ];
13881441         24  -> [X , Y , Z , Q  | Acc ]
13891442     end , State1 };
1390- % %
13911443% % General case - split and recursive shuffle
13921444shuffle_r ([_ , _ , _ , _  | _ ] =  List , State0 , Acc0 ) -> 
1393-     {Left ,  Right , State1 } =  shuffle_split (List , State0 ),
1394-     {Acc1 , State2 } =  shuffle_r (Left , State1 , Acc0 ),
1395-     shuffle_r (Right , State2 , Acc1 ).
1445+     {Zero ,  One , State1 } =  shuffle_split (List , State0 ),
1446+     {Acc1 , State2 } =  shuffle_r (Zero , State1 , Acc0 ),
1447+     shuffle_r (One , State2 , Acc1 ).
13961448
1397- % % Split L into two random subsets: Left  and Right 
1449+ % % Split L into two random subsets: Zero  and One 
13981450% %
13991451shuffle_split (L , State ) -> 
14001452    shuffle_split (L , State , 1 , [], []).
14011453% %
1402- shuffle_split ([], State , _P , Left ,  Right ) -> 
1403-     {Left ,  Right , State };
1404- shuffle_split ([_  | _ ] =  L , State0 , 1 , Left ,  Right ) -> 
1454+ shuffle_split ([], State , _P , Zero ,  One ) -> 
1455+     {Zero ,  One , State };
1456+ shuffle_split ([_  | _ ] =  L , State0 , 1 , Zero ,  One ) -> 
14051457    M  =  1  bsl  56 ,
1406-     case  rand : uniform_s (M , State0 ) of 
1458+     case  uniform_s (M , State0 ) of 
14071459        {V , State1 } when  is_integer (V ), 1  =<  V , V  =<  M  ->
14081460            % % Setting the top bit M here provides the marker
14091461            % % for when we are out of random bits: P =:= 1
1410-             shuffle_split (L , State1 , (V  -  1 ) +  M , Left ,  Right )
1462+             shuffle_split (L , State1 , (V  -  1 ) +  M , Zero ,  One )
14111463    end ;
1412- shuffle_split ([X  | L ], State , P , Left ,  Right )
1413-   when  is_integer (P ), 1  = <P , P  <  1  bsl  57  -> 
1464+ shuffle_split ([X  | L ], State , P , Zero ,  One )
1465+   when  is_integer (P ), 1  <  P , P  <  1  bsl  57  -> 
14141466    case  P  band  1  of 
14151467        0  ->
1416-             shuffle_split (L , State , P  bsr  1 , [X  | Left ], Right );
1468+             shuffle_split (L , State , P  bsr  1 , [X  | Zero ], One );
14171469        1  ->
1418-             shuffle_split (L , State , P  bsr  1 , Left , [X  | Right ])
1470+             shuffle_split (L , State , P  bsr  1 , Zero , [X  | One ])
14191471    end .
14201472
14211473% % =====================================================================
0 commit comments