Skip to content

Commit c72c71c

Browse files
committed
Explain the shuffle algorithm
1 parent a536947 commit c72c71c

File tree

1 file changed

+55
-21
lines changed

1 file changed

+55
-21
lines changed

lib/stdlib/src/rand.erl

Lines changed: 55 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1328,15 +1328,50 @@ shuffle_s(List, State) when is_list(List) ->
13281328
shuffle_r(List, State, []).
13291329

13301330
%% Random-split-and-shuffle algorithm suggested by Richard A. O'Keefe
1331-
%% on ErlangForums, as I interpreted it...
1331+
%% on ErlangForums, as I interpreted it... "basically a randomized
1332+
%% quicksort", shall we name it Quickshuffle?
13321333
%%
1333-
%% Randomly split the list in two lists,
1334-
%% recursively shuffle the two smaller lists,
1335-
%% randomize the order between the lists according to their size.
1334+
%% Randomly split the list in two lists, and recursively shuffle
1335+
%% the two smaller lists.
13361336
%%
1337-
%% This is equivalent to assigning a random number to each
1338-
%% element and sorting, but extending the numbers on demand
1339-
%% while there still are duplicates.
1337+
%% How the algorithm works and why it is correct can be explained like this:
1338+
%%
1339+
%% The objective is, given a list of elements, to return a random
1340+
%% permutation of those elements so that every possible permutation
1341+
%% has the same probability to be returned.
1342+
%%
1343+
%% One of the two correct and bias free algorithms described on the Wikipedia
1344+
%% page for Fisher-Yates shuffling is to assign a random number to each
1345+
%% element in the list and order the elements according to the numbers.
1346+
%% For this to be correct the generated numbers must not have duplicates.
1347+
%%
1348+
%% This algorithm does that, but assigning a number and ordering
1349+
%% the elements is more or less the same step, which is taken
1350+
%% one binary bit at the time.
1351+
%%
1352+
%% It can be seen as, to each element, assign a fixpoint number
1353+
%% of infinite length starting with bit weight 1/2, continuing with 1/4,
1354+
%% and so on..., but reveal it incrementally.
1355+
%%
1356+
%% The list to shuffle is traversed, and a random bit is generated
1357+
%% for each element. If it is a 0, the element is assigned the zero bit
1358+
%% by moving it to the head of the list Zero, and if it is a 1,
1359+
%% to the head of the list One.
1360+
%%
1361+
%% Then the list Zero is recursively shuffled onto the accumulator list Acc,
1362+
%% after that the list One. By that all elements in Zero are ordered
1363+
%% before the ones in One, according to the generated numbers.
1364+
%% The order is actually not important as long as it is consistent.
1365+
%%
1366+
%% The algorithm recurses until the Zero or One list has length
1367+
%% 0 or 1, which is when the generated fixpoint number has no duplicate.
1368+
%% The fixpoint number in itself only exists in the guise of the
1369+
%% recursion call stack, that is whether an element belongs to list
1370+
%% Zero or One on each recursion level.
1371+
%%
1372+
%% As an optimization, since the algorithm is equivalent to its objective
1373+
%% to randomly permute a list, we can when reaching a small enough list
1374+
%% as in 4 or less instead do an explicit random permutation of the list.
13401375

13411376
%% Leaf cases - random permutations for 0..4 elements
13421377
shuffle_r([], State, Acc) ->
@@ -1387,35 +1422,34 @@ shuffle_r([X, Y, Z, Q], State0, Acc) ->
13871422
23 -> [Y, X, Z, Q | Acc];
13881423
24 -> [X, Y, Z, Q | Acc]
13891424
end, State1};
1390-
%%
13911425
%% General case - split and recursive shuffle
13921426
shuffle_r([_, _, _, _ | _] = List, State0, Acc0) ->
1393-
{Left, Right, State1} = shuffle_split(List, State0),
1394-
{Acc1, State2} = shuffle_r(Left, State1, Acc0),
1395-
shuffle_r(Right, State2, Acc1).
1427+
{Zero, One, State1} = shuffle_split(List, State0),
1428+
{Acc1, State2} = shuffle_r(Zero, State1, Acc0),
1429+
shuffle_r(One, State2, Acc1).
13961430

1397-
%% Split L into two random subsets: Left and Right
1431+
%% Split L into two random subsets: Zero and One
13981432
%%
13991433
shuffle_split(L, State) ->
14001434
shuffle_split(L, State, 1, [], []).
14011435
%%
1402-
shuffle_split([], State, _P, Left, Right) ->
1403-
{Left, Right, State};
1404-
shuffle_split([_ | _] = L, State0, 1, Left, Right) ->
1436+
shuffle_split([], State, _P, Zero, One) ->
1437+
{Zero, One, State};
1438+
shuffle_split([_ | _] = L, State0, 1, Zero, One) ->
14051439
M = 1 bsl 56,
1406-
case rand:uniform_s(M, State0) of
1440+
case uniform_s(M, State0) of
14071441
{V, State1} when is_integer(V), 1 =< V, V =< M ->
14081442
%% Setting the top bit M here provides the marker
14091443
%% for when we are out of random bits: P =:= 1
1410-
shuffle_split(L, State1, (V - 1) + M, Left, Right)
1444+
shuffle_split(L, State1, (V - 1) + M, Zero, One)
14111445
end;
1412-
shuffle_split([X | L], State, P, Left, Right)
1413-
when is_integer(P), 1 =< P, P < 1 bsl 57 ->
1446+
shuffle_split([X | L], State, P, Zero, One)
1447+
when is_integer(P), 1 < P, P < 1 bsl 57 ->
14141448
case P band 1 of
14151449
0 ->
1416-
shuffle_split(L, State, P bsr 1, [X | Left], Right);
1450+
shuffle_split(L, State, P bsr 1, [X | Zero], One);
14171451
1 ->
1418-
shuffle_split(L, State, P bsr 1, Left, [X | Right])
1452+
shuffle_split(L, State, P bsr 1, Zero, [X | One])
14191453
end.
14201454

14211455
%% =====================================================================

0 commit comments

Comments
 (0)