Skip to content

Commit 3aeae41

Browse files
committed
Explain the shuffle algorithm
1 parent a536947 commit 3aeae41

File tree

1 file changed

+75
-23
lines changed

1 file changed

+75
-23
lines changed

lib/stdlib/src/rand.erl

Lines changed: 75 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1328,15 +1328,68 @@ shuffle_s(List, State) when is_list(List) ->
13281328
shuffle_r(List, State, []).
13291329

13301330
%% Random-split-and-shuffle algorithm suggested by Richard A. O'Keefe
1331-
%% on ErlangForums, as I interpreted it...
1332-
%%
1333-
%% Randomly split the list in two lists,
1334-
%% recursively shuffle the two smaller lists,
1335-
%% randomize the order between the lists according to their size.
1336-
%%
1337-
%% This is equivalent to assigning a random number to each
1338-
%% element and sorting, but extending the numbers on demand
1339-
%% while there still are duplicates.
1331+
%% on ErlangForums, as I interpreted it... "basically a randomized
1332+
%% quicksort", shall we name it Quickshuffle?
1333+
%%
1334+
%% Randomly split the list in two lists, and recursively shuffle
1335+
%% the two smaller lists.
1336+
%%
1337+
%% How the algorithm works and why it is correct can be explained like this:
1338+
%%
1339+
%% The objective is, given a list of elements, to return a random
1340+
%% permutation of those elements so that every possible permutation
1341+
%% has the same probability to be returned.
1342+
%%
1343+
%% One of the two correct and bias free algorithms described on the Wikipedia
1344+
%% page for Fisher-Yates shuffling is to assign a random number to each
1345+
%% element in the list and order the elements according to the numbers.
1346+
%% For this to be correct the generated numbers must not have duplicates.
1347+
%%
1348+
%% This algorithm does that, but assigning a number and ordering
1349+
%% the elements is more or less the same step, which is taken
1350+
%% one binary bit at the time.
1351+
%%
1352+
%% It can be seen as, to each element, assign a fixpoint number
1353+
%% of infinite length starting with bit weight 1/2, continuing with 1/4,
1354+
%% and so on..., but reveal it incrementally.
1355+
%%
1356+
%% The list to shuffle is traversed, and a random bit is generated
1357+
%% for each element. If it is a 0, the element is assigned the zero bit
1358+
%% by moving it to the head of the list Zero, and if it is a 1,
1359+
%% to the head of the list One.
1360+
%%
1361+
%% Then the list Zero is recursively shuffled onto the accumulator list Acc,
1362+
%% after that the list One. By that all elements in Zero are ordered
1363+
%% before the ones in One, according to the generated numbers.
1364+
%% The order is actually not important as long as it is consistent.
1365+
%%
1366+
%% The algorithm recurses until the Zero or One list has length
1367+
%% 0 or 1, which is when the generated fixpoint number has no duplicate.
1368+
%% The fixpoint number in itself only exists in the guise of the
1369+
%% recursion call stack, that is whether an element belongs to list
1370+
%% Zero or One on each recursion level.
1371+
%% Here is the bare algorithm:
1372+
%%
1373+
%% quickshuffle([], Acc) -> Acc;
1374+
%% quickshuffle([X], Acc) -> [X | Acc];
1375+
%% quickshuffle([_|_] = L, Acc) ->
1376+
%% {Zero, One} = quickshuffle_split(L, [], []),
1377+
%% quickshuffle(One, quickshuffle(Zero, Acc)).
1378+
%%
1379+
%% quickshuffle_split([], Zero, One) ->
1380+
%% {Zero, One};
1381+
%% quickshuffle_split([X | L], Zero, One) ->
1382+
%% case random_bit() of
1383+
%% 0 -> quickshuffle_split(L, [X | Zero], One);
1384+
%% 1 -> quickshuffle_split(L, Zero, [X | One])
1385+
%% end.
1386+
%%
1387+
%% As an optimization, since the algorithm is equivalent to its objective
1388+
%% to randomly permute a list, we can when reaching a small enough list
1389+
%% as in 4 or less instead do an explicit random permutation of the list.
1390+
%%
1391+
%% The `random_bit()` can be generated with small overhead by generating
1392+
%% a random word and cache it, then shift out one bit at the time.
13401393

13411394
%% Leaf cases - random permutations for 0..4 elements
13421395
shuffle_r([], State, Acc) ->
@@ -1387,35 +1440,34 @@ shuffle_r([X, Y, Z, Q], State0, Acc) ->
13871440
23 -> [Y, X, Z, Q | Acc];
13881441
24 -> [X, Y, Z, Q | Acc]
13891442
end, State1};
1390-
%%
13911443
%% General case - split and recursive shuffle
13921444
shuffle_r([_, _, _, _ | _] = List, State0, Acc0) ->
1393-
{Left, Right, State1} = shuffle_split(List, State0),
1394-
{Acc1, State2} = shuffle_r(Left, State1, Acc0),
1395-
shuffle_r(Right, State2, Acc1).
1445+
{Zero, One, State1} = shuffle_split(List, State0),
1446+
{Acc1, State2} = shuffle_r(Zero, State1, Acc0),
1447+
shuffle_r(One, State2, Acc1).
13961448

1397-
%% Split L into two random subsets: Left and Right
1449+
%% Split L into two random subsets: Zero and One
13981450
%%
13991451
shuffle_split(L, State) ->
14001452
shuffle_split(L, State, 1, [], []).
14011453
%%
1402-
shuffle_split([], State, _P, Left, Right) ->
1403-
{Left, Right, State};
1404-
shuffle_split([_ | _] = L, State0, 1, Left, Right) ->
1454+
shuffle_split([], State, _P, Zero, One) ->
1455+
{Zero, One, State};
1456+
shuffle_split([_ | _] = L, State0, 1, Zero, One) ->
14051457
M = 1 bsl 56,
1406-
case rand:uniform_s(M, State0) of
1458+
case uniform_s(M, State0) of
14071459
{V, State1} when is_integer(V), 1 =< V, V =< M ->
14081460
%% Setting the top bit M here provides the marker
14091461
%% for when we are out of random bits: P =:= 1
1410-
shuffle_split(L, State1, (V - 1) + M, Left, Right)
1462+
shuffle_split(L, State1, (V - 1) + M, Zero, One)
14111463
end;
1412-
shuffle_split([X | L], State, P, Left, Right)
1413-
when is_integer(P), 1 =< P, P < 1 bsl 57 ->
1464+
shuffle_split([X | L], State, P, Zero, One)
1465+
when is_integer(P), 1 < P, P < 1 bsl 57 ->
14141466
case P band 1 of
14151467
0 ->
1416-
shuffle_split(L, State, P bsr 1, [X | Left], Right);
1468+
shuffle_split(L, State, P bsr 1, [X | Zero], One);
14171469
1 ->
1418-
shuffle_split(L, State, P bsr 1, Left, [X | Right])
1470+
shuffle_split(L, State, P bsr 1, Zero, [X | One])
14191471
end.
14201472

14211473
%% =====================================================================

0 commit comments

Comments
 (0)