reverted some refactorings and extended comments

stateMachinist · stateMachinist · commit 72524f602673 · 2025-10-09T22:01:23.000+02:00
diff --git a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/CoreRow.java b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/CoreRow.java
@@ -22,22 +22,28 @@
 
 import net.automatalib.word.Word;
 
+/**
+ * Each core row represents some hypothesis state and stores its outputs for all table suffixes.
+ */
 class CoreRow<S, I, O> extends Row<S, I, O> {
 
     /**
      * Hypothesis state associated with this row.
      */
     final S state;
+
     /**
-     * Index in core row list.
+     * Index of this row in the core row list.
      */
     final int idx;
+
     /**
-     * Maps suffixes to the outputs contained in this row.
+     * Maps suffixes to their outputs.
      */
     final Map<Word<I>, Word<O>> sufToOut;
+
     /**
-     * Also store identifiers of suffix-output pairs for fast compatibility checking.
+     * Identifiers of all suffix-output pairs in this row, used for fast compatibility checking.
      */
     final Set<Integer> cellIds;
 
@@ -46,7 +52,7 @@ class CoreRow<S, I, O> extends Row<S, I, O> {
         this.state = state;
         this.idx = idx;
         sufToOut = new HashMap<>();
-        cellIds = new HashSet<>();
+        cellIds = new HashSet<>(); // use HashSet to enable fast containment checks
     }
 
     void addSuffix(Word<I> suf, Word<O> out, int cell) {
diff --git a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/FringeRow.java b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/FringeRow.java
@@ -18,7 +18,8 @@
 import net.automatalib.word.Word;
 
 /**
- * Each fringe row represents a hypothesis transition.
+ * Each fringe row represents some hypothesis transition
+ * outside the spanning tree defined by the core prefixes.
  *
  * @param <S>
  *         state type
@@ -33,17 +34,21 @@ class FringeRow<S, I, O> extends Row<S, I, O> {
      * Source state.
      */
     final S srcState;
+
     /**
      * Input symbol.
      */
     final I transIn;
+
     /**
      * Output symbol (determined dynamically).
      */
     O transOut;
+
     /**
-     * For compression, fringe rows do not store observations directly. Instead, they point to some leaf in a tree
-     * encoding their classification history. This trick avoids redundantly storing identical observations.
+     * For compression, fringe rows do not store observations directly.
+     * Instead, they point to some leaf in a tree encoding their classification history.
+     * This trick avoids redundantly storing identical observations.
      */
     Leaf<S, I, O> leaf;
 
diff --git a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/GenericSparseLearner.java b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/GenericSparseLearner.java
@@ -21,11 +21,10 @@
 import java.util.Collections;
 import java.util.Deque;
 import java.util.HashMap;
-import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
-import java.util.Set;
 import java.util.function.Function;
+import java.util.stream.Collectors;
 
 import de.learnlib.algorithm.LearningAlgorithm.MealyLearner;
 import de.learnlib.counterexample.LocalSuffixFinders;
@@ -35,57 +34,66 @@
 import net.automatalib.alphabet.Alphabet;
 import net.automatalib.automaton.transducer.MealyMachine;
 import net.automatalib.automaton.transducer.MutableMealyMachine;
-import net.automatalib.common.util.HashUtil;
 import net.automatalib.common.util.Pair;
 import net.automatalib.word.Word;
 
 class GenericSparseLearner<S, I, O> implements MealyLearner<I, O> {
 
     private final Alphabet<I> alphabet;
     private final MealyMembershipOracle<I, O> oracle;
+
     /**
      * Suffixes.
      */
     private final Deque<Word<I>> sufs;
+
     /**
      * Core rows.
      */
     private final List<CoreRow<S, I, O>> cRows;
+
     /**
      * Fringe rows.
      */
     private final Deque<FringeRow<S, I, O>> fRows;
+
     /**
-     * Fringe prefix to row.
+     * Maps fringe prefixes to rows.
      */
     private final Map<Word<I>, FringeRow<S, I, O>> prefToFringe;
+
     /**
-     * List of unique cells.
+     * List of unique suffix-output cells.
      */
     private final List<Pair<Word<I>, Word<O>>> cells;
+
     /**
-     * Maps each unique cell to its list index.
+     * Maps each suffix-output cell to its list index.
      */
     private final Map<Pair<Word<I>, Word<O>>, Integer> cellToIdx;
+
     /**
      * Hypothesis.
      */
     private final MutableMealyMachine<S, I, ?, O> hyp;
+
     /**
      * Maps each state to its core row prefix.
      */
     private final Map<S, Word<I>> stateToPrefix;
+
     /**
-     * Access sequences.
+     * Computes access sequences.
      */
     private final Function<Word<I>, Word<I>> accSeq;
 
     /**
-     * For fast suffix ranking, we track for each suffix how the core rows are partitioned by it.
+     * For fast suffix ranking, this map stores the core row partitions created by each suffix.
      */
     private final Map<Word<I>, List<BitSet>> sufToVecs;
+
     /**
-     * See {@link #sufToVecs}.
+     * Helper map for efficiently constructing the suffix partition map (see {@link #sufToVecs}).
      */
     private final Map<Word<I>, Map<Word<O>, Integer>> sufToOutToIdx;
 
@@ -122,7 +130,8 @@ public void startLearning() {
         stateToPrefix.put(init, c.prefix);
         extendFringe(c, init, new Leaf<>(c, 1, sufs.size(), Collections.emptyList()));
         fRows.forEach(f -> query(f, Word.epsilon())); // query transition outputs
-        // initially, transition outputs must be queried manually for later transitions, they derive from suffix queries
+        // initially, transition outputs must be queried manually,
+        // for later transitions, they derive from suffix queries
         updateHypothesis();
     }
 
@@ -188,7 +197,8 @@ private Word<I> pickSuffix(BitSet remRows) {
 
             assert sumOccur == remRows.cardinality();
             if (maxOccur < bestRank) {
-                // among equally ranked suffixes, pick youngest (mind that suffixes are stored/iterated LIFO)
+                // among equally ranked suffixes, pick youngest
+                // (mind that suffixes are stored/iterated LIFO)
                 bestSuf = s;
                 bestRank = maxOccur;
                 if (bestRank == 1) { // optimization: no better suffix is possible
@@ -219,15 +229,11 @@ private void followNode(FringeRow<S, I, O> f, Node<S, I, O> n) {
         }
 
         final BitSet remRows = new BitSet();
-        for (int i = sep.remRows.nextSetBit(0); i >= 0; i = sep.remRows.nextSetBit(i + 1)) {
-            if (cRows.get(i).cellIds.contains(cellIdx)) {
-                remRows.set(i);
-            }
-        }
-        final List<Integer> cellIds = new ArrayList<>(sep.cellsIds.size() + 1);
-        cellIds.addAll(sep.cellsIds);
+        sep.remRows.stream().filter(i -> cRows.get(i).cellIds.contains(cellIdx)).forEach(remRows::set);
+        final List<Integer> cellIds = new ArrayList<>(sep.cellsIds); // important: copy elements!
         cellIds.add(cellIdx);
-        if (remRows.isEmpty()) { // no compatible core prefix
+        if (remRows.isEmpty()) {
+            // no compatible core prefix
             f.leaf = null;
             moveToCore(f, cellIds);
         } else if (remRows.cardinality() == 1) {
@@ -252,7 +258,8 @@ private Word<O> query(Row<S, I, O> r, Word<I> suf) {
     }
 
     /**
-     * Adds suffix-output pair to index if not yet contained and returns a unique identifier representing the pair.
+     * Adds suffix-output pair to index if not yet contained,
+     * and returns a unique identifier representing the pair.
      */
     private int getUniqueCellIdx(Word<I> suf, Word<O> out) {
         assert suf.length() == out.length();
@@ -270,7 +277,7 @@ private int getUniqueCellIdx(Word<I> suf, Word<O> out) {
      * Returns index of new core row.
      */
     private int moveToCore(FringeRow<S, I, O> f, List<Integer> cellIds) {
-        boolean removed = fRows.remove(f);
+        final boolean removed = fRows.remove(f);
         assert removed;
         final S state = hyp.addState();
         final CoreRow<S, I, O> c = new CoreRow<>(f.prefix, state, cRows.size());
@@ -290,25 +297,14 @@ private int moveToCore(FringeRow<S, I, O> f, List<Integer> cellIds) {
     }
 
     /**
-     * Takes fringe row and its observations, queries the missing entries, and returns a list containing the
-     * observations for all suffixes.
+     * Takes fringe row and its observations, queries the missing entries,
+     * and returns a list containing the observations for all suffixes.
      */
     private List<Integer> completeRowObservations(FringeRow<S, I, O> f, List<Integer> cellIds) {
-        final Set<Word<I>> sufsPresent = new HashSet<>(HashUtil.capacity(cellIds.size()));
-        for (Integer id : cellIds) {
-            sufsPresent.add(this.cells.get(id).getFirst());
-        }
-        final List<Word<I>> sufsMissing = new ArrayList<>(sufs.size());
-        for (Word<I> s : sufs) {
-            if (!sufsPresent.contains(s)) {
-                sufsMissing.add(s);
-            }
-        }
-        final List<Integer> cellIdsFull = new ArrayList<>(cellIds.size() + sufsMissing.size());
-        cellIdsFull.addAll(cellIds);
-        for (Word<I> s : sufsMissing) {
-            cellIdsFull.add(getUniqueCellIdx(s, query(f, s)));
-        }
+        final List<Word<I>> sufsPresent = cellIds.stream().map(c -> this.cells.get(c).getFirst()).collect(Collectors.toList());
+        final List<Word<I>> sufsMissing = sufs.stream().filter(s -> !sufsPresent.contains(s)).collect(Collectors.toList());
+        final List<Integer> cellIdsFull = new ArrayList<>(cellIds); // important: copy elements!
+        sufsMissing.forEach(s -> cellIdsFull.add(getUniqueCellIdx(s, query(f, s))));
         return cellIdsFull;
     }
 
@@ -340,11 +336,9 @@ private void identifyNewState(DefaultQuery<I, Word<O>> q) {
     private void addSuffixToTable(Word<I> suf) {
         assert !sufs.contains(suf);
         sufs.push(suf);
-        /*
-         * This might be an extension of an existing suffix -> storing/iterating suffixes in LIFO order exploits caching
-         * when filling core rows. Similarly, since core rows are prefix-closed, cache hit rate for adding suffixes is
-         * maximized by iterating core rows in LIFO order
-         */
+
+        // since core rows are prefix-closed,
+        // cache hit rate is maximized by LIFO iteration
         for (int i = cRows.size() - 1; i >= 0; i--) {
             addSuffixToCoreRow(cRows.get(i), suf);
         }
diff --git a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Leaf.java b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Leaf.java
@@ -28,9 +28,9 @@ class Leaf<S, I, O> extends Node<S, I, O> {
     private int lastNumSufs;
 
     /**
-     * Split leafs always remember how many core rows and suffixes the table contained at their last visit. This
-     * information is used as a logical timestamp to check whether the separator is still guaranteed to be optimal or if
-     * it needs to be recomputed.
+     * Split leafs always remember how many core rows and suffixes the table contained
+     * at their last visit. This information is used as a logical timestamp to check
+     * if the separator is still guaranteed to be optimal or if it needs to be recomputed.
      */
     @Nullable Separator<S, I, O> sep;
 
@@ -72,10 +72,9 @@ void update(List<CoreRow<S, I, O>> cRows, int numSufs) {
             sep = null;
         }
 
-        /*
-         * Since suffixes and core rows grow monotonically, the separator only needs to be recomputed whenever new
-         * compatible core prefixes emerge or when the suffix set grows.
-         */
+        // Since suffixes and core rows grow monotonically,
+        // the separator only needs to be recomputed whenever
+        // new compatible core prefixes emerge or the suffix set grows.
 
         for (int i = lastNumCRows; i < cRows.size(); i++) {
             final CoreRow<S, I, O> c = cRows.get(i);
diff --git a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Node.java b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Node.java
@@ -21,9 +21,14 @@
 class Node<S, I, O> { // type parameters required for safe casting
 
     /**
-     * Cell identifiers of the fringe rows at this node.
+     * Suffix-output cell identifiers of the fringe rows that share this node.
      */
     final List<Integer> cellsIds;
+
+    /**
+     * Bit vector indicating the core rows that remain compatible
+     * with the observations associated with this node.
+     */
     final BitSet remRows;
 
     protected Node(List<Integer> cellsIds) {
diff --git a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/SparseLearner.java b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/SparseLearner.java
@@ -24,9 +24,9 @@
 import net.automatalib.word.Word;
 
 /**
- * Optimized implementation of the Ls learning algorithm, as described in the paper <a
- * href="https://doi.org/10.1007/978-3-032-05792-1_10">Learning Mealy Machines with Sparse Observation Tables</a> by
- * Wolffhardt Schwabe, Paul Kogel, and Sabine Glesner.
+ * Optimized implementation of the L<sup>s</sup> learning algorithm, as described in the paper
+ * <a href="https://doi.org/10.1007/978-3-032-05792-1_10">Learning Mealy Machines with Sparse Observation Tables</a>
+ * by Wolffhardt Schwabe, Paul Kogel, and Sabine Glesner.
  */
 public class SparseLearner<I, O> extends GenericSparseLearner<Integer, I, O> {
 
diff --git a/algorithms/active/sparse/src/main/java/module-info.java b/algorithms/active/sparse/src/main/java/module-info.java
@@ -15,9 +15,9 @@
  */
 
 /**
- * This module provides the implementation of the Sparse OT learning algorithm as described in the paper <a
- * href="https://doi.org/10.1007/978-3-032-05792-1_10">Learning Mealy Machines with Sparse Observation Tables</a> by
- * Wolffhardt Schwabe, Paul Kogel, and Sabine Glesner.
+ * This module provides the implementation of the Sparse OT learning algorithm as described in the paper
+ * <a href="https://doi.org/10.1007/978-3-032-05792-1_10">Learning Mealy Machines with Sparse Observation Tables</a>
+ * by Wolffhardt Schwabe, Paul Kogel, and Sabine Glesner.
  * <p>
  * This module is provided by the following Maven dependency:
  * <pre>