Add --kernel and --launch-id to Reproducer (#209)

FindHao · meta-codesync[bot] · commit 3b40ee878f4c · 2025-12-02T20:56:59.000-08:00
Summary: This PR adds support for reproducing kernel launches by kernel name and launch ID, eliminating the need to manually find line numbers in trace files. ## Changes - **`reproducer/cli.py`**: - Added `--kernel` argument (str, default=None) - Added `--launch-id` argument (int, default=0, 0-based) - Updated `--line` help text to indicate mutual exclusivity - **`reproducer/orchestrator.py`**: - Extended `reproduce()` function signature: - `line_index: int` (required, maintains backward compatibility) - `out_dir: str` (required, no default value) - `template: str` (required, no default value) - `kernel_name: Optional[str] = None` (new, placed after required params) - `launch_id: int = 0` (new) - Implemented kernel lookup logic: if `kernel_name` is provided, uses `find_launch_index_by_kernel()` to find the actual `line_index` - Updated docstring to document support for `.ndjson`, `.ndjson.gz`, and `.bin.ndjson` formats - **`cli.py`**: - Added mutual exclusivity check: error if both `--kernel` and `--line` (non-zero) are provided - Updated `reproduce()` call to pass new parameters using unified calling pattern - **`tests/test_tritonparse.py`**: - Added helper methods to `TestTritonparseCPU` class: - `_get_test_ndjson_file()`: Get test file path - `setup_temp_reproduce_dir()`: Create temporary directory - `cleanup_temp_reproduce_dir()`: Cleanup temporary directory - Added 5 unit tests: - `test_reproduce_mutual_exclusivity()`: Test parameter mutual exclusivity - `test_reproduce_kernel_default_launch_id()`: Test default launch_id - `test_reproduce_kernel_launch_id()`: End-to-end integration test - `test_reproduce_kernel_not_found()`: Test error handling - `test_reproduce_launch_id_out_of_range()`: Test boundary conditions - Refactored tests to use helper methods, following `TestTritonparseCUDA` pattern - Added imports at module level: `Path` and `tritonparse.reproducer.orchestrator` ## Usage ```bash # Existing: use line number (0-based) tritonparseoss reproduce trace.ndjson --line 4 # NEW: use kernel name + launch id (0-based) tritonparseoss reproduce trace.ndjson --kernel matmul_kernel --launch-id 2 # Also works with .ndjson.gz and .bin.ndjson files tritonparseoss reproduce trace.ndjson.gz --kernel matmul_kernel --launch-id 0 ``` ## Testing Tests use real data from `tests/example_output/parsed_output_complex/dedicated_log_triton_trace_findhao__mapped.ndjson.gz` when possible, mock data for edge cases. ## Notes - All indices are 0-based for consistency with Python conventions - Kernel name matching is case-sensitive (exact match only) - Backward compatible: existing `--line` usage continues to work - Error messages include helpful hints (valid range, similar kernel suggestions) Pull Request resolved: #209 Reviewed By: wychi Differential Revision: D88171118 Pulled By: FindHao fbshipit-source-id: 3f82ddd3ee3d5298acabede98ca689ae233d2f6f
diff --git a/tests/test_tritonparse.py b/tests/test_tritonparse.py
@@ -14,13 +14,15 @@
 import unittest
 from collections import defaultdict
 from dataclasses import dataclass
+from pathlib import Path
 from typing import Any, Union
 
 import torch
 import torch._inductor.config as inductor_config
 import triton  # @manual=//triton:triton
 import triton.language as tl  # @manual=//triton:triton
 import tritonparse.context_manager
+import tritonparse.reproducer.orchestrator
 import tritonparse.structured_logging
 import tritonparse.utils
 from triton import knobs  # @manual=//triton:triton
@@ -138,6 +140,26 @@ def clear_all_caches(*kernels):
 class TestTritonparseCPU(unittest.TestCase):
     """CPU-only tests (no CUDA required)"""
 
+    def _get_test_ndjson_file(self):
+        """Get the test NDJSON file path."""
+        gz_file = (
+            Path(__file__).parent
+            / "example_output/parsed_output_complex/dedicated_log_triton_trace_findhao__mapped.ndjson.gz"
+        )
+        self.assertTrue(gz_file.exists(), f"Test file not found: {gz_file}")
+        return gz_file
+
+    def setup_temp_reproduce_dir(self):
+        """Setup temporary directory for reproduce tests."""
+        temp_dir = tempfile.mkdtemp()
+        out_dir = os.path.join(temp_dir, "repro_output")
+        return temp_dir, out_dir
+
+    def cleanup_temp_reproduce_dir(self, temp_dir):
+        """Cleanup temporary directory for reproduce tests."""
+        if not TEST_KEEP_OUTPUT:
+            shutil.rmtree(temp_dir, ignore_errors=True)
+
     def test_callsite_parsing(self):
         """Test parsing of callsite locations in TTIR/TTGIR"""
         from tritonparse.ir_parser import extract_loc_definitions
@@ -482,6 +504,116 @@ def test_find_launch_index_out_of_range(self):
         self.assertIn("--launch-id 10", error_msg)
         self.assertIn("Valid range: 0 to 3", error_msg)
 
+    def test_reproduce_mutual_exclusivity(self):
+        """Test that --line and --kernel/--launch-id are mutually exclusive."""
+        import argparse
+
+        from tritonparse.reproducer.cli import _add_reproducer_args
+
+        parser = argparse.ArgumentParser()
+        _add_reproducer_args(parser)
+
+        # Test: both --line and --kernel provided should raise error
+        # Create a mock parser with error method
+        mock_parser = argparse.ArgumentParser()
+        _add_reproducer_args(mock_parser)
+        args = mock_parser.parse_args(
+            ["test.ndjson", "--line", "5", "--kernel", "matmul_kernel"]
+        )
+
+        # The mutual exclusivity check happens in cli.py main()
+        # We test that args are parsed correctly, and the check will happen there
+        self.assertEqual(args.kernel, "matmul_kernel")
+        self.assertEqual(args.line, 5)
+
+        # Test: only --kernel should work (line defaults to 0, which is allowed)
+        args = parser.parse_args(["test.ndjson", "--kernel", "matmul_kernel"])
+        self.assertEqual(args.kernel, "matmul_kernel")
+        self.assertEqual(args.line, 0)  # default value, allowed with --kernel
+
+        # Test: only --line should work
+        args = parser.parse_args(["test.ndjson", "--line", "5"])
+        self.assertEqual(args.line, 5)
+        self.assertIsNone(args.kernel)
+
+    def test_reproduce_kernel_launch_id(self):
+        """End-to-end test: reproduce using --kernel and --launch-id."""
+        gz_file = self._get_test_ndjson_file()
+        temp_dir, out_dir = self.setup_temp_reproduce_dir()
+
+        try:
+            # Test reproducing fused_op_kernel launch_id=0
+            result = tritonparse.reproducer.orchestrator.reproduce(
+                input_path=str(gz_file),
+                line_index=0,  # Placeholder, will be recalculated from kernel_name
+                out_dir=out_dir,
+                template="example",
+                kernel_name="fused_op_kernel",
+                launch_id=0,
+            )
+
+            # Verify output structure
+            self.assertIn("kernel", result)
+            self.assertIn("repro_script", result)
+            self.assertIn("repro_context", result)
+            self.assertTrue(os.path.exists(result["repro_script"]))
+            self.assertTrue(os.path.exists(result["repro_context"]))
+
+            # Verify the script contains kernel name
+            script_content = Path(result["repro_script"]).read_text()
+            self.assertIn("fused_op_kernel", script_content)
+
+        finally:
+            self.cleanup_temp_reproduce_dir(temp_dir)
+
+    def test_reproduce_kernel_not_found(self):
+        """Test that proper error is raised when kernel not found."""
+        gz_file = self._get_test_ndjson_file()
+        temp_dir, out_dir = self.setup_temp_reproduce_dir()
+
+        try:
+            with self.assertRaises(ValueError) as cm:
+                tritonparse.reproducer.orchestrator.reproduce(
+                    input_path=str(gz_file),
+                    line_index=0,  # Placeholder, will be recalculated from kernel_name
+                    out_dir=out_dir,
+                    template="example",
+                    kernel_name="nonexistent_kernel",
+                    launch_id=0,
+                )
+
+            error_msg = str(cm.exception)
+            self.assertIn("not found", error_msg)
+            self.assertIn("nonexistent_kernel", error_msg)
+
+        finally:
+            self.cleanup_temp_reproduce_dir(temp_dir)
+
+    def test_reproduce_launch_id_out_of_range(self):
+        """Test that proper error is raised when launch_id is out of range."""
+        gz_file = self._get_test_ndjson_file()
+        temp_dir, out_dir = self.setup_temp_reproduce_dir()
+
+        try:
+            # fused_op_kernel has only 4 launches (0-3), test with launch_id=10
+            with self.assertRaises(ValueError) as cm:
+                tritonparse.reproducer.orchestrator.reproduce(
+                    input_path=str(gz_file),
+                    line_index=0,  # Placeholder, will be recalculated from kernel_name
+                    out_dir=out_dir,
+                    template="example",
+                    kernel_name="fused_op_kernel",
+                    launch_id=10,
+                )
+
+            error_msg = str(cm.exception)
+            self.assertIn("has only 4 launches", error_msg)
+            self.assertIn("--launch-id 10", error_msg)
+            self.assertIn("Valid range: 0 to 3", error_msg)
+
+        finally:
+            self.cleanup_temp_reproduce_dir(temp_dir)
+
 
 class TestTritonparseCUDA(unittest.TestCase):
     """CUDA tests (require GPU)"""
diff --git a/tritonparse/cli.py b/tritonparse/cli.py
@@ -68,6 +68,10 @@ def main():
         }
         unified_parse(**parse_args)
     elif args.func == "reproduce":
+        # Check mutual exclusivity between --line and --kernel/--launch-id
+        if args.kernel and args.line != 0:
+            repro_parser.error("--line and --kernel/--launch-id are mutually exclusive")
+
         replacer = None
         if args.use_fbcode:
             from tritonparse.fb.reproducer.replacer import FBCodePlaceholderReplacer
@@ -77,9 +81,11 @@ def main():
 
         reproduce(
             input_path=args.input,
-            line_index=args.line,
+            line_index=args.line if not args.kernel else 0,
             out_dir=args.out_dir,
             template=args.template,
+            kernel_name=args.kernel,
+            launch_id=args.launch_id if args.kernel else 0,
             kernel_import=args.kernel_import,
             replacer=replacer,
         )
diff --git a/tritonparse/reproducer/cli.py b/tritonparse/reproducer/cli.py
@@ -14,7 +14,26 @@ def _add_reproducer_args(parser: argparse.ArgumentParser) -> None:
         default=0,
         help=(
             "The line index (0-based) of the launch event in the input file to reproduce. "
-            "Defaults to 0 (first launch event)."
+            "Defaults to 0 (first launch event). Mutually exclusive with --kernel/--launch-id."
+        ),
+    )
+    parser.add_argument(
+        "--kernel",
+        type=str,
+        default=None,
+        help=(
+            "Kernel name (exact match, case-sensitive) to reproduce. "
+            "Use with --launch-id to specify which launch of the kernel. "
+            "Mutually exclusive with --line."
+        ),
+    )
+    parser.add_argument(
+        "--launch-id",
+        type=int,
+        default=0,
+        help=(
+            "0-based launch index for the kernel specified by --kernel. "
+            "Defaults to 0 (first launch). Only used when --kernel is provided."
         ),
     )
     parser.add_argument(
diff --git a/tritonparse/reproducer/orchestrator.py b/tritonparse/reproducer/orchestrator.py
@@ -3,6 +3,7 @@
 from pathlib import Path
 from typing import Optional
 
+from tritonparse.info.kernel_query import find_launch_index_by_kernel
 from tritonparse.reproducer.ingestion.ndjson import build_context_bundle
 from tritonparse.reproducer.placeholder_replacer import (
     DefaultPlaceholderReplacer,
@@ -20,24 +21,44 @@ def reproduce(
     line_index: int,
     out_dir: str,
     template: str,
+    kernel_name: Optional[str] = None,
+    launch_id: int = 0,
     replacer: Optional[PlaceholderReplacer] = None,
     kernel_import: KernelImportMode = KernelImportMode.DEFAULT,
 ) -> dict[str, str]:
     """
     Generate a reproducer script from NDJSON trace file.
 
+    Must provide either line_index OR (kernel_name + launch_id), not both.
+    If kernel_name is provided, the line_index parameter will be ignored and
+    recalculated from the kernel lookup.
+
     Args:
-        input_path: Path to the NDJSON trace file.
-        line_index: 0-based index of the launch event to reproduce in the events list.
+        input_path: Path to ndjson file. Supports uncompressed (.ndjson),
+            gzip compressed (.ndjson.gz), and gzip member concatenation (.bin.ndjson) formats.
+        line_index: 0-based index in events list. Ignored if kernel_name is provided.
         out_dir: Output directory for reproducer files.
         template: Template name to use for the reproducer.
+        kernel_name: Exact kernel name to match (case-sensitive). If provided, line_index will be recalculated.
+        launch_id: 0-based launch index for the kernel (default: 0, first launch).
         replacer: Optional custom PlaceholderReplacer instance. If None, uses DefaultPlaceholderReplacer.
         kernel_import: Kernel import mode (DEFAULT or COPY).
     """
-    logger.debug(f"Building bundle from {input_path} at line {line_index}")
     events = load_ndjson(Path(input_path))
     logger.debug(f"Loaded {len(events)} events")
 
+    # If kernel_name is provided, lookup the actual line_index (overrides the parameter)
+    if kernel_name is not None:
+        logger.debug(
+            f"Looking up kernel '{kernel_name}' launch_id={launch_id} in {input_path}"
+        )
+        line_index = find_launch_index_by_kernel(events, kernel_name, launch_id)
+        logger.debug(
+            f"Found kernel '{kernel_name}' launch_id={launch_id} at line {line_index}"
+        )
+
+    logger.debug(f"Building bundle from {input_path} at line {line_index}")
+
     # Build context bundle from the specified launch event
     context_bundle = build_context_bundle(events, line_index)
     logger.debug(