iree-org
diff --git a/‎lit_tests/kernel/wave/infer_index_exprs.py‎
Lines changed: 122 additions & 0 deletions b/‎lit_tests/kernel/wave/infer_index_exprs.py‎
Lines changed: 122 additions & 0 deletions
diff --git a/‎lit_tests/kernel/wave/mlir_converter.py‎
Lines changed: 12 additions & 10 deletions b/‎lit_tests/kernel/wave/mlir_converter.py‎
Lines changed: 12 additions & 10 deletions
diff --git a/‎lit_tests/kernel/wave/mlir_converter_debug_locations.py‎
Lines changed: 2 additions & 2 deletions b/‎lit_tests/kernel/wave/mlir_converter_debug_locations.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎lit_tests/kernel/wave/mlir_converter_diagnostics.py‎
Lines changed: 1 addition & 1 deletion b/‎lit_tests/kernel/wave/mlir_converter_diagnostics.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎tests/kernel/wave/common/utils.py‎
Lines changed: 1 addition & 1 deletion b/‎tests/kernel/wave/common/utils.py‎
Lines changed: 1 addition & 1 deletion
@@ -0,0 +1,122 @@
+# REQUIRES: water
+# RUN: python %s
+# The point of this test is to avoid crashing or asserting, so just run it under lit.
+
+# Copyright 2025 The Wave Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+import wave_lang.kernel.lang as tkl
+import wave_lang.kernel.wave as tkw
+from wave_lang.kernel.wave.wave import LaunchableWave
+from wave_lang.kernel.wave.compile import WaveCompileOptions, wave_compile
+
+from wave_lang.kernel.lang.global_symbols import *
+from wave_lang.kernel.wave.constraints import MMAType
+from wave_lang.kernel.wave.utils.general_utils import torch_dtype_to_wave
+
+import torch
+
+
+# TODO: use the generic template, currently blocked by water not handling wave constraints.
+def _get_gemm_kernel(
+    shape: tuple[int, int, int],
+    mfma_variant: MMAType,
+    dtype: torch.dtype = torch.float16,
+    block_shape: tuple[int, int, int] | None = None,
+    waves_per_block: tuple[int, int] | None = None,
+) -> tuple[LaunchableWave, dict[tkl.IndexSymbol, tkl.IndexExpr]]:
+    if not block_shape:
+        # BLOCK_M, BLOCK_N, BLOCK_K
+        block_shape = (64, 64, 32)
+
+    if not waves_per_block:
+        # WAVE_M, WAVE_N
+        waves_per_block = (2, 2)
+
+    assert len(block_shape) == 3, "block_shape needs to be rank 3 for M, N, K."
+    assert len(waves_per_block) == 2, "waves_per_block needs to be rank 2 for M, N."
+
+    # Input sizes
+    M = tkl.sym.M
+    N = tkl.sym.N
+    K = tkl.sym.K
+    # Workgroup tile sizes
+    BLOCK_M = tkl.sym.BLOCK_M
+    BLOCK_N = tkl.sym.BLOCK_N
+    BLOCK_K = tkl.sym.BLOCK_K
+    # Address space (for GPU, shared(1) or global(0))
+    ADDRESS_SPACE = tkl.sym.GLOBAL_ADDRESS_SPACE
+    dtype = torch_dtype_to_wave(dtype)
+    # Expose user-constraints
+    constraints: list[tkw.Constraint] = [tkw.WorkgroupConstraint(M, BLOCK_M, 0)]
+    constraints += [tkw.WorkgroupConstraint(N, BLOCK_N, 1)]
+    constraints += [tkw.TilingConstraint(K, BLOCK_K)]
+
+    # TODO: dialect expects waves_per_block to be rank 3, so we append a 1 to the end.
+    constraints += [
+        tkw.HardwareConstraint(
+            threads_per_wave=64,
+            mma_type=mfma_variant,
+            waves_per_block=waves_per_block + (1,),
+        )
+    ]
+
+    # Wave-level micro-kernel.
+    # Since warps are not directly addressable, there is no
+    # explicit notion of a warp id (like a workgroup or thread id).
+    # This kernel uses the input sizes M, N, K throughout, as the tiling
+    # and data movement strategy is determined during the compilation process.
+    # These can be influenced by introducing constraints.
+    @tkw.wave(constraints)
+    def gemm(
+        a: tkl.Memory[M, K, GLOBAL_ADDRESS_SPACE, dtype],
+        b: tkl.Memory[N, K, GLOBAL_ADDRESS_SPACE, dtype],
+        c: tkl.Memory[M, N, GLOBAL_ADDRESS_SPACE, tkl.f32],
+    ):
+        c_reg = tkl.Register[M, N, tkl.f32](0.0)
+
+        # This microkernel encodes the fact that if the iterate
+        # dimension were tiled, then we would need to materialize a loop.
+        @tkw.iterate(K, init_args=[c_reg])
+        def repeat(acc: tkl.Register[M, N, tkl.f32]) -> tkl.Register[M, N, tkl.f32]:
+            # a_reg: tkw.Register[M, K, dtype]
+            a_reg = tkw.read(a)
+            # b_reg: tkw.Register[N, K, dtype]
+            b_reg = tkw.read(b)
+            # acc: tkw.Register[M, N, tkl.f32]
+            acc = tkw.mma(a_reg, b_reg, acc)
+            return acc
+
+        # repeat represents the results of the loop
+        tkw.write(repeat, c)
+
+    hyperparams = {
+        ADDRESS_SPACE: SHARED_ADDRESS_SPACE,
+        BLOCK_M: block_shape[0],
+        BLOCK_N: block_shape[1],
+        BLOCK_K: block_shape[2],
+        M: shape[0],
+        N: shape[1],
+        K: shape[2],
+    }
+    return gemm, hyperparams
+
+
+def testGemm():
+    gemm, hyperparams = _get_gemm_kernel(
+        shape=(1024, 1024, 1024), mfma_variant=MMAType.F32_16x16x16_F16
+    )
+    options = WaveCompileOptions(
+        subs=hyperparams,
+        run_bench=False,
+        check_water_analysis=True,
+    )
+    compiled_gemm = wave_compile(options, gemm)
+    assert compiled_gemm is not None
+
+
+if __name__ == "__main__":
+    testGemm()
@@ -80,7 +80,7 @@ def failure_to_parse_override_mlir():
 
     # Override the MLIR module after `wave_compile` so it doesn't attempt to parse it.
     options.override_mlir = "module {"
-    _, diagnostics = emit_wave_dialect(trace, constraints, options)
+    _, diagnostics, _ = emit_wave_dialect(trace, constraints, options)
 
     assert len(diagnostics) == 1
     # CHECK: Unable to parse module assembly
@@ -91,7 +91,9 @@ def failure_to_parse_override_mlir():
 @run_test
 def failure_to_parse_pipeline():
     trace, options, constraints = _get_dummy_trace_options_and_constraints()
-    _, diagnostics = emit_wave_dialect(trace, constraints, options, pipeline="module {")
+    _, diagnostics, _ = emit_wave_dialect(
+        trace, constraints, options, pipeline="module {"
+    )
 
     assert len(diagnostics) == 1
     # CHECK: Failed to apply transform script: Unable to parse module assembly
@@ -102,7 +104,7 @@ def failure_to_parse_pipeline():
 @run_test
 def pipeline_is_empty():
     trace, options, constraints = _get_dummy_trace_options_and_constraints()
-    _, diagnostics = emit_wave_dialect(
+    _, diagnostics, _ = emit_wave_dialect(
         trace, constraints, options, pipeline="module {}"
     )
 
@@ -115,7 +117,7 @@ def pipeline_is_empty():
 @run_test
 def pipeline_is_not_a_named_sequence():
     trace, options, constraints = _get_dummy_trace_options_and_constraints()
-    _, diagnostics = emit_wave_dialect(
+    _, diagnostics, _ = emit_wave_dialect(
         trace, constraints, options, pipeline="module { module {}}"
     )
 
@@ -141,7 +143,7 @@ def pipeline_is_not_a_named_sequence():
 def failure_in_pipeline():
     trace, options, constraints = _get_dummy_trace_options_and_constraints()
     options.override_mlir = "module {}"
-    _, diagnostics = emit_wave_dialect(
+    _, diagnostics, _ = emit_wave_dialect(
         trace, constraints, options, pipeline=GUARANTEED_FAIL_TRANSFORM_SCRIPT
     )
     assert len(diagnostics) == 1
@@ -158,7 +160,7 @@ def override_mlir():
 module {
   func.func private @overridden_mlir()
 }"""
-    emitted, diagnostics = emit_wave_dialect(trace, constraints, options)
+    emitted, diagnostics, _ = emit_wave_dialect(trace, constraints, options)
     assert len(diagnostics) == 0, "Did not expect errors in overridden IR."
 
     # CHECK: func.func private @overridden_mlir()
@@ -218,7 +220,7 @@ def mlir_converter_matrix_add():
     constraints = matrix_add.constraints
 
     # Use the mlir_converter to emit wave MLIR dialect
-    mlir_output, diagnostics = emit_wave_dialect(trace, constraints, options)
+    mlir_output, diagnostics, _ = emit_wave_dialect(trace, constraints, options)
 
     if diagnostics:
         for diagnostic in diagnostics:
@@ -374,7 +376,7 @@ def pipeline(root: OpHandle):
 
     # Use the mlir_converter to emit wave MLIR dialect and apply the empty
     # pipeline.
-    mlir_output, diagnostics = emit_wave_dialect(
+    mlir_output, diagnostics, _ = emit_wave_dialect(
         trace, constraints, options, pipeline=pipeline_asm
     )
 
@@ -528,7 +530,7 @@ def mixed_memory_kernel(
     constraints = mixed_memory_kernel.constraints
 
     with Context(), Location.unknown():
-        mlir_output, diagnostics = emit_wave_dialect(trace, constraints, options)
+        mlir_output, diagnostics, _ = emit_wave_dialect(trace, constraints, options)
 
     assert len(diagnostics) == 0, f"Should have no diagnostics, got: {diagnostics}"
 
@@ -582,7 +584,7 @@ def invalid_hyperparameter_kernel(
     # This should raise a RuntimeError due to invalid non-int hyperparameter
     try:
         with Context(), Location.unknown():
-            mlir_output, diagnostics = emit_wave_dialect(trace, constraints, options)
+            mlir_output, diagnostics, _ = emit_wave_dialect(trace, constraints, options)
         assert False, "Expected RuntimeError for invalid non-int hyperparameter"
     except RuntimeError as e:
         # Verify the error message is what we expect
 
@@ -95,7 +95,7 @@ def mlir_converter_location():
     constraints = matrix_add.constraints
 
     # Use the mlir_converter to emit wave MLIR dialect
-    mlir_output, diagnostics = emit_wave_dialect(trace, constraints, options)
+    mlir_output, diagnostics, _ = emit_wave_dialect(trace, constraints, options)
 
     if diagnostics:
         print(diagnostics)
@@ -210,7 +210,7 @@ def repeat(acc: tkl.Register[M, N, tkl.f32]) -> tkl.Register[M, N, tkl.f32]:
     constraints = matmul.constraints
 
     # Use the mlir_converter to emit wave MLIR dialect
-    mlir_output, diagnostics = emit_wave_dialect(trace, constraints, options)
+    mlir_output, diagnostics, _ = emit_wave_dialect(trace, constraints, options)
 
     if diagnostics:
         print(diagnostics)
 
@@ -85,7 +85,7 @@ def mlir_converter_diagnostics_emission():
     constraints = matrix_add.constraints
 
     # Use the mlir_converter to emit wave MLIR dialect
-    _, diagnostics = emit_wave_dialect(
+    _, diagnostics, _ = emit_wave_dialect(
         trace, constraints, options, test_diagnostic_emission=True
     )
 
 
@@ -70,7 +70,7 @@ def param_bool(name, shortname=None, values=None):
 
 
 def _is_water_and_ee_available() -> bool:
-    from wave_lang.kernel.wave.water import is_water_available
+    from wave_lang.support.detect_water import is_water_available
     from wave_lang.kernel.wave.execution_engine import is_execution_engine_available
 
     return is_water_available() and is_execution_engine_available()
Original file line number	Diff line number	Diff line change
`@@ -85,7 +85,7 @@ def mlir_converter_diagnostics_emission():`
`85`	`85`	`constraints = matrix_add.constraints`
`86`	`86`
`87`	`87`	`# Use the mlir_converter to emit wave MLIR dialect`
`88`		`- _, diagnostics = emit_wave_dialect(`
	`88`	`+ _, diagnostics, _ = emit_wave_dialect(`
`89`	`89`	`trace, constraints, options, test_diagnostic_emission=True`
`90`	`90`	`)`
`91`	`91`