Merge pull request #2087 from j2kun:64x64-reduce

copybara-github · copybara-github · commit 384e09ccfdfc · 2025-08-12T13:59:44.000-07:00
PiperOrigin-RevId: 794259650
diff --git a/tests/Examples/common/roberts_cross_16x16.mlir b/tests/Examples/common/roberts_cross_16x16.mlir
@@ -0,0 +1,61 @@
+func.func @roberts_cross(%img: tensor<256xi16> {secret.secret}) -> tensor<256xi16> {
+  %c256 = arith.constant 256 : index
+  %c16 = arith.constant 16 : index
+  %c1 = arith.constant 1 : index
+  %c0 = arith.constant 0 : index
+  %c-1 =  arith.constant -1 : index
+
+  // Each point p = img[x][y], where x is row and y is column, in the new image will equal:
+  // (img[x-1][y-1] - img[x][y])^2 + (img[x-1][y] - img[x][y-1])^2
+  %r = affine.for %x = 0 to 16 iter_args(%imgx = %img) -> tensor<256xi16> {
+    %1 = affine.for %y = 0 to 16 iter_args(%imgy = %imgx) -> tensor<256xi16> {
+
+      // fetch img[x-1][y-1]
+      %4 = arith.addi %x, %c-1 : index
+      %5 = arith.muli %4, %c16 : index
+      %6 = arith.addi %y, %c-1 : index
+      %7 = arith.addi %5, %6 : index
+      %8 = arith.remui %7, %c256 : index
+      %9 = tensor.extract %img[%8] : tensor<256xi16>
+
+      // fetch img[x][y]
+      %10 = arith.muli %x, %c16 : index
+      %11 = arith.addi %10, %y : index
+      %12 = arith.remui %11, %c256 : index
+      %13 = tensor.extract %img[%12] : tensor<256xi16>
+
+      // subtract those two
+      %14 = arith.subi %9, %13 : i16
+
+      // fetch img[x-1][y]
+      %15 = arith.addi %x, %c-1 : index
+      %16 = arith.muli %15, %c16 : index
+      %18 = arith.addi %16, %y : index
+      %19 = arith.remui %18, %c256 : index
+      %20 = tensor.extract %img[%19] : tensor<256xi16>
+
+      // fetch img[x][y-1]
+      %21 = arith.muli %x, %c16 : index
+      %22 = arith.addi %y, %c-1 : index
+      %23 = arith.addi %21, %22 : index
+      %24 = arith.remui %23, %c256 : index
+      %25 = tensor.extract %img[%24] : tensor<256xi16>
+
+      // subtract those two
+      %26 = arith.subi %20, %25 : i16
+
+      // square each difference
+      %27 = arith.muli %14, %14 :  i16
+      %28 = arith.muli %26, %26 :  i16
+
+      // add the squares
+      %29 = arith.addi %27, %28 : i16
+
+      // save to result[x][y]
+      %30 = tensor.insert %29 into %imgy[%12] : tensor<256xi16>
+      affine.yield %30: tensor<256xi16>
+    }
+    affine.yield %1 : tensor<256xi16>
+  }
+  return %r : tensor<256xi16>
+}
diff --git a/tests/Examples/common/roberts_cross_64x64.mlir b/tests/Examples/common/roberts_cross_64x64.mlir
diff --git a/tests/Examples/lattigo/bgv/roberts_cross/BUILD b/tests/Examples/lattigo/bgv/roberts_cross/BUILD
@@ -13,7 +13,7 @@ heir_lattigo_lib(
         "--mlir-to-bgv=ciphertext-degree=4096 plaintext-modulus=536903681",
         "--scheme-to-lattigo",
     ],
-    mlir_src = "@heir//tests/Examples/common:roberts_cross_64x64.mlir",
+    mlir_src = "@heir//tests/Examples/common:roberts_cross_16x16.mlir",
 )
 
 # For Google-internal reasons we must separate the go_test rules from the macro
diff --git a/tests/Examples/lattigo/bgv/roberts_cross/roberts_cross_test.go b/tests/Examples/lattigo/bgv/roberts_cross/roberts_cross_test.go
@@ -7,34 +7,34 @@ import (
 func TestBinops(t *testing.T) {
 	evaluator, params, ecd, enc, dec := roberts_cross__configure()
 
-	input := make([]int16, 4096)
-	expected := make([]int16, 4096)
+	input := make([]int16, 256)
+	expected := make([]int16, 256)
 
-	for i := 0; i < 4096; i++ {
+	for i := 0; i < 256; i++ {
 		input[i] = int16(i)
 	}
 
-	for row := 0; row < 64; row++ {
-		for col := 0; col < 64; col++ {
-			xY := (row*64 + col) % 4096
-			xYm1 := (row*64 + col - 1) % 4096
-			xm1Y := ((row-1)*64 + col) % 4096
-			xm1Ym1 := ((row-1)*64 + col - 1) % 4096
+	for row := 0; row < 16; row++ {
+		for col := 0; col < 16; col++ {
+			xY := (row*16 + col) % 256
+			xYm1 := (row*16 + col - 1) % 256
+			xm1Y := ((row-1)*16 + col) % 256
+			xm1Ym1 := ((row-1)*16 + col - 1) % 256
 
 			if xYm1 < 0 {
-				xYm1 += 4096
+				xYm1 += 256
 			}
 			if xm1Y < 0 {
-				xm1Y += 4096
+				xm1Y += 256
 			}
 			if xm1Ym1 < 0 {
-				xm1Ym1 += 4096
+				xm1Ym1 += 256
 			}
 
 			v1 := input[xm1Ym1] - input[xY]
 			v2 := input[xm1Y] - input[xYm1]
 			sum := v1*v1 + v2*v2
-			expected[row*64+col] = sum
+			expected[row*16+col] = sum
 		}
 	}
 
@@ -44,7 +44,7 @@ func TestBinops(t *testing.T) {
 
 	result := roberts_cross__decrypt__result0(evaluator, params, ecd, dec, resultCt)
 
-	for i := 0; i < 4096; i++ {
+	for i := 0; i < 256; i++ {
 		if result[i] != expected[i] {
 			t.Errorf("Decryption error at %d: %d != %d", i, result[i], expected[i])
 		}
diff --git a/tests/Examples/openfhe/bgv/roberts_cross/BUILD b/tests/Examples/openfhe/bgv/roberts_cross/BUILD
@@ -5,14 +5,14 @@ load("@heir//tests/Examples/openfhe:test.bzl", "openfhe_end_to_end_test")
 package(default_applicable_licenses = ["@heir//:license"])
 
 openfhe_end_to_end_test(
-    name = "roberts_cross_64x64_test",
-    generated_lib_header = "roberts_cross_64x64_lib.h",
+    name = "roberts_cross_16x16_test",
+    generated_lib_header = "roberts_cross_16x16_lib.h",
     heir_opt_flags = [
         "--annotate-module=backend=openfhe scheme=bgv",
         "--mlir-to-bgv=ciphertext-degree=4096 plaintext-modulus=536903681",
         "--scheme-to-openfhe",
     ],
-    mlir_src = "@heir//tests/Examples/common:roberts_cross_64x64.mlir",
+    mlir_src = "@heir//tests/Examples/common:roberts_cross_16x16.mlir",
     tags = ["notap"],
     test_src = "roberts_cross_test.cpp",
 )
diff --git a/tests/Examples/openfhe/bgv/roberts_cross/roberts_cross_test.cpp b/tests/Examples/openfhe/bgv/roberts_cross/roberts_cross_test.cpp
@@ -5,7 +5,7 @@
 #include "gtest/gtest.h"  // from @googletest
 
 // Generated headers (block clang-format from messing up order)
-#include "tests/Examples/openfhe/bgv/roberts_cross/roberts_cross_64x64_lib.h"
+#include "tests/Examples/openfhe/bgv/roberts_cross/roberts_cross_16x16_lib.h"
 
 using ::testing::ContainerEq;
 
@@ -23,24 +23,24 @@ TEST(RobertsCrossTest, TestInput1) {
 
   std::vector<int16_t> input;
   std::vector<int16_t> expected;
-  input.reserve(4096);
-  expected.reserve(4096);
+  input.reserve(256);
+  expected.reserve(256);
 
-  for (int i = 0; i < 4096; ++i) {
+  for (int i = 0; i < 256; ++i) {
     input.push_back(i);
   }
 
-  for (int row = 0; row < 64; ++row) {
-    for (int col = 0; col < 64; ++col) {
+  for (int row = 0; row < 16; ++row) {
+    for (int col = 0; col < 16; ++col) {
       // (img[x-1][y-1] - img[x][y])^2 + (img[x-1][y] - img[x][y-1])^2
-      int xY = (row * 64 + col) % 4096;
-      int xYm1 = (row * 64 + col - 1) % 4096;
-      int xm1Y = ((row - 1) * 64 + col) % 4096;
-      int xm1Ym1 = ((row - 1) * 64 + col - 1) % 4096;
-
-      if (xYm1 < 0) xYm1 += 4096;
-      if (xm1Y < 0) xm1Y += 4096;
-      if (xm1Ym1 < 0) xm1Ym1 += 4096;
+      int xY = (row * 16 + col) % 256;
+      int xYm1 = (row * 16 + col - 1) % 256;
+      int xm1Y = ((row - 1) * 16 + col) % 256;
+      int xm1Ym1 = ((row - 1) * 16 + col - 1) % 256;
+
+      if (xYm1 < 0) xYm1 += 256;
+      if (xm1Y < 0) xm1Y += 256;
+      if (xm1Ym1 < 0) xm1Ym1 += 256;
 
       int16_t v1 = (input[xm1Ym1] - input[xY]);
       int16_t v2 = (input[xm1Y] - input[xYm1]);
diff --git a/tests/Examples/plaintext/roberts_cross/BUILD b/tests/Examples/plaintext/roberts_cross/BUILD
@@ -33,7 +33,7 @@ plaintext_test(
     heir_opt_flags = [
         "--mlir-to-plaintext-backend=plaintext-size=4096",
     ],
-    mlir_src = "@heir//tests/Examples/common:roberts_cross_64x64.mlir",
+    mlir_src = "@heir//tests/Examples/common:roberts_cross_16x16.mlir",
     deps = [
         ":roberts_cross_test",
         "@heir//tests/Examples/plaintext:memrefCopy",
@@ -46,7 +46,7 @@ plaintext_test(
     heir_opt_flags = [
         "--mlir-to-plaintext-backend=plaintext-size=4096 plaintext-modulus=536903681",
     ],
-    mlir_src = "@heir//tests/Examples/common:roberts_cross_64x64.mlir",
+    mlir_src = "@heir//tests/Examples/common:roberts_cross_16x16.mlir",
     deps = [
         ":roberts_cross_mod_test",
         "@heir//tests/Examples/plaintext:memrefCopy",
@@ -59,7 +59,7 @@ plaintext_test(
     heir_opt_flags = [
         "--mlir-to-plaintext-backend=plaintext-size=4096 plaintext-modulus=786433",
     ],
-    mlir_src = "@heir//tests/Examples/common:roberts_cross_64x64.mlir",
+    mlir_src = "@heir//tests/Examples/common:roberts_cross_16x16.mlir",
     deps = [
         ":roberts_cross_mod_failure_test",
         "@heir//tests/Examples/plaintext:memrefCopy",
diff --git a/tests/Transforms/heir_simd_vectorizer/BUILD b/tests/Transforms/heir_simd_vectorizer/BUILD
@@ -9,17 +9,5 @@ glob_lit_tests(
     name = "all_tests",
     data = ["@heir//tests:test_utilities"],
     driver = "@heir//tests:run_lit.sh",
-    size_override = {
-        "box_blur_64x64.mlir": "large",
-        "roberts_cross_64x64.mlir": "enormous",
-        "gx_kernel_64x64.mlir": "large",
-    },
-    tags_override = {
-        "gx_kernel_64x64.mlir": [
-            "nofastbuild",
-            "notap",
-            "manual",
-        ],
-    },
     test_file_exts = ["mlir"],
 )
diff --git a/tests/Transforms/heir_simd_vectorizer/box_blur_16x16.mlir b/tests/Transforms/heir_simd_vectorizer/box_blur_16x16.mlir
@@ -3,60 +3,60 @@
 
 module  {
   // CHECK: @box_blur
-  // CHECK-SAME: %[[arg0:.*]]: !secret.secret<tensor<4096xi16>>) -> !secret.secret<tensor<4096xi16>> {
-  // CHECK-DAG:    %[[c127:.*]] = arith.constant 127 : index
-  // CHECK-DAG:    %[[c3968:.*]] = arith.constant 3968 : index
-  // CHECK-DAG:    %[[c4032:.*]] = arith.constant 4032 : index
-  // CHECK-DAG:    %[[c63:.*]] = arith.constant 63 : index
-  // CHECK-DAG:    %[[c65:.*]] = arith.constant 65 : index
-  // CHECK-NEXT:   %[[v0:.*]] = secret.generic(%[[arg0]]: !secret.secret<tensor<4096xi16>>) {
-  // CHECK-NEXT:   ^body(%[[arg1:.*]]: tensor<4096xi16>):
-  // CHECK-NEXT:     %[[v1:.*]] = tensor_ext.rotate %[[arg1]], %[[c3968]]
-  // CHECK-NEXT:     %[[v2:.*]] = tensor_ext.rotate %[[arg1]], %[[c4032]]
+  // CHECK-SAME: %[[arg0:.*]]: !secret.secret<tensor<256xi16>>) -> !secret.secret<tensor<256xi16>> {
+  // CHECK-DAG:    %[[c31:.*]] = arith.constant 31 : index
+  // CHECK-DAG:    %[[c240:.*]] = arith.constant 240 : index
+  // CHECK-DAG:    %[[c224:.*]] = arith.constant 224 : index
+  // CHECK-DAG:    %[[c15:.*]] = arith.constant 15 : index
+  // CHECK-DAG:    %[[c17:.*]] = arith.constant 17 : index
+  // CHECK-NEXT:   %[[v0:.*]] = secret.generic(%[[arg0]]: !secret.secret<tensor<256xi16>>) {
+  // CHECK-NEXT:   ^body(%[[arg1:.*]]: tensor<256xi16>):
+  // CHECK-NEXT:     %[[v1:.*]] = tensor_ext.rotate %[[arg1]], %[[c224]]
+  // CHECK-NEXT:     %[[v2:.*]] = tensor_ext.rotate %[[arg1]], %[[c240]]
   // CHECK-NEXT:     %[[v3:.*]] = arith.addi %[[v1]], %[[v2]]
   // CHECK-NEXT:     %[[v4:.*]] = arith.addi %[[v3]], %[[arg1]]
-  // CHECK-NEXT:     %[[v5:.*]] = tensor_ext.rotate %[[v4]], %[[c63]]
+  // CHECK-NEXT:     %[[v5:.*]] = tensor_ext.rotate %[[v4]], %[[c15]]
   // CHECK-NEXT:     %[[v6:.*]] = arith.addi %[[v5]], %[[v2]]
   // CHECK-NEXT:     %[[v7:.*]] = arith.addi %[[v6]], %[[arg1]]
-  // CHECK-NEXT:     %[[v8:.*]] = tensor_ext.rotate %[[v7]], %[[c63]]
-  // CHECK-NEXT:     %[[v9:.*]] = tensor_ext.rotate %[[arg1]], %[[c127]]
+  // CHECK-NEXT:     %[[v8:.*]] = tensor_ext.rotate %[[v7]], %[[c15]]
+  // CHECK-NEXT:     %[[v9:.*]] = tensor_ext.rotate %[[arg1]], %[[c31]]
   // CHECK-NEXT:     %[[v10:.*]] = arith.addi %[[v8]], %[[v9]]
   // CHECK-NEXT:     %[[v11:.*]] = arith.addi %[[v10]], %[[arg1]]
-  // CHECK-NEXT:     %[[v12:.*]] = tensor_ext.rotate %[[v11]], %[[c3968]]
+  // CHECK-NEXT:     %[[v12:.*]] = tensor_ext.rotate %[[v11]], %[[c224]]
   // CHECK-NEXT:     %[[v13:.*]] = arith.addi %[[v12]], %[[v2]]
   // CHECK-NEXT:     %[[v14:.*]] = arith.addi %[[v13]], %[[arg1]]
-  // CHECK-NEXT:     %[[v15:.*]] = tensor_ext.rotate %[[v14]], %[[c65]]
+  // CHECK-NEXT:     %[[v15:.*]] = tensor_ext.rotate %[[v14]], %[[c17]]
   // CHECK-NEXT:     secret.yield %[[v15]]
-  // CHECK-NEXT:   } -> !secret.secret<tensor<4096xi16>>
+  // CHECK-NEXT:   } -> !secret.secret<tensor<256xi16>>
   // CHECK-NEXT:   return %[[v0]]
 
-  func.func @box_blur(%arg0: tensor<4096xi16>) -> tensor<4096xi16> {
-    %c4096 = arith.constant 4096 : index
-    %c64 = arith.constant 64 : index
-    %0 = affine.for %x = 0 to 64 iter_args(%arg0_x = %arg0) -> (tensor<4096xi16>) {
-      %1 = affine.for %y = 0 to 64 iter_args(%arg0_y = %arg0_x) -> (tensor<4096xi16>) {
+  func.func @box_blur(%arg0: tensor<256xi16>) -> tensor<256xi16> {
+    %c256 = arith.constant 256 : index
+    %c16 = arith.constant 16 : index
+    %0 = affine.for %x = 0 to 16 iter_args(%arg0_x = %arg0) -> (tensor<256xi16>) {
+      %1 = affine.for %y = 0 to 16 iter_args(%arg0_y = %arg0_x) -> (tensor<256xi16>) {
         %c0_si16 = arith.constant 0 : i16
         %2 = affine.for %j = -1 to 2 iter_args(%value_j = %c0_si16) -> (i16) {
           %6 = affine.for %i = -1 to 2 iter_args(%value_i = %value_j) -> (i16) {
             %7 = arith.addi %x, %i : index
-            %8 = arith.muli %7, %c64 : index
+            %8 = arith.muli %7, %c16 : index
             %9 = arith.addi %y, %j : index
             %10 = arith.addi %8, %9 : index
-            %11 = arith.remui %10, %c4096 : index
-            %12 = tensor.extract %arg0[%11] : tensor<4096xi16>
+            %11 = arith.remui %10, %c256 : index
+            %12 = tensor.extract %arg0[%11] : tensor<256xi16>
             %13 = arith.addi %value_i, %12 : i16
             affine.yield %13 : i16
           }
           affine.yield %6 : i16
         }
-        %3 = arith.muli %c64, %x : index
+        %3 = arith.muli %c16, %x : index
         %4 = arith.addi %3, %y : index
-        %5 = arith.remui %4, %c4096 : index
-        %6 = tensor.insert %2 into %arg0_y[%5] : tensor<4096xi16>
-        affine.yield %6 : tensor<4096xi16>
+        %5 = arith.remui %4, %c256 : index
+        %6 = tensor.insert %2 into %arg0_y[%5] : tensor<256xi16>
+        affine.yield %6 : tensor<256xi16>
       }
-      affine.yield %1 : tensor<4096xi16>
+      affine.yield %1 : tensor<256xi16>
     }
-    return %0 : tensor<4096xi16>
+    return %0 : tensor<256xi16>
   }
 }
diff --git a/tests/Transforms/heir_simd_vectorizer/gx_kernel_16x16.mlir b/tests/Transforms/heir_simd_vectorizer/gx_kernel_16x16.mlir
diff --git a/tests/Transforms/heir_simd_vectorizer/roberts_cross_16x16.mlir b/tests/Transforms/heir_simd_vectorizer/roberts_cross_16x16.mlir

Original file line number	Diff line number	Diff line change
`@@ -13,7 +13,7 @@ heir_lattigo_lib(`
`13`	`13`	`"--mlir-to-bgv=ciphertext-degree=4096 plaintext-modulus=536903681",`
`14`	`14`	`"--scheme-to-lattigo",`
`15`	`15`	`],`
`16`		`- mlir_src = "@heir//tests/Examples/common:roberts_cross_64x64.mlir",`
	`16`	`+ mlir_src = "@heir//tests/Examples/common:roberts_cross_16x16.mlir",`
`17`	`17`	`)`
`18`	`18`
`19`	`19`	`# For Google-internal reasons we must separate the go_test rules from the macro`