enable layout propagation for tensor.extract_slice

asraa · copybara-github · commit 24c0a487b498 · 2025-11-03T07:13:13.000-08:00
PiperOrigin-RevId: 827483403
diff --git a/lib/Transforms/ConvertToCiphertextSemantics/ConvertToCiphertextSemantics.cpp b/lib/Transforms/ConvertToCiphertextSemantics/ConvertToCiphertextSemantics.cpp
@@ -1130,21 +1130,23 @@ class ConvertTensorInsertSlice
       sizes.push_back(b.getIndexAttr(1));
       sizes.push_back(b.getIndexAttr(slots));
       SmallVector<OpFoldResult> strides(2, b.getIndexAttr(1));
-      Value extractedDest = tensor::ExtractSliceOp::create(
+      Operation* extractedDest = tensor::ExtractSliceOp::create(
           b, op.getLoc(), cast<RankedTensorType>(convertedSource.getType()),
           adaptor.getDest(), ctOffsets, sizes, strides);
       Operation* scalarMul = makeAppropriatelyTypedMulOp(
           b, op.getLoc(), scalarMask, convertedSource, {getArithFMF(b)});
       Operation* destMul = makeAppropriatelyTypedMulOp(
-          b, op.getLoc(), destMask, extractedDest, {getArithFMF(b)});
+          b, op.getLoc(), destMask, extractedDest->getResult(0),
+          {getArithFMF(b)});
       Operation* finalAdd =
           makeAppropriatelyTypedAddOp(b, op.getLoc(), scalarMul->getResult(0),
                                       destMul->getResult(0), {getArithFMF(b)});
 
       // Insert the final result into the ciphertext at position ct.
       Operation* insertOp = tensor::InsertSliceOp::create(
           b, finalAdd->getResult(0), result, ctOffsets, sizes, strides);
-      setMaterializedAttr({scalarMul, destMul, finalAdd, insertOp});
+      setMaterializedAttr(
+          {extractedDest, scalarMul, destMul, finalAdd, insertOp});
       result = insertOp->getResult(0);
     }
 
@@ -1472,6 +1474,123 @@ class ConvertTensorInsertLayout
   }
 };
 
+class ConvertTensorExtractSlice
+    : public ContextAwareOpConversionPattern<tensor::ExtractSliceOp> {
+ public:
+  using ContextAwareOpConversionPattern<
+      tensor::ExtractSliceOp>::ContextAwareOpConversionPattern;
+
+  LogicalResult secretSourceSecretResult(
+      tensor::ExtractSliceOp op, OpAdaptor adaptor,
+      ContextAwareConversionPatternRewriter& rewriter) const {
+    MLIRContext* ctx = op.getContext();
+
+    FailureOr<Attribute> sourceLayoutResult =
+        getTypeConverter()->getContextualAttr(adaptor.getSource());
+    FailureOr<Attribute> resultLayoutResult =
+        getTypeConverter()->getContextualAttr(op.getResult());
+    LayoutAttr resultLayout = cast<LayoutAttr>(resultLayoutResult.value());
+    IntegerRelation sourceRel =
+        cast<LayoutAttr>(sourceLayoutResult.value()).getIntegerRelation();
+    IntegerRelation resultRel = resultLayout.getIntegerRelation();
+
+    // Compute the layout relation of the extract_slice operation.
+    auto extractSliceLayout =
+        getSliceExtractionRelation(op.getSourceType(), op.getResultType(),
+                                   SmallVector<int64_t>(op.getStaticOffsets()),
+                                   SmallVector<int64_t>(op.getStaticSizes()),
+                                   SmallVector<int64_t>(op.getStaticStrides()));
+    if (failed(extractSliceLayout)) {
+      return op.emitError() << "failed to get layout for extract slice";
+    }
+
+    // Remap the source ciphertext semantic tensor to the result ciphertext
+    // semantic tensor layout. To do this, we compose the relations to traverse
+    // the following diagram, starting from the source ciphertext tensor to the
+    // extracted slice ciphertext tensor.
+    //  Source tensor ─────────> Slice tensor
+    //     \                        │
+    //    /│\                       │
+    //     │                       \│ /
+    //     │                        \/
+    // Source ciphertext        Slice ciphertext
+    // (ct, slot)               (ct, slot)
+    sourceRel.inverse();
+    sourceRel.compose(extractSliceLayout.value());
+    sourceRel.compose(resultRel);
+
+    // tensor_ext.remap constrains its input and output types to be the same,
+    // i.e., remap occurs within one set of ciphertexts. The output of an
+    // extract_slice, however, may have a layout that has fewer ciphertexts
+    // in it. For example, extracting one row from a data-semantic matrix that
+    // is packed with one row per ciphertext would result in a single output
+    // ciphertext, and the expected layout of the result will reflect that.
+    // To bridge this gap, this kernel post-processes the remap's output to
+    // extract the subset ciphertexts relevant to the layout of the output
+    // slice.
+    LayoutAttr sliceLayoutAttr =
+        LayoutAttr::getFromIntegerRelation(ctx, sourceRel);
+    RankedTensorType sourceCiphertextSemanticType =
+        cast<RankedTensorType>(adaptor.getSource().getType());
+    auto remapSource = tensor_ext::RemapOp::create(
+        rewriter, op.getLoc(), sourceCiphertextSemanticType,
+        adaptor.getSource(), sliceLayoutAttr);
+
+    auto resultCiphertextSemanticType = cast<RankedTensorType>(
+        getTypeConverter()->convertType(op.getResultType(), resultLayout));
+    SmallVector<OpFoldResult> strides(2, rewriter.getIndexAttr(1));
+    SmallVector<OpFoldResult> offsets(2, rewriter.getIndexAttr(0));
+    SmallVector<OpFoldResult> sizes;
+    sizes.push_back(
+        rewriter.getIndexAttr(resultCiphertextSemanticType.getDimSize(0)));
+    sizes.push_back(
+        rewriter.getIndexAttr(resultCiphertextSemanticType.getDimSize(1)));
+    auto extractRemap = tensor::ExtractSliceOp::create(
+        rewriter, op.getLoc(), resultCiphertextSemanticType,
+        remapSource.getResult(), offsets, sizes, strides);
+
+    setMaterializedAttr({remapSource, extractRemap});
+    setAttributeAssociatedWith(extractRemap.getResult(), kLayoutAttrName,
+                               sliceLayoutAttr);
+    rewriter.replaceOp(op, extractRemap.getResult());
+    return success();
+  }
+
+  LogicalResult matchAndRewrite(
+      tensor::ExtractSliceOp op, OpAdaptor adaptor,
+      ContextAwareConversionPatternRewriter& rewriter) const final {
+    // Extract a secret slice from a secret tensor.
+    FailureOr<Attribute> sourceLayoutResult =
+        getTypeConverter()->getContextualAttr(adaptor.getSource());
+    FailureOr<Attribute> resultLayoutResult =
+        getTypeConverter()->getContextualAttr(op.getResult());
+
+    bool isSecretSource = succeeded(sourceLayoutResult);
+    bool isSecretResult = succeeded(resultLayoutResult);
+
+    if (isSecretSource && isSecretResult) {
+      return secretSourceSecretResult(op, adaptor, rewriter);
+    }
+
+    if (isSecretSource && !isSecretResult) {
+      return op.emitError()
+             << "result tensor should have been assigned a layout "
+                "by layout-propagation";
+    }
+
+    if (!isSecretSource && isSecretResult) {
+      return op.emitError()
+             << "source tensor should have been assigned a layout "
+                "by layout-propagation";
+    }
+
+    // cleartext scalar and cleartext tensor means this is a cleartext op
+    // that can be elided.
+    setMaterializedAttr(op);
+    return success();
+  }
+};
+
 class ConvertCollapseShape
     : public ContextAwareOpConversionPattern<tensor::CollapseShapeOp> {
  public:
@@ -1680,17 +1799,18 @@ struct ConvertToCiphertextSemantics
       return isa<ModuleOp>(op) || hasMaterializedAttr(op);
     });
 
-    patterns.add<
-        ConvertFunc, ConvertGeneric,
-        // tensor_ext ops
-        ConvertConvertLayout,
-        // linalg ops
-        ConvertLinalgReduce, ConvertLinalgMatvecLayout, ConvertLinalgConv2D,
-        // tensor ops
-        ConvertTensorExtractLayout, ConvertTensorInsertLayout,
-        ConvertCollapseShape, ConvertExpandShape, ConvertTensorInsertSlice,
-        // default
-        ConvertAnyAddingMaterializedAttr>(typeConverter, context);
+    patterns.add<ConvertFunc, ConvertGeneric,
+                 // tensor_ext ops
+                 ConvertConvertLayout,
+                 // linalg ops
+                 ConvertLinalgReduce, ConvertLinalgMatvecLayout,
+                 ConvertLinalgConv2D,
+                 // tensor ops
+                 ConvertTensorExtractLayout, ConvertTensorInsertLayout,
+                 ConvertCollapseShape, ConvertExpandShape,
+                 ConvertTensorInsertSlice, ConvertTensorExtractSlice,
+                 // default
+                 ConvertAnyAddingMaterializedAttr>(typeConverter, context);
     patterns.add<ConvertAssignLayout>(typeConverter, context, ciphertextSize);
 
     ConversionConfig config;
diff --git a/lib/Transforms/LayoutPropagation/LayoutPropagation.cpp b/lib/Transforms/LayoutPropagation/LayoutPropagation.cpp
@@ -139,6 +139,7 @@ struct LayoutPropagation : impl::LayoutPropagationBase<LayoutPropagation> {
   LogicalResult visitOperation(tensor::ExtractOp op);
   LogicalResult visitOperation(tensor::InsertOp op);
   LogicalResult visitOperation(tensor::InsertSliceOp op);
+  LogicalResult visitOperation(tensor::ExtractSliceOp op);
 
   // Determine if the operation arguments have compatible layouts for the
   // given op. If the check fails, the CompatibilityResult::compatible field
@@ -277,16 +278,8 @@ LogicalResult LayoutPropagation::visitOperation(Operation* op) {
       // affine ops
       .Case<affine::AffineForOp>([&](auto op) { return visitOperation(op); })
       // tensor ops
-      .Case<tensor::ExtractOp, tensor::InsertOp, tensor::InsertSliceOp>(
-          [&](auto op) { return visitOperation(op); })
-      .Case<tensor::ExtractSliceOp>([&](auto op) {
-        // TODO(#2028): Support tensor.extract_slice and tensor.insert_slice in
-        // layout.
-        return op->emitError()
-               << "Layout propagation not supported for this op";
-      })
-      // tensor ops
-      .Case<CollapseShapeOp, ExpandShapeOp>(
+      .Case<tensor::ExtractOp, tensor::InsertOp, tensor::InsertSliceOp,
+            tensor::ExtractSliceOp, CollapseShapeOp, ExpandShapeOp>(
           [&](auto op) { return visitOperation(op); })
       // AddI, AddF, mgmt.* all pass the layout through unchanged.
       .Default([&](Operation* op) {
@@ -790,6 +783,53 @@ LogicalResult LayoutPropagation::visitOperation(tensor::InsertSliceOp op) {
   return success();
 }
 
+LogicalResult LayoutPropagation::visitOperation(tensor::ExtractSliceOp op) {
+  // Assign the induced layout from extracting a slice from the source tensor.
+  if (!assignedLayouts.contains(op.getSource())) {
+    return op->emitError() << "Source tensor has no assigned layout";
+  }
+  IntegerRelation sourceLayout =
+      assignedLayouts.at(op.getSource()).getIntegerRelation();
+
+  FailureOr<IntegerRelation> maybeSliceExtractionLayout =
+      getSliceExtractionRelation(op.getSourceType(), op.getResultType(),
+                                 SmallVector<int64_t>(op.getStaticOffsets()),
+                                 SmallVector<int64_t>(op.getStaticSizes()),
+                                 SmallVector<int64_t>(op.getStaticStrides()));
+  if (failed(maybeSliceExtractionLayout)) {
+    return failure();
+  }
+  IntegerRelation sliceExtractionLayout = maybeSliceExtractionLayout.value();
+
+  // Compose the inverted slice extraction layout with the source layout to
+  // get the result slice layout.
+  sliceExtractionLayout.inverse();
+  sliceExtractionLayout.compose(sourceLayout);
+  // If the slice extracted was not at offset zero, then the resulting slice may
+  // be indexed at a non-zero ciphertext. For example, imagine extracting a
+  // slice out of the second ciphertext. Then computing the inverse of the slice
+  // extraction layout and composing that with the source relation would mean
+  // that the slice would map to the second ciphertext. But a slice extracted
+  // from a tensor.extract_slice op is always indexed starting from zero.
+  // Reindexing the the resulting relation to start from ciphertext zero.
+  auto ctVarOffset =
+      sliceExtractionLayout.getVarKindOffset(presburger::VarKind::Range);
+  auto ctLowerBound = sliceExtractionLayout.getConstantBound64(
+      presburger::BoundType::LB, ctVarOffset);
+  if (!ctLowerBound) {
+    return op.emitError() << "failed to get constant bound on ciphertext index";
+  }
+  auto zeroIndexedSliceLayout =
+      shiftVar(sliceExtractionLayout, ctVarOffset, -ctLowerBound.value());
+
+  LayoutAttr outputLayout = LayoutAttr::getFromIntegerRelation(
+      op.getContext(), zeroIndexedSliceLayout);
+  assignedLayouts.insert({op.getResult(), outputLayout});
+  debugAssignLayout(op.getResult(), outputLayout);
+  setResultLayoutAttr(op);
+  return success();
+}
+
 CompatibilityResult LayoutPropagation::hasCompatibleArgumentLayouts(
     Operation* op) {
   return TypeSwitch<Operation*, CompatibilityResult>(op)
@@ -917,7 +957,7 @@ CompatibilityResult LayoutPropagation::hasCompatibleArgumentLayouts(
     tensor::InsertSliceOp op) {
   // The arguments of a tensor::InsertSliceOp are the tensors to insert and the
   // tensor to insert into.
-  auto insert = op.getOperands()[0];
+  auto insert = op.getSource();
   auto dest = op.getDest();
 
   if (!assignedLayouts.contains(insert)) {
diff --git a/lib/Utils/Layout/Utils.cpp b/lib/Utils/Layout/Utils.cpp
@@ -747,5 +747,53 @@ presburger::IntegerRelation shiftVar(
   return *shiftedRelation;
 }
 
+FailureOr<presburger::IntegerRelation> getSliceExtractionRelation(
+    RankedTensorType sourceType, RankedTensorType resultType,
+    SmallVector<int64_t> offsets, SmallVector<int64_t> sizes,
+    SmallVector<int64_t> strides) {
+  IntegerRelation result(PresburgerSpace::getRelationSpace(
+      sourceType.getRank(), /*numRange=*/resultType.getRank(), /*numSymbol=*/0,
+      /*numLocals=*/0));
+
+  // Add bounds for the source dimensions.
+  auto domainOffset = result.getVarKindOffset(VarKind::Domain);
+  for (int i = 0; i < sourceType.getRank(); ++i) {
+    addBounds(result, domainOffset + i, 0, sourceType.getDimSize(i) - 1);
+  }
+
+  // Add bounds for the result dimensions.
+  auto rangeOffset = result.getVarKindOffset(VarKind::Range);
+  for (int i = 0; i < resultType.getRank(); ++i) {
+    addBounds(result, rangeOffset + i, 0, resultType.getDimSize(i) - 1);
+  }
+
+  // Destination tensor's dimensions (d0, d1, ...) are mapped sequentially from
+  // the source tensor's dimensions (r0, r1, ...) for which the slice size is
+  // greater than 1.
+  auto constOffset = result.getNumCols() - 1;
+  unsigned int resultDim = 0;
+  for (auto sourceDim = 0; sourceDim < sourceType.getRank(); ++sourceDim) {
+    if (sizes[sourceDim] > 1) {
+      // Map to the i-th result dimension
+      // d_j = offsets[j] + r_i * strides[j]
+      addConstraint(result,
+                    {{domainOffset + sourceDim, -1},
+                     {constOffset, offsets[sourceDim]},
+                     {rangeOffset + resultDim, strides[sourceDim]}},
+                    /*equality=*/true);
+      ++resultDim;
+    } else {
+      // This is a dropped dimension, fixed at the offset
+      // d_j = offsets[j]
+      addConstraint(
+          result,
+          {{domainOffset + sourceDim, -1}, {constOffset, offsets[sourceDim]}},
+          /*equality=*/true);
+    }
+  }
+
+  return result;
+}
+
 }  // namespace heir
 }  // namespace mlir
diff --git a/lib/Utils/Layout/Utils.h b/lib/Utils/Layout/Utils.h
@@ -206,6 +206,12 @@ presburger::IntegerRelation shiftVar(
     const presburger::IntegerRelation& relation, unsigned int pos,
     int64_t offset);
 
+// Get layout relation that corresponds to a tensor::extract_slice op.
+FailureOr<presburger::IntegerRelation> getSliceExtractionRelation(
+    RankedTensorType sourceType, RankedTensorType resultType,
+    SmallVector<int64_t> offsets, SmallVector<int64_t> sizes,
+    SmallVector<int64_t> strides);
+
 }  // namespace heir
 }  // namespace mlir
 
diff --git a/lib/Utils/Layout/UtilsTest.cpp b/lib/Utils/Layout/UtilsTest.cpp
@@ -519,6 +519,34 @@ TEST(UtilsTest, TestShiftVarRangeOffset) {
   EXPECT_TRUE(shiftedRel.containsPointNoLocal({8, 1, 19}).has_value());
 }
 
+TEST(UtilsTest, TestGetSliceExtractionRelation) {
+  MLIRContext context;
+  // Extract a 3x4 slice from a 2x1x3x4 matrix at (1, 0, 0, 0).
+  RankedTensorType sourceType =
+      RankedTensorType::get({2, 1, 3, 4}, IndexType::get(&context));
+  RankedTensorType sliceType =
+      RankedTensorType::get({3, 4}, IndexType::get(&context));
+  SmallVector<int64_t> offsets = {1, 0, 0, 0};
+  SmallVector<int64_t> sizes = {1, 1, 3, 4};
+  SmallVector<int64_t> strides = {1, 1, 1, 1};
+
+  auto sliceRelation = getSliceExtractionRelation(sourceType, sliceType,
+                                                  offsets, sizes, strides);
+  ASSERT_TRUE(succeeded(sliceRelation));
+
+  // Test a few points.
+  // The relation maps from source indices to slice indices.
+  // For example, source (1,0,0,0) maps to slice (0,0)
+  std::vector<std::vector<int64_t>> expectedPoints = {
+      {1, 0, 0, 0, 0, 0}, {1, 0, 0, 1, 0, 1}, {1, 0, 1, 0, 1, 0},
+      {1, 0, 1, 1, 1, 1}, {1, 0, 2, 2, 2, 2},
+  };
+  for (const auto& point : expectedPoints) {
+    auto maybeExists = sliceRelation.value().containsPointNoLocal(point);
+    EXPECT_TRUE(maybeExists.has_value());
+  }
+}
+
 }  // namespace
 }  // namespace heir
 }  // namespace mlir
diff --git a/tests/Transforms/convert_to_ciphertext_semantics/extract_slice.mlir b/tests/Transforms/convert_to_ciphertext_semantics/extract_slice.mlir
@@ -0,0 +1,25 @@
+// RUN: heir-opt --convert-to-ciphertext-semantics=ciphertext-size=32 --split-input-file %s | FileCheck %s
+
+#layout1 = #tensor_ext.layout<"{ [i0, i1, i2, i3] -> [ct, slot] : i0 = 0 and ct = i1 and (-4i2 - i3 + slot) mod 16 = 0 and 0 <= i1 <= 1 and 0 <= i2 <= 3 and 0 <= i3 <= 3 and 0 <= slot <= 31 }">
+#layout = #tensor_ext.layout<"{ [i0, i1] -> [ct, slot] : ct = 0 and (-4i0 - i1 + slot) mod 16 = 0 and 0 <= i0 <= 3 and 0 <= i1 <= 3 and 0 <= slot <= 31 }">
+module {
+  // Layouts are aligned perfectly so that extract_slice extracts a single ciphertext out of %input0
+  // CHECK: func.func @trivial_insert
+  // CHECK-SAME: (%[[arg0:.*]]: !secret.secret<tensor<2x32xf32>>
+  func.func @trivial_insert(%arg0: !secret.secret<tensor<1x2x4x4xf32>> {tensor_ext.layout = #layout1}) -> (!secret.secret<tensor<4x4xf32>> {tensor_ext.layout = #layout}) {
+    %1 = secret.generic(%arg0: !secret.secret<tensor<1x2x4x4xf32>> {tensor_ext.layout = #layout1}) {
+    ^body(%input0: tensor<1x2x4x4xf32>):
+    // CHECK: secret.generic(%[[arg0]]: !secret.secret<tensor<2x32xf32>>)
+    // CHECK-NEXT: ^body(%[[input0:.*]]: tensor<2x32xf32>)
+    // CHECK: %[[v1:.*]] = tensor_ext.remap %[[input0]]
+    // CHECK-NEXT: %[[extracted:.*]] = tensor.extract_slice %[[v1]][0, 0] [1, 32] [1, 1]
+    // CHECK-NEXT: %[[v2:.*]] = arith.addf %[[extracted]], %[[extracted]]
+    // CHECK-NEXT: secret.yield %[[v2]]
+      %extract_slice = tensor.extract_slice %input0 [0, 1, 0, 0] [1, 1, 4, 4] [1, 1, 1, 1] {tensor_ext.layout = #layout}
+           : tensor<1x2x4x4xf32> to tensor<4x4xf32>
+      %3 = arith.addf %extract_slice, %extract_slice {tensor_ext.layout = #layout} : tensor<4x4xf32>
+      secret.yield %3 : tensor<4x4xf32>
+    } -> (!secret.secret<tensor<4x4xf32>> {tensor_ext.layout = #layout})
+    return %1 : !secret.secret<tensor<4x4xf32>>
+  }
+}
diff --git a/tests/Transforms/layout_propagation/extract_slice.mlir b/tests/Transforms/layout_propagation/extract_slice.mlir
diff --git a/tests/Transforms/layout_propagation/slice.mlir b/tests/Transforms/layout_propagation/slice.mlir