-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[LP] Assign weights when peeling last iteration. #166858
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
[LP] Assign weights when peeling last iteration. #166858
Conversation
|
@llvm/pr-subscribers-llvm-transforms Author: Mircea Trofin (mtrofin) ChangesAssigning weights when we peel the last iteration. The probability of going into the loop or exiting should stay the same. Looking at the BFI of before/after the peel, we notice that, indeed, the total count of the blocks corresponding to e.g. the header stays the same. Full diff: https://github.com/llvm/llvm-project/pull/166858.diff 3 Files Affected:
diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp
index e1dcaa85a5780..3c3ce7b73f305 100644
--- a/llvm/lib/Transforms/Utils/LoopPeel.cpp
+++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp
@@ -54,6 +54,7 @@ using namespace llvm::SCEVPatternMatch;
STATISTIC(NumPeeled, "Number of loops peeled");
STATISTIC(NumPeeledEnd, "Number of loops peeled from end");
+namespace llvm {
static cl::opt<unsigned> UnrollPeelCount(
"unroll-peel-count", cl::Hidden,
cl::desc("Set the unroll peeling count, for testing purposes"));
@@ -87,6 +88,9 @@ static cl::opt<bool> EnablePeelingForIV(
static const char *PeeledCountMetaData = "llvm.loop.peeled.count";
+extern cl::opt<bool> ProfcheckDisableMetadataFixes;
+} // namespace llvm
+
// Check whether we are capable of peeling this loop.
bool llvm::canPeel(const Loop *L) {
// Make sure the loop is in simplified form
@@ -1190,7 +1194,19 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
IRBuilder<> B(PreHeaderBR);
Value *Cond =
B.CreateICmpNE(BTCValue, ConstantInt::get(BTCValue->getType(), 0));
- B.CreateCondBr(Cond, NewPreHeader, InsertTop);
+ auto *BI = B.CreateCondBr(Cond, NewPreHeader, InsertTop);
+ SmallVector<uint32_t> Weights;
+ auto *OrigLatchBr = Latch->getTerminator();
+ auto HasBranchWeights = !ProfcheckDisableMetadataFixes &&
+ extractBranchWeights(*OrigLatchBr, Weights);
+ if (HasBranchWeights) {
+ // The probability of going into the loop or exiting should stay the
+ // same, but we may need to flip the weights. For BI, InsertTop
+ // (position 1) is towards the exit.
+ if (L->getExitBlock() == OrigLatchBr->getSuccessor(0))
+ std::swap(Weights[0], Weights[1]);
+ setBranchWeights(*BI, Weights, /*IsExpected=*/false);
+ }
PreHeaderBR->eraseFromParent();
// PreHeader now dominates InsertTop.
diff --git a/llvm/test/Transforms/LoopUnroll/peel-last-iteration-bfi.ll b/llvm/test/Transforms/LoopUnroll/peel-last-iteration-bfi.ll
new file mode 100644
index 0000000000000..38fece4d56e34
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/peel-last-iteration-bfi.ll
@@ -0,0 +1,64 @@
+; RUN: opt -p "print<block-freq>,loop-unroll,print<block-freq>" -scev-cheap-expansion-budget=3 -S %s -profcheck-disable-metadata-fixes 2>&1 | FileCheck %s --check-prefixes=COMMON,BAD
+; RUN: opt -p "print<block-freq>,loop-unroll,print<block-freq>" -scev-cheap-expansion-budget=3 -S %s 2>&1 | FileCheck %s --check-prefixes=COMMON,GOOD
+
+define i32 @test_expansion_cost_2(i32 %start, i32 %end) !prof !0 {
+entry:
+ %sub = add i32 %end, -1
+ br label %loop.header
+
+loop.header:
+ %iv = phi i32 [ %start, %entry ], [ %iv.next, %loop.latch ]
+ %c = icmp eq i32 %iv, %sub
+ br i1 %c, label %then, label %loop.latch, !prof !1
+
+then:
+ br label %loop.latch
+
+loop.latch:
+ %iv.next = add nsw i32 %iv, 1
+ %ec = icmp eq i32 %iv.next, %end
+ br i1 %ec, label %exit, label %loop.header, !prof !2
+
+exit:
+ ret i32 0
+}
+
+!0 = !{!"function_entry_count", i32 10}
+!1 = !{!"branch_weights", i32 2, i32 3}
+!2 = !{!"branch_weights", i32 1, i32 50}
+
+; COMMON: block-frequency-info: test_expansion_cost_2
+; COMMON-NEXT: entry: float = 1.0
+; COMMON-NEXT: loop.header: float = 51.0
+; COMMON-NEXT: then: float = 20.4
+; COMMON-NEXT: loop.latch: float = 51.0
+; COMMON-NEXT: exit: float = 1.0
+
+; COMMON: block-frequency-info: test_expansion_cost_2
+; GOOD-NEXT: entry: float = 1.0
+; GOOD-NEXT: entry.split: float = 0.98039
+; GOOD-NEXT: loop.header: float = 50.0
+; GOOD-NEXT: then: float = 20.0
+; GOOD-NEXT: loop.latch: float = 50.0
+; GOOD-NEXT: exit.peel.begin.loopexit: float = 0.98039
+; GOOD-NEXT: exit.peel.begin: float = 1.0
+; GOOD-NEXT: loop.header.peel: float = 1.0
+; GOOD-NEXT: then.peel: float = 0.4
+; GOOD-NEXT: loop.latch.peel: float = 1.0
+; GOOD-NEXT: exit.peel.next: float = 1.0
+; GOOD-NEXT: loop.header.peel.next: float = 1.0
+; GOOD-NEXT: exit: float = 1.0
+
+; BAD-NEXT: entry: float = 1.0
+; BAD-NEXT: entry.split: float = 0.625
+; BAD-NEXT: loop.header: float = 31.875
+; BAD-NEXT: then: float = 12.75
+; BAD-NEXT: loop.latch: float = 31.875
+; BAD-NEXT: exit.peel.begin.loopexit: float = 0.625
+; BAD-NEXT: exit.peel.begin: float = 1.0
+; BAD-NEXT: loop.header.peel: float = 1.0
+; BAD-NEXT: then.peel: float = 0.4
+; BAD-NEXT: loop.latch.peel: float = 1.0
+; BAD-NEXT: exit.peel.next: float = 1.0
+; BAD-NEXT: loop.header.peel.next: float = 1.0
+; BAD-NEXT: exit: float = 1.0
\ No newline at end of file
diff --git a/llvm/test/Transforms/LoopUnroll/peel-last-iteration-expansion-cost.ll b/llvm/test/Transforms/LoopUnroll/peel-last-iteration-expansion-cost.ll
index f3910f9bfc399..9b1e08c8ca526 100644
--- a/llvm/test/Transforms/LoopUnroll/peel-last-iteration-expansion-cost.ll
+++ b/llvm/test/Transforms/LoopUnroll/peel-last-iteration-expansion-cost.ll
@@ -1,46 +1,46 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
; RUN: opt -p loop-unroll -scev-cheap-expansion-budget=2 -S %s | FileCheck --check-prefix=BUDGET2 %s
; RUN: opt -p loop-unroll -scev-cheap-expansion-budget=3 -S %s | FileCheck --check-prefix=BUDGET3 %s
-define i32 @test_expansion_cost_2(i32 %start, i32 %end) {
+define i32 @test_expansion_cost_2(i32 %start, i32 %end) !prof !0 {
; BUDGET2-LABEL: define i32 @test_expansion_cost_2(
-; BUDGET2-SAME: i32 [[START:%.*]], i32 [[END:%.*]]) {
+; BUDGET2-SAME: i32 [[START:%.*]], i32 [[END:%.*]]) !prof [[PROF0:![0-9]+]] {
; BUDGET2-NEXT: [[ENTRY:.*]]:
; BUDGET2-NEXT: [[SUB:%.*]] = add i32 [[END]], -1
; BUDGET2-NEXT: br label %[[LOOP_HEADER:.*]]
; BUDGET2: [[LOOP_HEADER]]:
; BUDGET2-NEXT: [[IV:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; BUDGET2-NEXT: [[C:%.*]] = icmp eq i32 [[IV]], [[SUB]]
-; BUDGET2-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]]
+; BUDGET2-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]], !prof [[PROF1:![0-9]+]]
; BUDGET2: [[THEN]]:
; BUDGET2-NEXT: br label %[[LOOP_LATCH]]
; BUDGET2: [[LOOP_LATCH]]:
; BUDGET2-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], 1
; BUDGET2-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], [[END]]
-; BUDGET2-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_HEADER]]
+; BUDGET2-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_HEADER]], !prof [[PROF2:![0-9]+]]
; BUDGET2: [[EXIT]]:
; BUDGET2-NEXT: ret i32 0
;
; BUDGET3-LABEL: define i32 @test_expansion_cost_2(
-; BUDGET3-SAME: i32 [[START:%.*]], i32 [[END:%.*]]) {
+; BUDGET3-SAME: i32 [[START:%.*]], i32 [[END:%.*]]) !prof [[PROF0:![0-9]+]] {
; BUDGET3-NEXT: [[ENTRY:.*]]:
; BUDGET3-NEXT: [[SUB:%.*]] = add i32 [[END]], -1
; BUDGET3-NEXT: [[TMP0:%.*]] = sub i32 [[SUB]], [[START]]
; BUDGET3-NEXT: [[TMP1:%.*]] = icmp ne i32 [[TMP0]], 0
-; BUDGET3-NEXT: br i1 [[TMP1]], label %[[ENTRY_SPLIT:.*]], label %[[EXIT_PEEL_BEGIN:.*]]
+; BUDGET3-NEXT: br i1 [[TMP1]], label %[[ENTRY_SPLIT:.*]], label %[[EXIT_PEEL_BEGIN:.*]], !prof [[PROF1:![0-9]+]]
; BUDGET3: [[ENTRY_SPLIT]]:
; BUDGET3-NEXT: br label %[[LOOP_HEADER:.*]]
; BUDGET3: [[LOOP_HEADER]]:
; BUDGET3-NEXT: [[IV:%.*]] = phi i32 [ [[START]], %[[ENTRY_SPLIT]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; BUDGET3-NEXT: [[C:%.*]] = icmp eq i32 [[IV]], [[SUB]]
-; BUDGET3-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]]
+; BUDGET3-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]], !prof [[PROF2:![0-9]+]]
; BUDGET3: [[THEN]]:
; BUDGET3-NEXT: br label %[[LOOP_LATCH]]
; BUDGET3: [[LOOP_LATCH]]:
; BUDGET3-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], 1
; BUDGET3-NEXT: [[TMP2:%.*]] = sub i32 [[END]], 1
; BUDGET3-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], [[TMP2]]
-; BUDGET3-NEXT: br i1 [[EC]], label %[[EXIT_PEEL_BEGIN_LOOPEXIT:.*]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP0:![0-9]+]]
+; BUDGET3-NEXT: br i1 [[EC]], label %[[EXIT_PEEL_BEGIN_LOOPEXIT:.*]], label %[[LOOP_HEADER]], !prof [[PROF3:![0-9]+]], !llvm.loop [[LOOP4:![0-9]+]]
; BUDGET3: [[EXIT_PEEL_BEGIN_LOOPEXIT]]:
; BUDGET3-NEXT: [[DOTPH:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP_LATCH]] ]
; BUDGET3-NEXT: br label %[[EXIT_PEEL_BEGIN]]
@@ -49,13 +49,13 @@ define i32 @test_expansion_cost_2(i32 %start, i32 %end) {
; BUDGET3-NEXT: br label %[[LOOP_HEADER_PEEL:.*]]
; BUDGET3: [[LOOP_HEADER_PEEL]]:
; BUDGET3-NEXT: [[C_PEEL:%.*]] = icmp eq i32 [[TMP3]], [[SUB]]
-; BUDGET3-NEXT: br i1 [[C_PEEL]], label %[[THEN_PEEL:.*]], label %[[LOOP_LATCH_PEEL:.*]]
+; BUDGET3-NEXT: br i1 [[C_PEEL]], label %[[THEN_PEEL:.*]], label %[[LOOP_LATCH_PEEL:.*]], !prof [[PROF2]]
; BUDGET3: [[THEN_PEEL]]:
; BUDGET3-NEXT: br label %[[LOOP_LATCH_PEEL]]
; BUDGET3: [[LOOP_LATCH_PEEL]]:
; BUDGET3-NEXT: [[IV_NEXT_PEEL:%.*]] = add nsw i32 [[TMP3]], 1
; BUDGET3-NEXT: [[EC_PEEL:%.*]] = icmp eq i32 [[IV_NEXT_PEEL]], [[END]]
-; BUDGET3-NEXT: br i1 [[EC_PEEL]], label %[[EXIT_PEEL_NEXT:.*]], label %[[EXIT_PEEL_NEXT]]
+; BUDGET3-NEXT: br i1 [[EC_PEEL]], label %[[EXIT_PEEL_NEXT:.*]], label %[[EXIT_PEEL_NEXT]], !prof [[PROF3]]
; BUDGET3: [[EXIT_PEEL_NEXT]]:
; BUDGET3-NEXT: br label %[[LOOP_HEADER_PEEL_NEXT:.*]]
; BUDGET3: [[LOOP_HEADER_PEEL_NEXT]]:
@@ -70,7 +70,7 @@ entry:
loop.header:
%iv = phi i32 [ %start, %entry ], [ %iv.next, %loop.latch ]
%c = icmp eq i32 %iv, %sub
- br i1 %c, label %then, label %loop.latch
+ br i1 %c, label %then, label %loop.latch, !prof !1
then:
br label %loop.latch
@@ -78,12 +78,25 @@ then:
loop.latch:
%iv.next = add nsw i32 %iv, 1
%ec = icmp eq i32 %iv.next, %end
- br i1 %ec, label %exit, label %loop.header
+ br i1 %ec, label %exit, label %loop.header, !prof !2
exit:
ret i32 0
}
+
+!0 = !{!"function_entry_count", i32 10}
+!1 = !{!"branch_weights", i32 2, i32 3}
+!2 = !{!"branch_weights", i32 1, i32 10}
+;.
+; BUDGET2: [[PROF0]] = !{!"function_entry_count", i32 10}
+; BUDGET2: [[PROF1]] = !{!"branch_weights", i32 2, i32 3}
+; BUDGET2: [[PROF2]] = !{!"branch_weights", i32 1, i32 10}
;.
-; BUDGET3: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]}
-; BUDGET3: [[META1]] = !{!"llvm.loop.peeled.count", i32 1}
+; BUDGET3: [[PROF0]] = !{!"function_entry_count", i32 10}
+; BUDGET3: [[PROF1]] = !{!"branch_weights", i32 10, i32 1}
+; BUDGET3: [[PROF2]] = !{!"branch_weights", i32 2, i32 3}
+; BUDGET3: [[PROF3]] = !{!"branch_weights", i32 1, i32 10}
+; BUDGET3: [[LOOP4]] = distinct !{[[LOOP4]], [[META5:![0-9]+]], [[META6:![0-9]+]]}
+; BUDGET3: [[META5]] = !{!"llvm.loop.peeled.count", i32 1}
+; BUDGET3: [[META6]] = !{!"llvm.loop.estimated_trip_count", i32 10}
;.
|
0823bb2 to
1da3c16
Compare

Assigning weights when we peel the last iteration. The probability of going into the loop or exiting should stay the same. Looking at the BFI of before/after the peel, we notice that, indeed, the total count of the blocks corresponding to e.g. the header stays the same.