From 2112adc3ec82d547ab8733896481c40e1ab5fc72 Mon Sep 17 00:00:00 2001 From: Michel Weber Date: Thu, 23 Jan 2025 12:47:42 +0100 Subject: [PATCH 1/5] make instrumentation flexible --- .../llvm/Analysis/CFFunctionAnalysis.h | 15 ++ llvm/lib/Analysis/CFFunctionAnalysis.cpp | 47 ++++- llvm/lib/Passes/PassRegistry.def | 1 + .../Utils/CFFunctionInstrumentation.cpp | 3 + print_example/instrumented.ll | 180 ++++++++---------- 5 files changed, 139 insertions(+), 107 deletions(-) diff --git a/llvm/include/llvm/Analysis/CFFunctionAnalysis.h b/llvm/include/llvm/Analysis/CFFunctionAnalysis.h index 4998158b896dc..c30573a5265d6 100644 --- a/llvm/include/llvm/Analysis/CFFunctionAnalysis.h +++ b/llvm/include/llvm/Analysis/CFFunctionAnalysis.h @@ -9,6 +9,7 @@ #include "llvm/Passes/PassBuilder.h" #include "llvm/Passes/PassPlugin.h" #include "llvm/Support/Debug.h" +#include namespace llvm { @@ -34,6 +35,20 @@ struct CFFunctionAnalysisPrinterPass PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); }; +struct CFFunctionAnalysisStorePass + : PassInfoMixin { + + std::string Filename; + +public: + explicit CFFunctionAnalysisStorePass(std::string Filename) + : Filename(std::move(Filename)) {} + + explicit CFFunctionAnalysisStorePass() : Filename("called_functions.txt") {} + + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; + } // namespace llvm #endif // LLVM_ANALYSIS_CF_FUNCTION_ANALYSIS_H diff --git a/llvm/lib/Analysis/CFFunctionAnalysis.cpp b/llvm/lib/Analysis/CFFunctionAnalysis.cpp index 16ed818574640..c759143975a60 100644 --- a/llvm/lib/Analysis/CFFunctionAnalysis.cpp +++ b/llvm/lib/Analysis/CFFunctionAnalysis.cpp @@ -7,9 +7,7 @@ using namespace llvm; // Provide a definition for the static object used to identify passes. AnalysisKey CFFunctionAnalysis::Key; -CFFunctionAnalysisInfo CFFunctionAnalysis::run(Module &M, - ModuleAnalysisManager &AM) { - +CFFunctionAnalysisInfo analyse(Module &M) { CFFunctionAnalysisInfo CalledFunctions; for (auto &F : M) { if (F.isDeclaration()) { @@ -75,6 +73,27 @@ CFFunctionAnalysisInfo CFFunctionAnalysis::run(Module &M, return CalledFunctions; } +CFFunctionAnalysisInfo CFFunctionAnalysis::run(Module &M, + ModuleAnalysisManager &AM) { + + CFFunctionAnalysisInfo CalledFunctions; + + // check if called functions file exists + std::ifstream file("called_functions.txt"); + if (file.good()) { + std::string line; + while (std::getline(file, line)) { + char *cstr = new char[line.length() + 1]; + strcpy(cstr, line.c_str()); + CalledFunctions.insert(cstr); + } + } else { + CalledFunctions = analyse(M); + } + file.close(); + return CalledFunctions; +} + PreservedAnalyses CFFunctionAnalysisPrinterPass::run(Module &M, ModuleAnalysisManager &AM) { OS << "Called functions for " << M.getName() << ":\n"; @@ -84,3 +103,25 @@ CFFunctionAnalysisPrinterPass::run(Module &M, ModuleAnalysisManager &AM) { OS << "\n"; return PreservedAnalyses::all(); } + +PreservedAnalyses CFFunctionAnalysisStorePass::run(Module &M, + ModuleAnalysisManager &AM) { + CFFunctionAnalysisInfo CalledFunctions = AM.getResult(M); + + // store called functions to called_functions.txt + std::ofstream out; + out.open(Filename, std::ios::app); + + if (!out) { + errs() << "Error: cannot open file " << Filename << "\n"; + return PreservedAnalyses::none(); + } + + for (auto &F : CalledFunctions) { + out << F.str() << "\n"; + } + + out.close(); + + return PreservedAnalyses::all(); +} diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index cde1615937bab..53328e623a0b9 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -42,6 +42,7 @@ MODULE_ALIAS_ANALYSIS("globals-aa", GlobalsAA()) #ifndef MODULE_PASS #define MODULE_PASS(NAME, CREATE_PASS) #endif +MODULE_PASS("store", CFFunctionAnalysisStorePass()) MODULE_PASS("print", CFFunctionAnalysisPrinterPass(errs())) MODULE_PASS("cffunction-instrumentation", CFFunctionInstrumentationPass()) MODULE_PASS("always-inline", AlwaysInlinerPass()) diff --git a/llvm/lib/Transforms/Utils/CFFunctionInstrumentation.cpp b/llvm/lib/Transforms/Utils/CFFunctionInstrumentation.cpp index 7c90db2841f2c..0a7e1b5bfe1b9 100644 --- a/llvm/lib/Transforms/Utils/CFFunctionInstrumentation.cpp +++ b/llvm/lib/Transforms/Utils/CFFunctionInstrumentation.cpp @@ -10,6 +10,9 @@ PreservedAnalyses CFFunctionInstrumentationPass::run(Module &M, ModuleAnalysisManager &AM) { CFFunctionAnalysisInfo CalledFunctions = AM.getResult(M); + for (auto &F : CalledFunctions) { + LLVM_DEBUG(dbgs() << "Called function: " << F << "\n"); + } int permissions_created = 0; Value *WritePermission = nullptr; Value *FileName = nullptr; diff --git a/print_example/instrumented.ll b/print_example/instrumented.ll index 873e18c01d935..0863922ac60f1 100644 --- a/print_example/instrumented.ll +++ b/print_example/instrumented.ll @@ -4,143 +4,115 @@ source_filename = "test.ll" @0 = private unnamed_addr constant [19 x i8] c"function_trace.txt\00", align 1 @1 = private unnamed_addr constant [2 x i8] c"a\00", align 1 @2 = private unnamed_addr constant [10 x i8] c"foo %lld\0A\00", align 1 -@3 = private unnamed_addr constant [10 x i8] c"bat %lld\0A\00", align 1 -@4 = private unnamed_addr constant [10 x i8] c"baz %lld\0A\00", align 1 +@3 = private unnamed_addr constant [10 x i8] c"baz %lld\0A\00", align 1 -define noundef i1 @foo() local_unnamed_addr { -access: - %0 = tail call i32 @access(ptr nonnull @0, i32 0) - %1 = icmp eq i32 %0, 0 - br i1 %1, label %print, label %return +define i1 @foo() { + br label %access + +access: ; preds = %0 + %1 = call i32 @access(ptr @0, i32 0) + %2 = icmp eq i32 %1, 0 + br i1 %2, label %print, label %return print: ; preds = %access - %2 = tail call ptr @fopen(ptr nonnull @0, ptr nonnull @1) - tail call void (ptr, ...) @fprintf(ptr %2, ptr nonnull @2, i1 false) - %3 = tail call i32 @fclose(ptr %2) + %3 = call ptr @fopen(ptr @0, ptr @1) + call void (ptr, ...) @fprintf(ptr %3, ptr @2, i1 false) + %4 = call i32 @fclose(ptr %3) br label %return return: ; preds = %print, %access ret i1 false } -define noundef i32 @bat() local_unnamed_addr { -access: - %0 = tail call i32 @access(ptr nonnull @0, i32 0) - %1 = icmp eq i32 %0, 0 - br i1 %1, label %print, label %return - -print: ; preds = %access - %2 = tail call ptr @fopen(ptr nonnull @0, ptr nonnull @1) - tail call void (ptr, ...) @fprintf(ptr %2, ptr nonnull @3, i32 0) - %3 = tail call i32 @fclose(ptr %2) - br label %return - -return: ; preds = %print, %access +define i32 @bat() { ret i32 0 } -; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) -define void @bar() local_unnamed_addr #0 { +define void @bar() { ret void } -define noundef i32 @baz() local_unnamed_addr { -end: - %0 = tail call i32 @access(ptr nonnull @0, i32 0) - %1 = icmp eq i32 %0, 0 - br i1 %1, label %print.i, label %foo.exit - -print.i: ; preds = %end - %2 = tail call ptr @fopen(ptr nonnull @0, ptr nonnull @1) - tail call void (ptr, ...) @fprintf(ptr %2, ptr nonnull @2, i1 false) - %3 = tail call i32 @fclose(ptr %2) - br label %foo.exit - -foo.exit: ; preds = %end, %print.i - %4 = tail call i32 @access(ptr nonnull @0, i32 0) - %5 = icmp eq i32 %4, 0 - br i1 %5, label %print, label %return - -print: ; preds = %foo.exit - %6 = tail call ptr @fopen(ptr nonnull @0, ptr nonnull @1) - tail call void (ptr, ...) @fprintf(ptr %6, ptr nonnull @4, i32 5) - %7 = tail call i32 @fclose(ptr %6) +define i32 @baz() { + %1 = call i1 @foo() + br i1 %1, label %then, label %else + +then: ; preds = %0 + br label %end + +else: ; preds = %0 + br label %end + +end: ; preds = %else, %then + br label %access + +access: ; preds = %end + %2 = call i32 @access(ptr @0, i32 0) + %3 = icmp eq i32 %2, 0 + br i1 %3, label %print, label %return + +print: ; preds = %access + %4 = call ptr @fopen(ptr @0, ptr @1) + call void (ptr, ...) @fprintf(ptr %4, ptr @3, i32 5) + %5 = call i32 @fclose(ptr %4) br label %return -return: ; preds = %print, %foo.exit +return: ; preds = %print, %access ret i32 5 } -define noundef i32 @qux() local_unnamed_addr { -end: - %0 = tail call i32 @access(ptr nonnull @0, i32 0) - %1 = icmp eq i32 %0, 0 - br i1 %1, label %print.i, label %bat.exit +define i32 @qux() { + %1 = call i32 @bat() + %2 = icmp eq i32 %1, 0 + br i1 %2, label %then, label %else + +then: ; preds = %0 + br label %end -print.i: ; preds = %end - %2 = tail call ptr @fopen(ptr nonnull @0, ptr nonnull @1) - tail call void (ptr, ...) @fprintf(ptr %2, ptr nonnull @3, i32 0) - %3 = tail call i32 @fclose(ptr %2) - br label %bat.exit +else: ; preds = %0 + br label %end -bat.exit: ; preds = %end, %print.i +end: ; preds = %else, %then ret i32 5 } -define noundef i32 @quuz() local_unnamed_addr { -end: - %0 = tail call i32 @baz() - %1 = tail call i32 @access(ptr nonnull @0, i32 0) - %2 = icmp eq i32 %1, 0 - br i1 %2, label %print.i, label %bat.exit +define i32 @quuz() { + %1 = call i32 @baz() + %2 = call i32 @bat() + %3 = add i32 %1, %2 + %4 = icmp eq i32 %3, 0 + br i1 %4, label %then, label %else -print.i: ; preds = %end - %3 = tail call ptr @fopen(ptr nonnull @0, ptr nonnull @1) - tail call void (ptr, ...) @fprintf(ptr %3, ptr nonnull @3, i32 0) - %4 = tail call i32 @fclose(ptr %3) - br label %bat.exit +then: ; preds = %0 + br label %end -bat.exit: ; preds = %end, %print.i +else: ; preds = %0 + br label %end + +end: ; preds = %else, %then ret i32 5 } -define noundef i32 @quux() local_unnamed_addr { -end: - %0 = tail call i32 @access(ptr nonnull @0, i32 0) - %1 = icmp eq i32 %0, 0 - br i1 %1, label %print.i, label %bat.exit - -print.i: ; preds = %end - %2 = tail call ptr @fopen(ptr nonnull @0, ptr nonnull @1) - tail call void (ptr, ...) @fprintf(ptr %2, ptr nonnull @3, i32 0) - %3 = tail call i32 @fclose(ptr %2) - br label %bat.exit - -bat.exit: ; preds = %end, %print.i - %4 = tail call i32 @access(ptr nonnull @0, i32 0) - %5 = icmp eq i32 %4, 0 - br i1 %5, label %print.i1, label %bat.exit2 - -print.i1: ; preds = %bat.exit - %6 = tail call ptr @fopen(ptr nonnull @0, ptr nonnull @1) - tail call void (ptr, ...) @fprintf(ptr %6, ptr nonnull @3, i32 0) - %7 = tail call i32 @fclose(ptr %6) - br label %bat.exit2 - -bat.exit2: ; preds = %bat.exit, %print.i1 +define i32 @quux() { + %1 = call i32 @bat() + %2 = call i32 @bat() + %3 = add i32 %1, %2 + %4 = icmp eq i32 %3, 0 + br i1 %4, label %then, label %else + +then: ; preds = %0 + br label %end + +else: ; preds = %0 + br label %end + +end: ; preds = %else, %then ret i32 5 } -; Function Attrs: nofree nounwind -declare noundef i32 @access(ptr nocapture noundef readonly, i32 noundef) local_unnamed_addr #1 - -declare void @fprintf(ptr, ...) local_unnamed_addr +declare i32 @access(ptr, i32) -; Function Attrs: nofree nounwind -declare noundef i32 @fclose(ptr nocapture noundef) local_unnamed_addr #1 +declare void @fprintf(ptr, ...) -; Function Attrs: nofree nounwind -declare noalias noundef ptr @fopen(ptr nocapture noundef readonly, ptr nocapture noundef readonly) local_unnamed_addr #1 +declare i32 @fclose(ptr) -attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) } -attributes #1 = { nofree nounwind } +declare ptr @fopen(ptr, ptr) From 311126227692ce7672ba19ff4ddfc46da5c8b236 Mon Sep 17 00:00:00 2001 From: Michel Weber Date: Thu, 23 Jan 2025 12:52:45 +0100 Subject: [PATCH 2/5] add store pass to O3 --- llvm/lib/Passes/PassBuilderPipelines.cpp | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index c269d94829347..743d54624f510 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/CFFunctionAnalysis.h" #include "llvm/Analysis/CGSCCPassManager.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InlineAdvisor.h" @@ -123,9 +124,9 @@ #include "llvm/Transforms/Scalar/WarnMissedTransforms.h" #include "llvm/Transforms/Utils/AddDiscriminators.h" #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" +#include "llvm/Transforms/Utils/CFFunctionInstrumentation.h" #include "llvm/Transforms/Utils/CanonicalizeAliases.h" #include "llvm/Transforms/Utils/CountVisits.h" -#include "llvm/Transforms/Utils/CFFunctionInstrumentation.h" #include "llvm/Transforms/Utils/InjectTLIMappings.h" #include "llvm/Transforms/Utils/LibCallsShrinkWrap.h" #include "llvm/Transforms/Utils/Mem2Reg.h" @@ -181,9 +182,9 @@ static cl::opt EnablePostPGOLoopRotation( "enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden, cl::desc("Run the loop rotation transformation after PGO instrumentation")); -static cl::opt EnableGlobalAnalyses( - "enable-global-analyses", cl::init(true), cl::Hidden, - cl::desc("Enable inter-procedural analyses")); +static cl::opt + EnableGlobalAnalyses("enable-global-analyses", cl::init(true), cl::Hidden, + cl::desc("Enable inter-procedural analyses")); static cl::opt RunPartialInlining("enable-partial-inlining", cl::init(false), cl::Hidden, @@ -1084,11 +1085,10 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, // and prior to optimizing globals. // FIXME: This position in the pipeline hasn't been carefully considered in // years, it should be re-analyzed. - MPM.addPass(IPSCCPPass( - IPSCCPOptions(/*AllowFuncSpec=*/ - Level != OptimizationLevel::Os && - Level != OptimizationLevel::Oz && - !isLTOPreLink(Phase)))); + MPM.addPass(IPSCCPPass(IPSCCPOptions(/*AllowFuncSpec=*/ + Level != OptimizationLevel::Os && + Level != OptimizationLevel::Oz && + !isLTOPreLink(Phase)))); // Attach metadata to indirect call sites indicating the set of functions // they may target at run-time. This should follow IPSCCP. @@ -1498,6 +1498,7 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, ModulePassManager MPM; + MPM.addPass(CFFunctionAnalysisStorePass("cffunction-analysis-store.txt")); MPM.addPass(CFFunctionInstrumentationPass()); // Convert @llvm.global.annotations to !annotation metadata. @@ -1559,7 +1560,7 @@ PassBuilder::buildFatLTODefaultPipeline(OptimizationLevel Level, bool ThinLTO, ModulePassManager PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) { if (Level == OptimizationLevel::O0) - return buildO0DefaultPipeline(Level, /*LTOPreLink*/true); + return buildO0DefaultPipeline(Level, /*LTOPreLink*/ true); ModulePassManager MPM; From 47d2c6bfa4261653b02d0549e7acfbae9ce150b7 Mon Sep 17 00:00:00 2001 From: Michel Weber Date: Wed, 29 Jan 2025 09:33:42 +0100 Subject: [PATCH 3/5] remove useless loop --- llvm/lib/Transforms/Utils/CFFunctionInstrumentation.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CFFunctionInstrumentation.cpp b/llvm/lib/Transforms/Utils/CFFunctionInstrumentation.cpp index 0a7e1b5bfe1b9..128d30a5f3206 100644 --- a/llvm/lib/Transforms/Utils/CFFunctionInstrumentation.cpp +++ b/llvm/lib/Transforms/Utils/CFFunctionInstrumentation.cpp @@ -10,9 +10,9 @@ PreservedAnalyses CFFunctionInstrumentationPass::run(Module &M, ModuleAnalysisManager &AM) { CFFunctionAnalysisInfo CalledFunctions = AM.getResult(M); - for (auto &F : CalledFunctions) { - LLVM_DEBUG(dbgs() << "Called function: " << F << "\n"); - } + LLVM_DEBUG( + for (auto &F + : CalledFunctions) { dbgs() << "Called function: " << F << "\n"; }); int permissions_created = 0; Value *WritePermission = nullptr; Value *FileName = nullptr; From fb5a4b37feb5d8358644b6486f68ff691e2e28c0 Mon Sep 17 00:00:00 2001 From: Michel Weber Date: Wed, 29 Jan 2025 18:40:38 +0100 Subject: [PATCH 4/5] track handled blocks --- llvm/lib/Transforms/Utils/CFFunctionInstrumentation.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/llvm/lib/Transforms/Utils/CFFunctionInstrumentation.cpp b/llvm/lib/Transforms/Utils/CFFunctionInstrumentation.cpp index 128d30a5f3206..c07a9971172d2 100644 --- a/llvm/lib/Transforms/Utils/CFFunctionInstrumentation.cpp +++ b/llvm/lib/Transforms/Utils/CFFunctionInstrumentation.cpp @@ -36,10 +36,14 @@ CFFunctionInstrumentationPass::run(Module &M, ModuleAnalysisManager &AM) { StringRef funcFileName = StringRef(fileName); // for all return instructions, print the return value to a file with the // name of the function + + // store already handled blocks + std::set HandledBlocks; for (auto &BB : F) { // Do NOT reinstrument the inserted blocks if (BB.getName() == "return" || BB.getName() == "print" || BB.getName() == "open") { + HandledBlocks.insert(&BB); continue; } if (auto *RI = dyn_cast(BB.getTerminator())) { @@ -118,6 +122,7 @@ CFFunctionInstrumentationPass::run(Module &M, ModuleAnalysisManager &AM) { ReturnBB->moveAfter(PrintBB); } } + HandledBlocks.insert(&BB); } } From 374227524fb8a318dd8817815d6308b1b9f12c92 Mon Sep 17 00:00:00 2001 From: Michel Weber Date: Wed, 29 Jan 2025 18:42:29 +0100 Subject: [PATCH 5/5] track handled blocks2 --- llvm/lib/Transforms/Utils/CFFunctionInstrumentation.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/llvm/lib/Transforms/Utils/CFFunctionInstrumentation.cpp b/llvm/lib/Transforms/Utils/CFFunctionInstrumentation.cpp index c07a9971172d2..b0d2ac72e5878 100644 --- a/llvm/lib/Transforms/Utils/CFFunctionInstrumentation.cpp +++ b/llvm/lib/Transforms/Utils/CFFunctionInstrumentation.cpp @@ -40,6 +40,9 @@ CFFunctionInstrumentationPass::run(Module &M, ModuleAnalysisManager &AM) { // store already handled blocks std::set HandledBlocks; for (auto &BB : F) { + if (HandledBlocks.count(&BB)) { + continue; + } // Do NOT reinstrument the inserted blocks if (BB.getName() == "return" || BB.getName() == "print" || BB.getName() == "open") { @@ -120,6 +123,10 @@ CFFunctionInstrumentationPass::run(Module &M, ModuleAnalysisManager &AM) { // place new BBs in the correct order ReturnBB->moveAfter(PrintBB); + + HandledBlocks.insert(AccessBB); + HandledBlocks.insert(PrintBB); + HandledBlocks.insert(ReturnBB); } } HandledBlocks.insert(&BB);