diff --git a/CFFunctionInstrumentation/parse_found_funcs.py b/CFFunctionInstrumentation/parse_found_funcs.py new file mode 100644 index 0000000000000..8c90ba7691e4d --- /dev/null +++ b/CFFunctionInstrumentation/parse_found_funcs.py @@ -0,0 +1,106 @@ + +from argparse import ArgumentParser +from utils import demangle_function_name + +def unique_funcs(funcs_per_module): + unique_functions = set() + for module, funcs in funcs_per_module.items(): + unique_functions.update(funcs) + return unique_functions + + +def parse_called_funcs(filename): + print(f'Parsing {filename}') + + funcs_per_module = {} + # open the file and read the lines + try: + with open(filename, 'r') as f: + for line in f: + splitline = line.strip().split(' ') + if not len(splitline) == 4: + continue + func = splitline[0] + module = splitline[3] + if module not in funcs_per_module: + funcs_per_module[module] = [] + funcs_per_module[module].append(func) + except FileNotFoundError: + print(f'File {filename} not found') + return [] + + return funcs_per_module + +def get_unique_funcs(filename): + funcs_per_module = parse_called_funcs(filename) + return unique_funcs(funcs_per_module) + +def get_stored_funcs_dict(filename): + file = open(filename, "r") + funcs_per_module = eval(file.read()) + + return funcs_per_module + +def print_stats(funcs_per_module): + + unique_functions = unique_funcs(funcs_per_module) + + print(f'Found {len(funcs_per_module)} modules') + + print(f'Found {len(unique_functions)} unique functions') + +def build_folder_hierarchy(modules): + hierarchy = {} + for module in modules: + module = module.removeprefix('/home/webmiche/questions/llvm-project/') + parts = module.split('/') + current = hierarchy + for part in parts: + if part not in current: + current[part] = {} + current = current[part] + return hierarchy + +def print_hierarchy(hierarchy): + for outer_most, inner in hierarchy.items(): + print(outer_most) + for inner_most, inner_inner in inner.items(): + print(f' {inner_most}') + for inner_inner_most in inner_inner: + print(f' {inner_inner_most}') + for inner_inner_inner in inner_inner[inner_inner_most]: + print(f' {inner_inner_inner}') + for inner_inner_inner_inner in inner_inner[inner_inner_most][inner_inner_inner]: + print(f' {inner_inner_inner_inner}') + print() + print() + print() + + + +if __name__ == '__main__': + # pass the filename as an argument + parser = ArgumentParser() + parser.add_argument('filename') + args = parser.parse_args() + filename = args.filename + funcs_per_module = get_stored_funcs_dict(filename) + + + hierarchy = build_folder_hierarchy(funcs_per_module.keys()) + + print_hierarchy(hierarchy) + + # add up all the functions in the TableGen folder + functions_in_tablegen = {} + for module, funcs in funcs_per_module.items(): + if module.startswith('/home/webmiche/questions/llvm-project/llvm/utils/TableGen'): + functions_in_tablegen[module] = funcs + + unique_functions = unique_funcs(functions_in_tablegen) + total_functions = len(unique_functions) + + + print_stats(funcs_per_module) + + print(f'Functions in TableGen: {total_functions}') diff --git a/CFFunctionInstrumentation/parse_trace.py b/CFFunctionInstrumentation/parse_trace.py index e3d1e943b8f20..bcd19da7bfb4e 100644 --- a/CFFunctionInstrumentation/parse_trace.py +++ b/CFFunctionInstrumentation/parse_trace.py @@ -1,6 +1,7 @@ from collections import Counter from subprocess import run, PIPE +from utils import demangle_function_name def parse_trace(trace_string) -> dict: @@ -30,10 +31,6 @@ def parse_file(file_path) -> dict: trace_string = f.read() return parse_trace(trace_string) -def demangle_function_name(function_name): - cmd = f"llvm-cxxfilt {function_name}" - result = run(cmd, shell=True, stdout=PIPE) - return result.stdout.decode('utf-8').strip() def single_valued_functions(trace_dict): result = {} diff --git a/CFFunctionInstrumentation/utils.py b/CFFunctionInstrumentation/utils.py new file mode 100644 index 0000000000000..cddb49a166145 --- /dev/null +++ b/CFFunctionInstrumentation/utils.py @@ -0,0 +1,6 @@ +from subprocess import run, PIPE + +def demangle_function_name(function_name): + cmd = f"llvm-cxxfilt {function_name}" + result = run(cmd, shell=True, stdout=PIPE) + return result.stdout.decode('utf-8').strip() diff --git a/funcs_per_module.zip b/funcs_per_module.zip new file mode 100644 index 0000000000000..1ca68b646f274 Binary files /dev/null and b/funcs_per_module.zip differ diff --git a/instrumented_per_module.zip b/instrumented_per_module.zip new file mode 100644 index 0000000000000..3c6e287e120bb Binary files /dev/null and b/instrumented_per_module.zip differ diff --git a/llvm/include/llvm/Transforms/Utils/CFFunctionInstrumentation.h b/llvm/include/llvm/Transforms/Utils/CFFunctionInstrumentation.h index c8133333356a3..7e351dfce722a 100644 --- a/llvm/include/llvm/Transforms/Utils/CFFunctionInstrumentation.h +++ b/llvm/include/llvm/Transforms/Utils/CFFunctionInstrumentation.h @@ -8,6 +8,7 @@ #include "llvm/IR/PassManager.h" #include "llvm/Support/Debug.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include namespace llvm { diff --git a/llvm/lib/Analysis/CFFunctionAnalysis.cpp b/llvm/lib/Analysis/CFFunctionAnalysis.cpp index c759143975a60..5d6489056a562 100644 --- a/llvm/lib/Analysis/CFFunctionAnalysis.cpp +++ b/llvm/lib/Analysis/CFFunctionAnalysis.cpp @@ -118,7 +118,7 @@ PreservedAnalyses CFFunctionAnalysisStorePass::run(Module &M, } for (auto &F : CalledFunctions) { - out << F.str() << "\n"; + out << F.str() << " called from " << M.getName().str() << "\n"; } out.close(); diff --git a/llvm/lib/Transforms/Utils/CFFunctionInstrumentation.cpp b/llvm/lib/Transforms/Utils/CFFunctionInstrumentation.cpp index b0d2ac72e5878..d3cb21110697e 100644 --- a/llvm/lib/Transforms/Utils/CFFunctionInstrumentation.cpp +++ b/llvm/lib/Transforms/Utils/CFFunctionInstrumentation.cpp @@ -30,6 +30,18 @@ CFFunctionInstrumentationPass::run(Module &M, ModuleAnalysisManager &AM) { continue; } + // print the function name and the module name to traced_functions.txt + std::ofstream out; + out.open("traced_functions.txt", std::ios::app); + + if (!out) { + errs() << "Error: cannot open file traced_functions.txt \n"; + return PreservedAnalyses::none(); + } + out << F.getName().str() << " instrumented in " << M.getName().str() + << "\n"; + out.close(); + std::string outputString = F.getName().str() + " %lld\n"; StringRef funcFormatStr = StringRef(outputString); std::string fileName = "function_trace.txt"; @@ -37,6 +49,11 @@ CFFunctionInstrumentationPass::run(Module &M, ModuleAnalysisManager &AM) { // for all return instructions, print the return value to a file with the // name of the function + auto GV = new GlobalVariable( + M, Type::getInt32Ty(M.getContext()), true, + GlobalValue::LinkageTypes::PrivateLinkage, + ConstantInt::get(IntegerType::getInt32Ty(M.getContext()), 0), + "init" + F.getName()); // store already handled blocks std::set HandledBlocks; for (auto &BB : F) { @@ -46,7 +63,7 @@ CFFunctionInstrumentationPass::run(Module &M, ModuleAnalysisManager &AM) { // Do NOT reinstrument the inserted blocks if (BB.getName() == "return" || BB.getName() == "print" || BB.getName() == "open") { - HandledBlocks.insert(&BB); + HandledBlocks.insert(&BB); continue; } if (auto *RI = dyn_cast(BB.getTerminator())) { @@ -76,10 +93,44 @@ CFFunctionInstrumentationPass::run(Module &M, ModuleAnalysisManager &AM) { // split at return BasicBlock *ReturnBB = BB.splitBasicBlock(RI, "return", false); + BasicBlock *CheckBB = + BasicBlock::Create(M.getContext(), "access_check", &F); + BasicBlock *Check2BB = + BasicBlock::Create(M.getContext(), "no_init_check", &F); BasicBlock *AccessBB = BasicBlock::Create(M.getContext(), "access", &F); + BasicBlock *UpdateGVBB = + BasicBlock::Create(M.getContext(), "update", &F); + BasicBlock *NoAccessBB = + BasicBlock::Create(M.getContext(), "no_access", &F); + BasicBlock *PrintBB = BasicBlock::Create(M.getContext(), "print", &F); + LLVM_DEBUG(dbgs() << "Created BBs\n"); + + IRBuilder<> CheckBuilder(CheckBB); + + Value *GV_value = CheckBuilder.CreateLoad( + IntegerType::getInt32Ty(M.getContext()), GV); + + // check if GV is 0 (not yet accessed) or -1 (no access) or 1 (access) + Value *CmpGV = CheckBuilder.CreateICmpEQ( + GV_value, + ConstantInt::get(IntegerType::getInt32Ty(M.getContext()), 1)); + + CheckBuilder.CreateCondBr(CmpGV, PrintBB, Check2BB); + + LLVM_DEBUG(dbgs() << "Created check BB\n"); + + IRBuilder<> Check2Builder(Check2BB); + Value *CmpGV2 = Check2Builder.CreateICmpEQ( + GV_value, + ConstantInt::get(IntegerType::getInt32Ty(M.getContext()), -1)); + + Check2Builder.CreateCondBr(CmpGV2, ReturnBB, AccessBB); + + LLVM_DEBUG(dbgs() << "Created check2 BB\n"); + IRBuilder<> AccessBuilder(AccessBB); // insert call to access function with filename and 0 Value *status = AccessBuilder.CreateCall( @@ -88,7 +139,25 @@ CFFunctionInstrumentationPass::run(Module &M, ModuleAnalysisManager &AM) { Value *Cmp = AccessBuilder.CreateICmpEQ( status, ConstantInt::get(IntegerType::getInt32Ty(M.getContext()), 0)); - AccessBuilder.CreateCondBr(Cmp, PrintBB, ReturnBB); + AccessBuilder.CreateCondBr(Cmp, UpdateGVBB, NoAccessBB); + + LLVM_DEBUG(dbgs() << "Created access BB\n"); + + IRBuilder<> NoAccessBuilder(NoAccessBB); + NoAccessBuilder.CreateStore( + ConstantInt::get(IntegerType::getInt32Ty(M.getContext()), -1), + GV); + NoAccessBuilder.CreateBr(ReturnBB); + + LLVM_DEBUG(dbgs() << "Created no access BB\n"); + + IRBuilder<> UpdateGVBuilder(UpdateGVBB); + UpdateGVBuilder.CreateStore( + ConstantInt::get(IntegerType::getInt32Ty(M.getContext()), 1), GV); + + UpdateGVBuilder.CreateBr(PrintBB); + + LLVM_DEBUG(dbgs() << "Created update BB\n"); IRBuilder<> PrintBuilder(PrintBB); FunctionCallee PrintFunc = M.getOrInsertFunction( @@ -119,7 +188,7 @@ CFFunctionInstrumentationPass::run(Module &M, ModuleAnalysisManager &AM) { PrintBuilder.CreateCall(CloseFunc, write_fptr); PrintBuilder.CreateBr(ReturnBB); - BB.getTerminator()->setSuccessor(0, AccessBB); + BB.getTerminator()->setSuccessor(0, CheckBB); // place new BBs in the correct order ReturnBB->moveAfter(PrintBB); @@ -127,6 +196,10 @@ CFFunctionInstrumentationPass::run(Module &M, ModuleAnalysisManager &AM) { HandledBlocks.insert(AccessBB); HandledBlocks.insert(PrintBB); HandledBlocks.insert(ReturnBB); + HandledBlocks.insert(CheckBB); + HandledBlocks.insert(Check2BB); + HandledBlocks.insert(UpdateGVBB); + HandledBlocks.insert(NoAccessBB); } } HandledBlocks.insert(&BB); diff --git a/print_example/instrumented.ll b/print_example/instrumented.ll index 0863922ac60f1..9e988e056cb01 100644 --- a/print_example/instrumented.ll +++ b/print_example/instrumented.ll @@ -1,30 +1,82 @@ ; ModuleID = 'test.ll' source_filename = "test.ll" +@initfoo = private constant i32 0 @0 = private unnamed_addr constant [19 x i8] c"function_trace.txt\00", align 1 @1 = private unnamed_addr constant [2 x i8] c"a\00", align 1 @2 = private unnamed_addr constant [10 x i8] c"foo %lld\0A\00", align 1 -@3 = private unnamed_addr constant [10 x i8] c"baz %lld\0A\00", align 1 +@initbat = private constant i32 0 +@3 = private unnamed_addr constant [10 x i8] c"bat %lld\0A\00", align 1 +@initbaz = private constant i32 0 +@4 = private unnamed_addr constant [10 x i8] c"baz %lld\0A\00", align 1 define i1 @foo() { - br label %access + br label %access_check -access: ; preds = %0 - %1 = call i32 @access(ptr @0, i32 0) - %2 = icmp eq i32 %1, 0 - br i1 %2, label %print, label %return +access_check: ; preds = %0 + %1 = load i32, ptr @initfoo, align 4 + %2 = icmp eq i32 %1, 1 + br i1 %2, label %print, label %no_init_check + +no_init_check: ; preds = %access_check + %3 = icmp eq i32 %1, -1 + br i1 %3, label %return, label %access + +access: ; preds = %no_init_check + %4 = call i32 @access(ptr @0, i32 0) + %5 = icmp eq i32 %4, 0 + br i1 %5, label %update, label %no_access + +update: ; preds = %access + store i32 1, ptr @initfoo, align 4 + br label %print + +no_access: ; preds = %access + store i32 -1, ptr @initfoo, align 4 + br label %return -print: ; preds = %access - %3 = call ptr @fopen(ptr @0, ptr @1) - call void (ptr, ...) @fprintf(ptr %3, ptr @2, i1 false) - %4 = call i32 @fclose(ptr %3) +print: ; preds = %update, %access_check + %6 = call ptr @fopen(ptr @0, ptr @1) + call void (ptr, ...) @fprintf(ptr %6, ptr @2, i1 false) + %7 = call i32 @fclose(ptr %6) br label %return -return: ; preds = %print, %access +return: ; preds = %print, %no_access, %no_init_check ret i1 false } define i32 @bat() { + br label %access_check + +access_check: ; preds = %0 + %1 = load i32, ptr @initbat, align 4 + %2 = icmp eq i32 %1, 1 + br i1 %2, label %print, label %no_init_check + +no_init_check: ; preds = %access_check + %3 = icmp eq i32 %1, -1 + br i1 %3, label %return, label %access + +access: ; preds = %no_init_check + %4 = call i32 @access(ptr @0, i32 0) + %5 = icmp eq i32 %4, 0 + br i1 %5, label %update, label %no_access + +update: ; preds = %access + store i32 1, ptr @initbat, align 4 + br label %print + +no_access: ; preds = %access + store i32 -1, ptr @initbat, align 4 + br label %return + +print: ; preds = %update, %access_check + %6 = call ptr @fopen(ptr @0, ptr @1) + call void (ptr, ...) @fprintf(ptr %6, ptr @3, i32 0) + %7 = call i32 @fclose(ptr %6) + br label %return + +return: ; preds = %print, %no_access, %no_init_check ret i32 0 } @@ -43,20 +95,37 @@ else: ; preds = %0 br label %end end: ; preds = %else, %then - br label %access + br label %access_check -access: ; preds = %end - %2 = call i32 @access(ptr @0, i32 0) - %3 = icmp eq i32 %2, 0 - br i1 %3, label %print, label %return +access_check: ; preds = %end + %2 = load i32, ptr @initbaz, align 4 + %3 = icmp eq i32 %2, 1 + br i1 %3, label %print, label %no_init_check + +no_init_check: ; preds = %access_check + %4 = icmp eq i32 %2, -1 + br i1 %4, label %return, label %access + +access: ; preds = %no_init_check + %5 = call i32 @access(ptr @0, i32 0) + %6 = icmp eq i32 %5, 0 + br i1 %6, label %update, label %no_access + +update: ; preds = %access + store i32 1, ptr @initbaz, align 4 + br label %print + +no_access: ; preds = %access + store i32 -1, ptr @initbaz, align 4 + br label %return -print: ; preds = %access - %4 = call ptr @fopen(ptr @0, ptr @1) - call void (ptr, ...) @fprintf(ptr %4, ptr @3, i32 5) - %5 = call i32 @fclose(ptr %4) +print: ; preds = %update, %access_check + %7 = call ptr @fopen(ptr @0, ptr @1) + call void (ptr, ...) @fprintf(ptr %7, ptr @4, i32 5) + %8 = call i32 @fclose(ptr %7) br label %return -return: ; preds = %print, %access +return: ; preds = %print, %no_access, %no_init_check ret i32 5 } diff --git a/print_example/test.c b/print_example/test.c deleted file mode 100644 index d38ae81af67f7..0000000000000 --- a/print_example/test.c +++ /dev/null @@ -1,2 +0,0 @@ - -int main() { return 0; }