From f5817049a93ce1c037b36fb8970a4c0cfdaf30e5 Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Wed, 19 Jun 2024 14:56:13 +0900 Subject: [PATCH] [MC/DC][Coverage] Make tvbitmapupdate capable of atomic write This also introduces "Test and conditional Read-Modify-Write". The flow to `atomicrmw or` is marked as `unlikely`. This includes #96040. --- .../Instrumentation/InstrProfiling.cpp | 86 ++++++++++++++++--- .../Instrumentation/InstrProfiling/mcdc.ll | 21 ++++- 2 files changed, 92 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index b9f1fcdd9c2339..3d973b28cfb3ef 100644 --- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -38,6 +38,7 @@ #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/InitializePasses.h" @@ -237,6 +238,10 @@ class InstrLowerer final { GlobalVariable *NamesVar = nullptr; size_t NamesSize = 0; + /// The instance of [[forceinline]] rmw_or(ptr, i8). + /// This is name-insensitive. + Function *RMWOrFunc = nullptr; + // vector of counter load/store pairs to be register promoted. std::vector PromotionCandidates; @@ -297,6 +302,14 @@ class InstrLowerer final { StringRef Name, GlobalValue::LinkageTypes Linkage); + /// Create [[forceinline]] rmw_or(ptr, i8). + /// This doesn't update `RMWOrFunc`. + Function *createRMWOrFunc(); + + /// Get the call to `rmw_or`. + /// Create the instance if it is unknown. + CallInst *getRMWOrCall(Value *Addr, Value *Val); + /// Compute the address of the test vector bitmap that this profiling /// instruction acts on. Value *getBitmapAddress(InstrProfMCDCTVBitmapUpdate *I); @@ -937,6 +950,67 @@ Value *InstrLowerer::getCounterAddress(InstrProfCntrInstBase *I) { return Builder.CreateIntToPtr(Add, Addr->getType()); } +Function *InstrLowerer::createRMWOrFunc() { + auto &Ctx = M.getContext(); + auto *Int8Ty = Type::getInt8Ty(Ctx); + // void alwaysinline rmw_or(ptr, i8) + Function *Fn = Function::Create( + FunctionType::get(Type::getVoidTy(Ctx), + {PointerType::getUnqual(Ctx), Int8Ty}, false), + Function::LinkageTypes::PrivateLinkage, "rmw_or", M); + Fn->addFnAttr(Attribute::AlwaysInline); + auto *ArgAddr = Fn->getArg(0); + auto *ArgVal = Fn->getArg(1); + IRBuilder<> Builder(BasicBlock::Create(Ctx, "", Fn)); + + // Load profile bitmap byte. + // %mcdc.bits = load i8, ptr %4, align 1 + auto *Bitmap = Builder.CreateLoad(Int8Ty, ArgAddr, "mcdc.bits"); + + if (Options.Atomic || AtomicCounterUpdateAll) { + // If ((Bitmap & Val) != Val), then execute atomic (Bitmap |= Val). + // Note, just-loaded Bitmap might not be up-to-date. Use it just for + // early testing. + auto *Masked = Builder.CreateAnd(Bitmap, ArgVal); + auto *ShouldStore = Builder.CreateICmpNE(Masked, ArgVal); + auto *ThenTerm = BasicBlock::Create(Ctx, "", Fn); + auto *ElseTerm = BasicBlock::Create(Ctx, "", Fn); + // Assume updating will be rare. + auto *Unlikely = MDBuilder(Ctx).createUnlikelyBranchWeights(); + Builder.CreateCondBr(ShouldStore, ThenTerm, ElseTerm, Unlikely); + + IRBuilder<> ThenBuilder(ThenTerm); + ThenBuilder.CreateAtomicRMW(AtomicRMWInst::Or, ArgAddr, ArgVal, + MaybeAlign(), AtomicOrdering::Monotonic); + ThenBuilder.CreateRetVoid(); + + IRBuilder<> ElseBuilder(ElseTerm); + ElseBuilder.CreateRetVoid(); + + return Fn; + } + + // Perform logical OR of profile bitmap byte and shifted bit offset. + // %8 = or i8 %mcdc.bits, %7 + auto *Result = Builder.CreateOr(Bitmap, ArgVal); + + // Store the updated profile bitmap byte. + // store i8 %8, ptr %3, align 1 + Builder.CreateStore(Result, ArgAddr); + + // Terminator + Builder.CreateRetVoid(); + + return Fn; +} + +CallInst *InstrLowerer::getRMWOrCall(Value *Addr, Value *Val) { + if (!RMWOrFunc) + RMWOrFunc = createRMWOrFunc(); + + return CallInst::Create(RMWOrFunc, {Addr, Val}); +} + Value *InstrLowerer::getBitmapAddress(InstrProfMCDCTVBitmapUpdate *I) { auto *Bitmaps = getOrCreateRegionBitmaps(I); IRBuilder<> Builder(I); @@ -1044,17 +1118,7 @@ void InstrLowerer::lowerMCDCTestVectorBitmapUpdate( // %7 = shl i8 1, %6 auto *ShiftedVal = Builder.CreateShl(Builder.getInt8(0x1), BitToSet); - // Load profile bitmap byte. - // %mcdc.bits = load i8, ptr %4, align 1 - auto *Bitmap = Builder.CreateLoad(Int8Ty, BitmapByteAddr, "mcdc.bits"); - - // Perform logical OR of profile bitmap byte and shifted bit offset. - // %8 = or i8 %mcdc.bits, %7 - auto *Result = Builder.CreateOr(Bitmap, ShiftedVal); - - // Store the updated profile bitmap byte. - // store i8 %8, ptr %3, align 1 - Builder.CreateStore(Result, BitmapByteAddr); + Builder.Insert(getRMWOrCall(BitmapByteAddr, ShiftedVal)); Update->eraseFromParent(); } diff --git a/llvm/test/Instrumentation/InstrProfiling/mcdc.ll b/llvm/test/Instrumentation/InstrProfiling/mcdc.ll index 4980b45f90c50a..d3adc590d37794 100644 --- a/llvm/test/Instrumentation/InstrProfiling/mcdc.ll +++ b/llvm/test/Instrumentation/InstrProfiling/mcdc.ll @@ -1,5 +1,6 @@ ; Check that MC/DC intrinsics are properly lowered -; RUN: opt < %s -passes=instrprof -S | FileCheck %s +; RUN: opt < %s -passes=instrprof -S | FileCheck %s --check-prefixes=CHECK,BASIC +; RUN: opt < %s -passes=instrprof -S -instrprof-atomic-counter-update-all | FileCheck %s --check-prefixes=CHECK,ATOMIC ; RUN: opt < %s -passes=instrprof -runtime-counter-relocation -S 2>&1 | FileCheck %s --check-prefix RELOC ; RELOC: Runtime counter relocation is presently not supported for MC/DC bitmaps @@ -30,12 +31,24 @@ entry: ; CHECK-NEXT: %[[LAB8:[0-9]+]] = and i32 %[[TEMP]], 7 ; CHECK-NEXT: %[[LAB9:[0-9]+]] = trunc i32 %[[LAB8]] to i8 ; CHECK-NEXT: %[[LAB10:[0-9]+]] = shl i8 1, %[[LAB9]] - ; CHECK-NEXT: %[[BITS:mcdc.*]] = load i8, ptr %[[LAB7]], align 1 - ; CHECK-NEXT: %[[LAB11:[0-9]+]] = or i8 %[[BITS]], %[[LAB10]] - ; CHECK-NEXT: store i8 %[[LAB11]], ptr %[[LAB7]], align 1 + ; CHECK-NEXT: call void @[[RMW_OR:.+]](ptr %[[LAB7]], i8 %[[LAB10]]) ret void } +; CHECK: define private void @[[RMW_OR]](ptr %[[ARGPTR:.+]], i8 %[[ARGVAL:.+]]) +; CHECK: %[[BITS:.+]] = load i8, ptr %[[ARGPTR]], align 1 +; BASIC-NEXT: %[[LAB11:[0-9]+]] = or i8 %[[BITS]], %[[ARGVAL]] +; BASIC-NEXT: store i8 %[[LAB11]], ptr %[[ARGPTR]], align 1 +; ATOMIC-NEXT: %[[MASKED:.+]] = and i8 %[[BITS]], %[[ARGVAL]] +; ATOMIC-NEXT: %[[SHOULDWRITE:.+]] = icmp ne i8 %[[MASKED]], %[[ARGVAL]] +; ATOMIC-NEXT: br i1 %[[SHOULDWRITE]], label %[[WRITE:.+]], label %[[SKIP:.+]], !prof ![[MDPROF:[0-9]+]] +; ATOMIC: [[WRITE]]: +; ATOMIC-NEXT: %{{.+}} = atomicrmw or ptr %[[ARGPTR]], i8 %[[ARGVAL]] monotonic, align 1 +; ATOMIC-NEXT: ret void +; ATOMIC: [[SKIP]]: +; CHECK-NEXT: ret void +; ATOMIC: ![[MDPROF]] = !{!"branch_weights", i32 1, i32 1048575} + declare void @llvm.instrprof.cover(ptr, i64, i32, i32) declare void @llvm.instrprof.mcdc.parameters(ptr, i64, i32)