From ddb75ca6062c9312896647c206f8b96574bef11d Mon Sep 17 00:00:00 2001 From: Chaitanya Date: Wed, 24 Jul 2024 12:19:57 +0530 Subject: [PATCH] [AMDGPU] Utilities to asan instrument memory instructions. (#98863) This change adds the utilities required to asan instrument memory instructions. In "amdgpu-sw-lower-lds" pass #87265, during lowering from LDS to global memory, new instructions in global memory would be created which need to be asan instrumented. --- .../Utils/AMDGPUAsanInstrumentation.cpp | 332 ++++++++++++++++++ .../AMDGPU/Utils/AMDGPUAsanInstrumentation.h | 60 ++++ llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt | 1 + 3 files changed, 393 insertions(+) create mode 100644 llvm/lib/Target/AMDGPU/Utils/AMDGPUAsanInstrumentation.cpp create mode 100644 llvm/lib/Target/AMDGPU/Utils/AMDGPUAsanInstrumentation.h diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsanInstrumentation.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsanInstrumentation.cpp new file mode 100644 index 00000000000000..593fca5bc3ed68 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsanInstrumentation.cpp @@ -0,0 +1,332 @@ +//===AMDGPUAsanInstrumentation.cpp - ASAN related helper functions===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-------------------------------------------------------------===// + +#include "AMDGPUAsanInstrumentation.h" + +#define DEBUG_TYPE "amdgpu-asan-instrumentation" + +using namespace llvm; + +namespace llvm { +namespace AMDGPU { + +static uint64_t getRedzoneSizeForScale(int AsanScale) { + // Redzone used for stack and globals is at least 32 bytes. + // For scales 6 and 7, the redzone has to be 64 and 128 bytes respectively. + return std::max(32U, 1U << AsanScale); +} + +static uint64_t getMinRedzoneSizeForGlobal(int AsanScale) { + return getRedzoneSizeForScale(AsanScale); +} + +uint64_t getRedzoneSizeForGlobal(int AsanScale, uint64_t SizeInBytes) { + constexpr uint64_t kMaxRZ = 1 << 18; + const uint64_t MinRZ = getMinRedzoneSizeForGlobal(AsanScale); + + uint64_t RZ = 0; + if (SizeInBytes <= MinRZ / 2) { + // Reduce redzone size for small size objects, e.g. int, char[1]. MinRZ is + // at least 32 bytes, optimize when SizeInBytes is less than or equal to + // half of MinRZ. + RZ = MinRZ - SizeInBytes; + } else { + // Calculate RZ, where MinRZ <= RZ <= MaxRZ, and RZ ~ 1/4 * SizeInBytes. + RZ = std::clamp((SizeInBytes / MinRZ / 4) * MinRZ, MinRZ, kMaxRZ); + + // Round up to multiple of MinRZ. + if (SizeInBytes % MinRZ) + RZ += MinRZ - (SizeInBytes % MinRZ); + } + + assert((RZ + SizeInBytes) % MinRZ == 0); + + return RZ; +} + +static size_t TypeStoreSizeToSizeIndex(uint32_t TypeSize) { + size_t Res = llvm::countr_zero(TypeSize / 8); + return Res; +} + +static Instruction *genAMDGPUReportBlock(Module &M, IRBuilder<> &IRB, + Value *Cond, bool Recover) { + Value *ReportCond = Cond; + if (!Recover) { + auto *Ballot = + IRB.CreateIntrinsic(Intrinsic::amdgcn_ballot, IRB.getInt64Ty(), {Cond}); + ReportCond = IRB.CreateIsNotNull(Ballot); + } + + auto *Trm = SplitBlockAndInsertIfThen( + ReportCond, &*IRB.GetInsertPoint(), false, + MDBuilder(M.getContext()).createUnlikelyBranchWeights()); + Trm->getParent()->setName("asan.report"); + + if (Recover) + return Trm; + + Trm = SplitBlockAndInsertIfThen(Cond, Trm, false); + IRB.SetInsertPoint(Trm); + return IRB.CreateIntrinsic(Intrinsic::amdgcn_unreachable, {}, {}); +} + +static Value *createSlowPathCmp(Module &M, IRBuilder<> &IRB, Type *IntptrTy, + Value *AddrLong, Value *ShadowValue, + uint32_t TypeStoreSize, int AsanScale) { + uint64_t Granularity = static_cast(1) << AsanScale; + // Addr & (Granularity - 1) + Value *LastAccessedByte = + IRB.CreateAnd(AddrLong, ConstantInt::get(IntptrTy, Granularity - 1)); + // (Addr & (Granularity - 1)) + size - 1 + if (TypeStoreSize / 8 > 1) + LastAccessedByte = IRB.CreateAdd( + LastAccessedByte, ConstantInt::get(IntptrTy, TypeStoreSize / 8 - 1)); + // (uint8_t) ((Addr & (Granularity-1)) + size - 1) + LastAccessedByte = + IRB.CreateIntCast(LastAccessedByte, ShadowValue->getType(), false); + // ((uint8_t) ((Addr & (Granularity-1)) + size - 1)) >= ShadowValue + return IRB.CreateICmpSGE(LastAccessedByte, ShadowValue); +} + +static Instruction *generateCrashCode(Module &M, IRBuilder<> &IRB, + Type *IntptrTy, Instruction *InsertBefore, + Value *Addr, bool IsWrite, + size_t AccessSizeIndex, + Value *SizeArgument, bool Recover) { + IRB.SetInsertPoint(InsertBefore); + CallInst *Call = nullptr; + SmallString<128> kAsanReportErrorTemplate{"__asan_report_"}; + SmallString<64> TypeStr{IsWrite ? "store" : "load"}; + SmallString<64> EndingStr{Recover ? "_noabort" : ""}; + + SmallString<128> AsanErrorCallbackSizedString; + raw_svector_ostream AsanErrorCallbackSizedOS(AsanErrorCallbackSizedString); + AsanErrorCallbackSizedOS << kAsanReportErrorTemplate << TypeStr << "_n" + << EndingStr; + + SmallVector Args2 = {IntptrTy, IntptrTy}; + AttributeList AL2; + FunctionCallee AsanErrorCallbackSized = M.getOrInsertFunction( + AsanErrorCallbackSizedOS.str(), + FunctionType::get(IRB.getVoidTy(), Args2, false), AL2); + SmallVector Args1{1, IntptrTy}; + AttributeList AL1; + + SmallString<128> AsanErrorCallbackString; + raw_svector_ostream AsanErrorCallbackOS(AsanErrorCallbackString); + AsanErrorCallbackOS << kAsanReportErrorTemplate << TypeStr + << (1ULL << AccessSizeIndex) << EndingStr; + + FunctionCallee AsanErrorCallback = M.getOrInsertFunction( + AsanErrorCallbackOS.str(), + FunctionType::get(IRB.getVoidTy(), Args1, false), AL1); + if (SizeArgument) { + Call = IRB.CreateCall(AsanErrorCallbackSized, {Addr, SizeArgument}); + } else { + Call = IRB.CreateCall(AsanErrorCallback, Addr); + } + + Call->setCannotMerge(); + return Call; +} + +static Value *memToShadow(Module &M, IRBuilder<> &IRB, Type *IntptrTy, + Value *Shadow, int AsanScale, uint32_t AsanOffset) { + // Shadow >> scale + Shadow = IRB.CreateLShr(Shadow, AsanScale); + if (AsanOffset == 0) + return Shadow; + // (Shadow >> scale) | offset + Value *ShadowBase = ConstantInt::get(IntptrTy, AsanOffset); + return IRB.CreateAdd(Shadow, ShadowBase); +} + +void instrumentAddress(Module &M, IRBuilder<> &IRB, Instruction *OrigIns, + Instruction *InsertBefore, Value *Addr, + MaybeAlign Alignment, uint32_t TypeStoreSize, + bool IsWrite, Value *SizeArgument, bool UseCalls, + bool Recover, int AsanScale, int AsanOffset) { + Type *AddrTy = Addr->getType(); + Type *IntptrTy = M.getDataLayout().getIntPtrType( + M.getContext(), AddrTy->getPointerAddressSpace()); + IRB.SetInsertPoint(InsertBefore); + size_t AccessSizeIndex = TypeStoreSizeToSizeIndex(TypeStoreSize); + Type *ShadowTy = IntegerType::get(M.getContext(), + std::max(8U, TypeStoreSize >> AsanScale)); + Type *ShadowPtrTy = PointerType::get(ShadowTy, 0); + Value *AddrLong = IRB.CreatePtrToInt(Addr, IntptrTy); + Value *ShadowPtr = + memToShadow(M, IRB, IntptrTy, AddrLong, AsanScale, AsanOffset); + const uint64_t ShadowAlign = + std::max(Alignment.valueOrOne().value() >> AsanScale, 1); + Value *ShadowValue = IRB.CreateAlignedLoad( + ShadowTy, IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy), Align(ShadowAlign)); + Value *Cmp = IRB.CreateIsNotNull(ShadowValue); + auto *Cmp2 = createSlowPathCmp(M, IRB, IntptrTy, AddrLong, ShadowValue, + TypeStoreSize, AsanScale); + Cmp = IRB.CreateAnd(Cmp, Cmp2); + Instruction *CrashTerm = genAMDGPUReportBlock(M, IRB, Cmp, Recover); + Instruction *Crash = + generateCrashCode(M, IRB, IntptrTy, CrashTerm, AddrLong, IsWrite, + AccessSizeIndex, SizeArgument, Recover); + Crash->setDebugLoc(OrigIns->getDebugLoc()); + return; +} + +void getInterestingMemoryOperands( + Module &M, Instruction *I, + SmallVectorImpl &Interesting) { + const DataLayout &DL = M.getDataLayout(); + if (LoadInst *LI = dyn_cast(I)) { + Interesting.emplace_back(I, LI->getPointerOperandIndex(), false, + LI->getType(), LI->getAlign()); + } else if (StoreInst *SI = dyn_cast(I)) { + Interesting.emplace_back(I, SI->getPointerOperandIndex(), true, + SI->getValueOperand()->getType(), SI->getAlign()); + } else if (AtomicRMWInst *RMW = dyn_cast(I)) { + Interesting.emplace_back(I, RMW->getPointerOperandIndex(), true, + RMW->getValOperand()->getType(), std::nullopt); + } else if (AtomicCmpXchgInst *XCHG = dyn_cast(I)) { + Interesting.emplace_back(I, XCHG->getPointerOperandIndex(), true, + XCHG->getCompareOperand()->getType(), + std::nullopt); + } else if (auto CI = dyn_cast(I)) { + switch (CI->getIntrinsicID()) { + case Intrinsic::masked_load: + case Intrinsic::masked_store: + case Intrinsic::masked_gather: + case Intrinsic::masked_scatter: { + bool IsWrite = CI->getType()->isVoidTy(); + // Masked store has an initial operand for the value. + unsigned OpOffset = IsWrite ? 1 : 0; + Type *Ty = IsWrite ? CI->getArgOperand(0)->getType() : CI->getType(); + MaybeAlign Alignment = Align(1); + // Otherwise no alignment guarantees. We probably got Undef. + if (auto *Op = dyn_cast(CI->getOperand(1 + OpOffset))) + Alignment = Op->getMaybeAlignValue(); + Value *Mask = CI->getOperand(2 + OpOffset); + Interesting.emplace_back(I, OpOffset, IsWrite, Ty, Alignment, Mask); + break; + } + case Intrinsic::masked_expandload: + case Intrinsic::masked_compressstore: { + bool IsWrite = CI->getIntrinsicID() == Intrinsic::masked_compressstore; + unsigned OpOffset = IsWrite ? 1 : 0; + auto BasePtr = CI->getOperand(OpOffset); + MaybeAlign Alignment = BasePtr->getPointerAlignment(DL); + Type *Ty = IsWrite ? CI->getArgOperand(0)->getType() : CI->getType(); + IRBuilder<> IB(I); + Value *Mask = CI->getOperand(1 + OpOffset); + Type *IntptrTy = M.getDataLayout().getIntPtrType( + M.getContext(), BasePtr->getType()->getPointerAddressSpace()); + // Use the popcount of Mask as the effective vector length. + Type *ExtTy = VectorType::get(IntptrTy, cast(Ty)); + Value *ExtMask = IB.CreateZExt(Mask, ExtTy); + Value *EVL = IB.CreateAddReduce(ExtMask); + Value *TrueMask = ConstantInt::get(Mask->getType(), 1); + Interesting.emplace_back(I, OpOffset, IsWrite, Ty, Alignment, TrueMask, + EVL); + break; + } + case Intrinsic::vp_load: + case Intrinsic::vp_store: + case Intrinsic::experimental_vp_strided_load: + case Intrinsic::experimental_vp_strided_store: { + auto *VPI = cast(CI); + unsigned IID = CI->getIntrinsicID(); + bool IsWrite = CI->getType()->isVoidTy(); + unsigned PtrOpNo = *VPI->getMemoryPointerParamPos(IID); + Type *Ty = IsWrite ? CI->getArgOperand(0)->getType() : CI->getType(); + MaybeAlign Alignment = VPI->getOperand(PtrOpNo)->getPointerAlignment(DL); + Value *Stride = nullptr; + if (IID == Intrinsic::experimental_vp_strided_store || + IID == Intrinsic::experimental_vp_strided_load) { + Stride = VPI->getOperand(PtrOpNo + 1); + // Use the pointer alignment as the element alignment if the stride is a + // mutiple of the pointer alignment. Otherwise, the element alignment + // should be Align(1). + unsigned PointerAlign = Alignment.valueOrOne().value(); + if (!isa(Stride) || + cast(Stride)->getZExtValue() % PointerAlign != 0) + Alignment = Align(1); + } + Interesting.emplace_back(I, PtrOpNo, IsWrite, Ty, Alignment, + VPI->getMaskParam(), VPI->getVectorLengthParam(), + Stride); + break; + } + case Intrinsic::vp_gather: + case Intrinsic::vp_scatter: { + auto *VPI = cast(CI); + unsigned IID = CI->getIntrinsicID(); + bool IsWrite = IID == Intrinsic::vp_scatter; + unsigned PtrOpNo = *VPI->getMemoryPointerParamPos(IID); + Type *Ty = IsWrite ? CI->getArgOperand(0)->getType() : CI->getType(); + MaybeAlign Alignment = VPI->getPointerAlignment(); + Interesting.emplace_back(I, PtrOpNo, IsWrite, Ty, Alignment, + VPI->getMaskParam(), + VPI->getVectorLengthParam()); + break; + } + case Intrinsic::amdgcn_raw_buffer_load: + case Intrinsic::amdgcn_raw_ptr_buffer_load: + case Intrinsic::amdgcn_raw_buffer_load_format: + case Intrinsic::amdgcn_raw_ptr_buffer_load_format: + case Intrinsic::amdgcn_raw_tbuffer_load: + case Intrinsic::amdgcn_raw_ptr_tbuffer_load: + case Intrinsic::amdgcn_struct_buffer_load: + case Intrinsic::amdgcn_struct_ptr_buffer_load: + case Intrinsic::amdgcn_struct_buffer_load_format: + case Intrinsic::amdgcn_struct_ptr_buffer_load_format: + case Intrinsic::amdgcn_struct_tbuffer_load: + case Intrinsic::amdgcn_struct_ptr_tbuffer_load: + case Intrinsic::amdgcn_s_buffer_load: + case Intrinsic::amdgcn_global_load_tr_b64: + case Intrinsic::amdgcn_global_load_tr_b128: { + unsigned PtrOpNo = 0; + bool IsWrite = false; + Type *Ty = CI->getType(); + Value *Ptr = CI->getArgOperand(PtrOpNo); + MaybeAlign Alignment = Ptr->getPointerAlignment(DL); + Interesting.emplace_back(I, PtrOpNo, IsWrite, Ty, Alignment); + break; + } + case Intrinsic::amdgcn_raw_tbuffer_store: + case Intrinsic::amdgcn_raw_ptr_tbuffer_store: + case Intrinsic::amdgcn_raw_buffer_store: + case Intrinsic::amdgcn_raw_ptr_buffer_store: + case Intrinsic::amdgcn_raw_buffer_store_format: + case Intrinsic::amdgcn_raw_ptr_buffer_store_format: + case Intrinsic::amdgcn_struct_buffer_store: + case Intrinsic::amdgcn_struct_ptr_buffer_store: + case Intrinsic::amdgcn_struct_buffer_store_format: + case Intrinsic::amdgcn_struct_ptr_buffer_store_format: + case Intrinsic::amdgcn_struct_tbuffer_store: + case Intrinsic::amdgcn_struct_ptr_tbuffer_store: { + unsigned PtrOpNo = 1; + bool IsWrite = true; + Value *Ptr = CI->getArgOperand(PtrOpNo); + Type *Ty = Ptr->getType(); + MaybeAlign Alignment = Ptr->getPointerAlignment(DL); + Interesting.emplace_back(I, PtrOpNo, IsWrite, Ty, Alignment); + break; + } + default: + for (unsigned ArgNo = 0; ArgNo < CI->arg_size(); ArgNo++) { + if (Type *Ty = CI->getParamByRefType(ArgNo)) { + Interesting.emplace_back(I, ArgNo, false, Ty, Align(1)); + } else if (Type *Ty = CI->getParamByValType(ArgNo)) { + Interesting.emplace_back(I, ArgNo, false, Ty, Align(1)); + } + } + } + } +} +} // end namespace AMDGPU +} // end namespace llvm diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsanInstrumentation.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsanInstrumentation.h new file mode 100644 index 00000000000000..b438004aa6ce79 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsanInstrumentation.h @@ -0,0 +1,60 @@ +//===AMDGPUAsanInstrumentation.h - ASAN helper functions -*- C++- *===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPU_ASAN_INSTRUMENTATION_H +#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPU_ASAN_INSTRUMENTATION_H + +#include "AMDGPU.h" +#include "AMDGPUBaseInfo.h" +#include "AMDGPUMemoryUtils.h" +#include "llvm/ADT/SetOperations.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicsAMDGPU.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/OptimizedStructLayout.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Instrumentation/AddressSanitizer.h" +#include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" + +namespace llvm { +namespace AMDGPU { + +/// Given SizeInBytes of the Value to be instrunmented, +/// Returns the redzone size corresponding to it. +uint64_t getRedzoneSizeForGlobal(int Scale, uint64_t SizeInBytes); + +/// Instrument the memory operand Addr. +/// Generates report blocks that catch the addressing errors. +void instrumentAddress(Module &M, IRBuilder<> &IRB, Instruction *OrigIns, + Instruction *InsertBefore, Value *Addr, + MaybeAlign Alignment, uint32_t TypeStoreSize, + bool IsWrite, Value *SizeArgument, bool UseCalls, + bool Recover, int Scale, int Offset); + +/// Get all the memory operands from the instruction +/// that needs to be instrumented +void getInterestingMemoryOperands( + Module &M, Instruction *I, + SmallVectorImpl &Interesting); + +} // end namespace AMDGPU +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPU_ASAN_INSTRUMENTATION_H diff --git a/llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt b/llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt index 09b8da9f5dd48a..4d69fb67db8602 100644 --- a/llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt +++ b/llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt @@ -1,4 +1,5 @@ add_llvm_component_library(LLVMAMDGPUUtils + AMDGPUAsanInstrumentation.cpp AMDGPUAsmUtils.cpp AMDGPUBaseInfo.cpp AMDGPUDelayedMCExpr.cpp