- 
                Notifications
    You must be signed in to change notification settings 
- Fork 15k
[IR][DSE] Support non-malloc functions in malloc+memset->calloc fold #138299
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The way to do this is probably to add an extra attribute along the lines of "alloc-variant-zeroed"="calloc"?
6e08081    to
    3ca30dc      
    Compare
  
    3ca30dc    to
    f94c92c      
    Compare
  
    | @llvm/pr-subscribers-llvm-ir @llvm/pr-subscribers-llvm-transforms Author: None (clubby789) ChangesAdd a  Full diff: https://github.com/llvm/llvm-project/pull/138299.diff 3 Files Affected: 
 diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 568843a4486e5..78718f102e19c 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -1954,6 +1954,10 @@ For example:
     The first three options are mutually exclusive, and the remaining options
     describe more details of how the function behaves. The remaining options
     are invalid for "free"-type functions.
+``"alloc-variant-zeroed"="FUNCTION"``
+    This attribute indicates that another function is equivalent to an allocator function,
+    but returns zeroed memory. The function must have "zeroed" allocation behavior,
+    the same ``alloc-family``, and take exactly the same arguments.
 ``allocsize(<EltSizeParam>[, <NumEltsParam>])``
     This attribute indicates that the annotated function will always return at
     least a given number of bytes (or null). Its arguments are zero-indexed
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index e318ec94db4c3..9ba13450c1fdb 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -2028,9 +2028,19 @@ struct DSEState {
     if (!InnerCallee)
       return false;
     LibFunc Func;
+    std::optional<StringRef> ZeroedVariantName = std::nullopt;
     if (!TLI.getLibFunc(*InnerCallee, Func) || !TLI.has(Func) ||
-        Func != LibFunc_malloc)
-      return false;
+        Func != LibFunc_malloc) {
+      if (!Malloc->hasFnAttr("alloc-variant-zeroed") ||
+          Malloc->getFnAttr("alloc-variant-zeroed")
+              .getValueAsString()
+              .empty()) {
+        return false;
+      }
+      ZeroedVariantName =
+          Malloc->getFnAttr("alloc-variant-zeroed").getValueAsString();
+    }
+
     // Gracefully handle malloc with unexpected memory attributes.
     auto *MallocDef = dyn_cast_or_null<MemoryDef>(MSSA.getMemoryAccess(Malloc));
     if (!MallocDef)
@@ -2057,15 +2067,38 @@ struct DSEState {
 
     if (Malloc->getOperand(0) != MemSet->getLength())
       return false;
-    if (!shouldCreateCalloc(Malloc, MemSet) ||
-        !DT.dominates(Malloc, MemSet) ||
+    if (!shouldCreateCalloc(Malloc, MemSet) || !DT.dominates(Malloc, MemSet) ||
         !memoryIsNotModifiedBetween(Malloc, MemSet, BatchAA, DL, &DT))
       return false;
     IRBuilder<> IRB(Malloc);
-    Type *SizeTTy = Malloc->getArgOperand(0)->getType();
-    auto *Calloc =
-        emitCalloc(ConstantInt::get(SizeTTy, 1), Malloc->getArgOperand(0), IRB,
-                   TLI, Malloc->getType()->getPointerAddressSpace());
+    assert(Func == LibFunc_malloc || ZeroedVariantName.has_value());
+    Value *Calloc = nullptr;
+    if (ZeroedVariantName.has_value()) {
+      auto *ZeroedVariant =
+          Malloc->getModule()->getFunction(*ZeroedVariantName);
+      if (!ZeroedVariant)
+        return false;
+      auto Attributes = ZeroedVariant->getAttributes();
+      auto MallocFamily = getAllocationFamily(Malloc, &TLI);
+      if (MallocFamily &&
+          *MallocFamily !=
+              Attributes.getFnAttr("alloc-family").getValueAsString())
+        return false;
+      if (!Attributes.hasFnAttr(Attribute::AllocKind) ||
+          (Attributes.getAllocKind() & AllocFnKind::Zeroed) ==
+              AllocFnKind::Unknown)
+        return false;
+
+      SmallVector<Value *, 3> Args;
+      for (unsigned I = 0; I < Malloc->arg_size(); I++)
+        Args.push_back(Malloc->getArgOperand(I));
+      Calloc = IRB.CreateCall(ZeroedVariant, Args, *ZeroedVariantName);
+    } else {
+      Type *SizeTTy = Malloc->getArgOperand(0)->getType();
+      Calloc =
+          emitCalloc(ConstantInt::get(SizeTTy, 1), Malloc->getArgOperand(0),
+                     IRB, TLI, Malloc->getType()->getPointerAddressSpace());
+    }
     if (!Calloc)
       return false;
 
diff --git a/llvm/test/Transforms/DeadStoreElimination/noop-stores.ll b/llvm/test/Transforms/DeadStoreElimination/noop-stores.ll
index 9fc20d76da5eb..e54d93015d587 100644
--- a/llvm/test/Transforms/DeadStoreElimination/noop-stores.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/noop-stores.ll
@@ -374,6 +374,19 @@ define ptr @notmalloc_memset(i64 %size, ptr %notmalloc) {
   ret ptr %call1
 }
 
+; This should create a customalloc_zeroed
+define ptr @customalloc_memset(i64 %size, i64 %align) {
+; CHECK-LABEL: @customalloc_memset
+; CHECK-NEXT:  [[CALL:%.*]] = call ptr @customalloc_zeroed(i64 [[SIZE:%.*]], i64 [[ALIGN:%.*]])
+; CHECK-NEXT:  ret ptr [[CALL]]
+  %call = call ptr @customalloc(i64 %size, i64 %align)
+  call void @llvm.memset.p0.i64(ptr %call, i8 0, i64 %size, i1 false)
+  ret ptr %call
+}
+
+declare ptr @customalloc(i64, i64) allockind("alloc") "alloc-family"="customalloc" "alloc-variant-zeroed"="customalloc_zeroed"
+declare ptr @customalloc_zeroed(i64, i64) allockind("alloc,zeroed") "alloc-family"="customalloc"
+
 ; This should not create recursive call to calloc.
 define ptr @calloc(i64 %nmemb, i64 %size) inaccessiblememonly {
 ; CHECK-LABEL: @calloc(
 | 
f94c92c    to
    c346cf7      
    Compare
  
    | 
 I think it is worth it because there are some programs out there that use xmalloc and it might be useful to optimize that to xcalloc. | 
| 
 Sorry, I'm not sure I understand this. I think the original question is about extending the system to be smart enough to transform  | 
| 
 xcalloc still takes the same arguments as calloc. Likewise of xmalloc. | 
| My mistake, I misread the definition. Would it make more sense to have a different attribute for calloc-like functions that have their arguments remapped ( | 
| As far as I know, Clang currently doesn't support declaring allocators in C using attributes (it only supports noalias via  | 
c346cf7    to
    89d8649      
    Compare
  
    89d8649    to
    910c21e      
    Compare
  
    | This seems fine to me - it's a little bit of a bummer there's no way to just find the matching  | 
26fca54    to
    eba5e9e      
    Compare
  
    | Hi, are any more changes desired here? | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This needs a rebase, but generally looks good to me.
eba5e9e    to
    e1729a7      
    Compare
  
    | ✅ With the latest revision this PR passed the C/C++ code formatter. | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
e1729a7    to
    c7e7901      
    Compare
  
    | 
 | 
Introduced by accident in #138299 (https://lab.llvm.org/buildbot/#/builders/164/builds/10604)
…oed variant function (#149336) cc #138299 rustc sets `allockind("uninitialized")` - if we copy the attributes as-is when creating a dummy function, Verify complains about `allockind("uninitialized,zeroed")` conflicting, so we need to clear the flag. Co-authored-by: Jamie Hill-Daniel <jamie@osec.io>
…g dummy zeroed variant function (#149336) cc llvm/llvm-project#138299 rustc sets `allockind("uninitialized")` - if we copy the attributes as-is when creating a dummy function, Verify complains about `allockind("uninitialized,zeroed")` conflicting, so we need to clear the flag. Co-authored-by: Jamie Hill-Daniel <jamie@osec.io>
…oed variant function (llvm#149336) cc llvm#138299 rustc sets `allockind("uninitialized")` - if we copy the attributes as-is when creating a dummy function, Verify complains about `allockind("uninitialized,zeroed")` conflicting, so we need to clear the flag. Co-authored-by: Jamie Hill-Daniel <jamie@osec.io> (cherry picked from commit 74c396a)
…g dummy zeroed variant function (#149336) cc llvm/llvm-project#138299 rustc sets `allockind("uninitialized")` - if we copy the attributes as-is when creating a dummy function, Verify complains about `allockind("uninitialized,zeroed")` conflicting, so we need to clear the flag. Co-authored-by: Jamie Hill-Daniel <jamie@osec.io> (cherry picked from commit 74c396a)
Pass `alloc-variant-zeroed` to LLVM Makes use of llvm/llvm-project#138299 (once we pull in a version of LLVM with this attribute). ~~Unfortunately also requires llvm/llvm-project#149336 to work.~~ Closes #104847
Pass `alloc-variant-zeroed` to LLVM Makes use of llvm/llvm-project#138299 (once we pull in a version of LLVM with this attribute). ~~Unfortunately also requires llvm/llvm-project#149336 to work.~~ Closes #104847
Pass `alloc-variant-zeroed` to LLVM Makes use of llvm/llvm-project#138299 (once we pull in a version of LLVM with this attribute). ~~Unfortunately also requires llvm/llvm-project#149336 to work.~~ Closes #104847
Pass `alloc-variant-zeroed` to LLVM Makes use of llvm/llvm-project#138299 (once we pull in a version of LLVM with this attribute). ~~Unfortunately also requires llvm/llvm-project#149336 to work.~~ Closes rust-lang/rust#104847
Pass `alloc-variant-zeroed` to LLVM Makes use of llvm/llvm-project#138299 (once we pull in a version of LLVM with this attribute). ~~Unfortunately also requires llvm/llvm-project#149336 to work.~~ Closes rust-lang/rust#104847
Pass `alloc-variant-zeroed` to LLVM Makes use of llvm/llvm-project#138299 (once we pull in a version of LLVM with this attribute). ~~Unfortunately also requires llvm/llvm-project#149336 to work.~~ Closes rust-lang#104847
Add a
alloc-variant-zeroedfunction attribute which can be used to inform folding allocation+memset. This addresses rust-lang/rust#104847, where LLVM does not know how to perform this transformation for non-C languages.