Closed
Description
apologies for the unreduced module but it's not easy to make minimal triggers for this kind of bug. the function below, optimized at -O2, appears to hang us up indefinitely. all of the cases triggering this have contained a bitreverse intrinsic so I suspect that's part of the problem.
I attached a profiler to the process and here's what's up at the top:
Samples: 479K of event 'cycles', Event count (approx.): 561733173365
Overhead Command Shared Object Symbol
3.69% opt libLLVMAnalysis.so.16git [.] programUndefinedIfUndefOrPoison ▒
3.60% opt libLLVMAnalysis.so.16git [.] computeKnownBits ◆
2.82% opt libLLVMAnalysis.so.16git [.] llvm::DataLayout::getTypeSizeInBits ▒
2.54% opt libLLVMCore.so.16git [.] llvm::Type::getPrimitiveSizeInBits ▒
2.54% opt libLLVMAnalysis.so.16git [.] llvm::mustTriggerUB ▒
2.38% opt libLLVMAnalysis.so.16git [.] computeKnownBitsFromOperator ▒
1.86% opt libLLVMInstCombine.so.16git [.] llvm::InstCombinerImpl::run ▒
1.78% opt libLLVMAnalysis.so.16git [.] llvm::getGuaranteedWellDefinedOps ▒
1.72% opt libLLVMInstCombine.so.16git [.] llvm::InstCombinerImpl::SimplifyDemandedUseBits ▒
1.29% opt libLLVMCore.so.16git [.] llvm::Value::setValueName ▒
1.22% opt libLLVMAnalysis.so.16git [.] computeKnownBitsFromAssume ▒
1.19% opt libLLVMCore.so.16git [.] llvm::Type::getScalarSizeInBits ▒
1.18% opt libLLVMAnalysis.so.16git [.] llvm::isGuaranteedToTransferExecutionToSuccessor ▒
trigger this by running opt -O2
on this code:
source_filename = "M2"
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64-linux-gnu"
define i64 @f-tgt(i64 %0, i64 %1) {
f:
%X0_1 = freeze i64 %0
%X0_2 = trunc i64 %X0_1 to i1
%X0_3 = zext i1 %X0_2 to i32
%X0_4 = zext i32 %X0_3 to i64
%X1_1 = freeze i64 %1
%X1_2 = trunc i64 %X1_1 to i1
%X1_3 = zext i1 %X1_2 to i32
%X1_4 = zext i32 %X1_3 to i64
%X2_3 = or i64 poison, 0
%X2_4 = freeze i64 %X2_3
%X3_3 = or i64 poison, 0
%X3_4 = freeze i64 %X3_3
%X4_3 = or i64 poison, 0
%X4_4 = freeze i64 %X4_3
%X5_3 = or i64 poison, 0
%X5_4 = freeze i64 %X5_3
%X6_3 = or i64 poison, 0
%X6_4 = freeze i64 %X6_3
%X7_3 = or i64 poison, 0
%X7_4 = freeze i64 %X7_3
%X8_3 = or i64 poison, 0
%X8_4 = freeze i64 %X8_3
%X9_3 = or i64 poison, 0
%X9_4 = freeze i64 %X9_3
%X10_3 = or i64 poison, 0
%X10_4 = freeze i64 %X10_3
%X11_3 = or i64 poison, 0
%X11_4 = freeze i64 %X11_3
%X12_3 = or i64 poison, 0
%X12_4 = freeze i64 %X12_3
%X13_3 = or i64 poison, 0
%X13_4 = freeze i64 %X13_3
%X14_3 = or i64 poison, 0
%X14_4 = freeze i64 %X14_3
%X15_3 = or i64 poison, 0
%X15_4 = freeze i64 %X15_3
%X16_3 = or i64 poison, 0
%X16_4 = freeze i64 %X16_3
%X17_3 = or i64 poison, 0
%X17_4 = freeze i64 %X17_3
%Q0_3 = or i128 poison, 0
%Q0_4 = freeze i128 %Q0_3
%Q1_3 = or i128 poison, 0
%Q1_4 = freeze i128 %Q1_3
%Q2_3 = or i128 poison, 0
%Q2_4 = freeze i128 %Q2_3
%Q3_3 = or i128 poison, 0
%Q3_4 = freeze i128 %Q3_3
%X8_3x1x2x0 = trunc i64 %X1_4 to i32
%X8_3x2x2x0 = call i32 @llvm.bitreverse.i32(i32 %X8_3x1x2x0)
%X8_3x3x2x0 = zext i32 %X8_3x2x2x0 to i64
%X8_4x1x3x0 = trunc i64 %X8_3x3x2x0 to i32
%X8_4x2x3x0 = xor i32 %X8_4x1x3x0, -1
%X8_4x3x3x0 = or i32 0, %X8_4x2x3x0
%X8_4x4x3x0 = zext i32 %X8_4x3x3x0 to i64
%X0_3x1x4x0 = trunc i64 %X8_4x4x3x0 to i32
%X0_3x2x4x0 = lshr i32 %X0_3x1x4x0, 31
%X0_3x3x4x0 = trunc i64 %X0_4 to i32
%X0_3x4x4x0 = and i32 %X0_3x3x4x0, %X0_3x2x4x0
%X0_3x5x4x0 = zext i32 %X0_3x4x4x0 to i64
%X0_3x1x5x0 = trunc i64 %X0_3x5x4x0 to i32
%X0_3x2x5x0 = zext i32 %X0_3x1x5x0 to i64
ret i64 %X0_3x2x5x0
}
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare i32 @llvm.bitreverse.i32(i32) #0
attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }