Skip to content

apparent infinite loop in InstCombine #59897

Closed
@regehr

Description

@regehr

apologies for the unreduced module but it's not easy to make minimal triggers for this kind of bug. the function below, optimized at -O2, appears to hang us up indefinitely. all of the cases triggering this have contained a bitreverse intrinsic so I suspect that's part of the problem.

I attached a profiler to the process and here's what's up at the top:

Samples: 479K of event 'cycles', Event count (approx.): 561733173365
Overhead  Command  Shared Object                   Symbol
   3.69%  opt      libLLVMAnalysis.so.16git        [.] programUndefinedIfUndefOrPoison                   ▒
   3.60%  opt      libLLVMAnalysis.so.16git        [.] computeKnownBits                                  ◆
   2.82%  opt      libLLVMAnalysis.so.16git        [.] llvm::DataLayout::getTypeSizeInBits               ▒
   2.54%  opt      libLLVMCore.so.16git            [.] llvm::Type::getPrimitiveSizeInBits                ▒
   2.54%  opt      libLLVMAnalysis.so.16git        [.] llvm::mustTriggerUB                               ▒
   2.38%  opt      libLLVMAnalysis.so.16git        [.] computeKnownBitsFromOperator                      ▒
   1.86%  opt      libLLVMInstCombine.so.16git     [.] llvm::InstCombinerImpl::run                       ▒
   1.78%  opt      libLLVMAnalysis.so.16git        [.] llvm::getGuaranteedWellDefinedOps                 ▒
   1.72%  opt      libLLVMInstCombine.so.16git     [.] llvm::InstCombinerImpl::SimplifyDemandedUseBits   ▒
   1.29%  opt      libLLVMCore.so.16git            [.] llvm::Value::setValueName                         ▒
   1.22%  opt      libLLVMAnalysis.so.16git        [.] computeKnownBitsFromAssume                        ▒
   1.19%  opt      libLLVMCore.so.16git            [.] llvm::Type::getScalarSizeInBits                   ▒
   1.18%  opt      libLLVMAnalysis.so.16git        [.] llvm::isGuaranteedToTransferExecutionToSuccessor  ▒

trigger this by running opt -O2 on this code:

source_filename = "M2"
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64-linux-gnu"

define i64 @f-tgt(i64 %0, i64 %1) {
f:
  %X0_1 = freeze i64 %0
  %X0_2 = trunc i64 %X0_1 to i1
  %X0_3 = zext i1 %X0_2 to i32
  %X0_4 = zext i32 %X0_3 to i64
  %X1_1 = freeze i64 %1
  %X1_2 = trunc i64 %X1_1 to i1
  %X1_3 = zext i1 %X1_2 to i32
  %X1_4 = zext i32 %X1_3 to i64
  %X2_3 = or i64 poison, 0
  %X2_4 = freeze i64 %X2_3
  %X3_3 = or i64 poison, 0
  %X3_4 = freeze i64 %X3_3
  %X4_3 = or i64 poison, 0
  %X4_4 = freeze i64 %X4_3
  %X5_3 = or i64 poison, 0
  %X5_4 = freeze i64 %X5_3
  %X6_3 = or i64 poison, 0
  %X6_4 = freeze i64 %X6_3
  %X7_3 = or i64 poison, 0
  %X7_4 = freeze i64 %X7_3
  %X8_3 = or i64 poison, 0
  %X8_4 = freeze i64 %X8_3
  %X9_3 = or i64 poison, 0
  %X9_4 = freeze i64 %X9_3
  %X10_3 = or i64 poison, 0
  %X10_4 = freeze i64 %X10_3
  %X11_3 = or i64 poison, 0
  %X11_4 = freeze i64 %X11_3
  %X12_3 = or i64 poison, 0
  %X12_4 = freeze i64 %X12_3
  %X13_3 = or i64 poison, 0
  %X13_4 = freeze i64 %X13_3
  %X14_3 = or i64 poison, 0
  %X14_4 = freeze i64 %X14_3
  %X15_3 = or i64 poison, 0
  %X15_4 = freeze i64 %X15_3
  %X16_3 = or i64 poison, 0
  %X16_4 = freeze i64 %X16_3
  %X17_3 = or i64 poison, 0
  %X17_4 = freeze i64 %X17_3
  %Q0_3 = or i128 poison, 0
  %Q0_4 = freeze i128 %Q0_3
  %Q1_3 = or i128 poison, 0
  %Q1_4 = freeze i128 %Q1_3
  %Q2_3 = or i128 poison, 0
  %Q2_4 = freeze i128 %Q2_3
  %Q3_3 = or i128 poison, 0
  %Q3_4 = freeze i128 %Q3_3
  %X8_3x1x2x0 = trunc i64 %X1_4 to i32
  %X8_3x2x2x0 = call i32 @llvm.bitreverse.i32(i32 %X8_3x1x2x0)
  %X8_3x3x2x0 = zext i32 %X8_3x2x2x0 to i64
  %X8_4x1x3x0 = trunc i64 %X8_3x3x2x0 to i32
  %X8_4x2x3x0 = xor i32 %X8_4x1x3x0, -1
  %X8_4x3x3x0 = or i32 0, %X8_4x2x3x0
  %X8_4x4x3x0 = zext i32 %X8_4x3x3x0 to i64
  %X0_3x1x4x0 = trunc i64 %X8_4x4x3x0 to i32
  %X0_3x2x4x0 = lshr i32 %X0_3x1x4x0, 31
  %X0_3x3x4x0 = trunc i64 %X0_4 to i32
  %X0_3x4x4x0 = and i32 %X0_3x3x4x0, %X0_3x2x4x0
  %X0_3x5x4x0 = zext i32 %X0_3x4x4x0 to i64
  %X0_3x1x5x0 = trunc i64 %X0_3x5x4x0 to i32
  %X0_3x2x5x0 = zext i32 %X0_3x1x5x0 to i64
  ret i64 %X0_3x2x5x0
}

; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare i32 @llvm.bitreverse.i32(i32) #0

attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }

Metadata

Metadata

Assignees

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions