From d78c4d9d00c15c52f0463c8dbb03945b036d53eb Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 26 Sep 2020 23:26:40 -0700 Subject: [PATCH 1/5] [X86] Add more test cases to inline-asm-flag-output.ll. NFC These are tests to make sure we are able to use the flag directly in a conditional branch after the inline asm. --- .../CodeGen/X86/inline-asm-flag-output.ll | 1122 +++++++++++++++++ 1 file changed, 1122 insertions(+) diff --git a/llvm/test/CodeGen/X86/inline-asm-flag-output.ll b/llvm/test/CodeGen/X86/inline-asm-flag-output.ll index d9ae4f8d202745..8b0d573d8cb7c7 100644 --- a/llvm/test/CodeGen/X86/inline-asm-flag-output.ll +++ b/llvm/test/CodeGen/X86/inline-asm-flag-output.ll @@ -868,3 +868,1125 @@ entry: %rv = zext i1 %tobool to i32 ret i32 %rv } + +declare void @bar() + +define void @test_cca_branch(i64 %nr, i64* %addr) nounwind { +; X32-LABEL: test_cca_branch: +; X32: # %bb.0: # %entry +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: #APP +; X32-NEXT: cmp %eax,(%edx) +; X32-NEXT: #NO_APP +; X32-NEXT: jbe .LBB28_2 +; X32-NEXT: # %bb.1: # %then +; X32-NEXT: calll bar +; X32-NEXT: .LBB28_2: # %exit +; X32-NEXT: retl +; +; X64-LABEL: test_cca_branch: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: #APP +; X64-NEXT: cmp %rdi,(%rsi) +; X64-NEXT: #NO_APP +; X64-NEXT: jbe .LBB28_2 +; X64-NEXT: # %bb.1: # %then +; X64-NEXT: callq bar +; X64-NEXT: .LBB28_2: # %exit +; X64-NEXT: popq %rax +; X64-NEXT: retq +entry: + %cc = tail call i8 asm "cmp $2,$1", "={@cca},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) nounwind + %tobool = icmp ne i8 %cc, 0 + br i1 %tobool, label %then, label %exit + +then: + call void @bar() + br label %exit + +exit: + ret void +} + +define void @test_ccae_branch(i64 %nr, i64* %addr) nounwind { +; X32-LABEL: test_ccae_branch: +; X32: # %bb.0: # %entry +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: #APP +; X32-NEXT: cmp %eax,(%edx) +; X32-NEXT: #NO_APP +; X32-NEXT: jb .LBB29_2 +; X32-NEXT: # %bb.1: # %then +; X32-NEXT: calll bar +; X32-NEXT: .LBB29_2: # %exit +; X32-NEXT: retl +; +; X64-LABEL: test_ccae_branch: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: #APP +; X64-NEXT: cmp %rdi,(%rsi) +; X64-NEXT: #NO_APP +; X64-NEXT: jb .LBB29_2 +; X64-NEXT: # %bb.1: # %then +; X64-NEXT: callq bar +; X64-NEXT: .LBB29_2: # %exit +; X64-NEXT: popq %rax +; X64-NEXT: retq +entry: + %cc = tail call i8 asm "cmp $2,$1", "={@ccae},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) nounwind + %tobool = icmp ne i8 %cc, 0 + br i1 %tobool, label %then, label %exit + +then: + call void @bar() + br label %exit + +exit: + ret void +} + +define void @test_ccb_branch(i64 %nr, i64* %addr) nounwind { +; X32-LABEL: test_ccb_branch: +; X32: # %bb.0: # %entry +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: #APP +; X32-NEXT: cmp %eax,(%edx) +; X32-NEXT: #NO_APP +; X32-NEXT: jae .LBB30_2 +; X32-NEXT: # %bb.1: # %then +; X32-NEXT: calll bar +; X32-NEXT: .LBB30_2: # %exit +; X32-NEXT: retl +; +; X64-LABEL: test_ccb_branch: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: #APP +; X64-NEXT: cmp %rdi,(%rsi) +; X64-NEXT: #NO_APP +; X64-NEXT: jae .LBB30_2 +; X64-NEXT: # %bb.1: # %then +; X64-NEXT: callq bar +; X64-NEXT: .LBB30_2: # %exit +; X64-NEXT: popq %rax +; X64-NEXT: retq +entry: + %cc = tail call i8 asm "cmp $2,$1", "={@ccb},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) nounwind + %tobool = icmp ne i8 %cc, 0 + br i1 %tobool, label %then, label %exit + +then: + call void @bar() + br label %exit + +exit: + ret void +} + +define void @test_ccbe_branch(i64 %nr, i64* %addr) nounwind { +; X32-LABEL: test_ccbe_branch: +; X32: # %bb.0: # %entry +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: #APP +; X32-NEXT: cmp %eax,(%edx) +; X32-NEXT: #NO_APP +; X32-NEXT: ja .LBB31_2 +; X32-NEXT: # %bb.1: # %then +; X32-NEXT: calll bar +; X32-NEXT: .LBB31_2: # %exit +; X32-NEXT: retl +; +; X64-LABEL: test_ccbe_branch: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: #APP +; X64-NEXT: cmp %rdi,(%rsi) +; X64-NEXT: #NO_APP +; X64-NEXT: ja .LBB31_2 +; X64-NEXT: # %bb.1: # %then +; X64-NEXT: callq bar +; X64-NEXT: .LBB31_2: # %exit +; X64-NEXT: popq %rax +; X64-NEXT: retq +entry: + %cc = tail call i8 asm "cmp $2,$1", "={@ccbe},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) nounwind + %tobool = icmp ne i8 %cc, 0 + br i1 %tobool, label %then, label %exit + +then: + call void @bar() + br label %exit + +exit: + ret void +} + +define void @test_ccc_branch(i64 %nr, i64* %addr) nounwind { +; X32-LABEL: test_ccc_branch: +; X32: # %bb.0: # %entry +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: #APP +; X32-NEXT: cmp %eax,(%edx) +; X32-NEXT: #NO_APP +; X32-NEXT: jae .LBB32_2 +; X32-NEXT: # %bb.1: # %then +; X32-NEXT: calll bar +; X32-NEXT: .LBB32_2: # %exit +; X32-NEXT: retl +; +; X64-LABEL: test_ccc_branch: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: #APP +; X64-NEXT: cmp %rdi,(%rsi) +; X64-NEXT: #NO_APP +; X64-NEXT: jae .LBB32_2 +; X64-NEXT: # %bb.1: # %then +; X64-NEXT: callq bar +; X64-NEXT: .LBB32_2: # %exit +; X64-NEXT: popq %rax +; X64-NEXT: retq +entry: + %cc = tail call i8 asm "cmp $2,$1", "={@ccc},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) nounwind + %tobool = icmp ne i8 %cc, 0 + br i1 %tobool, label %then, label %exit + +then: + call void @bar() + br label %exit + +exit: + ret void +} + +define void @test_cce_branch(i64 %nr, i64* %addr) nounwind { +; X32-LABEL: test_cce_branch: +; X32: # %bb.0: # %entry +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: #APP +; X32-NEXT: cmp %eax,(%edx) +; X32-NEXT: #NO_APP +; X32-NEXT: jne .LBB33_2 +; X32-NEXT: # %bb.1: # %then +; X32-NEXT: calll bar +; X32-NEXT: .LBB33_2: # %exit +; X32-NEXT: retl +; +; X64-LABEL: test_cce_branch: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: #APP +; X64-NEXT: cmp %rdi,(%rsi) +; X64-NEXT: #NO_APP +; X64-NEXT: jne .LBB33_2 +; X64-NEXT: # %bb.1: # %then +; X64-NEXT: callq bar +; X64-NEXT: .LBB33_2: # %exit +; X64-NEXT: popq %rax +; X64-NEXT: retq +entry: + %cc = tail call i8 asm "cmp $2,$1", "={@cce},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) nounwind + %tobool = icmp ne i8 %cc, 0 + br i1 %tobool, label %then, label %exit + +then: + call void @bar() + br label %exit + +exit: + ret void +} + +define void @test_ccz_branch(i64 %nr, i64* %addr) nounwind { +; X32-LABEL: test_ccz_branch: +; X32: # %bb.0: # %entry +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: #APP +; X32-NEXT: cmp %eax,(%edx) +; X32-NEXT: #NO_APP +; X32-NEXT: jne .LBB34_2 +; X32-NEXT: # %bb.1: # %then +; X32-NEXT: calll bar +; X32-NEXT: .LBB34_2: # %exit +; X32-NEXT: retl +; +; X64-LABEL: test_ccz_branch: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: #APP +; X64-NEXT: cmp %rdi,(%rsi) +; X64-NEXT: #NO_APP +; X64-NEXT: jne .LBB34_2 +; X64-NEXT: # %bb.1: # %then +; X64-NEXT: callq bar +; X64-NEXT: .LBB34_2: # %exit +; X64-NEXT: popq %rax +; X64-NEXT: retq +entry: + %cc = tail call i8 asm "cmp $2,$1", "={@ccz},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) nounwind + %tobool = icmp ne i8 %cc, 0 + br i1 %tobool, label %then, label %exit + +then: + call void @bar() + br label %exit + +exit: + ret void +} + +define void @test_ccg_branch(i64 %nr, i64* %addr) nounwind { +; X32-LABEL: test_ccg_branch: +; X32: # %bb.0: # %entry +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: #APP +; X32-NEXT: cmp %eax,(%edx) +; X32-NEXT: #NO_APP +; X32-NEXT: jle .LBB35_2 +; X32-NEXT: # %bb.1: # %then +; X32-NEXT: calll bar +; X32-NEXT: .LBB35_2: # %exit +; X32-NEXT: retl +; +; X64-LABEL: test_ccg_branch: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: #APP +; X64-NEXT: cmp %rdi,(%rsi) +; X64-NEXT: #NO_APP +; X64-NEXT: jle .LBB35_2 +; X64-NEXT: # %bb.1: # %then +; X64-NEXT: callq bar +; X64-NEXT: .LBB35_2: # %exit +; X64-NEXT: popq %rax +; X64-NEXT: retq +entry: + %cc = tail call i8 asm "cmp $2,$1", "={@ccg},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) nounwind + %tobool = icmp ne i8 %cc, 0 + br i1 %tobool, label %then, label %exit + +then: + call void @bar() + br label %exit + +exit: + ret void +} + +define void @test_ccge_branch(i64 %nr, i64* %addr) nounwind { +; X32-LABEL: test_ccge_branch: +; X32: # %bb.0: # %entry +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: #APP +; X32-NEXT: cmp %eax,(%edx) +; X32-NEXT: #NO_APP +; X32-NEXT: jl .LBB36_2 +; X32-NEXT: # %bb.1: # %then +; X32-NEXT: calll bar +; X32-NEXT: .LBB36_2: # %exit +; X32-NEXT: retl +; +; X64-LABEL: test_ccge_branch: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: #APP +; X64-NEXT: cmp %rdi,(%rsi) +; X64-NEXT: #NO_APP +; X64-NEXT: jl .LBB36_2 +; X64-NEXT: # %bb.1: # %then +; X64-NEXT: callq bar +; X64-NEXT: .LBB36_2: # %exit +; X64-NEXT: popq %rax +; X64-NEXT: retq +entry: + %cc = tail call i8 asm "cmp $2,$1", "={@ccge},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) nounwind + %tobool = icmp ne i8 %cc, 0 + br i1 %tobool, label %then, label %exit + +then: + call void @bar() + br label %exit + +exit: + ret void +} + +define void @test_ccl_branch(i64 %nr, i64* %addr) nounwind { +; X32-LABEL: test_ccl_branch: +; X32: # %bb.0: # %entry +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: #APP +; X32-NEXT: cmp %eax,(%edx) +; X32-NEXT: #NO_APP +; X32-NEXT: jge .LBB37_2 +; X32-NEXT: # %bb.1: # %then +; X32-NEXT: calll bar +; X32-NEXT: .LBB37_2: # %exit +; X32-NEXT: retl +; +; X64-LABEL: test_ccl_branch: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: #APP +; X64-NEXT: cmp %rdi,(%rsi) +; X64-NEXT: #NO_APP +; X64-NEXT: jge .LBB37_2 +; X64-NEXT: # %bb.1: # %then +; X64-NEXT: callq bar +; X64-NEXT: .LBB37_2: # %exit +; X64-NEXT: popq %rax +; X64-NEXT: retq +entry: + %cc = tail call i8 asm "cmp $2,$1", "={@ccl},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) nounwind + %tobool = icmp ne i8 %cc, 0 + br i1 %tobool, label %then, label %exit + +then: + call void @bar() + br label %exit + +exit: + ret void +} + +define void @test_ccle_branch(i64 %nr, i64* %addr) nounwind { +; X32-LABEL: test_ccle_branch: +; X32: # %bb.0: # %entry +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: #APP +; X32-NEXT: cmp %eax,(%edx) +; X32-NEXT: #NO_APP +; X32-NEXT: jg .LBB38_2 +; X32-NEXT: # %bb.1: # %then +; X32-NEXT: calll bar +; X32-NEXT: .LBB38_2: # %exit +; X32-NEXT: retl +; +; X64-LABEL: test_ccle_branch: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: #APP +; X64-NEXT: cmp %rdi,(%rsi) +; X64-NEXT: #NO_APP +; X64-NEXT: jg .LBB38_2 +; X64-NEXT: # %bb.1: # %then +; X64-NEXT: callq bar +; X64-NEXT: .LBB38_2: # %exit +; X64-NEXT: popq %rax +; X64-NEXT: retq +entry: + %cc = tail call i8 asm "cmp $2,$1", "={@ccle},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) nounwind + %tobool = icmp ne i8 %cc, 0 + br i1 %tobool, label %then, label %exit + +then: + call void @bar() + br label %exit + +exit: + ret void +} + +define void @test_ccna_branch(i64 %nr, i64* %addr) nounwind { +; X32-LABEL: test_ccna_branch: +; X32: # %bb.0: # %entry +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: #APP +; X32-NEXT: cmp %eax,(%edx) +; X32-NEXT: #NO_APP +; X32-NEXT: ja .LBB39_2 +; X32-NEXT: # %bb.1: # %then +; X32-NEXT: calll bar +; X32-NEXT: .LBB39_2: # %exit +; X32-NEXT: retl +; +; X64-LABEL: test_ccna_branch: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: #APP +; X64-NEXT: cmp %rdi,(%rsi) +; X64-NEXT: #NO_APP +; X64-NEXT: ja .LBB39_2 +; X64-NEXT: # %bb.1: # %then +; X64-NEXT: callq bar +; X64-NEXT: .LBB39_2: # %exit +; X64-NEXT: popq %rax +; X64-NEXT: retq +entry: + %cc = tail call i8 asm "cmp $2,$1", "={@ccna},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) nounwind + %tobool = icmp ne i8 %cc, 0 + br i1 %tobool, label %then, label %exit + +then: + call void @bar() + br label %exit + +exit: + ret void +} + +define void @test_ccnae_branch(i64 %nr, i64* %addr) nounwind { +; X32-LABEL: test_ccnae_branch: +; X32: # %bb.0: # %entry +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: #APP +; X32-NEXT: cmp %eax,(%edx) +; X32-NEXT: #NO_APP +; X32-NEXT: jae .LBB40_2 +; X32-NEXT: # %bb.1: # %then +; X32-NEXT: calll bar +; X32-NEXT: .LBB40_2: # %exit +; X32-NEXT: retl +; +; X64-LABEL: test_ccnae_branch: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: #APP +; X64-NEXT: cmp %rdi,(%rsi) +; X64-NEXT: #NO_APP +; X64-NEXT: jae .LBB40_2 +; X64-NEXT: # %bb.1: # %then +; X64-NEXT: callq bar +; X64-NEXT: .LBB40_2: # %exit +; X64-NEXT: popq %rax +; X64-NEXT: retq +entry: + %cc = tail call i8 asm "cmp $2,$1", "={@ccnae},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) nounwind + %tobool = icmp ne i8 %cc, 0 + br i1 %tobool, label %then, label %exit + +then: + call void @bar() + br label %exit + +exit: + ret void +} + +define void @test_ccnb_branch(i64 %nr, i64* %addr) nounwind { +; X32-LABEL: test_ccnb_branch: +; X32: # %bb.0: # %entry +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: #APP +; X32-NEXT: cmp %eax,(%edx) +; X32-NEXT: #NO_APP +; X32-NEXT: jb .LBB41_2 +; X32-NEXT: # %bb.1: # %then +; X32-NEXT: calll bar +; X32-NEXT: .LBB41_2: # %exit +; X32-NEXT: retl +; +; X64-LABEL: test_ccnb_branch: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: #APP +; X64-NEXT: cmp %rdi,(%rsi) +; X64-NEXT: #NO_APP +; X64-NEXT: jb .LBB41_2 +; X64-NEXT: # %bb.1: # %then +; X64-NEXT: callq bar +; X64-NEXT: .LBB41_2: # %exit +; X64-NEXT: popq %rax +; X64-NEXT: retq +entry: + %cc = tail call i8 asm "cmp $2,$1", "={@ccnb},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) nounwind + %tobool = icmp ne i8 %cc, 0 + br i1 %tobool, label %then, label %exit + +then: + call void @bar() + br label %exit + +exit: + ret void +} + +define void @test_ccnbe_branch(i64 %nr, i64* %addr) nounwind { +; X32-LABEL: test_ccnbe_branch: +; X32: # %bb.0: # %entry +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: #APP +; X32-NEXT: cmp %eax,(%edx) +; X32-NEXT: #NO_APP +; X32-NEXT: jbe .LBB42_2 +; X32-NEXT: # %bb.1: # %then +; X32-NEXT: calll bar +; X32-NEXT: .LBB42_2: # %exit +; X32-NEXT: retl +; +; X64-LABEL: test_ccnbe_branch: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: #APP +; X64-NEXT: cmp %rdi,(%rsi) +; X64-NEXT: #NO_APP +; X64-NEXT: jbe .LBB42_2 +; X64-NEXT: # %bb.1: # %then +; X64-NEXT: callq bar +; X64-NEXT: .LBB42_2: # %exit +; X64-NEXT: popq %rax +; X64-NEXT: retq +entry: + %cc = tail call i8 asm "cmp $2,$1", "={@ccnbe},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) nounwind + %tobool = icmp ne i8 %cc, 0 + br i1 %tobool, label %then, label %exit + +then: + call void @bar() + br label %exit + +exit: + ret void +} + +define void @test_ccnc_branch(i64 %nr, i64* %addr) nounwind { +; X32-LABEL: test_ccnc_branch: +; X32: # %bb.0: # %entry +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: #APP +; X32-NEXT: cmp %eax,(%edx) +; X32-NEXT: #NO_APP +; X32-NEXT: jb .LBB43_2 +; X32-NEXT: # %bb.1: # %then +; X32-NEXT: calll bar +; X32-NEXT: .LBB43_2: # %exit +; X32-NEXT: retl +; +; X64-LABEL: test_ccnc_branch: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: #APP +; X64-NEXT: cmp %rdi,(%rsi) +; X64-NEXT: #NO_APP +; X64-NEXT: jb .LBB43_2 +; X64-NEXT: # %bb.1: # %then +; X64-NEXT: callq bar +; X64-NEXT: .LBB43_2: # %exit +; X64-NEXT: popq %rax +; X64-NEXT: retq +entry: + %cc = tail call i8 asm "cmp $2,$1", "={@ccnc},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) nounwind + %tobool = icmp ne i8 %cc, 0 + br i1 %tobool, label %then, label %exit + +then: + call void @bar() + br label %exit + +exit: + ret void +} + +define void @test_ccne_branch(i64 %nr, i64* %addr) nounwind { +; X32-LABEL: test_ccne_branch: +; X32: # %bb.0: # %entry +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: #APP +; X32-NEXT: cmp %eax,(%edx) +; X32-NEXT: #NO_APP +; X32-NEXT: je .LBB44_2 +; X32-NEXT: # %bb.1: # %then +; X32-NEXT: calll bar +; X32-NEXT: .LBB44_2: # %exit +; X32-NEXT: retl +; +; X64-LABEL: test_ccne_branch: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: #APP +; X64-NEXT: cmp %rdi,(%rsi) +; X64-NEXT: #NO_APP +; X64-NEXT: je .LBB44_2 +; X64-NEXT: # %bb.1: # %then +; X64-NEXT: callq bar +; X64-NEXT: .LBB44_2: # %exit +; X64-NEXT: popq %rax +; X64-NEXT: retq +entry: + %cc = tail call i8 asm "cmp $2,$1", "={@ccne},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) nounwind + %tobool = icmp ne i8 %cc, 0 + br i1 %tobool, label %then, label %exit + +then: + call void @bar() + br label %exit + +exit: + ret void +} + +define void @test_ccnz_branch(i64 %nr, i64* %addr) nounwind { +; X32-LABEL: test_ccnz_branch: +; X32: # %bb.0: # %entry +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: #APP +; X32-NEXT: cmp %eax,(%edx) +; X32-NEXT: #NO_APP +; X32-NEXT: je .LBB45_2 +; X32-NEXT: # %bb.1: # %then +; X32-NEXT: calll bar +; X32-NEXT: .LBB45_2: # %exit +; X32-NEXT: retl +; +; X64-LABEL: test_ccnz_branch: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: #APP +; X64-NEXT: cmp %rdi,(%rsi) +; X64-NEXT: #NO_APP +; X64-NEXT: je .LBB45_2 +; X64-NEXT: # %bb.1: # %then +; X64-NEXT: callq bar +; X64-NEXT: .LBB45_2: # %exit +; X64-NEXT: popq %rax +; X64-NEXT: retq +entry: + %cc = tail call i8 asm "cmp $2,$1", "={@ccnz},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) nounwind + %tobool = icmp ne i8 %cc, 0 + br i1 %tobool, label %then, label %exit + +then: + call void @bar() + br label %exit + +exit: + ret void +} + +define void @test_ccng_branch(i64 %nr, i64* %addr) nounwind { +; X32-LABEL: test_ccng_branch: +; X32: # %bb.0: # %entry +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: #APP +; X32-NEXT: cmp %eax,(%edx) +; X32-NEXT: #NO_APP +; X32-NEXT: jg .LBB46_2 +; X32-NEXT: # %bb.1: # %then +; X32-NEXT: calll bar +; X32-NEXT: .LBB46_2: # %exit +; X32-NEXT: retl +; +; X64-LABEL: test_ccng_branch: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: #APP +; X64-NEXT: cmp %rdi,(%rsi) +; X64-NEXT: #NO_APP +; X64-NEXT: jg .LBB46_2 +; X64-NEXT: # %bb.1: # %then +; X64-NEXT: callq bar +; X64-NEXT: .LBB46_2: # %exit +; X64-NEXT: popq %rax +; X64-NEXT: retq +entry: + %cc = tail call i8 asm "cmp $2,$1", "={@ccng},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) nounwind + %tobool = icmp ne i8 %cc, 0 + br i1 %tobool, label %then, label %exit + +then: + call void @bar() + br label %exit + +exit: + ret void +} + +define void @test_ccnge_branch(i64 %nr, i64* %addr) nounwind { +; X32-LABEL: test_ccnge_branch: +; X32: # %bb.0: # %entry +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: #APP +; X32-NEXT: cmp %eax,(%edx) +; X32-NEXT: #NO_APP +; X32-NEXT: jge .LBB47_2 +; X32-NEXT: # %bb.1: # %then +; X32-NEXT: calll bar +; X32-NEXT: .LBB47_2: # %exit +; X32-NEXT: retl +; +; X64-LABEL: test_ccnge_branch: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: #APP +; X64-NEXT: cmp %rdi,(%rsi) +; X64-NEXT: #NO_APP +; X64-NEXT: jge .LBB47_2 +; X64-NEXT: # %bb.1: # %then +; X64-NEXT: callq bar +; X64-NEXT: .LBB47_2: # %exit +; X64-NEXT: popq %rax +; X64-NEXT: retq +entry: + %cc = tail call i8 asm "cmp $2,$1", "={@ccnge},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) nounwind + %tobool = icmp ne i8 %cc, 0 + br i1 %tobool, label %then, label %exit + +then: + call void @bar() + br label %exit + +exit: + ret void +} + +define void @test_ccnl_branch(i64 %nr, i64* %addr) nounwind { +; X32-LABEL: test_ccnl_branch: +; X32: # %bb.0: # %entry +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: #APP +; X32-NEXT: cmp %eax,(%edx) +; X32-NEXT: #NO_APP +; X32-NEXT: jl .LBB48_2 +; X32-NEXT: # %bb.1: # %then +; X32-NEXT: calll bar +; X32-NEXT: .LBB48_2: # %exit +; X32-NEXT: retl +; +; X64-LABEL: test_ccnl_branch: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: #APP +; X64-NEXT: cmp %rdi,(%rsi) +; X64-NEXT: #NO_APP +; X64-NEXT: jl .LBB48_2 +; X64-NEXT: # %bb.1: # %then +; X64-NEXT: callq bar +; X64-NEXT: .LBB48_2: # %exit +; X64-NEXT: popq %rax +; X64-NEXT: retq +entry: + %cc = tail call i8 asm "cmp $2,$1", "={@ccnl},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) nounwind + %tobool = icmp ne i8 %cc, 0 + br i1 %tobool, label %then, label %exit + +then: + call void @bar() + br label %exit + +exit: + ret void +} + +define void @test_ccnle_branch(i64 %nr, i64* %addr) nounwind { +; X32-LABEL: test_ccnle_branch: +; X32: # %bb.0: # %entry +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: #APP +; X32-NEXT: cmp %eax,(%edx) +; X32-NEXT: #NO_APP +; X32-NEXT: jle .LBB49_2 +; X32-NEXT: # %bb.1: # %then +; X32-NEXT: calll bar +; X32-NEXT: .LBB49_2: # %exit +; X32-NEXT: retl +; +; X64-LABEL: test_ccnle_branch: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: #APP +; X64-NEXT: cmp %rdi,(%rsi) +; X64-NEXT: #NO_APP +; X64-NEXT: jle .LBB49_2 +; X64-NEXT: # %bb.1: # %then +; X64-NEXT: callq bar +; X64-NEXT: .LBB49_2: # %exit +; X64-NEXT: popq %rax +; X64-NEXT: retq +entry: + %cc = tail call i8 asm "cmp $2,$1", "={@ccnle},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) nounwind + %tobool = icmp ne i8 %cc, 0 + br i1 %tobool, label %then, label %exit + +then: + call void @bar() + br label %exit + +exit: + ret void +} + +define void @test_ccno_branch(i64 %nr, i64* %addr) nounwind { +; X32-LABEL: test_ccno_branch: +; X32: # %bb.0: # %entry +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: #APP +; X32-NEXT: cmp %eax,(%edx) +; X32-NEXT: #NO_APP +; X32-NEXT: jo .LBB50_2 +; X32-NEXT: # %bb.1: # %then +; X32-NEXT: calll bar +; X32-NEXT: .LBB50_2: # %exit +; X32-NEXT: retl +; +; X64-LABEL: test_ccno_branch: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: #APP +; X64-NEXT: cmp %rdi,(%rsi) +; X64-NEXT: #NO_APP +; X64-NEXT: jo .LBB50_2 +; X64-NEXT: # %bb.1: # %then +; X64-NEXT: callq bar +; X64-NEXT: .LBB50_2: # %exit +; X64-NEXT: popq %rax +; X64-NEXT: retq +entry: + %cc = tail call i8 asm "cmp $2,$1", "={@ccno},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) nounwind + %tobool = icmp ne i8 %cc, 0 + br i1 %tobool, label %then, label %exit + +then: + call void @bar() + br label %exit + +exit: + ret void +} + +define void @test_ccnp_branch(i64 %nr, i64* %addr) nounwind { +; X32-LABEL: test_ccnp_branch: +; X32: # %bb.0: # %entry +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: #APP +; X32-NEXT: cmp %eax,(%edx) +; X32-NEXT: #NO_APP +; X32-NEXT: jp .LBB51_2 +; X32-NEXT: # %bb.1: # %then +; X32-NEXT: calll bar +; X32-NEXT: .LBB51_2: # %exit +; X32-NEXT: retl +; +; X64-LABEL: test_ccnp_branch: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: #APP +; X64-NEXT: cmp %rdi,(%rsi) +; X64-NEXT: #NO_APP +; X64-NEXT: jp .LBB51_2 +; X64-NEXT: # %bb.1: # %then +; X64-NEXT: callq bar +; X64-NEXT: .LBB51_2: # %exit +; X64-NEXT: popq %rax +; X64-NEXT: retq +entry: + %cc = tail call i8 asm "cmp $2,$1", "={@ccnp},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) nounwind + %tobool = icmp ne i8 %cc, 0 + br i1 %tobool, label %then, label %exit + +then: + call void @bar() + br label %exit + +exit: + ret void +} + +define void @test_ccns_branch(i64 %nr, i64* %addr) nounwind { +; X32-LABEL: test_ccns_branch: +; X32: # %bb.0: # %entry +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: #APP +; X32-NEXT: cmp %eax,(%edx) +; X32-NEXT: #NO_APP +; X32-NEXT: js .LBB52_2 +; X32-NEXT: # %bb.1: # %then +; X32-NEXT: calll bar +; X32-NEXT: .LBB52_2: # %exit +; X32-NEXT: retl +; +; X64-LABEL: test_ccns_branch: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: #APP +; X64-NEXT: cmp %rdi,(%rsi) +; X64-NEXT: #NO_APP +; X64-NEXT: js .LBB52_2 +; X64-NEXT: # %bb.1: # %then +; X64-NEXT: callq bar +; X64-NEXT: .LBB52_2: # %exit +; X64-NEXT: popq %rax +; X64-NEXT: retq +entry: + %cc = tail call i8 asm "cmp $2,$1", "={@ccns},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) nounwind + %tobool = icmp ne i8 %cc, 0 + br i1 %tobool, label %then, label %exit + +then: + call void @bar() + br label %exit + +exit: + ret void +} + +define void @test_cco_branch(i64 %nr, i64* %addr) nounwind { +; X32-LABEL: test_cco_branch: +; X32: # %bb.0: # %entry +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: #APP +; X32-NEXT: cmp %eax,(%edx) +; X32-NEXT: #NO_APP +; X32-NEXT: jno .LBB53_2 +; X32-NEXT: # %bb.1: # %then +; X32-NEXT: calll bar +; X32-NEXT: .LBB53_2: # %exit +; X32-NEXT: retl +; +; X64-LABEL: test_cco_branch: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: #APP +; X64-NEXT: cmp %rdi,(%rsi) +; X64-NEXT: #NO_APP +; X64-NEXT: jno .LBB53_2 +; X64-NEXT: # %bb.1: # %then +; X64-NEXT: callq bar +; X64-NEXT: .LBB53_2: # %exit +; X64-NEXT: popq %rax +; X64-NEXT: retq +entry: + %cc = tail call i8 asm "cmp $2,$1", "={@cco},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) nounwind + %tobool = icmp ne i8 %cc, 0 + br i1 %tobool, label %then, label %exit + +then: + call void @bar() + br label %exit + +exit: + ret void +} + +define void @test_ccp_branch(i64 %nr, i64* %addr) nounwind { +; X32-LABEL: test_ccp_branch: +; X32: # %bb.0: # %entry +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: #APP +; X32-NEXT: cmp %eax,(%edx) +; X32-NEXT: #NO_APP +; X32-NEXT: jnp .LBB54_2 +; X32-NEXT: # %bb.1: # %then +; X32-NEXT: calll bar +; X32-NEXT: .LBB54_2: # %exit +; X32-NEXT: retl +; +; X64-LABEL: test_ccp_branch: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: #APP +; X64-NEXT: cmp %rdi,(%rsi) +; X64-NEXT: #NO_APP +; X64-NEXT: jnp .LBB54_2 +; X64-NEXT: # %bb.1: # %then +; X64-NEXT: callq bar +; X64-NEXT: .LBB54_2: # %exit +; X64-NEXT: popq %rax +; X64-NEXT: retq +entry: + %cc = tail call i8 asm "cmp $2,$1", "={@ccp},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) nounwind + %tobool = icmp ne i8 %cc, 0 + br i1 %tobool, label %then, label %exit + +then: + call void @bar() + br label %exit + +exit: + ret void +} + +define void @test_ccs_branch(i64 %nr, i64* %addr) nounwind { +; X32-LABEL: test_ccs_branch: +; X32: # %bb.0: # %entry +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: #APP +; X32-NEXT: cmp %eax,(%edx) +; X32-NEXT: #NO_APP +; X32-NEXT: jns .LBB55_2 +; X32-NEXT: # %bb.1: # %then +; X32-NEXT: calll bar +; X32-NEXT: .LBB55_2: # %exit +; X32-NEXT: retl +; +; X64-LABEL: test_ccs_branch: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: #APP +; X64-NEXT: cmp %rdi,(%rsi) +; X64-NEXT: #NO_APP +; X64-NEXT: jns .LBB55_2 +; X64-NEXT: # %bb.1: # %then +; X64-NEXT: callq bar +; X64-NEXT: .LBB55_2: # %exit +; X64-NEXT: popq %rax +; X64-NEXT: retq +entry: + %cc = tail call i8 asm "cmp $2,$1", "={@ccs},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %addr, i64 %nr) nounwind + %tobool = icmp ne i8 %cc, 0 + br i1 %tobool, label %then, label %exit + +then: + call void @bar() + br label %exit + +exit: + ret void +} From 82420b4e49ff92c49c2b548bf541a5655e97d197 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sun, 27 Sep 2020 01:04:30 -0700 Subject: [PATCH 2/5] [DivRemPairs] Use DenseMapBase::find instead of operator[]. NFC --- llvm/lib/Transforms/Scalar/DivRemPairs.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/DivRemPairs.cpp b/llvm/lib/Transforms/Scalar/DivRemPairs.cpp index d44a5979a8b257..42a5fb1135a6be 100644 --- a/llvm/lib/Transforms/Scalar/DivRemPairs.cpp +++ b/llvm/lib/Transforms/Scalar/DivRemPairs.cpp @@ -151,8 +151,8 @@ static DivRemWorklistTy getWorklist(Function &F) { // rare than division. for (auto &RemPair : RemMap) { // Find the matching division instruction from the division map. - Instruction *DivInst = DivMap[RemPair.first]; - if (!DivInst) + auto It = DivMap.find(RemPair.first); + if (It == DivMap.end()) continue; // We have a matching pair of div/rem instructions. @@ -160,7 +160,7 @@ static DivRemWorklistTy getWorklist(Function &F) { Instruction *RemInst = RemPair.second; // Place it in the worklist. - Worklist.emplace_back(DivInst, RemInst); + Worklist.emplace_back(It->second, RemInst); } return Worklist; From 7156938be26405156e17aa29e1c04e1afde88b04 Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Sun, 27 Sep 2020 01:22:55 -0700 Subject: [PATCH 3/5] [AArch64][GlobalISel] Use the look-through constant helper for the shift s32->s64 custom legalization. Almost NFC, except it catches more cases and gives a 0.1% CTMark -O0 size win. --- llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp | 10 ++++------ .../AArch64/GlobalISel/legalize-unmerge-values.mir | 5 +++-- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 5274d643e62477..5a0bb178818567 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -756,16 +756,14 @@ bool AArch64LegalizerInfo::legalizeShlAshrLshr( // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the // imported patterns can select it later. Either way, it will be legal. Register AmtReg = MI.getOperand(2).getReg(); - auto *CstMI = MRI.getVRegDef(AmtReg); - assert(CstMI && "expected to find a vreg def"); - if (CstMI->getOpcode() != TargetOpcode::G_CONSTANT) + auto VRegAndVal = getConstantVRegValWithLookThrough(AmtReg, MRI); + if (!VRegAndVal) return true; // Check the shift amount is in range for an immediate form. - unsigned Amount = CstMI->getOperand(1).getCImm()->getZExtValue(); + int64_t Amount = VRegAndVal->Value; if (Amount > 31) return true; // This will have to remain a register variant. - assert(MRI.getType(AmtReg).getSizeInBits() == 32); - auto ExtCst = MIRBuilder.buildZExt(LLT::scalar(64), AmtReg); + auto ExtCst = MIRBuilder.buildConstant(LLT::scalar(64), Amount); MI.getOperand(2).setReg(ExtCst.getReg(0)); return true; } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-unmerge-values.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-unmerge-values.mir index 56c5b8a8f1e275..9c1f6fc6f41b43 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-unmerge-values.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-unmerge-values.mir @@ -24,9 +24,10 @@ body: | ; CHECK-LABEL: name: test_unmerge_s4 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[COPY]](s32) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 4 ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UV]](s8) - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT]], [[C]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT]], [[C1]](s64) ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV]](s8) ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[LSHR]](s32) ; CHECK: $x0 = COPY [[ANYEXT]](s64) From f229bf2e12461be55446e6b08ccb931308586031 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Sat, 12 Sep 2020 23:10:15 +0200 Subject: [PATCH 4/5] [Legalize][X86] Improve nnan fmin/fmax vector reduction Use +/-Inf or +/-Largest as neutral element for nnan fmin/fmax reductions. This avoids dropping any FMF flags. Preserving the nnan flag in particular is important to get a good lowering on X86. Differential Revision: https://reviews.llvm.org/D87586 --- .../SelectionDAG/LegalizeVectorTypes.cpp | 24 +++--- .../AArch64/vecreduce-fmax-legalization.ll | 4 +- .../AArch64/vecreduce-fmin-legalization.ll | 4 +- .../CodeGen/X86/vector-reduce-fmax-nnan.ll | 85 +++++-------------- .../CodeGen/X86/vector-reduce-fmin-nnan.ll | 81 ++++-------------- 5 files changed, 50 insertions(+), 148 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index e67717b81e4e66..b1952225ca10c6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -4794,20 +4794,18 @@ SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE(SDNode *N) { case ISD::VECREDUCE_FMUL: NeutralElem = DAG.getConstantFP(1.0, dl, ElemVT); break; - case ISD::VECREDUCE_FMAX: - // This has maxnum semantics, so NaN represents missing data. We must clear - // 'nnan' if it was set because the NaN would be a poison value. - NeutralElem = DAG.getConstantFP( - std::numeric_limits::quiet_NaN(), dl, ElemVT); - Flags.setNoNaNs(false); - break; case ISD::VECREDUCE_FMIN: - // This has minnum semantics, so NaN represents missing data. We must clear - // 'nnan' if it was set because the NaN would be a poison value. - NeutralElem = DAG.getConstantFP( - std::numeric_limits::quiet_NaN(), dl, ElemVT); - Flags.setNoNaNs(false); - break; + case ISD::VECREDUCE_FMAX: { + // Neutral element for fminnum is NaN, Inf or FLT_MAX, depending on FMF. + const fltSemantics &Semantics = DAG.EVTToAPFloatSemantics(ElemVT); + APFloat NeutralAF = !Flags.hasNoNaNs() ? APFloat::getQNaN(Semantics) : + !Flags.hasNoInfs() ? APFloat::getInf(Semantics) : + APFloat::getLargest(Semantics); + if (N->getOpcode() == ISD::VECREDUCE_FMAX) + NeutralAF.changeSign(); + + NeutralElem = DAG.getConstantFP(NeutralAF, dl, ElemVT); + } } // Pad the vector with the neutral element. diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll index 5fd7116e9068b2..89cee4f0a06006 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll @@ -47,7 +47,7 @@ define fp128 @test_v1f128(<1 x fp128> %a) nounwind { define float @test_v3f32(<3 x float> %a) nounwind { ; CHECK-LABEL: test_v3f32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #2143289344 +; CHECK-NEXT: mov w8, #-8388608 ; CHECK-NEXT: fmov s1, w8 ; CHECK-NEXT: mov v0.s[3], v1.s[0] ; CHECK-NEXT: fmaxnmv s0, v0.4s @@ -59,7 +59,7 @@ define float @test_v3f32(<3 x float> %a) nounwind { define float @test_v3f32_ninf(<3 x float> %a) nounwind { ; CHECK-LABEL: test_v3f32_ninf: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #2143289344 +; CHECK-NEXT: mov w8, #-8388609 ; CHECK-NEXT: fmov s1, w8 ; CHECK-NEXT: mov v0.s[3], v1.s[0] ; CHECK-NEXT: fmaxnmv s0, v0.4s diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll index 7a37c0d047a134..bb2d6b75bcd2a4 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll @@ -47,7 +47,7 @@ define fp128 @test_v1f128(<1 x fp128> %a) nounwind { define float @test_v3f32(<3 x float> %a) nounwind { ; CHECK-LABEL: test_v3f32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #2143289344 +; CHECK-NEXT: mov w8, #2139095040 ; CHECK-NEXT: fmov s1, w8 ; CHECK-NEXT: mov v0.s[3], v1.s[0] ; CHECK-NEXT: fminnmv s0, v0.4s @@ -59,7 +59,7 @@ define float @test_v3f32(<3 x float> %a) nounwind { define float @test_v3f32_ninf(<3 x float> %a) nounwind { ; CHECK-LABEL: test_v3f32_ninf: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #2143289344 +; CHECK-NEXT: mov w8, #2139095039 ; CHECK-NEXT: fmov s1, w8 ; CHECK-NEXT: mov v0.s[3], v1.s[0] ; CHECK-NEXT: fminnmv s0, v0.4s diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll b/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll index dd3378411ecc82..f4539c572375a1 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll @@ -13,46 +13,27 @@ define float @test_v2f32(<2 x float> %a0) { ; SSE2-LABEL: test_v2f32: ; SSE2: # %bb.0: -; SSE2-NEXT: movaps %xmm0, %xmm2 -; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[1,1] ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: cmpunordss %xmm0, %xmm1 -; SSE2-NEXT: movaps %xmm1, %xmm3 -; SSE2-NEXT: andps %xmm2, %xmm3 -; SSE2-NEXT: maxss %xmm0, %xmm2 -; SSE2-NEXT: andnps %xmm2, %xmm1 -; SSE2-NEXT: orps %xmm3, %xmm1 -; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1] +; SSE2-NEXT: maxss %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_v2f32: ; SSE41: # %bb.0: -; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: cmpunordss %xmm0, %xmm1 -; SSE41-NEXT: movaps %xmm1, %xmm3 -; SSE41-NEXT: andps %xmm2, %xmm3 -; SSE41-NEXT: maxss %xmm0, %xmm2 -; SSE41-NEXT: andnps %xmm2, %xmm1 -; SSE41-NEXT: orps %xmm3, %xmm1 -; SSE41-NEXT: movaps %xmm1, %xmm0 +; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; SSE41-NEXT: maxss %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: test_v2f32: ; AVX: # %bb.0: ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX-NEXT: vmaxss %xmm0, %xmm1, %xmm2 -; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0 +; AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v2f32: ; AVX512: # %bb.0: -; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; AVX512-NEXT: vmaxss %xmm0, %xmm2, %xmm1 -; AVX512-NEXT: vcmpunordss %xmm0, %xmm0, %k1 -; AVX512-NEXT: vmovss %xmm2, %xmm1, %xmm1 {%k1} -; AVX512-NEXT: vmovaps %xmm1, %xmm0 +; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq %1 = call nnan float @llvm.experimental.vector.reduce.fmax.v2f32(<2 x float> %a0) ret float %1 @@ -302,65 +283,37 @@ define double @test_v3f64(<3 x double> %a0) { ; SSE2: # %bb.0: ; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],mem[1] -; SSE2-NEXT: movapd %xmm2, %xmm1 -; SSE2-NEXT: maxpd %xmm0, %xmm1 -; SSE2-NEXT: cmpunordpd %xmm0, %xmm0 -; SSE2-NEXT: andpd %xmm0, %xmm2 -; SSE2-NEXT: andnpd %xmm1, %xmm0 -; SSE2-NEXT: orpd %xmm2, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; SSE2-NEXT: maxpd %xmm2, %xmm0 ; SSE2-NEXT: movapd %xmm0, %xmm1 -; SSE2-NEXT: cmpunordsd %xmm0, %xmm1 -; SSE2-NEXT: movapd %xmm1, %xmm3 -; SSE2-NEXT: andpd %xmm2, %xmm3 -; SSE2-NEXT: maxsd %xmm0, %xmm2 -; SSE2-NEXT: andnpd %xmm2, %xmm1 -; SSE2-NEXT: orpd %xmm3, %xmm1 -; SSE2-NEXT: movapd %xmm1, %xmm0 +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] +; SSE2-NEXT: maxsd %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_v3f64: ; SSE41: # %bb.0: ; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE41-NEXT: blendpd {{.*#+}} xmm2 = xmm2[0],mem[1] -; SSE41-NEXT: movapd %xmm2, %xmm1 -; SSE41-NEXT: maxpd %xmm0, %xmm1 -; SSE41-NEXT: cmpunordpd %xmm0, %xmm0 -; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1 -; SSE41-NEXT: movapd %xmm1, %xmm2 -; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] -; SSE41-NEXT: movapd %xmm1, %xmm0 -; SSE41-NEXT: cmpunordsd %xmm1, %xmm0 -; SSE41-NEXT: movapd %xmm0, %xmm3 -; SSE41-NEXT: andpd %xmm2, %xmm3 -; SSE41-NEXT: maxsd %xmm1, %xmm2 -; SSE41-NEXT: andnpd %xmm2, %xmm0 -; SSE41-NEXT: orpd %xmm3, %xmm0 +; SSE41-NEXT: maxpd %xmm2, %xmm0 +; SSE41-NEXT: movapd %xmm0, %xmm1 +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] +; SSE41-NEXT: maxsd %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: test_v3f64: ; AVX: # %bb.0: ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX-NEXT: vmaxsd %xmm0, %xmm1, %xmm2 -; AVX-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm3 -; AVX-NEXT: vblendvpd %xmm3, %xmm1, %xmm2, %xmm1 -; AVX-NEXT: vcmpunordsd %xmm1, %xmm1, %xmm2 -; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm1 -; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX-NEXT: vmaxsd %xmm0, %xmm1, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v3f64: ; AVX512: # %bb.0: ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX512-NEXT: vmaxsd %xmm0, %xmm1, %xmm2 -; AVX512-NEXT: vcmpunordsd %xmm0, %xmm0, %k1 -; AVX512-NEXT: vmovsd %xmm1, %xmm2, %xmm2 {%k1} -; AVX512-NEXT: vcmpunordsd %xmm2, %xmm2, %k1 -; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX512-NEXT: vmaxsd %xmm2, %xmm1, %xmm0 -; AVX512-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1} +; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm1 +; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX512-NEXT: vmaxsd %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = call nnan double @llvm.experimental.vector.reduce.fmax.v3f64(<3 x double> %a0) diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll b/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll index 4354463dfdc282..5846f588581d0a 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll @@ -21,46 +21,27 @@ define float @test_v1f32(<1 x float> %a0) { define float @test_v2f32(<2 x float> %a0) { ; SSE2-LABEL: test_v2f32: ; SSE2: # %bb.0: -; SSE2-NEXT: movaps %xmm0, %xmm2 -; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[1,1] ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: cmpunordss %xmm0, %xmm1 -; SSE2-NEXT: movaps %xmm1, %xmm3 -; SSE2-NEXT: andps %xmm2, %xmm3 -; SSE2-NEXT: minss %xmm0, %xmm2 -; SSE2-NEXT: andnps %xmm2, %xmm1 -; SSE2-NEXT: orps %xmm3, %xmm1 -; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1] +; SSE2-NEXT: minss %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_v2f32: ; SSE41: # %bb.0: -; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: cmpunordss %xmm0, %xmm1 -; SSE41-NEXT: movaps %xmm1, %xmm3 -; SSE41-NEXT: andps %xmm2, %xmm3 -; SSE41-NEXT: minss %xmm0, %xmm2 -; SSE41-NEXT: andnps %xmm2, %xmm1 -; SSE41-NEXT: orps %xmm3, %xmm1 -; SSE41-NEXT: movaps %xmm1, %xmm0 +; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; SSE41-NEXT: minss %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: test_v2f32: ; AVX: # %bb.0: ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX-NEXT: vminss %xmm0, %xmm1, %xmm2 -; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0 +; AVX-NEXT: vminss %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v2f32: ; AVX512: # %bb.0: -; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; AVX512-NEXT: vminss %xmm0, %xmm2, %xmm1 -; AVX512-NEXT: vcmpunordss %xmm0, %xmm0, %k1 -; AVX512-NEXT: vmovss %xmm2, %xmm1, %xmm1 {%k1} -; AVX512-NEXT: vmovaps %xmm1, %xmm0 +; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq %1 = call nnan float @llvm.experimental.vector.reduce.fmin.v2f32(<2 x float> %a0) ret float %1 @@ -72,20 +53,9 @@ define float @test_v3f32(<3 x float> %a0) { ; SSE2-NEXT: movaps %xmm0, %xmm2 ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[1,1] ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: cmpunordss %xmm0, %xmm1 -; SSE2-NEXT: movaps %xmm1, %xmm3 -; SSE2-NEXT: andps %xmm2, %xmm3 -; SSE2-NEXT: minss %xmm0, %xmm2 -; SSE2-NEXT: andnps %xmm2, %xmm1 -; SSE2-NEXT: orps %xmm3, %xmm1 +; SSE2-NEXT: minss %xmm2, %xmm1 ; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] -; SSE2-NEXT: movaps %xmm0, %xmm2 -; SSE2-NEXT: minss %xmm1, %xmm2 -; SSE2-NEXT: cmpunordss %xmm1, %xmm1 -; SSE2-NEXT: movaps %xmm1, %xmm3 -; SSE2-NEXT: andnps %xmm2, %xmm3 -; SSE2-NEXT: andps %xmm0, %xmm1 -; SSE2-NEXT: orps %xmm3, %xmm1 +; SSE2-NEXT: minss %xmm0, %xmm1 ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: retq ; @@ -93,45 +63,26 @@ define float @test_v3f32(<3 x float> %a0) { ; SSE41: # %bb.0: ; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: cmpunordss %xmm0, %xmm1 -; SSE41-NEXT: movaps %xmm1, %xmm3 -; SSE41-NEXT: andps %xmm2, %xmm3 -; SSE41-NEXT: minss %xmm0, %xmm2 -; SSE41-NEXT: andnps %xmm2, %xmm1 -; SSE41-NEXT: orps %xmm3, %xmm1 +; SSE41-NEXT: minss %xmm2, %xmm1 ; SSE41-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] -; SSE41-NEXT: movaps %xmm0, %xmm2 -; SSE41-NEXT: minss %xmm1, %xmm2 -; SSE41-NEXT: cmpunordss %xmm1, %xmm1 -; SSE41-NEXT: movaps %xmm1, %xmm3 -; SSE41-NEXT: andnps %xmm2, %xmm3 -; SSE41-NEXT: andps %xmm0, %xmm1 -; SSE41-NEXT: orps %xmm3, %xmm1 +; SSE41-NEXT: minss %xmm0, %xmm1 ; SSE41-NEXT: movaps %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: test_v3f32: ; AVX: # %bb.0: ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX-NEXT: vminss %xmm0, %xmm1, %xmm2 -; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm3 -; AVX-NEXT: vblendvps %xmm3, %xmm1, %xmm2, %xmm1 -; AVX-NEXT: vcmpunordss %xmm1, %xmm1, %xmm2 -; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX-NEXT: vminss %xmm1, %xmm0, %xmm1 -; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; AVX-NEXT: vminss %xmm0, %xmm1, %xmm0 ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v3f32: ; AVX512: # %bb.0: ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX512-NEXT: vminss %xmm0, %xmm1, %xmm2 -; AVX512-NEXT: vcmpunordss %xmm0, %xmm0, %k1 -; AVX512-NEXT: vmovss %xmm1, %xmm2, %xmm2 {%k1} -; AVX512-NEXT: vcmpunordss %xmm2, %xmm2, %k1 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX512-NEXT: vminss %xmm2, %xmm1, %xmm0 -; AVX512-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} +; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm1 +; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; AVX512-NEXT: vminss %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: retq %1 = call nnan float @llvm.experimental.vector.reduce.fmin.v3f32(<3 x float> %a0) ret float %1 From 5811d723998a3abdd3cb95dc579d28f48c57c2fa Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Sun, 27 Sep 2020 01:45:09 -0700 Subject: [PATCH 5/5] [AArch64][GlobalISel] Promote scalar G_SHL constant shift amounts to s64. This was supposed to be done in the first place as is currently the case for G_ASHR and G_LSHR but was forgotten when the original shift legalization overhaul was done last year. This was exposed because we started falling back on s32 = s32, s64 SHLs due to a recent combiner change. Gives a very minor (0.1%) code size -O0 improvement on consumer-typeset. --- llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp | 7 +++++++ .../CodeGen/AArch64/GlobalISel/legalize-merge-values.mir | 5 +++-- .../AArch64/GlobalISel/legalize-non-pow2-load-store.mir | 7 +++---- llvm/test/CodeGen/AArch64/GlobalISel/legalize-shift.mir | 4 ++-- llvm/test/CodeGen/AArch64/arm64-clrsb.ll | 4 +--- 5 files changed, 16 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 5a0bb178818567..f17e4cc661161d 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -98,8 +98,15 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .moreElementsToNextPow2(0); getActionDefinitionsBuilder(G_SHL) + .customIf([=](const LegalityQuery &Query) { + const auto &SrcTy = Query.Types[0]; + const auto &AmtTy = Query.Types[1]; + return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 && + AmtTy.getSizeInBits() == 32; + }) .legalFor({ {s32, s32}, + {s32, s64}, {s64, s64}, {v16s8, v16s8}, {v4s16, v4s16}, diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-merge-values.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-merge-values.mir index 09ae228b4f1df4..a802baca4c8d29 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-merge-values.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-merge-values.mir @@ -6,11 +6,12 @@ name: test_merge_s4 body: | bb.0: ; CHECK-LABEL: name: test_merge_s4 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 4 ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[C2]], [[C1]] - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C3]](s64) ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[C2]](s32) ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SHL]](s32) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-non-pow2-load-store.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-non-pow2-load-store.mir index 7d7b77aa753521..6dc28e738dbcbe 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-non-pow2-load-store.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-non-pow2-load-store.mir @@ -28,12 +28,11 @@ body: | ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from %ir.ptr + 2, align 4) - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C2]](s32) + ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C2]](s64) ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR]](s32) - ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C3]](s64) + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s64) ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64) ; CHECK: G_STORE [[COPY2]](s32), [[COPY1]](p0) :: (store 2 into %ir.ptr2, align 4) ; CHECK: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p0) :: (store 1 into %ir.ptr2 + 2, align 4) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shift.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shift.mir index e2b9d27051ee28..467f38672b7065 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shift.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shift.mir @@ -235,8 +235,8 @@ body: | ; CHECK-LABEL: name: shl_cimm_32 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s64) ; CHECK: $w0 = COPY [[SHL]](s32) ; CHECK: RET_ReallyLR implicit $w0 %0:_(s32) = COPY $w0 diff --git a/llvm/test/CodeGen/AArch64/arm64-clrsb.ll b/llvm/test/CodeGen/AArch64/arm64-clrsb.ll index 64673f2e096b0f..149a466a114700 100644 --- a/llvm/test/CodeGen/AArch64/arm64-clrsb.ll +++ b/llvm/test/CodeGen/AArch64/arm64-clrsb.ll @@ -21,10 +21,8 @@ entry: ; CHECK-LABEL: clrsb32 ; CHECK: cls [[TEMP:w[0-9]+]], [[TEMP]] -; FIXME: We should produce the same result here to save some code size. After -; that, we can remove the GISEL special casing. ; GISEL-LABEL: clrsb32 -; GISEL: clz +; GISEL: cls [[TEMP:w[0-9]+]], [[TEMP]] } ; Function Attrs: nounwind ssp