This repository was archived by the owner on Dec 1, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 116
This repository was archived by the owner on Dec 1, 2021. It is now read-only.
Vectorized loop crashing? #24
Copy link
Copy link
Open
Description
I tried compile the following C code w/ clang 7.0.0 (trunk 338352) w/ the following command:
clang -O3 -masm=intel -mno-red-zone -mstackrealign -mllvm -inline-threshold=1000 -fno-asynchronous-unwind-tables -fno-exceptions -fno-rtti -S count.c
int CountFilledEntries(char* entries, int len) {
int result = 0;
for (int i = 0; i < len; i++) {
if (entries[i] != 0) result++;
}
return result;
}
Given the stub file:
//go:noescape
func _CountFilledEntries(entries unsafe.Pointer, len uint64) (count uint64)
func CountFilledEntries(entries []byte) uint {
return uint(_CountFilledEntries(
unsafe.Pointer((*reflect.SliceHeader)(unsafe.Pointer(&entries)).Data),
uint64(len(entries))),
)
}
Running the test below causes the program to crash. Any possible insight as to why?
func TestCount(t *testing.T) {
a := []byte{0, 1, 2, 9, 4, 0, 3, 0, 0}
fmt.Println(CountFilledEntries(a))
}
Assembly:
//+build !noasm !appengine
// AUTO-GENERATED BY C2GOASM -- DO NOT EDIT
TEXT ·_CountFilledEntries(SB), $0-24
MOVQ entries+0(FP), DI
MOVQ len+8(FP), SI
WORD $0xf685 // test esi, esi
JLE LBB0_1
WORD $0xf189 // mov ecx, esi
WORD $0xfe83; BYTE $0x07 // cmp esi, 7
JA LBB0_4
WORD $0xd231 // xor edx, edx
WORD $0xc031 // xor eax, eax
JMP LBB0_11
LBB0_1:
WORD $0xc031 // xor eax, eax
MOVQ AX, count+16(FP)
RET
LBB0_4:
WORD $0xca89 // mov edx, ecx
WORD $0xe283; BYTE $0xf8 // and edx, -8
LONG $0xf8728d48 // lea rsi, [rdx - 8]
WORD $0x8948; BYTE $0xf0 // mov rax, rsi
LONG $0x03e8c148 // shr rax, 3
LONG $0x01c08348 // add rax, 1
WORD $0x8941; BYTE $0xc0 // mov r8d, eax
LONG $0x01e08341 // and r8d, 1
WORD $0x8548; BYTE $0xf6 // test rsi, rsi
JE LBB0_5
LONG $0x000001be; BYTE $0x00 // mov esi, 1
WORD $0x2948; BYTE $0xc6 // sub rsi, rax
WORD $0x014c; BYTE $0xc6 // add rsi, r8
LONG $0xffc68348 // add rsi, -1
LONG $0xd2ef0f66 // pxor xmm2, xmm2
WORD $0xc031 // xor eax, eax
LONG $0xdb760f66 // pcmpeqd xmm3, xmm3
LONG $0xc0ef0f66 // pxor xmm0, xmm0
LONG $0xc9ef0f66 // pxor xmm1, xmm1
LBB0_7:
LONG $0x246e0f66; BYTE $0x07 // movd xmm4, dword [rdi + rax]
LONG $0xe2600f66 // punpcklbw xmm4, xmm2
LONG $0xe2610f66 // punpcklwd xmm4, xmm2
LONG $0x6c6e0f66; WORD $0x0407 // movd xmm5, dword [rdi + rax + 4]
LONG $0xea600f66 // punpcklbw xmm5, xmm2
LONG $0xea610f66 // punpcklwd xmm5, xmm2
LONG $0xe2760f66 // pcmpeqd xmm4, xmm2
LONG $0xe3ef0f66 // pxor xmm4, xmm3
LONG $0xc4fa0f66 // psubd xmm0, xmm4
LONG $0xea760f66 // pcmpeqd xmm5, xmm2
LONG $0xebef0f66 // pxor xmm5, xmm3
LONG $0xcdfa0f66 // psubd xmm1, xmm5
LONG $0x646e0f66; WORD $0x0807 // movd xmm4, dword [rdi + rax + 8]
LONG $0xe2600f66 // punpcklbw xmm4, xmm2
LONG $0xe2610f66 // punpcklwd xmm4, xmm2
LONG $0x6c6e0f66; WORD $0x0c07 // movd xmm5, dword [rdi + rax + 12]
LONG $0xea600f66 // punpcklbw xmm5, xmm2
LONG $0xea610f66 // punpcklwd xmm5, xmm2
LONG $0xe2760f66 // pcmpeqd xmm4, xmm2
LONG $0xe3ef0f66 // pxor xmm4, xmm3
LONG $0xc4fa0f66 // psubd xmm0, xmm4
LONG $0xea760f66 // pcmpeqd xmm5, xmm2
LONG $0xebef0f66 // pxor xmm5, xmm3
LONG $0xcdfa0f66 // psubd xmm1, xmm5
LONG $0x10c08348 // add rax, 16
LONG $0x02c68348 // add rsi, 2
JNE LBB0_7
WORD $0x854d; BYTE $0xc0 // test r8, r8
JE LBB0_10
LBB0_9:
LONG $0x546e0f66; WORD $0x0407 // movd xmm2, dword [rdi + rax + 4]
LONG $0xdbef0f66 // pxor xmm3, xmm3
LONG $0xd3600f66 // punpcklbw xmm2, xmm3
LONG $0xd3610f66 // punpcklwd xmm2, xmm3
LONG $0xd3760f66 // pcmpeqd xmm2, xmm3
LONG $0xe4760f66 // pcmpeqd xmm4, xmm4
LONG $0xd4ef0f66 // pxor xmm2, xmm4
LONG $0xcafa0f66 // psubd xmm1, xmm2
LONG $0x146e0f66; BYTE $0x07 // movd xmm2, dword [rdi + rax]
LONG $0xd3600f66 // punpcklbw xmm2, xmm3
LONG $0xd3610f66 // punpcklwd xmm2, xmm3
LONG $0xd3760f66 // pcmpeqd xmm2, xmm3
LONG $0xd4ef0f66 // pxor xmm2, xmm4
LONG $0xc2fa0f66 // psubd xmm0, xmm2
LBB0_10:
LONG $0xc1fe0f66 // paddd xmm0, xmm1
LONG $0xc8700f66; BYTE $0x4e // pshufd xmm1, xmm0, 78
LONG $0xc8fe0f66 // paddd xmm1, xmm0
LONG $0xc1700f66; BYTE $0xe5 // pshufd xmm0, xmm1, 229
LONG $0xc1fe0f66 // paddd xmm0, xmm1
LONG $0xc07e0f66 // movd eax, xmm0
WORD $0x3948; BYTE $0xca // cmp rdx, rcx
JE LBB0_12
LBB0_11:
LONG $0x01173c80 // cmp byte [rdi + rdx], 1
WORD $0xd883; BYTE $0xff // sbb eax, -1
LONG $0x01c28348 // add rdx, 1
WORD $0x3948; BYTE $0xd1 // cmp rcx, rdx
JNE LBB0_11
LBB0_12:
WORD $0x8948; BYTE $0xec // mov rsp, rbp
BYTE $0x5d // pop rbp
BYTE $0xc3 // ret
LBB0_5:
LONG $0xc0ef0f66 // pxor xmm0, xmm0
WORD $0xc031 // xor eax, eax
LONG $0xc9ef0f66 // pxor xmm1, xmm1
WORD $0x854d; BYTE $0xc0 // test r8, r8
JNE LBB0_9
JMP LBB0_10
Metadata
Metadata
Assignees
Labels
No labels