forked from llvm/llvm-project
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathearly_exit_costs.ll
90 lines (79 loc) · 3.68 KB
/
early_exit_costs.ll
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
; REQUIRES: asserts
; RUN: opt -S < %s -p loop-vectorize -enable-early-exit-vectorization -disable-output \
; RUN: -debug-only=loop-vectorize 2>&1 | FileCheck %s --check-prefixes=CHECK
target triple = "aarch64-unknown-linux-gnu"
declare void @init_mem(ptr, i64);
define i64 @same_exit_block_pre_inc_use1_sve() #1 {
; CHECK-LABEL: LV: Checking a loop in 'same_exit_block_pre_inc_use1_sve'
; CHECK: LV: Selecting VF: vscale x 16
; CHECK: Calculating cost of work in exit block vector.early.exit
; CHECK-NEXT: Cost of 4 for VF vscale x 16: EMIT vp<{{.*}}> = first-active-lane vp<{{.*}}>
; CHECK-NEXT: Cost of 2 for VF vscale x 16: EMIT vp<{{.*}}> = extractelement ir<{{.*}}>, vp<{{.*}}>
; CHECK-NEXT: Cost of 4 for VF vscale x 16: EMIT vp<{{.*}}>.1 = first-active-lane vp<{{.*}}>
; CHECK-NEXT: Cost of 2 for VF vscale x 16: EMIT vp<{{.*}}>.1 = extractelement ir<{{.*}}>, vp<{{.*}}>.1
; CHECK: LV: Minimum required TC for runtime checks to be profitable:32
entry:
%p1 = alloca [1024 x i8]
%p2 = alloca [1024 x i8]
call void @init_mem(ptr %p1, i64 1024)
call void @init_mem(ptr %p2, i64 1024)
br label %loop
loop:
%index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
%index2 = phi i64 [ %index2.next, %loop.inc ], [ 15, %entry ]
%arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
%ld1 = load i8, ptr %arrayidx, align 1
%arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
%ld2 = load i8, ptr %arrayidx1, align 1
%cmp3 = icmp eq i8 %ld1, %ld2
br i1 %cmp3, label %loop.inc, label %loop.end
loop.inc:
%index.next = add i64 %index, 1
%index2.next = add i64 %index2, 2
%exitcond = icmp ne i64 %index.next, 67
br i1 %exitcond, label %loop, label %loop.end
loop.end:
%val1 = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
%val2 = phi i64 [ %index2, %loop ], [ 98, %loop.inc ]
%retval = add i64 %val1, %val2
ret i64 %retval
}
define i64 @same_exit_block_pre_inc_use1_nosve() {
; CHECK-LABEL: LV: Checking a loop in 'same_exit_block_pre_inc_use1_nosve'
; CHECK: LV: Selecting VF: 16
; CHECK: Calculating cost of work in exit block vector.early.exit
; CHECK-NEXT: Cost of 48 for VF 16: EMIT vp<{{.*}}> = first-active-lane vp<{{.*}}>
; CHECK-NEXT: Cost of 2 for VF 16: EMIT vp<{{.*}}> = extractelement ir<{{.*}}>, vp<{{.*}}>
; CHECK-NEXT: Cost of 48 for VF 16: EMIT vp<{{.*}}>.1 = first-active-lane vp<{{.*}}>
; CHECK-NEXT: Cost of 2 for VF 16: EMIT vp<{{.*}}>.1 = extractelement ir<{{.*}}>, vp<{{.*}}>.1
; CHECK: LV: Minimum required TC for runtime checks to be profitable:176
; CHECK-NEXT: LV: Vectorization is not beneficial: expected trip count < minimum profitable VF (64 < 176)
; CHECK-NEXT: LV: Too many memory checks needed.
entry:
%p1 = alloca [1024 x i8]
%p2 = alloca [1024 x i8]
call void @init_mem(ptr %p1, i64 1024)
call void @init_mem(ptr %p2, i64 1024)
br label %loop
loop:
%index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
%index2 = phi i64 [ %index2.next, %loop.inc ], [ 15, %entry ]
%arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
%ld1 = load i8, ptr %arrayidx, align 1
%arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
%ld2 = load i8, ptr %arrayidx1, align 1
%cmp3 = icmp eq i8 %ld1, %ld2
br i1 %cmp3, label %loop.inc, label %loop.end
loop.inc:
%index.next = add i64 %index, 1
%index2.next = add i64 %index2, 2
%exitcond = icmp ne i64 %index.next, 67
br i1 %exitcond, label %loop, label %loop.end
loop.end:
%val1 = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
%val2 = phi i64 [ %index2, %loop ], [ 98, %loop.inc ]
%retval = add i64 %val1, %val2
ret i64 %retval
}
attributes #1 = { "target-features"="+sve" vscale_range(1,16) }