Skip to content

Commit cc943a6

Browse files
committed
[SLP]Fix PR106626: trye several attempts for lookup values, if not found.
If the value is used in Scalar several times, the first attempt to find its position in the node (if ReuseShuffleIndices and ReorderIndices not empty) may fail. In this case need to find another copy of the same value and try again. Fixes #106626
1 parent e51fc36 commit cc943a6

File tree

2 files changed

+249
-8
lines changed

2 files changed

+249
-8
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3237,15 +3237,25 @@ class BoUpSLP {
32373237
/// When ReuseReorderShuffleIndices is empty it just returns position of \p
32383238
/// V within vector of Scalars. Otherwise, try to remap on its reuse index.
32393239
int findLaneForValue(Value *V) const {
3240-
unsigned FoundLane = std::distance(Scalars.begin(), find(Scalars, V));
3241-
assert(FoundLane < Scalars.size() && "Couldn't find extract lane");
3242-
if (!ReorderIndices.empty())
3243-
FoundLane = ReorderIndices[FoundLane];
3244-
assert(FoundLane < Scalars.size() && "Couldn't find extract lane");
3245-
if (!ReuseShuffleIndices.empty()) {
3246-
FoundLane = std::distance(ReuseShuffleIndices.begin(),
3247-
find(ReuseShuffleIndices, FoundLane));
3240+
unsigned FoundLane = getVectorFactor();
3241+
for (auto *It = find(Scalars, V), *End = Scalars.end(); It != End;
3242+
std::advance(It, 1)) {
3243+
if (*It != V)
3244+
continue;
3245+
FoundLane = std::distance(Scalars.begin(), It);
3246+
assert(FoundLane < Scalars.size() && "Couldn't find extract lane");
3247+
if (!ReorderIndices.empty())
3248+
FoundLane = ReorderIndices[FoundLane];
3249+
assert(FoundLane < Scalars.size() && "Couldn't find extract lane");
3250+
if (ReuseShuffleIndices.empty())
3251+
break;
3252+
if (auto *RIt = find(ReuseShuffleIndices, FoundLane);
3253+
RIt != ReuseShuffleIndices.end()) {
3254+
FoundLane = std::distance(ReuseShuffleIndices.begin(), RIt);
3255+
break;
3256+
}
32483257
}
3258+
assert(FoundLane < getVectorFactor() && "Unable to find given value.");
32493259
return FoundLane;
32503260
}
32513261

Lines changed: 231 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,231 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
3+
4+
define void @test() {
5+
; CHECK-LABEL: define void @test() {
6+
; CHECK-NEXT: [[ENTRY:.*:]]
7+
; CHECK-NEXT: br label %[[BB61:.*]]
8+
; CHECK: [[BB61]]:
9+
; CHECK-NEXT: br label %[[BB64:.*]]
10+
; CHECK: [[BB62:.*]]:
11+
; CHECK-NEXT: br i1 poison, label %[[BB63:.*]], label %[[BB64]]
12+
; CHECK: [[BB63]]:
13+
; CHECK-NEXT: br label %[[BB64]]
14+
; CHECK: [[BB64]]:
15+
; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x float> [ poison, %[[BB61]] ], [ poison, %[[BB63]] ], [ poison, %[[BB62]] ]
16+
; CHECK-NEXT: [[I66:%.*]] = load float, ptr poison, align 16
17+
; CHECK-NEXT: [[I67:%.*]] = load float, ptr poison, align 4
18+
; CHECK-NEXT: [[I68:%.*]] = load float, ptr poison, align 8
19+
; CHECK-NEXT: [[I69:%.*]] = load float, ptr poison, align 4
20+
; CHECK-NEXT: [[I70:%.*]] = load float, ptr poison, align 4
21+
; CHECK-NEXT: [[I71:%.*]] = load float, ptr poison, align 16
22+
; CHECK-NEXT: [[I72:%.*]] = load float, ptr poison, align 4
23+
; CHECK-NEXT: [[I73:%.*]] = load float, ptr poison, align 8
24+
; CHECK-NEXT: [[I74:%.*]] = load float, ptr poison, align 4
25+
; CHECK-NEXT: [[I75:%.*]] = load float, ptr poison, align 16
26+
; CHECK-NEXT: [[I76:%.*]] = load float, ptr poison, align 4
27+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x float> poison, float [[I76]], i32 0
28+
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <16 x float> [[TMP1]], float [[I75]], i32 1
29+
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x float> [[TMP2]], float [[I74]], i32 2
30+
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <16 x float> [[TMP3]], float [[I73]], i32 3
31+
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <16 x float> [[TMP4]], float [[I71]], i32 4
32+
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <16 x float> [[TMP5]], float [[I70]], i32 5
33+
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x float> [[TMP6]], float [[I68]], i32 6
34+
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x float> [[TMP7]], float [[I66]], i32 7
35+
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <16 x float> [[TMP8]], float [[I72]], i32 13
36+
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x float> [[TMP9]], float [[I69]], i32 14
37+
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <16 x float> [[TMP10]], float [[I67]], i32 15
38+
; CHECK-NEXT: br i1 poison, label %[[BB167:.*]], label %[[BB77:.*]]
39+
; CHECK: [[BB77]]:
40+
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x float> [[TMP11]], <16 x float> poison, <8 x i32> <i32 poison, i32 5, i32 6, i32 7, i32 15, i32 15, i32 14, i32 15>
41+
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x float> [[TMP0]], <2 x float> poison, <16 x i32> <i32 poison, i32 poison, i32 1, i32 0, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 1, i32 poison, i32 poison>
42+
; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x float> [[TMP0]], <2 x float> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 poison, i32 poison>
43+
; CHECK-NEXT: br label %[[BB78:.*]]
44+
; CHECK: [[BB78]]:
45+
; CHECK-NEXT: [[TMP15:%.*]] = phi <8 x float> [ [[TMP12]], %[[BB77]] ], [ [[TMP30:%.*]], %[[BB78]] ]
46+
; CHECK-NEXT: [[TMP16:%.*]] = phi <2 x float> [ poison, %[[BB77]] ], [ [[TMP31:%.*]], %[[BB78]] ]
47+
; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> <i32 0, i32 3, i32 1, i32 2, i32 3, i32 0, i32 2, i32 3, i32 2, i32 7, i32 2, i32 3, i32 0, i32 6, i32 7, i32 7>
48+
; CHECK-NEXT: [[TMP18:%.*]] = fmul fast <16 x float> [[TMP17]], [[TMP13]]
49+
; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> <i32 1, i32 poison, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 1, i32 7, i32 6, i32 6>
50+
; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <2 x float> [[TMP16]], <2 x float> poison, <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
51+
; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <16 x float> [[TMP19]], <16 x float> [[TMP20]], <16 x i32> <i32 0, i32 17, i32 2, i32 16, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 12, i32 13, i32 14, i32 15>
52+
; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
53+
; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <16 x float> [[TMP21]], <16 x float> [[TMP22]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 17, i32 6, i32 7, i32 8, i32 22, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
54+
; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <16 x float> [[TMP23]], <16 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 1, i32 5, i32 3, i32 1, i32 3, i32 9, i32 3, i32 1, i32 12, i32 13, i32 14, i32 15>
55+
; CHECK-NEXT: [[TMP25:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v2f32(<16 x float> [[TMP14]], <2 x float> [[TMP0]], i64 2)
56+
; CHECK-NEXT: [[TMP26:%.*]] = fmul fast <16 x float> [[TMP24]], [[TMP25]]
57+
; CHECK-NEXT: [[TMP27:%.*]] = fadd fast <16 x float> [[TMP26]], [[TMP18]]
58+
; CHECK-NEXT: [[TMP28:%.*]] = fadd fast <16 x float> [[TMP27]], poison
59+
; CHECK-NEXT: [[TMP29:%.*]] = fadd fast <16 x float> [[TMP28]], poison
60+
; CHECK-NEXT: [[TMP30]] = shufflevector <16 x float> [[TMP29]], <16 x float> poison, <8 x i32> <i32 12, i32 5, i32 6, i32 7, i32 15, i32 15, i32 14, i32 15>
61+
; CHECK-NEXT: [[TMP31]] = shufflevector <16 x float> [[TMP29]], <16 x float> poison, <2 x i32> <i32 10, i32 11>
62+
; CHECK-NEXT: br i1 poison, label %[[BB78]], label %[[BB167]]
63+
; CHECK: [[BB167]]:
64+
; CHECK-NEXT: [[TMP32:%.*]] = phi <16 x float> [ [[TMP11]], %[[BB64]] ], [ [[TMP29]], %[[BB78]] ]
65+
; CHECK-NEXT: [[TMP33:%.*]] = extractelement <16 x float> [[TMP32]], i32 15
66+
; CHECK-NEXT: store float [[TMP33]], ptr poison, align 1
67+
; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x float> [[TMP32]], i32 13
68+
; CHECK-NEXT: store float [[TMP34]], ptr poison, align 1
69+
; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x float> [[TMP32]], i32 14
70+
; CHECK-NEXT: br i1 poison, label %[[BB186:.*]], label %[[BB184:.*]]
71+
; CHECK: [[BB184]]:
72+
; CHECK-NEXT: br label %[[BB185:.*]]
73+
; CHECK: [[BB185]]:
74+
; CHECK-NEXT: br i1 poison, label %[[BB185]], label %[[BB186]]
75+
; CHECK: [[BB186]]:
76+
; CHECK-NEXT: [[I187:%.*]] = phi nsz float [ [[TMP35]], %[[BB167]] ], [ poison, %[[BB185]] ]
77+
; CHECK-NEXT: ret void
78+
;
79+
entry:
80+
br label %bb61
81+
82+
bb61:
83+
br label %bb64
84+
85+
bb62:
86+
br i1 poison, label %bb63, label %bb64
87+
88+
bb63:
89+
br label %bb64
90+
91+
bb64:
92+
%i = phi nsz float [ poison, %bb61 ], [ poison, %bb63 ], [ poison, %bb62 ]
93+
%i65 = phi nsz float [ poison, %bb61 ], [ poison, %bb63 ], [ poison, %bb62 ]
94+
%i66 = load float, ptr poison, align 16
95+
%i67 = load float, ptr poison, align 4
96+
%i68 = load float, ptr poison, align 8
97+
%i69 = load float, ptr poison, align 4
98+
%i70 = load float, ptr poison, align 4
99+
%i71 = load float, ptr poison, align 16
100+
%i72 = load float, ptr poison, align 4
101+
%i73 = load float, ptr poison, align 8
102+
%i74 = load float, ptr poison, align 4
103+
%i75 = load float, ptr poison, align 16
104+
%i76 = load float, ptr poison, align 4
105+
br i1 poison, label %bb167, label %bb77
106+
107+
bb77:
108+
br label %bb78
109+
110+
bb78:
111+
%i79 = phi nsz float [ %i66, %bb77 ], [ %i103, %bb78 ]
112+
%i80 = phi nsz float [ %i67, %bb77 ], [ %i104, %bb78 ]
113+
%i81 = phi nsz float [ %i68, %bb77 ], [ %i105, %bb78 ]
114+
%i82 = phi nsz float [ poison, %bb77 ], [ %i106, %bb78 ]
115+
%i83 = phi nsz float [ poison, %bb77 ], [ %i123, %bb78 ]
116+
%i84 = phi nsz float [ %i69, %bb77 ], [ %i124, %bb78 ]
117+
%i85 = phi nsz float [ poison, %bb77 ], [ %i125, %bb78 ]
118+
%i86 = phi nsz float [ %i70, %bb77 ], [ %i126, %bb78 ]
119+
%i87 = fmul fast float %i79, poison
120+
%i88 = fmul fast float %i80, poison
121+
%i89 = fmul fast float %i81, poison
122+
%i90 = fmul fast float %i82, poison
123+
%i91 = fmul fast float %i83, poison
124+
%i92 = fadd fast float %i91, %i87
125+
%i93 = fmul fast float %i84, poison
126+
%i94 = fadd fast float %i93, %i88
127+
%i95 = fmul fast float %i85, poison
128+
%i96 = fadd fast float %i95, %i89
129+
%i97 = fmul fast float %i86, poison
130+
%i98 = fadd fast float %i97, %i90
131+
%i99 = fadd fast float %i92, poison
132+
%i100 = fadd fast float %i94, poison
133+
%i101 = fadd fast float %i96, poison
134+
%i102 = fadd fast float %i98, poison
135+
%i103 = fadd fast float %i99, poison
136+
%i104 = fadd fast float %i100, poison
137+
%i105 = fadd fast float %i101, poison
138+
%i106 = fadd fast float %i102, poison
139+
%i107 = fmul fast float %i79, poison
140+
%i108 = fmul fast float %i80, poison
141+
%i109 = fmul fast float %i81, poison
142+
%i110 = fmul fast float %i82, poison
143+
%i111 = fmul fast float %i83, poison
144+
%i112 = fadd fast float %i111, %i107
145+
%i113 = fmul fast float %i84, poison
146+
%i114 = fadd fast float %i113, %i108
147+
%i115 = fmul fast float %i85, poison
148+
%i116 = fadd fast float %i115, %i109
149+
%i117 = fmul fast float %i86, poison
150+
%i118 = fadd fast float %i117, %i110
151+
%i119 = fadd fast float %i112, poison
152+
%i120 = fadd fast float %i114, poison
153+
%i121 = fadd fast float %i116, poison
154+
%i122 = fadd fast float %i118, poison
155+
%i123 = fadd fast float %i119, poison
156+
%i124 = fadd fast float %i120, poison
157+
%i125 = fadd fast float %i121, poison
158+
%i126 = fadd fast float %i122, poison
159+
%i127 = fmul fast float %i79, %i
160+
%i128 = fmul fast float %i80, %i
161+
%i129 = fmul fast float %i81, %i
162+
%i130 = fmul fast float %i82, %i
163+
%i131 = fmul fast float %i83, %i65
164+
%i132 = fadd fast float %i131, %i127
165+
%i133 = fmul fast float %i84, %i65
166+
%i134 = fadd fast float %i133, %i128
167+
%i135 = fmul fast float %i85, %i65
168+
%i136 = fadd fast float %i135, %i129
169+
%i137 = fmul fast float %i86, %i65
170+
%i138 = fadd fast float %i137, %i130
171+
%i139 = fadd fast float %i132, poison
172+
%i140 = fadd fast float %i134, poison
173+
%i141 = fadd fast float %i136, poison
174+
%i142 = fadd fast float %i138, poison
175+
%i143 = fadd fast float %i139, poison
176+
%i144 = fadd fast float %i140, poison
177+
%i145 = fadd fast float %i141, poison
178+
%i146 = fadd fast float %i142, poison
179+
%i147 = fmul fast float %i79, poison
180+
%i148 = fmul fast float %i80, poison
181+
%i149 = fmul fast float %i81, poison
182+
%i150 = fmul fast float %i82, poison
183+
%i151 = fmul fast float %i83, poison
184+
%i152 = fadd fast float %i151, %i147
185+
%i153 = fmul fast float %i84, poison
186+
%i154 = fadd fast float %i153, %i148
187+
%i155 = fmul fast float %i85, poison
188+
%i156 = fadd fast float %i155, %i149
189+
%i157 = fmul fast float %i86, poison
190+
%i158 = fadd fast float %i157, %i150
191+
%i159 = fadd fast float %i152, poison
192+
%i160 = fadd fast float %i154, poison
193+
%i161 = fadd fast float %i156, poison
194+
%i162 = fadd fast float %i158, poison
195+
%i163 = fadd fast float %i159, poison
196+
%i164 = fadd fast float %i160, poison
197+
%i165 = fadd fast float %i161, poison
198+
%i166 = fadd fast float %i162, poison
199+
br i1 poison, label %bb78, label %bb167
200+
201+
bb167:
202+
%i168 = phi nsz float [ %i76, %bb64 ], [ %i166, %bb78 ]
203+
%i169 = phi nsz float [ poison, %bb64 ], [ %i165, %bb78 ]
204+
%i170 = phi nsz float [ poison, %bb64 ], [ %i164, %bb78 ]
205+
%i171 = phi nsz float [ %i75, %bb64 ], [ %i163, %bb78 ]
206+
%i172 = phi nsz float [ %i74, %bb64 ], [ %i146, %bb78 ]
207+
%i173 = phi nsz float [ %i73, %bb64 ], [ %i145, %bb78 ]
208+
%i174 = phi nsz float [ %i72, %bb64 ], [ %i144, %bb78 ]
209+
%i175 = phi nsz float [ %i71, %bb64 ], [ %i143, %bb78 ]
210+
%i176 = phi nsz float [ %i70, %bb64 ], [ %i126, %bb78 ]
211+
%i177 = phi nsz float [ poison, %bb64 ], [ %i125, %bb78 ]
212+
%i178 = phi nsz float [ %i69, %bb64 ], [ %i124, %bb78 ]
213+
%i179 = phi nsz float [ poison, %bb64 ], [ %i123, %bb78 ]
214+
%i180 = phi nsz float [ poison, %bb64 ], [ %i106, %bb78 ]
215+
%i181 = phi nsz float [ %i68, %bb64 ], [ %i105, %bb78 ]
216+
%i182 = phi nsz float [ %i67, %bb64 ], [ %i104, %bb78 ]
217+
%i183 = phi nsz float [ %i66, %bb64 ], [ %i103, %bb78 ]
218+
store float %i182, ptr poison, align 1
219+
store float %i174, ptr poison, align 1
220+
br i1 poison, label %bb186, label %bb184
221+
222+
bb184:
223+
br label %bb185
224+
225+
bb185:
226+
br i1 poison, label %bb185, label %bb186
227+
228+
bb186:
229+
%i187 = phi nsz float [ %i178, %bb167 ], [ poison, %bb185 ]
230+
ret void
231+
}

0 commit comments

Comments
 (0)