@@ -120,14 +120,26 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
120
120
.clampMaxNumElements (0 , s64, 2 )
121
121
.clampMaxNumElements (0 , p0, 2 );
122
122
123
- getActionDefinitionsBuilder (G_BSWAP)
124
- .legalFor ({s32, s64, v4s16, v8s16, v2s32, v4s32, v2s64})
125
- .widenScalarOrEltToNextPow2 (0 , 16 )
123
+ getActionDefinitionsBuilder (G_INSERT)
124
+ .legalIf (all (typeInSet (0 , {s32, s64, p0}), typeInSet (1 , {s8, s16, s32}),
125
+ smallerThan (1 , 0 )))
126
+ .widenScalarToNextPow2 (0 )
126
127
.clampScalar (0 , s32, s64)
127
- .clampNumElements (0 , v4s16, v8s16)
128
- .clampNumElements (0 , v2s32, v4s32)
129
- .clampNumElements (0 , v2s64, v2s64)
130
- .moreElementsToNextPow2 (0 );
128
+ .widenScalarToNextPow2 (1 )
129
+ .minScalar (1 , s8)
130
+ .maxScalarIf (typeInSet (0 , {s32}), 1 , s16)
131
+ .maxScalarIf (typeInSet (0 , {s64, p0}), 1 , s32);
132
+
133
+ getActionDefinitionsBuilder (G_EXTRACT)
134
+ .legalIf (all (typeInSet (0 , {s16, s32, s64, p0}),
135
+ typeInSet (1 , {s32, s64, s128, p0}), smallerThan (0 , 1 )))
136
+ .widenScalarToNextPow2 (1 )
137
+ .clampScalar (1 , s32, s128)
138
+ .widenScalarToNextPow2 (0 )
139
+ .minScalar (0 , s16)
140
+ .maxScalarIf (typeInSet (1 , {s32}), 0 , s16)
141
+ .maxScalarIf (typeInSet (1 , {s64, p0}), 0 , s32)
142
+ .maxScalarIf (typeInSet (1 , {s128}), 0 , s64);
131
143
132
144
getActionDefinitionsBuilder ({G_ADD, G_SUB, G_AND, G_OR, G_XOR})
133
145
.legalFor ({s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64})
@@ -253,13 +265,112 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
253
265
.clampNumElements (0 , v2s32, v4s32)
254
266
.lower ();
255
267
268
+ // FIXME: Legal vector types are only legal with NEON.
269
+ getActionDefinitionsBuilder (G_ABS)
270
+ .legalFor (HasCSSC, {s32, s64})
271
+ .legalFor (PackedVectorAllTypeList)
272
+ .customIf ([=](const LegalityQuery &Q) {
273
+ // TODO: Fix suboptimal codegen for 128+ bit types.
274
+ LLT SrcTy = Q.Types [0 ];
275
+ return SrcTy.isScalar () && SrcTy.getSizeInBits () < 128 ;
276
+ })
277
+ .widenScalarIf (
278
+ [=](const LegalityQuery &Query) { return Query.Types [0 ] == v4s8; },
279
+ [=](const LegalityQuery &Query) { return std::make_pair (0 , v4s16); })
280
+ .widenScalarIf (
281
+ [=](const LegalityQuery &Query) { return Query.Types [0 ] == v2s16; },
282
+ [=](const LegalityQuery &Query) { return std::make_pair (0 , v2s32); })
283
+ .clampNumElements (0 , v8s8, v16s8)
284
+ .clampNumElements (0 , v4s16, v8s16)
285
+ .clampNumElements (0 , v2s32, v4s32)
286
+ .clampNumElements (0 , v2s64, v2s64)
287
+ .moreElementsToNextPow2 (0 )
288
+ .lower ();
289
+
256
290
getActionDefinitionsBuilder (
257
291
{G_SADDE, G_SSUBE, G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO})
258
292
.legalFor ({{s32, s32}, {s64, s32}})
259
293
.clampScalar (0 , s32, s64)
260
294
.clampScalar (1 , s32, s64)
261
295
.widenScalarToNextPow2 (0 );
262
296
297
+ getActionDefinitionsBuilder ({G_FSHL, G_FSHR})
298
+ .customFor ({{s32, s32}, {s32, s64}, {s64, s64}})
299
+ .lower ();
300
+
301
+ getActionDefinitionsBuilder (G_ROTR)
302
+ .legalFor ({{s32, s64}, {s64, s64}})
303
+ .customIf ([=](const LegalityQuery &Q) {
304
+ return Q.Types [0 ].isScalar () && Q.Types [1 ].getScalarSizeInBits () < 64 ;
305
+ })
306
+ .lower ();
307
+ getActionDefinitionsBuilder (G_ROTL).lower ();
308
+
309
+ getActionDefinitionsBuilder ({G_SBFX, G_UBFX})
310
+ .customFor ({{s32, s32}, {s64, s64}});
311
+
312
+ auto always = [=](const LegalityQuery &Q) { return true ; };
313
+ getActionDefinitionsBuilder (G_CTPOP)
314
+ .legalFor (HasCSSC, {{s32, s32}, {s64, s64}})
315
+ .legalFor ({{v8s8, v8s8}, {v16s8, v16s8}})
316
+ .customFor (!HasCSSC, {{s32, s32}, {s64, s64}})
317
+ .customFor ({{s128, s128},
318
+ {v4s16, v4s16},
319
+ {v8s16, v8s16},
320
+ {v2s32, v2s32},
321
+ {v4s32, v4s32},
322
+ {v2s64, v2s64}})
323
+ .clampScalar (0 , s32, s128)
324
+ .widenScalarToNextPow2 (0 )
325
+ .minScalarEltSameAsIf (always, 1 , 0 )
326
+ .maxScalarEltSameAsIf (always, 1 , 0 );
327
+
328
+ getActionDefinitionsBuilder (G_CTLZ)
329
+ .legalForCartesianProduct (
330
+ {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
331
+ .scalarize (1 )
332
+ .widenScalarToNextPow2 (1 , /* Min=*/ 32 )
333
+ .clampScalar (1 , s32, s64)
334
+ .scalarSameSizeAs (0 , 1 );
335
+ getActionDefinitionsBuilder (G_CTLZ_ZERO_UNDEF).lower ();
336
+
337
+ getActionDefinitionsBuilder (G_CTTZ)
338
+ .lowerIf (isVector (0 ))
339
+ .widenScalarToNextPow2 (1 , /* Min=*/ 32 )
340
+ .clampScalar (1 , s32, s64)
341
+ .scalarSameSizeAs (0 , 1 )
342
+ .legalFor (HasCSSC, {s32, s64})
343
+ .customFor (!HasCSSC, {s32, s64});
344
+
345
+ getActionDefinitionsBuilder (G_CTTZ_ZERO_UNDEF).lower ();
346
+
347
+ // TODO: Custom lowering for v2s32, v4s32, v2s64.
348
+ getActionDefinitionsBuilder (G_BITREVERSE)
349
+ .legalFor ({s32, s64, v8s8, v16s8})
350
+ .widenScalarToNextPow2 (0 , /* Min = */ 32 )
351
+ .clampScalar (0 , s32, s64)
352
+ .lower ();
353
+
354
+ getActionDefinitionsBuilder (G_BSWAP)
355
+ .legalFor ({s32, s64, v4s16, v8s16, v2s32, v4s32, v2s64})
356
+ .widenScalarOrEltToNextPow2 (0 , 16 )
357
+ .clampScalar (0 , s32, s64)
358
+ .clampNumElements (0 , v4s16, v8s16)
359
+ .clampNumElements (0 , v2s32, v4s32)
360
+ .clampNumElements (0 , v2s64, v2s64)
361
+ .moreElementsToNextPow2 (0 );
362
+
363
+ getActionDefinitionsBuilder ({G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT})
364
+ .legalFor ({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64})
365
+ .legalFor (HasSVE, {nxv16s8, nxv8s16, nxv4s32, nxv2s64})
366
+ .clampNumElements (0 , v8s8, v16s8)
367
+ .clampNumElements (0 , v4s16, v8s16)
368
+ .clampNumElements (0 , v2s32, v4s32)
369
+ .clampMaxNumElements (0 , s64, 2 )
370
+ .scalarizeIf (scalarOrEltWiderThan (0 , 64 ), 0 )
371
+ .moreElementsToNextPow2 (0 )
372
+ .lower ();
373
+
263
374
getActionDefinitionsBuilder (
264
375
{G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FSQRT, G_FMAXNUM, G_FMINNUM,
265
376
G_FMAXIMUM, G_FMINIMUM, G_FCEIL, G_FFLOOR, G_FRINT, G_FNEARBYINT,
@@ -309,27 +420,22 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
309
420
.minScalar (0 , s32)
310
421
.libcallFor ({{s32, s32}, {s64, s32}, {s128, s32}});
311
422
312
- getActionDefinitionsBuilder (G_INSERT)
313
- .legalIf (all (typeInSet (0 , {s32, s64, p0}),
314
- typeInSet (1 , {s8, s16, s32}), smallerThan (1 , 0 )))
315
- .widenScalarToNextPow2 (0 )
316
- .clampScalar (0 , s32, s64)
317
- .widenScalarToNextPow2 (1 )
318
- .minScalar (1 , s8)
319
- .maxScalarIf (typeInSet (0 , {s32}), 1 , s16)
320
- .maxScalarIf (typeInSet (0 , {s64, p0}), 1 , s32);
423
+ // TODO: Libcall support for s128.
424
+ // TODO: s16 should be legal with full FP16 support.
425
+ getActionDefinitionsBuilder ({G_LROUND, G_LLROUND})
426
+ .legalFor ({{s64, s32}, {s64, s64}});
321
427
322
- getActionDefinitionsBuilder (G_EXTRACT)
323
- .legalIf (all (typeInSet (0 , {s16, s32, s64, p0}),
324
- typeInSet (1 , {s32, s64, s128, p0}), smallerThan (0 , 1 )))
325
- .widenScalarToNextPow2 (1 )
326
- .clampScalar (1 , s32, s128)
327
- .widenScalarToNextPow2 (0 )
328
- .minScalar (0 , s16)
329
- .maxScalarIf (typeInSet (1 , {s32}), 0 , s16)
330
- .maxScalarIf (typeInSet (1 , {s64, p0}), 0 , s32)
331
- .maxScalarIf (typeInSet (1 , {s128}), 0 , s64);
428
+ // TODO: Custom legalization for mismatched types.
429
+ getActionDefinitionsBuilder (G_FCOPYSIGN)
430
+ .moreElementsIf (
431
+ [](const LegalityQuery &Query) { return Query.Types [0 ].isScalar (); },
432
+ [=](const LegalityQuery &Query) {
433
+ const LLT Ty = Query.Types [0 ];
434
+ return std::pair (0 , LLT::fixed_vector (Ty == s16 ? 4 : 2 , Ty));
435
+ })
436
+ .lower ();
332
437
438
+ getActionDefinitionsBuilder (G_FMAD).lower ();
333
439
334
440
for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) {
335
441
auto &Actions = getActionDefinitionsBuilder (Op);
@@ -1035,32 +1141,6 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
1035
1141
1036
1142
getActionDefinitionsBuilder (G_BUILD_VECTOR_TRUNC).lower ();
1037
1143
1038
- getActionDefinitionsBuilder (G_CTLZ)
1039
- .legalForCartesianProduct (
1040
- {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
1041
- .scalarize (1 )
1042
- .widenScalarToNextPow2 (1 , /* Min=*/ 32 )
1043
- .clampScalar (1 , s32, s64)
1044
- .scalarSameSizeAs (0 , 1 );
1045
- getActionDefinitionsBuilder (G_CTLZ_ZERO_UNDEF).lower ();
1046
-
1047
- // TODO: Custom lowering for v2s32, v4s32, v2s64.
1048
- getActionDefinitionsBuilder (G_BITREVERSE)
1049
- .legalFor ({s32, s64, v8s8, v16s8})
1050
- .widenScalarToNextPow2 (0 , /* Min = */ 32 )
1051
- .clampScalar (0 , s32, s64)
1052
- .lower ();
1053
-
1054
- getActionDefinitionsBuilder (G_CTTZ_ZERO_UNDEF).lower ();
1055
-
1056
- getActionDefinitionsBuilder (G_CTTZ)
1057
- .lowerIf (isVector (0 ))
1058
- .widenScalarToNextPow2 (1 , /* Min=*/ 32 )
1059
- .clampScalar (1 , s32, s64)
1060
- .scalarSameSizeAs (0 , 1 )
1061
- .legalFor (HasCSSC, {s32, s64})
1062
- .customFor (!HasCSSC, {s32, s64});
1063
-
1064
1144
getActionDefinitionsBuilder (G_SHUFFLE_VECTOR)
1065
1145
.legalIf ([=](const LegalityQuery &Query) {
1066
1146
const LLT &DstTy = Query.Types [0 ];
@@ -1122,6 +1202,15 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
1122
1202
SrcTy.getNumElements ())));
1123
1203
});
1124
1204
1205
+ getActionDefinitionsBuilder (G_EXTRACT_SUBVECTOR)
1206
+ .legalFor ({{v8s8, v16s8}, {v4s16, v8s16}, {v2s32, v4s32}})
1207
+ .widenScalarOrEltToNextPow2 (0 )
1208
+ .immIdx (0 ); // Inform verifier imm idx 0 is handled.
1209
+
1210
+ // TODO: {nxv16s8, s8}, {nxv8s16, s16}
1211
+ getActionDefinitionsBuilder (G_SPLAT_VECTOR)
1212
+ .legalFor (HasSVE, {{nxv4s32, s32}, {nxv2s64, s64}});
1213
+
1125
1214
getActionDefinitionsBuilder (G_JUMP_TABLE).legalFor ({p0});
1126
1215
1127
1216
getActionDefinitionsBuilder (G_BRJT).legalFor ({{p0, s64}});
@@ -1153,28 +1242,6 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
1153
1242
.libcall ();
1154
1243
}
1155
1244
1156
- // FIXME: Legal vector types are only legal with NEON.
1157
- getActionDefinitionsBuilder (G_ABS)
1158
- .legalFor (HasCSSC, {s32, s64})
1159
- .legalFor (PackedVectorAllTypeList)
1160
- .customIf ([=](const LegalityQuery &Q) {
1161
- // TODO: Fix suboptimal codegen for 128+ bit types.
1162
- LLT SrcTy = Q.Types [0 ];
1163
- return SrcTy.isScalar () && SrcTy.getSizeInBits () < 128 ;
1164
- })
1165
- .widenScalarIf (
1166
- [=](const LegalityQuery &Query) { return Query.Types [0 ] == v4s8; },
1167
- [=](const LegalityQuery &Query) { return std::make_pair (0 , v4s16); })
1168
- .widenScalarIf (
1169
- [=](const LegalityQuery &Query) { return Query.Types [0 ] == v2s16; },
1170
- [=](const LegalityQuery &Query) { return std::make_pair (0 , v2s32); })
1171
- .clampNumElements (0 , v8s8, v16s8)
1172
- .clampNumElements (0 , v4s16, v8s16)
1173
- .clampNumElements (0 , v2s32, v4s32)
1174
- .clampNumElements (0 , v2s64, v2s64)
1175
- .moreElementsToNextPow2 (0 )
1176
- .lower ();
1177
-
1178
1245
// For fadd reductions we have pairwise operations available. We treat the
1179
1246
// usual legal types as legal and handle the lowering to pairwise instructions
1180
1247
// later.
@@ -1284,65 +1351,6 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
1284
1351
// TODO: Update this to correct handling when adding AArch64/SVE support.
1285
1352
getActionDefinitionsBuilder (G_VECTOR_COMPRESS).lower ();
1286
1353
1287
- getActionDefinitionsBuilder ({G_FSHL, G_FSHR})
1288
- .customFor ({{s32, s32}, {s32, s64}, {s64, s64}})
1289
- .lower ();
1290
-
1291
- getActionDefinitionsBuilder (G_ROTR)
1292
- .legalFor ({{s32, s64}, {s64, s64}})
1293
- .customIf ([=](const LegalityQuery &Q) {
1294
- return Q.Types [0 ].isScalar () && Q.Types [1 ].getScalarSizeInBits () < 64 ;
1295
- })
1296
- .lower ();
1297
- getActionDefinitionsBuilder (G_ROTL).lower ();
1298
-
1299
- getActionDefinitionsBuilder ({G_SBFX, G_UBFX})
1300
- .customFor ({{s32, s32}, {s64, s64}});
1301
-
1302
- auto always = [=](const LegalityQuery &Q) { return true ; };
1303
- getActionDefinitionsBuilder (G_CTPOP)
1304
- .legalFor (HasCSSC, {{s32, s32}, {s64, s64}})
1305
- .legalFor ({{v8s8, v8s8}, {v16s8, v16s8}})
1306
- .customFor (!HasCSSC, {{s32, s32}, {s64, s64}})
1307
- .customFor ({{s128, s128},
1308
- {v2s32, v2s32},
1309
- {v4s32, v4s32},
1310
- {v4s16, v4s16},
1311
- {v8s16, v8s16},
1312
- {v2s64, v2s64}})
1313
- .clampScalar (0 , s32, s128)
1314
- .widenScalarToNextPow2 (0 )
1315
- .minScalarEltSameAsIf (always, 1 , 0 )
1316
- .maxScalarEltSameAsIf (always, 1 , 0 );
1317
-
1318
- getActionDefinitionsBuilder ({G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT})
1319
- .legalFor ({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64})
1320
- .legalFor (HasSVE, {nxv16s8, nxv8s16, nxv4s32, nxv2s64})
1321
- .clampNumElements (0 , v8s8, v16s8)
1322
- .clampNumElements (0 , v4s16, v8s16)
1323
- .clampNumElements (0 , v2s32, v4s32)
1324
- .clampMaxNumElements (0 , s64, 2 )
1325
- .scalarizeIf (scalarOrEltWiderThan (0 , 64 ), 0 )
1326
- .moreElementsToNextPow2 (0 )
1327
- .lower ();
1328
-
1329
- // TODO: Libcall support for s128.
1330
- // TODO: s16 should be legal with full FP16 support.
1331
- getActionDefinitionsBuilder ({G_LROUND, G_LLROUND})
1332
- .legalFor ({{s64, s32}, {s64, s64}});
1333
-
1334
- // TODO: Custom legalization for mismatched types.
1335
- getActionDefinitionsBuilder (G_FCOPYSIGN)
1336
- .moreElementsIf (
1337
- [](const LegalityQuery &Query) { return Query.Types [0 ].isScalar (); },
1338
- [=](const LegalityQuery &Query) {
1339
- const LLT Ty = Query.Types [0 ];
1340
- return std::pair (0 , LLT::fixed_vector (Ty == s16 ? 4 : 2 , Ty));
1341
- })
1342
- .lower ();
1343
-
1344
- getActionDefinitionsBuilder (G_FMAD).lower ();
1345
-
1346
1354
// Access to floating-point environment.
1347
1355
getActionDefinitionsBuilder ({G_GET_FPENV, G_SET_FPENV, G_RESET_FPENV,
1348
1356
G_GET_FPMODE, G_SET_FPMODE, G_RESET_FPMODE})
@@ -1354,15 +1362,6 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
1354
1362
1355
1363
getActionDefinitionsBuilder ({G_SCMP, G_UCMP}).lower ();
1356
1364
1357
- getActionDefinitionsBuilder (G_EXTRACT_SUBVECTOR)
1358
- .legalFor ({{v8s8, v16s8}, {v4s16, v8s16}, {v2s32, v4s32}})
1359
- .widenScalarOrEltToNextPow2 (0 )
1360
- .immIdx (0 ); // Inform verifier imm idx 0 is handled.
1361
-
1362
- // TODO: {nxv16s8, s8}, {nxv8s16, s16}
1363
- getActionDefinitionsBuilder (G_SPLAT_VECTOR)
1364
- .legalFor (HasSVE, {{nxv4s32, s32}, {nxv2s64, s64}});
1365
-
1366
1365
getLegacyLegalizerInfo ().computeTables ();
1367
1366
verify (*ST.getInstrInfo ());
1368
1367
}
0 commit comments