Skip to content

Commit 4abb722

Browse files
committed
[RISCV] Add tests for opportunities to reassociate to form more shXadd instructions. NFC
These tests consist of patterns like (sh3add Z, (add X, (slli Y, 6))) that can be reassociated to form (sh3add (sh3add Y, Z), X).
1 parent 0a6a40d commit 4abb722

File tree

1 file changed

+361
-0
lines changed

1 file changed

+361
-0
lines changed

llvm/test/CodeGen/RISCV/rv64zba.ll

Lines changed: 361 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2036,3 +2036,364 @@ define i64 @pack_i64_disjoint_2(i32 signext %a, i64 %b) nounwind {
20362036
%or = or disjoint i64 %b, %zexta
20372037
ret i64 %or
20382038
}
2039+
2040+
define i8 @array_index_sh1_sh0(ptr %p, i64 %idx1, i64 %idx2) {
2041+
; RV64I-LABEL: array_index_sh1_sh0:
2042+
; RV64I: # %bb.0:
2043+
; RV64I-NEXT: slli a1, a1, 1
2044+
; RV64I-NEXT: add a0, a0, a2
2045+
; RV64I-NEXT: add a0, a0, a1
2046+
; RV64I-NEXT: lbu a0, 0(a0)
2047+
; RV64I-NEXT: ret
2048+
;
2049+
; RV64ZBA-LABEL: array_index_sh1_sh0:
2050+
; RV64ZBA: # %bb.0:
2051+
; RV64ZBA-NEXT: sh1add a0, a1, a0
2052+
; RV64ZBA-NEXT: add a0, a0, a2
2053+
; RV64ZBA-NEXT: lbu a0, 0(a0)
2054+
; RV64ZBA-NEXT: ret
2055+
%a = getelementptr inbounds [2 x i8], ptr %p, i64 %idx1, i64 %idx2
2056+
%b = load i8, ptr %a, align 1
2057+
ret i8 %b
2058+
}
2059+
2060+
define i16 @array_index_sh1_sh1(ptr %p, i64 %idx1, i64 %idx2) {
2061+
; RV64I-LABEL: array_index_sh1_sh1:
2062+
; RV64I: # %bb.0:
2063+
; RV64I-NEXT: slli a1, a1, 2
2064+
; RV64I-NEXT: add a0, a0, a1
2065+
; RV64I-NEXT: slli a2, a2, 1
2066+
; RV64I-NEXT: add a0, a0, a2
2067+
; RV64I-NEXT: lh a0, 0(a0)
2068+
; RV64I-NEXT: ret
2069+
;
2070+
; RV64ZBA-LABEL: array_index_sh1_sh1:
2071+
; RV64ZBA: # %bb.0:
2072+
; RV64ZBA-NEXT: sh2add a0, a1, a0
2073+
; RV64ZBA-NEXT: sh1add a0, a2, a0
2074+
; RV64ZBA-NEXT: lh a0, 0(a0)
2075+
; RV64ZBA-NEXT: ret
2076+
%a = getelementptr inbounds [2 x i16], ptr %p, i64 %idx1, i64 %idx2
2077+
%b = load i16, ptr %a, align 2
2078+
ret i16 %b
2079+
}
2080+
2081+
define i32 @array_index_sh1_sh2(ptr %p, i64 %idx1, i64 %idx2) {
2082+
; RV64I-LABEL: array_index_sh1_sh2:
2083+
; RV64I: # %bb.0:
2084+
; RV64I-NEXT: slli a1, a1, 3
2085+
; RV64I-NEXT: add a0, a0, a1
2086+
; RV64I-NEXT: slli a2, a2, 2
2087+
; RV64I-NEXT: add a0, a0, a2
2088+
; RV64I-NEXT: lw a0, 0(a0)
2089+
; RV64I-NEXT: ret
2090+
;
2091+
; RV64ZBA-LABEL: array_index_sh1_sh2:
2092+
; RV64ZBA: # %bb.0:
2093+
; RV64ZBA-NEXT: sh3add a0, a1, a0
2094+
; RV64ZBA-NEXT: sh2add a0, a2, a0
2095+
; RV64ZBA-NEXT: lw a0, 0(a0)
2096+
; RV64ZBA-NEXT: ret
2097+
%a = getelementptr inbounds [2 x i32], ptr %p, i64 %idx1, i64 %idx2
2098+
%b = load i32, ptr %a, align 4
2099+
ret i32 %b
2100+
}
2101+
2102+
define i64 @array_index_sh1_sh3(ptr %p, i64 %idx1, i64 %idx2) {
2103+
; RV64I-LABEL: array_index_sh1_sh3:
2104+
; RV64I: # %bb.0:
2105+
; RV64I-NEXT: slli a1, a1, 4
2106+
; RV64I-NEXT: add a0, a0, a1
2107+
; RV64I-NEXT: slli a2, a2, 3
2108+
; RV64I-NEXT: add a0, a0, a2
2109+
; RV64I-NEXT: ld a0, 0(a0)
2110+
; RV64I-NEXT: ret
2111+
;
2112+
; RV64ZBA-LABEL: array_index_sh1_sh3:
2113+
; RV64ZBA: # %bb.0:
2114+
; RV64ZBA-NEXT: slli a1, a1, 4
2115+
; RV64ZBA-NEXT: add a0, a0, a1
2116+
; RV64ZBA-NEXT: sh3add a0, a2, a0
2117+
; RV64ZBA-NEXT: ld a0, 0(a0)
2118+
; RV64ZBA-NEXT: ret
2119+
%a = getelementptr inbounds [2 x i64], ptr %p, i64 %idx1, i64 %idx2
2120+
%b = load i64, ptr %a, align 8
2121+
ret i64 %b
2122+
}
2123+
2124+
define i8 @array_index_sh2_sh0(ptr %p, i64 %idx1, i64 %idx2) {
2125+
; RV64I-LABEL: array_index_sh2_sh0:
2126+
; RV64I: # %bb.0:
2127+
; RV64I-NEXT: slli a1, a1, 2
2128+
; RV64I-NEXT: add a0, a0, a2
2129+
; RV64I-NEXT: add a0, a0, a1
2130+
; RV64I-NEXT: lbu a0, 0(a0)
2131+
; RV64I-NEXT: ret
2132+
;
2133+
; RV64ZBA-LABEL: array_index_sh2_sh0:
2134+
; RV64ZBA: # %bb.0:
2135+
; RV64ZBA-NEXT: sh2add a0, a1, a0
2136+
; RV64ZBA-NEXT: add a0, a0, a2
2137+
; RV64ZBA-NEXT: lbu a0, 0(a0)
2138+
; RV64ZBA-NEXT: ret
2139+
%a = getelementptr inbounds [4 x i8], ptr %p, i64 %idx1, i64 %idx2
2140+
%b = load i8, ptr %a, align 1
2141+
ret i8 %b
2142+
}
2143+
2144+
define i16 @array_index_sh2_sh1(ptr %p, i64 %idx1, i64 %idx2) {
2145+
; RV64I-LABEL: array_index_sh2_sh1:
2146+
; RV64I: # %bb.0:
2147+
; RV64I-NEXT: slli a1, a1, 3
2148+
; RV64I-NEXT: add a0, a0, a1
2149+
; RV64I-NEXT: slli a2, a2, 1
2150+
; RV64I-NEXT: add a0, a0, a2
2151+
; RV64I-NEXT: lh a0, 0(a0)
2152+
; RV64I-NEXT: ret
2153+
;
2154+
; RV64ZBA-LABEL: array_index_sh2_sh1:
2155+
; RV64ZBA: # %bb.0:
2156+
; RV64ZBA-NEXT: sh3add a0, a1, a0
2157+
; RV64ZBA-NEXT: sh1add a0, a2, a0
2158+
; RV64ZBA-NEXT: lh a0, 0(a0)
2159+
; RV64ZBA-NEXT: ret
2160+
%a = getelementptr inbounds [4 x i16], ptr %p, i64 %idx1, i64 %idx2
2161+
%b = load i16, ptr %a, align 2
2162+
ret i16 %b
2163+
}
2164+
2165+
define i32 @array_index_sh2_sh2(ptr %p, i64 %idx1, i64 %idx2) {
2166+
; RV64I-LABEL: array_index_sh2_sh2:
2167+
; RV64I: # %bb.0:
2168+
; RV64I-NEXT: slli a1, a1, 4
2169+
; RV64I-NEXT: add a0, a0, a1
2170+
; RV64I-NEXT: slli a2, a2, 2
2171+
; RV64I-NEXT: add a0, a0, a2
2172+
; RV64I-NEXT: lw a0, 0(a0)
2173+
; RV64I-NEXT: ret
2174+
;
2175+
; RV64ZBA-LABEL: array_index_sh2_sh2:
2176+
; RV64ZBA: # %bb.0:
2177+
; RV64ZBA-NEXT: slli a1, a1, 4
2178+
; RV64ZBA-NEXT: add a0, a0, a1
2179+
; RV64ZBA-NEXT: sh2add a0, a2, a0
2180+
; RV64ZBA-NEXT: lw a0, 0(a0)
2181+
; RV64ZBA-NEXT: ret
2182+
%a = getelementptr inbounds [4 x i32], ptr %p, i64 %idx1, i64 %idx2
2183+
%b = load i32, ptr %a, align 4
2184+
ret i32 %b
2185+
}
2186+
2187+
define i64 @array_index_sh2_sh3(ptr %p, i64 %idx1, i64 %idx2) {
2188+
; RV64I-LABEL: array_index_sh2_sh3:
2189+
; RV64I: # %bb.0:
2190+
; RV64I-NEXT: slli a1, a1, 5
2191+
; RV64I-NEXT: add a0, a0, a1
2192+
; RV64I-NEXT: slli a2, a2, 3
2193+
; RV64I-NEXT: add a0, a0, a2
2194+
; RV64I-NEXT: ld a0, 0(a0)
2195+
; RV64I-NEXT: ret
2196+
;
2197+
; RV64ZBA-LABEL: array_index_sh2_sh3:
2198+
; RV64ZBA: # %bb.0:
2199+
; RV64ZBA-NEXT: slli a1, a1, 5
2200+
; RV64ZBA-NEXT: add a0, a0, a1
2201+
; RV64ZBA-NEXT: sh3add a0, a2, a0
2202+
; RV64ZBA-NEXT: ld a0, 0(a0)
2203+
; RV64ZBA-NEXT: ret
2204+
%a = getelementptr inbounds [4 x i64], ptr %p, i64 %idx1, i64 %idx2
2205+
%b = load i64, ptr %a, align 8
2206+
ret i64 %b
2207+
}
2208+
2209+
define i8 @array_index_sh3_sh0(ptr %p, i64 %idx1, i64 %idx2) {
2210+
; RV64I-LABEL: array_index_sh3_sh0:
2211+
; RV64I: # %bb.0:
2212+
; RV64I-NEXT: slli a1, a1, 3
2213+
; RV64I-NEXT: add a0, a0, a2
2214+
; RV64I-NEXT: add a0, a0, a1
2215+
; RV64I-NEXT: lbu a0, 0(a0)
2216+
; RV64I-NEXT: ret
2217+
;
2218+
; RV64ZBA-LABEL: array_index_sh3_sh0:
2219+
; RV64ZBA: # %bb.0:
2220+
; RV64ZBA-NEXT: sh3add a0, a1, a0
2221+
; RV64ZBA-NEXT: add a0, a0, a2
2222+
; RV64ZBA-NEXT: lbu a0, 0(a0)
2223+
; RV64ZBA-NEXT: ret
2224+
%a = getelementptr inbounds [8 x i8], ptr %p, i64 %idx1, i64 %idx2
2225+
%b = load i8, ptr %a, align 1
2226+
ret i8 %b
2227+
}
2228+
2229+
define i16 @array_index_sh3_sh1(ptr %p, i64 %idx1, i64 %idx2) {
2230+
; RV64I-LABEL: array_index_sh3_sh1:
2231+
; RV64I: # %bb.0:
2232+
; RV64I-NEXT: slli a1, a1, 4
2233+
; RV64I-NEXT: add a0, a0, a1
2234+
; RV64I-NEXT: slli a2, a2, 1
2235+
; RV64I-NEXT: add a0, a0, a2
2236+
; RV64I-NEXT: lh a0, 0(a0)
2237+
; RV64I-NEXT: ret
2238+
;
2239+
; RV64ZBA-LABEL: array_index_sh3_sh1:
2240+
; RV64ZBA: # %bb.0:
2241+
; RV64ZBA-NEXT: slli a1, a1, 4
2242+
; RV64ZBA-NEXT: add a0, a0, a1
2243+
; RV64ZBA-NEXT: sh1add a0, a2, a0
2244+
; RV64ZBA-NEXT: lh a0, 0(a0)
2245+
; RV64ZBA-NEXT: ret
2246+
%a = getelementptr inbounds [8 x i16], ptr %p, i64 %idx1, i64 %idx2
2247+
%b = load i16, ptr %a, align 2
2248+
ret i16 %b
2249+
}
2250+
2251+
define i32 @array_index_sh3_sh2(ptr %p, i64 %idx1, i64 %idx2) {
2252+
; RV64I-LABEL: array_index_sh3_sh2:
2253+
; RV64I: # %bb.0:
2254+
; RV64I-NEXT: slli a1, a1, 5
2255+
; RV64I-NEXT: add a0, a0, a1
2256+
; RV64I-NEXT: slli a2, a2, 2
2257+
; RV64I-NEXT: add a0, a0, a2
2258+
; RV64I-NEXT: lw a0, 0(a0)
2259+
; RV64I-NEXT: ret
2260+
;
2261+
; RV64ZBA-LABEL: array_index_sh3_sh2:
2262+
; RV64ZBA: # %bb.0:
2263+
; RV64ZBA-NEXT: slli a1, a1, 5
2264+
; RV64ZBA-NEXT: add a0, a0, a1
2265+
; RV64ZBA-NEXT: sh2add a0, a2, a0
2266+
; RV64ZBA-NEXT: lw a0, 0(a0)
2267+
; RV64ZBA-NEXT: ret
2268+
%a = getelementptr inbounds [8 x i32], ptr %p, i64 %idx1, i64 %idx2
2269+
%b = load i32, ptr %a, align 4
2270+
ret i32 %b
2271+
}
2272+
2273+
define i64 @array_index_sh3_sh3(ptr %p, i64 %idx1, i64 %idx2) {
2274+
; RV64I-LABEL: array_index_sh3_sh3:
2275+
; RV64I: # %bb.0:
2276+
; RV64I-NEXT: slli a1, a1, 6
2277+
; RV64I-NEXT: add a0, a0, a1
2278+
; RV64I-NEXT: slli a2, a2, 3
2279+
; RV64I-NEXT: add a0, a0, a2
2280+
; RV64I-NEXT: ld a0, 0(a0)
2281+
; RV64I-NEXT: ret
2282+
;
2283+
; RV64ZBA-LABEL: array_index_sh3_sh3:
2284+
; RV64ZBA: # %bb.0:
2285+
; RV64ZBA-NEXT: slli a1, a1, 6
2286+
; RV64ZBA-NEXT: add a0, a0, a1
2287+
; RV64ZBA-NEXT: sh3add a0, a2, a0
2288+
; RV64ZBA-NEXT: ld a0, 0(a0)
2289+
; RV64ZBA-NEXT: ret
2290+
%a = getelementptr inbounds [8 x i64], ptr %p, i64 %idx1, i64 %idx2
2291+
%b = load i64, ptr %a, align 8
2292+
ret i64 %b
2293+
}
2294+
2295+
; Similar to above, but with a lshr on one of the indices. This requires
2296+
; special handling during isel to form a shift pair.
2297+
define i64 @array_index_lshr_sh3_sh3(ptr %p, i64 %idx1, i64 %idx2) {
2298+
; RV64I-LABEL: array_index_lshr_sh3_sh3:
2299+
; RV64I: # %bb.0:
2300+
; RV64I-NEXT: srli a1, a1, 58
2301+
; RV64I-NEXT: slli a1, a1, 6
2302+
; RV64I-NEXT: slli a2, a2, 3
2303+
; RV64I-NEXT: add a0, a0, a2
2304+
; RV64I-NEXT: add a0, a0, a1
2305+
; RV64I-NEXT: ld a0, 0(a0)
2306+
; RV64I-NEXT: ret
2307+
;
2308+
; RV64ZBA-LABEL: array_index_lshr_sh3_sh3:
2309+
; RV64ZBA: # %bb.0:
2310+
; RV64ZBA-NEXT: srli a1, a1, 58
2311+
; RV64ZBA-NEXT: slli a1, a1, 6
2312+
; RV64ZBA-NEXT: add a0, a0, a1
2313+
; RV64ZBA-NEXT: sh3add a0, a2, a0
2314+
; RV64ZBA-NEXT: ld a0, 0(a0)
2315+
; RV64ZBA-NEXT: ret
2316+
%shr = lshr i64 %idx1, 58
2317+
%a = getelementptr inbounds [8 x i64], ptr %p, i64 %shr, i64 %idx2
2318+
%b = load i64, ptr %a, align 8
2319+
ret i64 %b
2320+
}
2321+
2322+
define i8 @array_index_sh4_sh0(ptr %p, i64 %idx1, i64 %idx2) {
2323+
; CHECK-LABEL: array_index_sh4_sh0:
2324+
; CHECK: # %bb.0:
2325+
; CHECK-NEXT: slli a1, a1, 4
2326+
; CHECK-NEXT: add a0, a0, a2
2327+
; CHECK-NEXT: add a0, a0, a1
2328+
; CHECK-NEXT: lbu a0, 0(a0)
2329+
; CHECK-NEXT: ret
2330+
%a = getelementptr inbounds [16 x i8], ptr %p, i64 %idx1, i64 %idx2
2331+
%b = load i8, ptr %a, align 1
2332+
ret i8 %b
2333+
}
2334+
2335+
define i16 @array_index_sh4_sh1(ptr %p, i64 %idx1, i64 %idx2) {
2336+
; RV64I-LABEL: array_index_sh4_sh1:
2337+
; RV64I: # %bb.0:
2338+
; RV64I-NEXT: slli a1, a1, 5
2339+
; RV64I-NEXT: add a0, a0, a1
2340+
; RV64I-NEXT: slli a2, a2, 1
2341+
; RV64I-NEXT: add a0, a0, a2
2342+
; RV64I-NEXT: lh a0, 0(a0)
2343+
; RV64I-NEXT: ret
2344+
;
2345+
; RV64ZBA-LABEL: array_index_sh4_sh1:
2346+
; RV64ZBA: # %bb.0:
2347+
; RV64ZBA-NEXT: slli a1, a1, 5
2348+
; RV64ZBA-NEXT: add a0, a0, a1
2349+
; RV64ZBA-NEXT: sh1add a0, a2, a0
2350+
; RV64ZBA-NEXT: lh a0, 0(a0)
2351+
; RV64ZBA-NEXT: ret
2352+
%a = getelementptr inbounds [16 x i16], ptr %p, i64 %idx1, i64 %idx2
2353+
%b = load i16, ptr %a, align 2
2354+
ret i16 %b
2355+
}
2356+
2357+
define i32 @array_index_sh4_sh2(ptr %p, i64 %idx1, i64 %idx2) {
2358+
; RV64I-LABEL: array_index_sh4_sh2:
2359+
; RV64I: # %bb.0:
2360+
; RV64I-NEXT: slli a1, a1, 6
2361+
; RV64I-NEXT: add a0, a0, a1
2362+
; RV64I-NEXT: slli a2, a2, 2
2363+
; RV64I-NEXT: add a0, a0, a2
2364+
; RV64I-NEXT: lw a0, 0(a0)
2365+
; RV64I-NEXT: ret
2366+
;
2367+
; RV64ZBA-LABEL: array_index_sh4_sh2:
2368+
; RV64ZBA: # %bb.0:
2369+
; RV64ZBA-NEXT: slli a1, a1, 6
2370+
; RV64ZBA-NEXT: add a0, a0, a1
2371+
; RV64ZBA-NEXT: sh2add a0, a2, a0
2372+
; RV64ZBA-NEXT: lw a0, 0(a0)
2373+
; RV64ZBA-NEXT: ret
2374+
%a = getelementptr inbounds [16 x i32], ptr %p, i64 %idx1, i64 %idx2
2375+
%b = load i32, ptr %a, align 4
2376+
ret i32 %b
2377+
}
2378+
2379+
define i64 @array_index_sh4_sh3(ptr %p, i64 %idx1, i64 %idx2) {
2380+
; RV64I-LABEL: array_index_sh4_sh3:
2381+
; RV64I: # %bb.0:
2382+
; RV64I-NEXT: slli a1, a1, 7
2383+
; RV64I-NEXT: add a0, a0, a1
2384+
; RV64I-NEXT: slli a2, a2, 3
2385+
; RV64I-NEXT: add a0, a0, a2
2386+
; RV64I-NEXT: ld a0, 0(a0)
2387+
; RV64I-NEXT: ret
2388+
;
2389+
; RV64ZBA-LABEL: array_index_sh4_sh3:
2390+
; RV64ZBA: # %bb.0:
2391+
; RV64ZBA-NEXT: slli a1, a1, 7
2392+
; RV64ZBA-NEXT: add a0, a0, a1
2393+
; RV64ZBA-NEXT: sh3add a0, a2, a0
2394+
; RV64ZBA-NEXT: ld a0, 0(a0)
2395+
; RV64ZBA-NEXT: ret
2396+
%a = getelementptr inbounds [16 x i64], ptr %p, i64 %idx1, i64 %idx2
2397+
%b = load i64, ptr %a, align 8
2398+
ret i64 %b
2399+
}

0 commit comments

Comments
 (0)