Skip to content

Commit f807f7e

Browse files
eme64pull[bot]
authored andcommitted
8330819: C2 SuperWord: bad dominance after pre-loop limit adjustment with base that has CastLL after pre-loop
Reviewed-by: chagedorn, kvn
1 parent c63376c commit f807f7e

File tree

2 files changed

+118
-1
lines changed

2 files changed

+118
-1
lines changed

src/hotspot/share/opto/vectorization.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -461,7 +461,10 @@ VPointer::VPointer(const MemNode* mem, const VLoop& vloop,
461461
break; // stop looking at addp's
462462
}
463463
}
464-
if (is_loop_member(adr)) {
464+
if (!invariant(adr)) {
465+
// The address must be invariant for the current loop. But if we are in a main-loop,
466+
// it must also be invariant of the pre-loop, otherwise we cannot use this address
467+
// for the pre-loop limit adjustment required for main-loop alignment.
465468
assert(!valid(), "adr is loop variant");
466469
return;
467470
}
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
/*
2+
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
3+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4+
*
5+
* This code is free software; you can redistribute it and/or modify it
6+
* under the terms of the GNU General Public License version 2 only, as
7+
* published by the Free Software Foundation.
8+
*
9+
* This code is distributed in the hope that it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12+
* version 2 for more details (a copy is included in the LICENSE file that
13+
* accompanied this code).
14+
*
15+
* You should have received a copy of the GNU General Public License version
16+
* 2 along with this work; if not, write to the Free Software Foundation,
17+
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18+
*
19+
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20+
* or visit www.oracle.com if you need additional information or have any
21+
* questions.
22+
*/
23+
24+
/*
25+
* @test
26+
* @bug 8330819
27+
* @summary Case where VPointer finds an "adr" CastX2P, which contains a CastLL,
28+
* that has a ctrl after the pre-loop. This value cannot be used in the
29+
* pre-loop limit for main-loop adjustment.
30+
* @modules java.base/jdk.internal.misc
31+
* @modules java.base/jdk.internal.util
32+
* @run main/othervm -Xbatch compiler.loopopts.superword.TestMemorySegmentMainLoopAlignment
33+
*/
34+
35+
package compiler.loopopts.superword;
36+
37+
import java.lang.foreign.*;
38+
import jdk.internal.misc.Unsafe;
39+
import jdk.internal.util.Preconditions;
40+
41+
public class TestMemorySegmentMainLoopAlignment {
42+
static final ValueLayout.OfInt ELEMENT_LAYOUT = ValueLayout.JAVA_INT.withByteAlignment(1);
43+
static final Unsafe UNSAFE = Unsafe.getUnsafe();
44+
static long RANGE = 6400;
45+
46+
// Type definition for the lambda
47+
interface MSOp {
48+
int apply(MemorySegment memory, long offset, int i);
49+
}
50+
51+
// Type definition for the lambda
52+
interface MemoryUnsafeOp {
53+
int apply(long base, long offset, int i);
54+
}
55+
56+
public static void main(String[] args) {
57+
// Allocate some raw memory:
58+
MemorySegment ms = Arena.ofAuto().allocate(6400, Integer.SIZE);
59+
for (int i = 0; i < 10_000; i++) {
60+
test1(ms, 0, TestMemorySegmentMainLoopAlignment::memorySegmentGet);
61+
}
62+
// Allocate some raw memory:
63+
long base = UNSAFE.allocateMemory(6400);
64+
for (int i = 0; i < 10_000; i++) {
65+
test2(base, 0, TestMemorySegmentMainLoopAlignment::memoryUnsafeGet);
66+
}
67+
}
68+
69+
// Somehow, it is necessary to pass this as a lambda
70+
// the checkIndex inside the "get" method produces the CastLL, which eventually pins the index
71+
// between the pre and main loop.
72+
static int memorySegmentGet(MemorySegment ms, long o, int i) {
73+
return ms.get(ELEMENT_LAYOUT, o + i * 4L);
74+
}
75+
76+
static int test1(MemorySegment a, long offset, MSOp f) {
77+
// Constant size array size allows a known range for the array access/loop iv i.
78+
int size = 16;
79+
int[] res = new int[size];
80+
int sum = 0;
81+
for (int i = 0; i < size; i++) {
82+
// With inlining, this eventually becomes:
83+
// sum += LoadI(MemorySegment / unsafe) + LoadI(array)
84+
// and we attempt vectorization.
85+
sum += f.apply(a, offset, i) + res[i];
86+
}
87+
return sum;
88+
}
89+
90+
// Somehow, it is necessary to pass this as a lambda
91+
static int memoryUnsafeGet(long base, long o, int i) {
92+
long index = o + i * 4L;
93+
// checkIndex -> CastLL: index >= 0.
94+
// Together with the info about i (known range for phi), this CastLL floats up to
95+
// the offset. Then we get adr = CastX2P(base + CastLL(offset)), where the CastLL
96+
// is pinned between the pre and main loop.
97+
Preconditions.checkIndex(index, RANGE, null);
98+
return UNSAFE.getInt(base + index);
99+
}
100+
101+
static int test2(long base, long offset, MemoryUnsafeOp f) {
102+
// Constant size array size allows a known range for the array access/loop iv i.
103+
int size = 16;
104+
int[] res = new int[size];
105+
int sum = 0;
106+
for (int i = 0; i < size; i++) {
107+
// With inlining, this eventually becomes:
108+
// sum += LoadI(unsafe) + LoadI(array)
109+
// and we attempt vectorization.
110+
sum += f.apply(base, offset, i) + res[i];
111+
}
112+
return sum;
113+
}
114+
}

0 commit comments

Comments
 (0)