-
Notifications
You must be signed in to change notification settings - Fork 6.1k
8252679: Two windows specific FileDIalog tests may fail on some Windows_Server_2016_Standard #24
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
👋 Welcome back serb! A progress list of the required criteria for merging this PR into |
|
@mrserb The following labels will be automatically applied to this pull request: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
+1
|
@mrserb This change now passes all automated pre-integration checks. When the change also fulfills all project specific requirements, type
Since the source branch of this PR was last updated there has been 1 commit pushed to the
As there are no conflicts, your changes will automatically be rebased on top of these commits when integrating. If you prefer to avoid automatic rebasing, please merge ➡️ To integrate this PR with the above commit message to the |
|
/integrate |
|
@mrserb Since your change was applied there has been 1 commit pushed to the
Your commit was automatically rebased without conflicts. Pushed as commit 1262ae3. 💡 You may see a message that your pull request was closed with unmerged commits. This can be safely ignored. |
This patch optimizes the backend implementation of VectorMaskToLong for
AArch64, given a more efficient approach to mov value bits from
predicate register to general purpose register as x86 PMOVMSK[1] does,
by using BEXT[2] which is available in SVE2.
With this patch, the final code (input mask is byte type with
SPECIESE_512, generated on an SVE vector reg size of 512-bit QEMU
emulator) changes as below:
Before:
mov z16.b, p0/z, #1
fmov x0, d16
orr x0, x0, x0, lsr openjdk#7
orr x0, x0, x0, lsr openjdk#14
orr x0, x0, x0, lsr openjdk#28
and x0, x0, #0xff
fmov x8, v16.d[1]
orr x8, x8, x8, lsr openjdk#7
orr x8, x8, x8, lsr openjdk#14
orr x8, x8, x8, lsr openjdk#28
and x8, x8, #0xff
orr x0, x0, x8, lsl openjdk#8
orr x8, xzr, #0x2
whilele p1.d, xzr, x8
lastb x8, p1, z16.d
orr x8, x8, x8, lsr openjdk#7
orr x8, x8, x8, lsr openjdk#14
orr x8, x8, x8, lsr openjdk#28
and x8, x8, #0xff
orr x0, x0, x8, lsl openjdk#16
orr x8, xzr, #0x3
whilele p1.d, xzr, x8
lastb x8, p1, z16.d
orr x8, x8, x8, lsr openjdk#7
orr x8, x8, x8, lsr openjdk#14
orr x8, x8, x8, lsr openjdk#28
and x8, x8, #0xff
orr x0, x0, x8, lsl openjdk#24
orr x8, xzr, #0x4
whilele p1.d, xzr, x8
lastb x8, p1, z16.d
orr x8, x8, x8, lsr openjdk#7
orr x8, x8, x8, lsr openjdk#14
orr x8, x8, x8, lsr openjdk#28
and x8, x8, #0xff
orr x0, x0, x8, lsl openjdk#32
mov x8, #0x5
whilele p1.d, xzr, x8
lastb x8, p1, z16.d
orr x8, x8, x8, lsr openjdk#7
orr x8, x8, x8, lsr openjdk#14
orr x8, x8, x8, lsr openjdk#28
and x8, x8, #0xff
orr x0, x0, x8, lsl openjdk#40
orr x8, xzr, #0x6
whilele p1.d, xzr, x8
lastb x8, p1, z16.d
orr x8, x8, x8, lsr openjdk#7
orr x8, x8, x8, lsr openjdk#14
orr x8, x8, x8, lsr openjdk#28
and x8, x8, #0xff
orr x0, x0, x8, lsl openjdk#48
orr x8, xzr, #0x7
whilele p1.d, xzr, x8
lastb x8, p1, z16.d
orr x8, x8, x8, lsr openjdk#7
orr x8, x8, x8, lsr openjdk#14
orr x8, x8, x8, lsr openjdk#28
and x8, x8, #0xff
orr x0, x0, x8, lsl openjdk#56
After:
mov z16.b, p0/z, #1
mov z17.b, #1
bext z16.d, z16.d, z17.d
mov z17.d, #0
uzp1 z16.s, z16.s, z17.s
uzp1 z16.h, z16.h, z17.h
uzp1 z16.b, z16.b, z17.b
mov x0, v16.d[0]
[1] https://www.felixcloutier.com/x86/pmovmskb
[2] https://developer.arm.com/documentation/ddi0602/2020-12/SVE-Instructions/BEXT--Gather-lower-bits-from-positions-selected-by-bitmask-
Change-Id: Ia983a20c89f76403e557ac21328f2f2e05dd08e0
This patch optimizes the backend implementation of VectorMaskToLong for
AArch64, given a more efficient approach to mov value bits from
predicate register to general purpose register as x86 PMOVMSK[1] does,
by using BEXT[2] which is available in SVE2.
With this patch, the final code (input mask is byte type with
SPECIESE_512, generated on an SVE vector reg size of 512-bit QEMU
emulator) changes as below:
Before:
mov z16.b, p0/z, #1
fmov x0, d16
orr x0, x0, x0, lsr openjdk#7
orr x0, x0, x0, lsr openjdk#14
orr x0, x0, x0, lsr openjdk#28
and x0, x0, #0xff
fmov x8, v16.d[1]
orr x8, x8, x8, lsr openjdk#7
orr x8, x8, x8, lsr openjdk#14
orr x8, x8, x8, lsr openjdk#28
and x8, x8, #0xff
orr x0, x0, x8, lsl openjdk#8
orr x8, xzr, #0x2
whilele p1.d, xzr, x8
lastb x8, p1, z16.d
orr x8, x8, x8, lsr openjdk#7
orr x8, x8, x8, lsr openjdk#14
orr x8, x8, x8, lsr openjdk#28
and x8, x8, #0xff
orr x0, x0, x8, lsl openjdk#16
orr x8, xzr, #0x3
whilele p1.d, xzr, x8
lastb x8, p1, z16.d
orr x8, x8, x8, lsr openjdk#7
orr x8, x8, x8, lsr openjdk#14
orr x8, x8, x8, lsr openjdk#28
and x8, x8, #0xff
orr x0, x0, x8, lsl openjdk#24
orr x8, xzr, #0x4
whilele p1.d, xzr, x8
lastb x8, p1, z16.d
orr x8, x8, x8, lsr openjdk#7
orr x8, x8, x8, lsr openjdk#14
orr x8, x8, x8, lsr openjdk#28
and x8, x8, #0xff
orr x0, x0, x8, lsl openjdk#32
mov x8, #0x5
whilele p1.d, xzr, x8
lastb x8, p1, z16.d
orr x8, x8, x8, lsr openjdk#7
orr x8, x8, x8, lsr openjdk#14
orr x8, x8, x8, lsr openjdk#28
and x8, x8, #0xff
orr x0, x0, x8, lsl openjdk#40
orr x8, xzr, #0x6
whilele p1.d, xzr, x8
lastb x8, p1, z16.d
orr x8, x8, x8, lsr openjdk#7
orr x8, x8, x8, lsr openjdk#14
orr x8, x8, x8, lsr openjdk#28
and x8, x8, #0xff
orr x0, x0, x8, lsl openjdk#48
orr x8, xzr, #0x7
whilele p1.d, xzr, x8
lastb x8, p1, z16.d
orr x8, x8, x8, lsr openjdk#7
orr x8, x8, x8, lsr openjdk#14
orr x8, x8, x8, lsr openjdk#28
and x8, x8, #0xff
orr x0, x0, x8, lsl openjdk#56
After:
mov z16.b, p0/z, #1
mov z17.b, #1
bext z16.d, z16.d, z17.d
mov z17.d, #0
uzp1 z16.s, z16.s, z17.s
uzp1 z16.h, z16.h, z17.h
uzp1 z16.b, z16.b, z17.b
mov x0, v16.d[0]
[1] https://www.felixcloutier.com/x86/pmovmskb
[2] https://developer.arm.com/documentation/ddi0602/2020-12/SVE-Instructions/BEXT--Gather-lower-bits-from-positions-selected-by-bitmask-
Change-Id: Ia983a20c89f76403e557ac21328f2f2e05dd08e0
After JDK-8283091, the loop below can be vectorized partially.
Statement 1 can be vectorized but statement 2 can't.
```
// int[] iArr; long[] lArrFld; int i1,i2;
for (i1 = 6; i1 < 227; i1++) {
iArr[i1] += lArrFld[i1]++; // statement 1
iArr[i1 + 1] -= (i2++); // statement 2
}
```
But we got incorrect results because the vector packs of iArr are
scheduled incorrectly like:
```
...
load_vector XMM1,[R8 + openjdk#16 + R11 << openjdk#2]
movl RDI, [R8 + openjdk#20 + R11 << openjdk#2] # int
load_vector XMM2,[R9 + openjdk#8 + R11 << openjdk#3]
subl RDI, R11 # int
vpaddq XMM3,XMM2,XMM0 ! add packedL
store_vector [R9 + openjdk#8 + R11 << openjdk#3],XMM3
vector_cast_l2x XMM2,XMM2 !
vpaddd XMM1,XMM2,XMM1 ! add packedI
addl RDI, openjdk#228 # int
movl [R8 + openjdk#20 + R11 << openjdk#2], RDI # int
movl RBX, [R8 + openjdk#24 + R11 << openjdk#2] # int
subl RBX, R11 # int
addl RBX, openjdk#227 # int
movl [R8 + openjdk#24 + R11 << openjdk#2], RBX # int
...
movl RBX, [R8 + openjdk#40 + R11 << openjdk#2] # int
subl RBX, R11 # int
addl RBX, openjdk#223 # int
movl [R8 + openjdk#40 + R11 << openjdk#2], RBX # int
movl RDI, [R8 + openjdk#44 + R11 << openjdk#2] # int
subl RDI, R11 # int
addl RDI, openjdk#222 # int
movl [R8 + openjdk#44 + R11 << openjdk#2], RDI # int
store_vector [R8 + openjdk#16 + R11 << openjdk#2],XMM1
...
```
simplified as:
```
load_vector iArr in statement 1
unvectorized loads/stores in statement 2
store_vector iArr in statement 1
```
We cannot pick the memory state from the first load for LoadI pack
here, as the LoadI vector operation must load the new values in memory
after iArr writes 'iArr[i1 + 1] - (i2++)' to 'iArr[i1 + 1]'(statement 2).
We must take the memory state of the last load where we have assigned
new values ('iArr[i1 + 1] - (i2++)') to the iArr array.
In JDK-8240281, we picked the memory state of the first load. Different
from the scenario in JDK-8240281, the store, which is dependent on an
earlier load here, is in a pack to be scheduled and the LoadI pack
depends on the last_mem. As designed[2], to schedule the StoreI pack,
all memory operations in another single pack should be moved in the same
direction. We know that the store in the pack depends on one of loads in
the LoadI pack, so the LoadI pack should be scheduled before the StoreI
pack. And the LoadI pack depends on the last_mem, so the last_mem must
be scheduled before the LoadI pack and also before the store pack.
Therefore, we need to take the memory state of the last load for the
LoadI pack here.
To fix it, the pack adds additional checks while picking the memory state
of the first load. When the store locates in a pack and the load pack
relies on the last_mem, we shouldn't choose the memory state of the
first load but choose the memory state of the last load.
[1]https://github.com/openjdk/jdk/blob/0ae834105740f7cf73fe96be22e0f564ad29b18d/src/hotspot/share/opto/superword.cpp#L2380
[2]https://github.com/openjdk/jdk/blob/0ae834105740f7cf73fe96be22e0f564ad29b18d/src/hotspot/share/opto/superword.cpp#L2232
Jira: ENTLLT-5482
Change-Id: I341d10b91957b60a1b4aff8116723e54083a5fb8
CustomizedGitHooks: yes
…cv-port-22 RISC-V: loom: RVC: Fix two potential alignment issues when RVC's enabled
* JVM-1897: broken phi node when we merge 2 virtual objects * Fix release build. --------- Co-authored-by: Xin Liu <xxinliu@amazon.com>
…ng into ldp/stp on AArch64 Macro-assembler on aarch64 can merge adjacent loads or stores into ldp/stp[1]. For example, it can merge: ``` str w20, [sp, openjdk#16] str w10, [sp, openjdk#20] ``` into ``` stp w20, w10, [sp, openjdk#16] ``` But C2 may generate a sequence like: ``` str x21, [sp, openjdk#8] str w20, [sp, openjdk#16] str x19, [sp, openjdk#24] <--- str w10, [sp, openjdk#20] <--- Before sorting str x11, [sp, openjdk#40] str w13, [sp, openjdk#48] str x16, [sp, openjdk#56] ``` We can't do any merging for non-adjacent loads or stores. The patch is to sort the spilling or unspilling sequence in the order of offset during instruction scheduling and bundling phase. After that, we can get a new sequence: ``` str x21, [sp, openjdk#8] str w20, [sp, openjdk#16] str w10, [sp, openjdk#20] <--- str x19, [sp, openjdk#24] <--- After sorting str x11, [sp, openjdk#40] str w13, [sp, openjdk#48] str x16, [sp, openjdk#56] ``` Then macro-assembler can do ld/st merging: ``` str x21, [sp, openjdk#8] stp w20, w10, [sp, openjdk#16] <--- Merged str x19, [sp, openjdk#24] str x11, [sp, openjdk#40] str w13, [sp, openjdk#48] str x16, [sp, openjdk#56] ``` To justify the patch, we run `HelloWorld.java` ``` public class HelloWorld { public static void main(String [] args) { System.out.println("Hello World!"); } } ``` with `java -Xcomp -XX:-TieredCompilation HelloWorld`. Before the patch, macro-assembler can do ld/st merging for 3688 times. After the patch, the number of ld/st merging increases to 3871 times, by ~5 %. Tested tier1~3 on x86 and AArch64. [1] https://github.com/openjdk/jdk/blob/a95062b39a431b4937ab6e9e73de4d2b8ea1ac49/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp#L2079
Use System.nanoTime() instead of System.currentTimeMillis()
These seemingly simple checks are needed on Windows because on windows we allow navigating in the special folders(Libraries). To map the LIbrary such as Music/Video we call the native win32 API. If this API fails to convert the Library to the path, we will show this library to the user in the FileChooser without an ability to navigate it.
I have checked that the test fails on the systems where the native API sometimes cannot map the Library to the path, so even the native applications cannot navigate it.
We run these tests, as a service on the headless windows system, so this could be a reason.
For now, I would like to move these tests to the headful group where we know the desktop session exists. If they continue to fail we probably will need to relax the checks in the tests.
Progress
Issue
Reviewers
Download
$ git fetch https://git.openjdk.java.net/jdk pull/24/head:pull/24$ git checkout pull/24