Skip to content

Conversation

@LanceAndersen
Copy link
Contributor

/reviewer @JoeWang-Java

@bridgekeeper
Copy link

bridgekeeper bot commented Sep 2, 2020

Welcome to the OpenJDK organization on GitHub!

This repository is currently a read-only git mirror of the official Mercurial repository (located at https://hg.openjdk.java.net/). As such, we are not currently accepting pull requests here. If you would like to contribute to the OpenJDK project, please see https://openjdk.java.net/contribute/ on how to proceed.

This pull request will be automatically closed.

@bridgekeeper bridgekeeper bot closed this Sep 2, 2020
e1iu pushed a commit to e1iu/jdk that referenced this pull request Mar 29, 2022
This patch optimizes the backend implementation of VectorMaskToLong for
AArch64, given a more efficient approach to mov value bits from
predicate register to general purpose register as x86 PMOVMSK[1] does,
by using BEXT[2] which is available in SVE2.

With this patch, the final code (input mask is byte type with
SPECIESE_512, generated on an SVE vector reg size of 512-bit QEMU
emulator) changes as below:

Before:

        mov     z16.b, p0/z, #1
        fmov    x0, d16
        orr     x0, x0, x0, lsr openjdk#7
        orr     x0, x0, x0, lsr openjdk#14
        orr     x0, x0, x0, lsr openjdk#28
        and     x0, x0, #0xff
        fmov    x8, v16.d[1]
        orr     x8, x8, x8, lsr openjdk#7
        orr     x8, x8, x8, lsr openjdk#14
        orr     x8, x8, x8, lsr openjdk#28
        and     x8, x8, #0xff
        orr     x0, x0, x8, lsl openjdk#8

        orr     x8, xzr, #0x2
        whilele p1.d, xzr, x8
        lastb   x8, p1, z16.d
        orr     x8, x8, x8, lsr openjdk#7
        orr     x8, x8, x8, lsr openjdk#14
        orr     x8, x8, x8, lsr openjdk#28
        and     x8, x8, #0xff
        orr     x0, x0, x8, lsl openjdk#16

        orr     x8, xzr, #0x3
        whilele p1.d, xzr, x8
        lastb   x8, p1, z16.d
        orr     x8, x8, x8, lsr openjdk#7
        orr     x8, x8, x8, lsr openjdk#14
        orr     x8, x8, x8, lsr openjdk#28
        and     x8, x8, #0xff
        orr     x0, x0, x8, lsl openjdk#24

        orr     x8, xzr, #0x4
        whilele p1.d, xzr, x8
        lastb   x8, p1, z16.d
        orr     x8, x8, x8, lsr openjdk#7
        orr     x8, x8, x8, lsr openjdk#14
        orr     x8, x8, x8, lsr openjdk#28
        and     x8, x8, #0xff
        orr     x0, x0, x8, lsl openjdk#32

        mov     x8, #0x5
        whilele p1.d, xzr, x8
        lastb   x8, p1, z16.d
        orr     x8, x8, x8, lsr openjdk#7
        orr     x8, x8, x8, lsr openjdk#14
        orr     x8, x8, x8, lsr openjdk#28
        and     x8, x8, #0xff
        orr     x0, x0, x8, lsl openjdk#40

        orr     x8, xzr, #0x6
        whilele p1.d, xzr, x8
        lastb   x8, p1, z16.d
        orr     x8, x8, x8, lsr openjdk#7
        orr     x8, x8, x8, lsr openjdk#14
        orr     x8, x8, x8, lsr openjdk#28
        and     x8, x8, #0xff
        orr     x0, x0, x8, lsl openjdk#48

        orr     x8, xzr, #0x7
        whilele p1.d, xzr, x8
        lastb   x8, p1, z16.d
        orr     x8, x8, x8, lsr openjdk#7
        orr     x8, x8, x8, lsr openjdk#14
        orr     x8, x8, x8, lsr openjdk#28
        and     x8, x8, #0xff
        orr     x0, x0, x8, lsl openjdk#56

After:

        mov     z16.b, p0/z, #1
        mov     z17.b, #1
        bext    z16.d, z16.d, z17.d
        mov     z17.d, #0
        uzp1    z16.s, z16.s, z17.s
        uzp1    z16.h, z16.h, z17.h
        uzp1    z16.b, z16.b, z17.b
        mov     x0, v16.d[0]

[1] https://www.felixcloutier.com/x86/pmovmskb
[2] https://developer.arm.com/documentation/ddi0602/2020-12/SVE-Instructions/BEXT--Gather-lower-bits-from-positions-selected-by-bitmask-

Change-Id: Ia983a20c89f76403e557ac21328f2f2e05dd08e0
e1iu pushed a commit to e1iu/jdk that referenced this pull request Apr 21, 2022
This patch optimizes the backend implementation of VectorMaskToLong for
AArch64, given a more efficient approach to mov value bits from
predicate register to general purpose register as x86 PMOVMSK[1] does,
by using BEXT[2] which is available in SVE2.

With this patch, the final code (input mask is byte type with
SPECIESE_512, generated on an SVE vector reg size of 512-bit QEMU
emulator) changes as below:

Before:

        mov     z16.b, p0/z, #1
        fmov    x0, d16
        orr     x0, x0, x0, lsr openjdk#7
        orr     x0, x0, x0, lsr openjdk#14
        orr     x0, x0, x0, lsr openjdk#28
        and     x0, x0, #0xff
        fmov    x8, v16.d[1]
        orr     x8, x8, x8, lsr openjdk#7
        orr     x8, x8, x8, lsr openjdk#14
        orr     x8, x8, x8, lsr openjdk#28
        and     x8, x8, #0xff
        orr     x0, x0, x8, lsl openjdk#8

        orr     x8, xzr, #0x2
        whilele p1.d, xzr, x8
        lastb   x8, p1, z16.d
        orr     x8, x8, x8, lsr openjdk#7
        orr     x8, x8, x8, lsr openjdk#14
        orr     x8, x8, x8, lsr openjdk#28
        and     x8, x8, #0xff
        orr     x0, x0, x8, lsl openjdk#16

        orr     x8, xzr, #0x3
        whilele p1.d, xzr, x8
        lastb   x8, p1, z16.d
        orr     x8, x8, x8, lsr openjdk#7
        orr     x8, x8, x8, lsr openjdk#14
        orr     x8, x8, x8, lsr openjdk#28
        and     x8, x8, #0xff
        orr     x0, x0, x8, lsl openjdk#24

        orr     x8, xzr, #0x4
        whilele p1.d, xzr, x8
        lastb   x8, p1, z16.d
        orr     x8, x8, x8, lsr openjdk#7
        orr     x8, x8, x8, lsr openjdk#14
        orr     x8, x8, x8, lsr openjdk#28
        and     x8, x8, #0xff
        orr     x0, x0, x8, lsl openjdk#32

        mov     x8, #0x5
        whilele p1.d, xzr, x8
        lastb   x8, p1, z16.d
        orr     x8, x8, x8, lsr openjdk#7
        orr     x8, x8, x8, lsr openjdk#14
        orr     x8, x8, x8, lsr openjdk#28
        and     x8, x8, #0xff
        orr     x0, x0, x8, lsl openjdk#40

        orr     x8, xzr, #0x6
        whilele p1.d, xzr, x8
        lastb   x8, p1, z16.d
        orr     x8, x8, x8, lsr openjdk#7
        orr     x8, x8, x8, lsr openjdk#14
        orr     x8, x8, x8, lsr openjdk#28
        and     x8, x8, #0xff
        orr     x0, x0, x8, lsl openjdk#48

        orr     x8, xzr, #0x7
        whilele p1.d, xzr, x8
        lastb   x8, p1, z16.d
        orr     x8, x8, x8, lsr openjdk#7
        orr     x8, x8, x8, lsr openjdk#14
        orr     x8, x8, x8, lsr openjdk#28
        and     x8, x8, #0xff
        orr     x0, x0, x8, lsl openjdk#56

After:

        mov     z16.b, p0/z, #1
        mov     z17.b, #1
        bext    z16.d, z16.d, z17.d
        mov     z17.d, #0
        uzp1    z16.s, z16.s, z17.s
        uzp1    z16.h, z16.h, z17.h
        uzp1    z16.b, z16.b, z17.b
        mov     x0, v16.d[0]

[1] https://www.felixcloutier.com/x86/pmovmskb
[2] https://developer.arm.com/documentation/ddi0602/2020-12/SVE-Instructions/BEXT--Gather-lower-bits-from-positions-selected-by-bitmask-

Change-Id: Ia983a20c89f76403e557ac21328f2f2e05dd08e0
franferrax added a commit to franferrax/jdk that referenced this pull request Aug 11, 2022
openjdk-notifier bot pushed a commit that referenced this pull request Oct 27, 2022
franferrax added a commit to franferrax/jdk that referenced this pull request Oct 28, 2022
openjdk#13 isn't a perfect revert of 0af22dc limited to SSLContextImpl.java and
SunJSSE.java, since it doesn't remove the SharedSecrets import. This was
already in this way in rh2020290-support_tls_1_3_in_fips.v1.patch, I'm
now realizing this when doing the OpenJDK 11 backport and retrying the
same approach in OpenJDK 17:
~~~
# Revert openjdk#13
git show 0bd5ca9 | git apply -R
# Redo openjdk#13 by reverting 0af22dc in SSLContextImpl.java and SunJSSE.java
git show 0af22dc |
  git apply -R --include=src/java.base/share/classes/sun/security/ssl/*
~~~

In openjdk#14, I forgot to delete the DHKF and DHKFLock static
variables from FIPSKeyImporter, which are no longer used,
see rh-openjdk#14 (comment).
gnu-andrew pushed a commit to gnu-andrew/jdk that referenced this pull request Nov 23, 2022
openjdk#13 isn't a perfect revert of 0af22dc limited to SSLContextImpl.java and
SunJSSE.java, since it doesn't remove the SharedSecrets import. This was
already in this way in rh2020290-support_tls_1_3_in_fips.v1.patch, I'm
now realizing this when doing the OpenJDK 11 backport and retrying the
same approach in OpenJDK 17:
~~~
# Revert openjdk#13
git show 0bd5ca9 | git apply -R
# Redo openjdk#13 by reverting 0af22dc in SSLContextImpl.java and SunJSSE.java
git show 0af22dc |
  git apply -R --include=src/java.base/share/classes/sun/security/ssl/*
~~~

In openjdk#14, I forgot to delete the DHKF and DHKFLock static
variables from FIPSKeyImporter, which are no longer used,
see rh-openjdk#14 (comment).

Reviewed-by: @gnu-andrew
gnu-andrew pushed a commit to gnu-andrew/jdk that referenced this pull request Apr 4, 2023
robehn pushed a commit to robehn/jdk that referenced this pull request Aug 15, 2023
Remove duplicated files with rivos-sdk-jdk
gnu-andrew pushed a commit to gnu-andrew/jdk that referenced this pull request Aug 18, 2023
gnu-andrew pushed a commit to gnu-andrew/jdk that referenced this pull request Aug 18, 2023
openjdk#13 isn't a perfect revert of 0af22dc limited to SSLContextImpl.java and
SunJSSE.java, since it doesn't remove the SharedSecrets import. This was
already in this way in rh2020290-support_tls_1_3_in_fips.v1.patch, I'm
now realizing this when doing the OpenJDK 11 backport and retrying the
same approach in OpenJDK 17:
~~~
# Revert openjdk#13
git show 0bd5ca9 | git apply -R
# Redo openjdk#13 by reverting 0af22dc in SSLContextImpl.java and SunJSSE.java
git show 0af22dc |
  git apply -R --include=src/java.base/share/classes/sun/security/ssl/*
~~~

In openjdk#14, I forgot to delete the DHKF and DHKFLock static
variables from FIPSKeyImporter, which are no longer used,
see rh-openjdk#14 (comment).

Reviewed-by: @gnu-andrew
pf0n pushed a commit to pf0n/jdk that referenced this pull request Jul 9, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Development

Successfully merging this pull request may close these issues.

1 participant