Skip to content

Commit cb9f5c5

Browse files
wenshaocl4es
authored andcommitted
8339290: Optimize ClassFile Utf8EntryImpl#writeTo
Reviewed-by: redestad, liach
1 parent 340e131 commit cb9f5c5

File tree

7 files changed

+268
-49
lines changed

7 files changed

+268
-49
lines changed

src/java.base/share/classes/java/lang/StringCoding.java

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
/*
2-
* Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2000, 2024, Oracle and/or its affiliates. All rights reserved.
3+
* Copyright (c) 2024, Alibaba Group Holding Limited. All Rights Reserved.
34
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
45
*
56
* This code is free software; you can redistribute it and/or modify it
@@ -34,6 +35,45 @@ class StringCoding {
3435

3536
private StringCoding() { }
3637

38+
/**
39+
* Count the number of leading non-zero ascii chars in the range.
40+
*/
41+
public static int countNonZeroAscii(String s) {
42+
byte[] value = s.value();
43+
if (s.isLatin1()) {
44+
return countNonZeroAsciiLatin1(value, 0, value.length);
45+
} else {
46+
return countNonZeroAsciiUTF16(value, 0, s.length());
47+
}
48+
}
49+
50+
/**
51+
* Count the number of non-zero ascii chars in the range.
52+
*/
53+
public static int countNonZeroAsciiLatin1(byte[] ba, int off, int len) {
54+
int limit = off + len;
55+
for (int i = off; i < limit; i++) {
56+
if (ba[i] <= 0) {
57+
return i - off;
58+
}
59+
}
60+
return len;
61+
}
62+
63+
/**
64+
* Count the number of leading non-zero ascii chars in the range.
65+
*/
66+
public static int countNonZeroAsciiUTF16(byte[] ba, int off, int strlen) {
67+
int limit = off + strlen;
68+
for (int i = off; i < limit; i++) {
69+
char c = StringUTF16.charAt(ba, i);
70+
if (c == 0 || c > 0x7F) {
71+
return i - off;
72+
}
73+
}
74+
return strlen;
75+
}
76+
3777
public static boolean hasNegatives(byte[] ba, int off, int len) {
3878
return countPositives(ba, off, len) != len;
3979
}

src/java.base/share/classes/java/lang/System.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2569,6 +2569,9 @@ public Stream<ModuleLayer> layers(ClassLoader loader) {
25692569
public int countPositives(byte[] bytes, int offset, int length) {
25702570
return StringCoding.countPositives(bytes, offset, length);
25712571
}
2572+
public int countNonZeroAscii(String s) {
2573+
return StringCoding.countNonZeroAscii(s);
2574+
}
25722575
public String newStringNoRepl(byte[] bytes, Charset cs) throws CharacterCodingException {
25732576
return String.newStringNoRepl(bytes, cs);
25742577
}

src/java.base/share/classes/jdk/internal/access/JavaLangAccess.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,11 @@ public interface JavaLangAccess {
318318
*/
319319
int countPositives(byte[] ba, int off, int len);
320320

321+
/**
322+
* Count the number of leading non-zero ascii chars in the String.
323+
*/
324+
int countNonZeroAscii(String s);
325+
321326
/**
322327
* Constructs a new {@code String} by decoding the specified subarray of
323328
* bytes using the specified {@linkplain java.nio.charset.Charset charset}.

src/java.base/share/classes/jdk/internal/classfile/impl/AbstractPoolEntry.java

Lines changed: 3 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
/*
22
* Copyright (c) 2022, 2024, Oracle and/or its affiliates. All rights reserved.
3+
* Copyright (c) 2024, Alibaba Group Holding Limited. All Rights Reserved.
34
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
45
*
56
* This code is free software; you can redistribute it and/or modify it
@@ -409,60 +410,14 @@ public boolean equalsString(String s) {
409410

410411
@Override
411412
void writeTo(BufWriterImpl pool) {
413+
pool.writeU1(tag);
412414
if (rawBytes != null) {
413-
pool.writeU1(tag);
414415
pool.writeU2(rawLen);
415416
pool.writeBytes(rawBytes, offset, rawLen);
416417
}
417418
else {
418419
// state == STRING and no raw bytes
419-
if (stringValue.length() > 65535) {
420-
throw new IllegalArgumentException("string too long");
421-
}
422-
pool.writeU1(tag);
423-
pool.writeU2(charLen);
424-
for (int i = 0; i < charLen; ++i) {
425-
char c = stringValue.charAt(i);
426-
if (c >= '\001' && c <= '\177') {
427-
// Optimistic writing -- hope everything is bytes
428-
// If not, we bail out, and alternate path patches the length
429-
pool.writeU1((byte) c);
430-
}
431-
else {
432-
int charLength = stringValue.length();
433-
int byteLength = i;
434-
char c1;
435-
for (int j = i; j < charLength; ++j) {
436-
c1 = (stringValue).charAt(j);
437-
if (c1 >= '\001' && c1 <= '\177') {
438-
byteLength++;
439-
} else if (c1 > '\u07FF') {
440-
byteLength += 3;
441-
} else {
442-
byteLength += 2;
443-
}
444-
}
445-
if (byteLength > 65535) {
446-
throw new IllegalArgumentException();
447-
}
448-
int byteLengthFinal = byteLength;
449-
pool.patchInt(pool.size() - i - 2, 2, byteLengthFinal);
450-
for (int j = i; j < charLength; ++j) {
451-
c1 = (stringValue).charAt(j);
452-
if (c1 >= '\001' && c1 <= '\177') {
453-
pool.writeU1((byte) c1);
454-
} else if (c1 > '\u07FF') {
455-
pool.writeU1((byte) (0xE0 | c1 >> 12 & 0xF));
456-
pool.writeU1((byte) (0x80 | c1 >> 6 & 0x3F));
457-
pool.writeU1((byte) (0x80 | c1 & 0x3F));
458-
} else {
459-
pool.writeU1((byte) (0xC0 | c1 >> 6 & 0x1F));
460-
pool.writeU1((byte) (0x80 | c1 & 0x3F));
461-
}
462-
}
463-
break;
464-
}
465-
}
420+
pool.writeUTF(stringValue);
466421
}
467422
}
468423
}

src/java.base/share/classes/jdk/internal/classfile/impl/BufWriterImpl.java

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
/*
22
* Copyright (c) 2022, 2024, Oracle and/or its affiliates. All rights reserved.
3+
* Copyright (c) 2024, Alibaba Group Holding Limited. All Rights Reserved.
34
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
45
*
56
* This code is free software; you can redistribute it and/or modify it
@@ -34,7 +35,11 @@
3435
import java.lang.classfile.constantpool.ConstantPoolBuilder;
3536
import java.lang.classfile.constantpool.PoolEntry;
3637

38+
import jdk.internal.access.JavaLangAccess;
39+
import jdk.internal.access.SharedSecrets;
40+
3741
public final class BufWriterImpl implements BufWriter {
42+
private static final JavaLangAccess JLA = SharedSecrets.getJavaLangAccess();
3843

3944
private final ConstantPoolBuilder constantPool;
4045
private final ClassFileImpl context;
@@ -152,6 +157,52 @@ public void writeBytes(BufWriterImpl other) {
152157
writeBytes(other.elems, 0, other.offset);
153158
}
154159

160+
@SuppressWarnings("deprecation")
161+
void writeUTF(String str) {
162+
int strlen = str.length();
163+
int countNonZeroAscii = JLA.countNonZeroAscii(str);
164+
int utflen = strlen;
165+
if (countNonZeroAscii != strlen) {
166+
for (int i = countNonZeroAscii; i < strlen; i++) {
167+
int c = str.charAt(i);
168+
if (c >= 0x80 || c == 0)
169+
utflen += (c >= 0x800) ? 2 : 1;
170+
}
171+
}
172+
if (utflen > 65535) {
173+
throw new IllegalArgumentException("string too long");
174+
}
175+
reserveSpace(utflen + 2);
176+
177+
int offset = this.offset;
178+
byte[] elems = this.elems;
179+
180+
elems[offset ] = (byte) (utflen >> 8);
181+
elems[offset + 1] = (byte) utflen;
182+
offset += 2;
183+
184+
str.getBytes(0, countNonZeroAscii, elems, offset);
185+
offset += countNonZeroAscii;
186+
187+
for (int i = countNonZeroAscii; i < strlen; ++i) {
188+
char c = str.charAt(i);
189+
if (c >= '\001' && c <= '\177') {
190+
elems[offset++] = (byte) c;
191+
} else if (c > '\u07FF') {
192+
elems[offset ] = (byte) (0xE0 | c >> 12 & 0xF);
193+
elems[offset + 1] = (byte) (0x80 | c >> 6 & 0x3F);
194+
elems[offset + 2] = (byte) (0x80 | c & 0x3F);
195+
offset += 3;
196+
} else {
197+
elems[offset ] = (byte) (0xC0 | c >> 6 & 0x1F);
198+
elems[offset + 1] = (byte) (0x80 | c & 0x3F);
199+
offset += 2;
200+
}
201+
}
202+
203+
this.offset = offset;
204+
}
205+
155206
@Override
156207
public void writeBytes(byte[] arr, int start, int length) {
157208
reserveSpace(length);
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
/*
2+
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
3+
* Copyright (c) 2024, Alibaba Group Holding Limited. All Rights Reserved.
4+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5+
*
6+
* This code is free software; you can redistribute it and/or modify it
7+
* under the terms of the GNU General Public License version 2 only, as
8+
* published by the Free Software Foundation.
9+
*
10+
* This code is distributed in the hope that it will be useful, but WITHOUT
11+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13+
* version 2 for more details (a copy is included in the LICENSE file that
14+
* accompanied this code).
15+
*
16+
* You should have received a copy of the GNU General Public License version
17+
* 2 along with this work; if not, write to the Free Software Foundation,
18+
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19+
*
20+
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21+
* or visit www.oracle.com if you need additional information or have any
22+
* questions.
23+
*/
24+
25+
import jdk.internal.access.JavaLangAccess;
26+
import jdk.internal.access.SharedSecrets;
27+
28+
import java.nio.charset.StandardCharsets;
29+
import java.util.Arrays;
30+
31+
/*
32+
* @test
33+
* @modules java.base/jdk.internal.access
34+
* @summary test latin1 String countNonZeroAscii
35+
* @run main/othervm -XX:+CompactStrings CountNonZeroAscii
36+
* @run main/othervm -XX:-CompactStrings CountNonZeroAscii
37+
*/
38+
public class CountNonZeroAscii {
39+
private static final JavaLangAccess JLA = SharedSecrets.getJavaLangAccess();
40+
41+
public static void main(String [] args) {
42+
byte[] bytes = new byte[1000];
43+
44+
Arrays.fill(bytes, (byte) 'A');
45+
String s = new String(bytes, StandardCharsets.ISO_8859_1);
46+
assertEquals(bytes.length, JLA.countNonZeroAscii(s));
47+
48+
for (int i = 0; i < bytes.length; i++) {
49+
for (int j = Byte.MIN_VALUE; j <= 0; j++) {
50+
bytes[i] = (byte) j;
51+
s = new String(bytes, StandardCharsets.ISO_8859_1);
52+
assertEquals(i, JLA.countNonZeroAscii(s));
53+
}
54+
bytes[i] = (byte) 'A';
55+
}
56+
}
57+
58+
static void assertEquals(int expected, int actual) {
59+
if (expected != actual) {
60+
throw new AssertionError("Expected " + expected + " but got " + actual);
61+
}
62+
}
63+
}
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
/*
2+
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
3+
* Copyright (c) 2024, Alibaba Group Holding Limited. All Rights Reserved.
4+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5+
*
6+
* This code is free software; you can redistribute it and/or modify it
7+
* under the terms of the GNU General Public License version 2 only, as
8+
* published by the Free Software Foundation.
9+
*
10+
* This code is distributed in the hope that it will be useful, but WITHOUT
11+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13+
* version 2 for more details (a copy is included in the LICENSE file that
14+
* accompanied this code).
15+
*
16+
* You should have received a copy of the GNU General Public License version
17+
* 2 along with this work; if not, write to the Free Software Foundation,
18+
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19+
*
20+
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21+
* or visit www.oracle.com if you need additional information or have any
22+
* questions.
23+
*/
24+
package org.openjdk.bench.java.lang.classfile;
25+
26+
import org.openjdk.jmh.annotations.Benchmark;
27+
import org.openjdk.jmh.annotations.BenchmarkMode;
28+
import org.openjdk.jmh.annotations.Fork;
29+
import org.openjdk.jmh.annotations.Measurement;
30+
import org.openjdk.jmh.annotations.Mode;
31+
import org.openjdk.jmh.annotations.OutputTimeUnit;
32+
import org.openjdk.jmh.annotations.Param;
33+
import org.openjdk.jmh.annotations.Scope;
34+
import org.openjdk.jmh.annotations.Setup;
35+
import org.openjdk.jmh.annotations.State;
36+
import org.openjdk.jmh.annotations.Warmup;
37+
import org.openjdk.jmh.infra.Blackhole;
38+
39+
import java.lang.classfile.constantpool.ConstantPoolBuilder;
40+
import java.lang.classfile.constantpool.ClassEntry;
41+
import java.lang.classfile.*;
42+
import java.lang.constant.*;
43+
import java.nio.charset.StandardCharsets;
44+
import java.util.HexFormat;
45+
import java.util.concurrent.TimeUnit;
46+
import java.util.function.Consumer;
47+
48+
import static java.lang.classfile.ClassFile.*;
49+
import static java.lang.constant.ConstantDescs.*;
50+
51+
import jdk.internal.classfile.impl.*;
52+
/**
53+
* Test various operations on
54+
*/
55+
@BenchmarkMode(Mode.AverageTime)
56+
@OutputTimeUnit(TimeUnit.NANOSECONDS)
57+
@Warmup(iterations = 1, time = 2)
58+
@Measurement(iterations = 3, time = 1)
59+
@Fork(jvmArgsAppend = "--enable-preview", value = 3)
60+
@State(Scope.Thread)
61+
public class Utf8EntryWriteTo {
62+
static final ClassDesc STRING_BUILDER = ClassDesc.ofDescriptor("Ljava/lang/StringBuilder;");
63+
static final MethodTypeDesc MTD_append = MethodTypeDesc.of(STRING_BUILDER, CD_String);
64+
static final MethodTypeDesc MTD_String = MethodTypeDesc.of(CD_String);
65+
static final ClassDesc CLASS_DESC = ClassDesc.ofDescriptor("Lorg/openjdk/bench/java/lang/classfile/String$$StringConcat;");
66+
67+
@Param({"ascii", "utf8_2_bytes", "utf8_3_bytes", "emoji"})
68+
public String charType;
69+
ConstantPoolBuilder poolBuilder;
70+
ClassEntry thisClass;
71+
72+
@Setup
73+
public void setup() throws Exception {
74+
byte[] bytes = HexFormat.of().parseHex(
75+
switch (charType) {
76+
case "ascii" -> "78";
77+
case "utf8_2_bytes" -> "c2a9";
78+
case "utf8_3_bytes" -> "e6b8a9";
79+
case "emoji" -> "e29da3efb88f";
80+
default -> throw new IllegalArgumentException("bad charType: " + charType);
81+
}
82+
);
83+
String s = new String(bytes, 0, bytes.length, StandardCharsets.UTF_8);
84+
String[] constants = new String[128];
85+
for (int i = 0; i < constants.length; i++) {
86+
constants[i] = "A".repeat(i).concat(s);
87+
}
88+
89+
poolBuilder = ConstantPoolBuilder.of();
90+
thisClass = poolBuilder.classEntry(CLASS_DESC);
91+
for (var c : constants) {
92+
poolBuilder.utf8Entry(c);
93+
}
94+
}
95+
96+
@Benchmark
97+
public void writeTo(Blackhole bh) {
98+
bh.consume(ClassFile
99+
.of()
100+
.build(thisClass, poolBuilder, (ClassBuilder clb) -> {}));
101+
}
102+
}

0 commit comments

Comments
 (0)