Skip to content

8355177: Speed up StringBuilder::append(char[]) via UTF16::compress & Unsafe::copyMemory #24773

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2003, 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2003, 2025, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2025, Alibaba Group Holding Limited. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -1767,6 +1768,9 @@ private final void appendChars(char[] s, int off, int end) {
int count = this.count;
if (isLatin1()) {
byte[] val = this.value;
int compressed = StringUTF16.compress(s, off, val, count, end - off);
count += compressed;
off += compressed;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we update this.count eagerly after compression?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the StringLatin1.canEncode(c) branch of the original code, this.count is not updated either, and the behavior is the same as before.

for (int i = off, j = count; i < end; i++) {
char c = s[i];
if (StringLatin1.canEncode(c)) {
Expand Down
19 changes: 14 additions & 5 deletions src/java.base/share/classes/java/lang/StringUTF16.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2015, 2025, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2025, Alibaba Group Holding Limited. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -1312,10 +1313,13 @@ static Stream<String> lines(byte[] value) {
return StreamSupport.stream(LinesSpliterator.spliterator(value), false);
}

private static void putChars(byte[] val, int index, char[] str, int off, int end) {
while (off < end) {
putChar(val, index++, str[off++]);
}
private static void putCharsUnchecked(byte[] val, int index, char[] str, int off, int end) {
Unsafe.getUnsafe().copyMemory(
str,
Unsafe.ARRAY_CHAR_BASE_OFFSET + ((long) off << 1),
val,
Unsafe.ARRAY_BYTE_BASE_OFFSET + ((long) index << 1),
((long) (end - off)) << 1);
}

public static String newString(byte[] val, int index, int len) {
Expand Down Expand Up @@ -1490,7 +1494,8 @@ public static void putCharSB(byte[] val, int index, int c) {

public static void putCharsSB(byte[] val, int index, char[] ca, int off, int end) {
checkBoundsBeginEnd(index, index + end - off, val);
putChars(val, index, ca, off, end);
checkBoundsBeginEnd(off, end, ca);
putCharsUnchecked(val, index, ca, off, end);
}

public static void putCharsSB(byte[] val, int index, CharSequence s, int off, int end) {
Expand Down Expand Up @@ -1666,6 +1671,10 @@ public static void checkBoundsBeginEnd(int begin, int end, byte[] val) {
String.checkBoundsBeginEnd(begin, end, length(val));
}

private static void checkBoundsBeginEnd(int begin, int end, char[] val) {
String.checkBoundsBeginEnd(begin, end, val.length);
}

public static void checkBoundsOffCount(int offset, int count, byte[] val) {
String.checkBoundsOffCount(offset, count, length(val));
}
Expand Down
29 changes: 28 additions & 1 deletion test/micro/org/openjdk/bench/java/lang/StringBuilders.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2014, 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, 2025, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2025, Alibaba Group Holding Limited. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -50,6 +51,7 @@ public class StringBuilders {
private String[] str16p8p7;
private String[] str3p9p8;
private String[] str22p40p31;
private char[][] charArray22p40p31;
private StringBuilder sbLatin1;
private StringBuilder sbLatin2;
private StringBuilder sbUtf16;
Expand All @@ -63,10 +65,15 @@ public void setup() {
"advise", "you", "to", "drive", "at", "top", "speed", "it'll",
"be", "a", "god", "damn", "miracle", "if", "we", "can", "get",
"there", "before", "you", "turn", "into", "a", "wild", "animal."};

str3p4p2 = new String[]{"123", "1234", "12"};
str16p8p7 = new String[]{"1234567890123456", "12345678", "1234567"};
str3p9p8 = new String[]{"123", "123456789", "12345678"};
str22p40p31 = new String[]{"1234567890123456789012", "1234567890123456789012345678901234567890", "1234567890123456789012345678901"};
charArray22p40p31 = new char[str22p40p31.length][];
for (int i = 0; i < str22p40p31.length; i++) {
charArray22p40p31[i] = str22p40p31[i].toCharArray();
}
sbLatin1 = new StringBuilder("Latin1 string");
sbLatin2 = new StringBuilder("Latin1 string");
sbUtf16 = new StringBuilder("UTF-\uFF11\uFF16 string");
Expand Down Expand Up @@ -273,6 +280,26 @@ public int appendWithLongUtf16() {
return buf.length();
}

@Benchmark
public int appendWithCharArrayLatin1() {
StringBuilder buf = sbLatin1;
buf.delete(0, buf.length());
for (char[] charArray : charArray22p40p31) {
buf.append(charArray);
}
return buf.length();
}

@Benchmark
public int appendWithCharArrayUTF16() {
StringBuilder buf = sbUtf16;
buf.delete(0, buf.length());
for (char[] charArray : charArray22p40p31) {
buf.append(charArray);
}
return buf.length();
}

@Benchmark
public String toStringCharWithBool8() {
StringBuilder result = new StringBuilder();
Expand Down