From a4b5b1169aaf145996dc2a8248a10b81e45d57b7 Mon Sep 17 00:00:00 2001 From: Peter Shipton Date: Mon, 27 May 2024 15:11:05 -0400 Subject: [PATCH] jdk11 create compact Strings when possible from split(), substring() Issue https://github.com/eclipse-openj9/openj9/issues/19543 Signed-off-by: Peter Shipton --- .../share/classes/com/ibm/jit/JITHelpers.java | 16 +++++ .../share/classes/java/lang/String.java | 62 +++++++++++++------ .../openj9/test/java/lang/Test_String.java | 6 ++ 3 files changed, 64 insertions(+), 20 deletions(-) diff --git a/jcl/src/java.base/share/classes/com/ibm/jit/JITHelpers.java b/jcl/src/java.base/share/classes/com/ibm/jit/JITHelpers.java index 6c36e77528a..0528c3e96db 100644 --- a/jcl/src/java.base/share/classes/com/ibm/jit/JITHelpers.java +++ b/jcl/src/java.base/share/classes/com/ibm/jit/JITHelpers.java @@ -566,6 +566,22 @@ public char getCharFromArrayByIndex(Object obj, int index) { } } + public boolean canEncodeAsLatin1(byte[] array, int start, int length) { + int count = 0; + int index = start << 1; + if (!IS_BIG_ENDIAN) { + index += 1; + } + while (count < length) { + if (array[index] != 0) { + return false; + } + index += 2; + count += 1; + } + return true; + } + /** * Returns the first index of the target character array within the source character array starting from the specified * offset. diff --git a/jcl/src/java.base/share/classes/java/lang/String.java b/jcl/src/java.base/share/classes/java/lang/String.java index b5b04794883..c4da934a70f 100644 --- a/jcl/src/java.base/share/classes/java/lang/String.java +++ b/jcl/src/java.base/share/classes/java/lang/String.java @@ -824,19 +824,25 @@ public String(char[] data, int start, int length) { } else { char theChar = helpers.getCharFromArrayByIndex(data, start); - if (theChar <= 255) { - value = decompressedAsciiTable[theChar]; + if (COMPACT_STRINGS && (theChar <= 255)) { + value = compressedAsciiTable[theChar]; + coder = LATIN1; + hash = theChar; } else { - value = new byte[2]; - - helpers.putCharInArrayByIndex(value, 0, theChar); - } + if (theChar <= 255) { + value = decompressedAsciiTable[theChar]; + } else { + value = new byte[2]; + + helpers.putCharInArrayByIndex(value, 0, theChar); + } - coder = UTF16; - hash = theChar; + coder = UTF16; + hash = theChar; - if (COMPACT_STRINGS) { - initCompressionFlag(); + if (COMPACT_STRINGS) { + initCompressionFlag(); + } } } } else { @@ -850,6 +856,11 @@ public String(char[] data, int start, int length) { } coder = LATIN1; + } else if (COMPACT_STRINGS && ((start != 0) || (data.length != length * 2)) && helpers.canEncodeAsLatin1(data, start, length)) { + value = new byte[length]; + coder = LATIN1; + + compress(data, start, value, 0, length); } else { if (start == 0 && data.length == length * 2) { value = data; @@ -887,19 +898,25 @@ public String(char[] data, int start, int length) { } else { char theChar = helpers.getCharFromArrayByIndex(data, start); - if (theChar <= 255) { - value = decompressedAsciiTable[theChar]; + if (COMPACT_STRINGS && (theChar <= 255)) { + value = compressedAsciiTable[theChar]; + coder = LATIN1; + hash = theChar; } else { - value = new byte[2]; - - helpers.putCharInArrayByIndex(value, 0, theChar); - } + if (theChar <= 255) { + value = decompressedAsciiTable[theChar]; + } else { + value = new byte[2]; + + helpers.putCharInArrayByIndex(value, 0, theChar); + } - coder = UTF16; - hash = theChar; + coder = UTF16; + hash = theChar; - if (COMPACT_STRINGS) { - initCompressionFlag(); + if (COMPACT_STRINGS) { + initCompressionFlag(); + } } } } else { @@ -913,6 +930,11 @@ public String(char[] data, int start, int length) { } coder = LATIN1; + } else if (COMPACT_STRINGS && ((start != 0) || (data.length != length * 2)) && helpers.canEncodeAsLatin1(data, start, length)) { + value = new byte[length]; + coder = LATIN1; + + compress(data, start, value, 0, length); } else { if (sharingIsAllowed && start == 0 && data.length == length * 2) { value = data; diff --git a/test/functional/Java8andUp/src/org/openj9/test/java/lang/Test_String.java b/test/functional/Java8andUp/src/org/openj9/test/java/lang/Test_String.java index f74132a0104..931634814c4 100644 --- a/test/functional/Java8andUp/src/org/openj9/test/java/lang/Test_String.java +++ b/test/functional/Java8andUp/src/org/openj9/test/java/lang/Test_String.java @@ -937,6 +937,12 @@ public void test_indexOf3() { String needle = "\u00b0\u00b1"; String hay = new StringBuilder("a").append(needle).toString(); AssertJUnit.assertEquals("Failed to find string 3", 1, hay.indexOf(needle)); + + String multi = "\u0100:abc"; + String[] splits = multi.split(":"); + String sub = multi.substring(2); + AssertJUnit.assertEquals("Failed to find string 4", 3, "123abc".indexOf(splits[1])); + AssertJUnit.assertEquals("Failed to find string 5", 3, "123abc".indexOf(sub)); } /**