From e00d418a2b56e2a4a783be6686d2f74c87aaf00e Mon Sep 17 00:00:00 2001 From: Peter Shipton Date: Mon, 27 May 2024 15:11:05 -0400 Subject: [PATCH] jdk11 always create compact Strings when possible Issue https://github.com/eclipse-openj9/openj9/issues/19543 Signed-off-by: Peter Shipton --- .../share/classes/com/ibm/jit/JITHelpers.java | 13 +++ .../share/classes/java/lang/String.java | 87 ++++++++++++------- .../openj9/test/java/lang/Test_String.java | 12 +++ 3 files changed, 79 insertions(+), 33 deletions(-) diff --git a/jcl/src/java.base/share/classes/com/ibm/jit/JITHelpers.java b/jcl/src/java.base/share/classes/com/ibm/jit/JITHelpers.java index 6c36e77528a..7ff050b6268 100644 --- a/jcl/src/java.base/share/classes/com/ibm/jit/JITHelpers.java +++ b/jcl/src/java.base/share/classes/com/ibm/jit/JITHelpers.java @@ -566,6 +566,19 @@ public char getCharFromArrayByIndex(Object obj, int index) { } } + public boolean canEncodeAsLatin1(byte[] array, int start, int length) { + int index = start << 1; + if (!IS_BIG_ENDIAN) { + index += 1; + } + for (int end = index + (length << 1); index < end; index += 2) { + if (array[index] != 0) { + return false; + } + } + return true; + } + /** * Returns the first index of the target character array within the source character array starting from the specified * offset. diff --git a/jcl/src/java.base/share/classes/java/lang/String.java b/jcl/src/java.base/share/classes/java/lang/String.java index b5b04794883..959b314937a 100644 --- a/jcl/src/java.base/share/classes/java/lang/String.java +++ b/jcl/src/java.base/share/classes/java/lang/String.java @@ -824,19 +824,25 @@ public String(char[] data, int start, int length) { } else { char theChar = helpers.getCharFromArrayByIndex(data, start); - if (theChar <= 255) { - value = decompressedAsciiTable[theChar]; + if (COMPACT_STRINGS && (theChar <= 255)) { + value = compressedAsciiTable[theChar]; + coder = LATIN1; + hash = theChar; } else { - value = new byte[2]; + if (theChar <= 255) { + value = decompressedAsciiTable[theChar]; + } else { + value = new byte[2]; - helpers.putCharInArrayByIndex(value, 0, theChar); - } + helpers.putCharInArrayByIndex(value, 0, theChar); + } - coder = UTF16; - hash = theChar; + coder = UTF16; + hash = theChar; - if (COMPACT_STRINGS) { - initCompressionFlag(); + if (COMPACT_STRINGS) { + initCompressionFlag(); + } } } } else { @@ -850,14 +856,15 @@ public String(char[] data, int start, int length) { } coder = LATIN1; + } else if (COMPACT_STRINGS && helpers.canEncodeAsLatin1(data, start, length)) { + value = new byte[length]; + coder = LATIN1; + + compress(data, start, value, 0, length); } else { - if (start == 0 && data.length == length * 2) { - value = data; - } else { - value = StringUTF16.newBytesFor(length); + value = StringUTF16.newBytesFor(length); - decompressedArrayCopy(data, start, value, 0, length); - } + decompressedArrayCopy(data, start, value, 0, length); coder = UTF16; @@ -887,19 +894,25 @@ public String(char[] data, int start, int length) { } else { char theChar = helpers.getCharFromArrayByIndex(data, start); - if (theChar <= 255) { - value = decompressedAsciiTable[theChar]; + if (COMPACT_STRINGS && (theChar <= 255)) { + value = compressedAsciiTable[theChar]; + coder = LATIN1; + hash = theChar; } else { - value = new byte[2]; + if (theChar <= 255) { + value = decompressedAsciiTable[theChar]; + } else { + value = new byte[2]; - helpers.putCharInArrayByIndex(value, 0, theChar); - } + helpers.putCharInArrayByIndex(value, 0, theChar); + } - coder = UTF16; - hash = theChar; + coder = UTF16; + hash = theChar; - if (COMPACT_STRINGS) { - initCompressionFlag(); + if (COMPACT_STRINGS) { + initCompressionFlag(); + } } } } else { @@ -913,14 +926,15 @@ public String(char[] data, int start, int length) { } coder = LATIN1; + } else if (COMPACT_STRINGS && helpers.canEncodeAsLatin1(data, start, length)) { + value = new byte[length]; + coder = LATIN1; + + compress(data, start, value, 0, length); } else { - if (sharingIsAllowed && start == 0 && data.length == length * 2) { - value = data; - } else { - value = StringUTF16.newBytesFor(length); + value = StringUTF16.newBytesFor(length); - decompressedArrayCopy(data, start, value, 0, length); - } + decompressedArrayCopy(data, start, value, 0, length); coder = UTF16; @@ -2581,7 +2595,11 @@ public String replace(char oldChar, char newChar) { helpers.putCharInArrayByIndex(buffer, index++, (char) newChar); } while ((index = indexOf(oldChar, index)) != -1); - return new String(buffer, UTF16); + if (newChar > 255) { + return new String(buffer, UTF16); + } + + return new String(buffer, 0, len, false); } } @@ -3194,7 +3212,7 @@ public String replaceAll(String regex, String substitute) { if (COMPACT_STRINGS && isCompressed() && (substituteLength == 0 || substitute.isCompressed())) { byte[] newChars = new byte[length]; byte toReplace = helpers.getByteFromArrayByIndex(regex.value, 0); - byte replacement = (byte)-1; // assign dummy value that will never be used + byte replacement = (byte)0; // assign dummy value that will never be used if (substituteLength == 1) { replacement = helpers.getByteFromArrayByIndex(substitute.value, 0); checkLastChar((char)replacement); @@ -3212,7 +3230,7 @@ public String replaceAll(String regex, String substitute) { } else if (!COMPACT_STRINGS || !isCompressed()) { byte[] newChars = StringUTF16.newBytesFor(length); char toReplace = regex.charAtInternal(0); - char replacement = (char)-1; // assign dummy value that will never be used + char replacement = (char)0; // assign dummy value that must be less than 256 if (substituteLength == 1) { replacement = substitute.charAtInternal(0); checkLastChar(replacement); @@ -3226,6 +3244,9 @@ public String replaceAll(String regex, String substitute) { helpers.putCharInArrayByIndex(newChars, newCharIndex++, replacement); } } + if ((replacement > 255) && (newCharIndex == length)) { + new String(newChars, UTF16); + } return new String(newChars, 0, newCharIndex, false); } } diff --git a/test/functional/Java8andUp/src/org/openj9/test/java/lang/Test_String.java b/test/functional/Java8andUp/src/org/openj9/test/java/lang/Test_String.java index f74132a0104..b275b56eb34 100644 --- a/test/functional/Java8andUp/src/org/openj9/test/java/lang/Test_String.java +++ b/test/functional/Java8andUp/src/org/openj9/test/java/lang/Test_String.java @@ -937,6 +937,18 @@ public void test_indexOf3() { String needle = "\u00b0\u00b1"; String hay = new StringBuilder("a").append(needle).toString(); AssertJUnit.assertEquals("Failed to find string 3", 1, hay.indexOf(needle)); + + String multi = "\u0100:abc"; + String[] splits = multi.split(":"); + AssertJUnit.assertEquals("Failed to find string 4", 3, "123abc".indexOf(splits[1])); + String sub = multi.substring(2); + AssertJUnit.assertEquals("Failed to find string 5", 3, "123abc".indexOf(sub)); + String r1 = multi.replace('\u0100', '1'); + AssertJUnit.assertEquals("Failed to find string 6", 0, "1:abc".indexOf(r1)); + String r2 = multi.replaceAll("\u0100", "1"); + AssertJUnit.assertEquals("Failed to find string 7", 0, "1:abc".indexOf(r2)); + String r3 = multi.replaceAll("\u0100", ""); + AssertJUnit.assertEquals("Failed to find string 8", 0, ":abc".indexOf(r3)); } /**