diff --git a/jcl/src/java.base/share/classes/com/ibm/jit/JITHelpers.java b/jcl/src/java.base/share/classes/com/ibm/jit/JITHelpers.java index 6c36e77528a..7ff050b6268 100644 --- a/jcl/src/java.base/share/classes/com/ibm/jit/JITHelpers.java +++ b/jcl/src/java.base/share/classes/com/ibm/jit/JITHelpers.java @@ -566,6 +566,19 @@ public char getCharFromArrayByIndex(Object obj, int index) { } } + public boolean canEncodeAsLatin1(byte[] array, int start, int length) { + int index = start << 1; + if (!IS_BIG_ENDIAN) { + index += 1; + } + for (int end = index + (length << 1); index < end; index += 2) { + if (array[index] != 0) { + return false; + } + } + return true; + } + /** * Returns the first index of the target character array within the source character array starting from the specified * offset. diff --git a/jcl/src/java.base/share/classes/java/lang/String.java b/jcl/src/java.base/share/classes/java/lang/String.java index b5b04794883..356d161d3a1 100644 --- a/jcl/src/java.base/share/classes/java/lang/String.java +++ b/jcl/src/java.base/share/classes/java/lang/String.java @@ -805,7 +805,7 @@ public String(char[] data, int start, int length) { } } - String(byte[] data, int start, int length, boolean compressed) { + private String(byte[] data, int start, int length, boolean compressed) { if (length == 0) { value = emptyValue; @@ -824,19 +824,25 @@ public String(char[] data, int start, int length) { } else { char theChar = helpers.getCharFromArrayByIndex(data, start); - if (theChar <= 255) { - value = decompressedAsciiTable[theChar]; + if (COMPACT_STRINGS && (theChar <= 255)) { + value = compressedAsciiTable[theChar]; + coder = LATIN1; + hash = theChar; } else { - value = new byte[2]; + if (theChar <= 255) { + value = decompressedAsciiTable[theChar]; + } else { + value = new byte[2]; - helpers.putCharInArrayByIndex(value, 0, theChar); - } + helpers.putCharInArrayByIndex(value, 0, theChar); + } - coder = UTF16; - hash = theChar; + coder = UTF16; + hash = theChar; - if (COMPACT_STRINGS) { - initCompressionFlag(); + if (COMPACT_STRINGS) { + initCompressionFlag(); + } } } } else { @@ -845,17 +851,19 @@ public String(char[] data, int start, int length) { value = data; } else { value = new byte[length]; - compressedArrayCopy(data, start, value, 0, length); } + coder = LATIN1; + } else if (COMPACT_STRINGS && helpers.canEncodeAsLatin1(data, start, length)) { + value = new byte[length]; + compress(data, start, value, 0, length); coder = LATIN1; } else { if (start == 0 && data.length == length * 2) { value = data; } else { value = StringUTF16.newBytesFor(length); - decompressedArrayCopy(data, start, value, 0, length); } @@ -868,7 +876,7 @@ public String(char[] data, int start, int length) { } } - String(byte[] data, int start, int length, boolean compressed, boolean sharingIsAllowed) { + private String(byte[] data, int start, int length, boolean compressed, boolean sharingIsAllowed) { if (length == 0) { value = emptyValue; @@ -887,41 +895,38 @@ public String(char[] data, int start, int length) { } else { char theChar = helpers.getCharFromArrayByIndex(data, start); - if (theChar <= 255) { - value = decompressedAsciiTable[theChar]; + if (COMPACT_STRINGS && (theChar <= 255)) { + value = compressedAsciiTable[theChar]; + coder = LATIN1; + hash = theChar; } else { - value = new byte[2]; - - helpers.putCharInArrayByIndex(value, 0, theChar); - } + if (theChar <= 255) { + value = decompressedAsciiTable[theChar]; + } else { + value = new byte[2]; + helpers.putCharInArrayByIndex(value, 0, theChar); + } - coder = UTF16; - hash = theChar; + coder = UTF16; + hash = theChar; - if (COMPACT_STRINGS) { - initCompressionFlag(); + if (COMPACT_STRINGS) { + initCompressionFlag(); + } } } } else { if (COMPACT_STRINGS && compressed) { - if (sharingIsAllowed && start == 0 && data.length == length) { - value = data; - } else { - value = new byte[length]; - - compressedArrayCopy(data, start, value, 0, length); - } - + value = new byte[length]; + compressedArrayCopy(data, start, value, 0, length); + coder = LATIN1; + } else if (COMPACT_STRINGS && helpers.canEncodeAsLatin1(data, start, length)) { + value = new byte[length]; + compress(data, start, value, 0, length); coder = LATIN1; } else { - if (sharingIsAllowed && start == 0 && data.length == length * 2) { - value = data; - } else { - value = StringUTF16.newBytesFor(length); - - decompressedArrayCopy(data, start, value, 0, length); - } - + value = StringUTF16.newBytesFor(length); + decompressedArrayCopy(data, start, value, 0, length); coder = UTF16; if (COMPACT_STRINGS) { @@ -2581,7 +2586,12 @@ public String replace(char oldChar, char newChar) { helpers.putCharInArrayByIndex(buffer, index++, (char) newChar); } while ((index = indexOf(oldChar, index)) != -1); - return new String(buffer, UTF16); + if (newChar > 255) { + // If the original String isn't compressed and the replacement character isn't Latin1, the result is uncompressed. + return new String(buffer, UTF16); + } + + return new String(buffer, 0, len, false); } } @@ -3194,7 +3204,7 @@ public String replaceAll(String regex, String substitute) { if (COMPACT_STRINGS && isCompressed() && (substituteLength == 0 || substitute.isCompressed())) { byte[] newChars = new byte[length]; byte toReplace = helpers.getByteFromArrayByIndex(regex.value, 0); - byte replacement = (byte)-1; // assign dummy value that will never be used + byte replacement = (byte)0; // assign dummy value that isn't used if (substituteLength == 1) { replacement = helpers.getByteFromArrayByIndex(substitute.value, 0); checkLastChar((char)replacement); @@ -3212,7 +3222,7 @@ public String replaceAll(String regex, String substitute) { } else if (!COMPACT_STRINGS || !isCompressed()) { byte[] newChars = StringUTF16.newBytesFor(length); char toReplace = regex.charAtInternal(0); - char replacement = (char)-1; // assign dummy value that will never be used + char replacement = (char)0; // assign dummy value that must be Latin1 (0 - 255) if (substituteLength == 1) { replacement = substitute.charAtInternal(0); checkLastChar(replacement); @@ -3226,6 +3236,10 @@ public String replaceAll(String regex, String substitute) { helpers.putCharInArrayByIndex(newChars, newCharIndex++, replacement); } } + if (replacement > 255) { + // If the original String isn't compressed and the replacement character isn't Latin1, the result is uncompressed. + return new String(newChars, UTF16); + } return new String(newChars, 0, newCharIndex, false); } } diff --git a/test/functional/Java8andUp/src/org/openj9/test/java/lang/Test_String.java b/test/functional/Java8andUp/src/org/openj9/test/java/lang/Test_String.java index f74132a0104..b275b56eb34 100644 --- a/test/functional/Java8andUp/src/org/openj9/test/java/lang/Test_String.java +++ b/test/functional/Java8andUp/src/org/openj9/test/java/lang/Test_String.java @@ -937,6 +937,18 @@ public void test_indexOf3() { String needle = "\u00b0\u00b1"; String hay = new StringBuilder("a").append(needle).toString(); AssertJUnit.assertEquals("Failed to find string 3", 1, hay.indexOf(needle)); + + String multi = "\u0100:abc"; + String[] splits = multi.split(":"); + AssertJUnit.assertEquals("Failed to find string 4", 3, "123abc".indexOf(splits[1])); + String sub = multi.substring(2); + AssertJUnit.assertEquals("Failed to find string 5", 3, "123abc".indexOf(sub)); + String r1 = multi.replace('\u0100', '1'); + AssertJUnit.assertEquals("Failed to find string 6", 0, "1:abc".indexOf(r1)); + String r2 = multi.replaceAll("\u0100", "1"); + AssertJUnit.assertEquals("Failed to find string 7", 0, "1:abc".indexOf(r2)); + String r3 = multi.replaceAll("\u0100", ""); + AssertJUnit.assertEquals("Failed to find string 8", 0, ":abc".indexOf(r3)); } /**