Skip to content

Commit 839cede

Browse files
committed
8357289: Break down the String constructor into smaller methods
Reviewed-by: liach, rriggs
1 parent ecd2d83 commit 839cede

File tree

1 file changed

+102
-119
lines changed

1 file changed

+102
-119
lines changed

src/java.base/share/classes/java/lang/String.java

Lines changed: 102 additions & 119 deletions
Original file line numberDiff line numberDiff line change
@@ -553,135 +553,125 @@ public String(byte[] bytes, int offset, int length, Charset charset) {
553553
* disambiguate it against other similar methods of this class.
554554
*/
555555
private String(Charset charset, byte[] bytes, int offset, int length) {
556+
String str;
556557
if (length == 0) {
557-
this.value = "".value;
558-
this.coder = "".coder;
558+
str = "";
559559
} else if (charset == UTF_8.INSTANCE) {
560-
if (COMPACT_STRINGS) {
561-
int dp = StringCoding.countPositives(bytes, offset, length);
562-
if (dp == length) {
563-
this.value = Arrays.copyOfRange(bytes, offset, offset + length);
564-
this.coder = LATIN1;
565-
return;
560+
str = utf8(bytes, offset, length);
561+
} else if (charset == ISO_8859_1.INSTANCE) {
562+
str = iso88591(bytes, offset, length);
563+
} else if (charset == US_ASCII.INSTANCE) {
564+
str = ascii(bytes, offset, length);
565+
} else {
566+
str = decode(charset, bytes, offset, length);
567+
}
568+
this(str);
569+
}
570+
571+
private static String utf8(byte[] bytes, int offset, int length) {
572+
if (COMPACT_STRINGS) {
573+
int dp = StringCoding.countPositives(bytes, offset, length);
574+
if (dp == length) {
575+
return new String(Arrays.copyOfRange(bytes, offset, offset + length), LATIN1);
576+
}
577+
// Decode with a stable copy, to be the result if the decoded length is the same
578+
byte[] latin1 = Arrays.copyOfRange(bytes, offset, offset + length);
579+
int sp = dp; // first dp bytes are already in the copy
580+
while (sp < length) {
581+
int b1 = latin1[sp++];
582+
if (b1 >= 0) {
583+
latin1[dp++] = (byte) b1;
584+
continue;
566585
}
567-
// Decode with a stable copy, to be the result if the decoded length is the same
568-
byte[] latin1 = Arrays.copyOfRange(bytes, offset, offset + length);
569-
int sp = dp; // first dp bytes are already in the copy
570-
while (sp < length) {
571-
int b1 = latin1[sp++];
572-
if (b1 >= 0) {
573-
latin1[dp++] = (byte)b1;
586+
if ((b1 & 0xfe) == 0xc2 && sp < length) { // b1 either 0xc2 or 0xc3
587+
int b2 = latin1[sp];
588+
if (b2 < -64) { // continuation bytes are always negative values in the range -128 to -65
589+
latin1[dp++] = (byte) decode2(b1, b2);
590+
sp++;
574591
continue;
575592
}
576-
if ((b1 & 0xfe) == 0xc2 && sp < length) { // b1 either 0xc2 or 0xc3
577-
int b2 = latin1[sp];
578-
if (b2 < -64) { // continuation bytes are always negative values in the range -128 to -65
579-
latin1[dp++] = (byte)decode2(b1, b2);
580-
sp++;
581-
continue;
582-
}
583-
}
584-
// anything not a latin1, including the REPL
585-
// we have to go with the utf16
586-
sp--;
587-
break;
588-
}
589-
if (sp == length) {
590-
if (dp != latin1.length) {
591-
latin1 = Arrays.copyOf(latin1, dp);
592-
}
593-
this.value = latin1;
594-
this.coder = LATIN1;
595-
return;
596-
}
597-
byte[] utf16 = StringUTF16.newBytesFor(length);
598-
StringLatin1.inflate(latin1, 0, utf16, 0, dp);
599-
dp = decodeUTF8_UTF16(latin1, sp, length, utf16, dp, true);
600-
if (dp != length) {
601-
utf16 = Arrays.copyOf(utf16, dp << 1);
602593
}
603-
this.value = utf16;
604-
this.coder = UTF16;
605-
} else { // !COMPACT_STRINGS
606-
byte[] dst = StringUTF16.newBytesFor(length);
607-
int dp = decodeUTF8_UTF16(bytes, offset, offset + length, dst, 0, true);
608-
if (dp != length) {
609-
dst = Arrays.copyOf(dst, dp << 1);
594+
// anything not a latin1, including the REPL
595+
// we have to go with the utf16
596+
sp--;
597+
break;
598+
}
599+
if (sp == length) {
600+
if (dp != latin1.length) {
601+
latin1 = Arrays.copyOf(latin1, dp);
610602
}
611-
this.value = dst;
612-
this.coder = UTF16;
603+
return new String(latin1, LATIN1);
613604
}
614-
} else if (charset == ISO_8859_1.INSTANCE) {
615-
if (COMPACT_STRINGS) {
616-
this.value = Arrays.copyOfRange(bytes, offset, offset + length);
617-
this.coder = LATIN1;
618-
} else {
619-
this.value = StringLatin1.inflate(bytes, offset, length);
620-
this.coder = UTF16;
605+
byte[] utf16 = StringUTF16.newBytesFor(length);
606+
StringLatin1.inflate(latin1, 0, utf16, 0, dp);
607+
dp = decodeUTF8_UTF16(latin1, sp, length, utf16, dp, true);
608+
if (dp != length) {
609+
utf16 = Arrays.copyOf(utf16, dp << 1);
621610
}
622-
} else if (charset == US_ASCII.INSTANCE) {
623-
if (COMPACT_STRINGS && !StringCoding.hasNegatives(bytes, offset, length)) {
624-
this.value = Arrays.copyOfRange(bytes, offset, offset + length);
625-
this.coder = LATIN1;
626-
} else {
627-
byte[] dst = StringUTF16.newBytesFor(length);
628-
int dp = 0;
629-
while (dp < length) {
630-
int b = bytes[offset++];
631-
StringUTF16.putChar(dst, dp++, (b >= 0) ? (char) b : REPL);
632-
}
633-
this.value = dst;
634-
this.coder = UTF16;
611+
return new String(utf16, UTF16);
612+
} else { // !COMPACT_STRINGS
613+
byte[] dst = StringUTF16.newBytesFor(length);
614+
int dp = decodeUTF8_UTF16(bytes, offset, offset + length, dst, 0, true);
615+
if (dp != length) {
616+
dst = Arrays.copyOf(dst, dp << 1);
635617
}
618+
return new String(dst, UTF16);
619+
}
620+
}
621+
622+
private static String iso88591(byte[] bytes, int offset, int length) {
623+
if (COMPACT_STRINGS) {
624+
return new String(Arrays.copyOfRange(bytes, offset, offset + length), LATIN1);
636625
} else {
637-
// (1)We never cache the "external" cs, the only benefit of creating
638-
// an additional StringDe/Encoder object to wrap it is to share the
639-
// de/encode() method. These SD/E objects are short-lived, the young-gen
640-
// gc should be able to take care of them well. But the best approach
641-
// is still not to generate them if not really necessary.
642-
// (2)The defensive copy of the input byte/char[] has a big performance
643-
// impact, as well as the outgoing result byte/char[]. Need to do the
644-
// optimization check of (sm==null && classLoader0==null) for both.
645-
CharsetDecoder cd = charset.newDecoder();
646-
// ArrayDecoder fastpaths
647-
if (cd instanceof ArrayDecoder ad) {
648-
// ascii
649-
if (ad.isASCIICompatible() && !StringCoding.hasNegatives(bytes, offset, length)) {
650-
if (COMPACT_STRINGS) {
651-
this.value = Arrays.copyOfRange(bytes, offset, offset + length);
652-
this.coder = LATIN1;
653-
return;
654-
}
655-
this.value = StringLatin1.inflate(bytes, offset, length);
656-
this.coder = UTF16;
657-
return;
658-
}
626+
return new String(StringLatin1.inflate(bytes, offset, length), UTF16);
627+
}
628+
}
629+
630+
private static String ascii(byte[] bytes, int offset, int length) {
631+
if (COMPACT_STRINGS && !StringCoding.hasNegatives(bytes, offset, length)) {
632+
return new String(Arrays.copyOfRange(bytes, offset, offset + length), LATIN1);
633+
} else {
634+
byte[] dst = StringUTF16.newBytesFor(length);
635+
int dp = 0;
636+
while (dp < length) {
637+
int b = bytes[offset++];
638+
StringUTF16.putChar(dst, dp++, (b >= 0) ? (char) b : REPL);
639+
}
640+
return new String(dst, UTF16);
641+
}
642+
}
659643

644+
private static String decode(Charset charset, byte[] bytes, int offset, int length) {
645+
// (1)We never cache the "external" cs, the only benefit of creating
646+
// an additional StringDe/Encoder object to wrap it is to share the
647+
// de/encode() method. These SD/E objects are short-lived, the young-gen
648+
// gc should be able to take care of them well. But the best approach
649+
// is still not to generate them if not really necessary.
650+
// (2)The defensive copy of the input byte/char[] has a big performance
651+
// impact, as well as the outgoing result byte/char[]. Need to do the
652+
// optimization check of (sm==null && classLoader0==null) for both.
653+
CharsetDecoder cd = charset.newDecoder();
654+
// ArrayDecoder fastpaths
655+
if (cd instanceof ArrayDecoder ad) {
656+
// ascii
657+
if (ad.isASCIICompatible() && !StringCoding.hasNegatives(bytes, offset, length)) {
658+
return iso88591(bytes, offset, length);
659+
} else {
660660
// fastpath for always Latin1 decodable single byte
661661
if (COMPACT_STRINGS && ad.isLatin1Decodable()) {
662662
byte[] dst = new byte[length];
663663
ad.decodeToLatin1(bytes, offset, length, dst);
664-
this.value = dst;
665-
this.coder = LATIN1;
666-
return;
667-
}
668-
669-
int en = scale(length, cd.maxCharsPerByte());
670-
cd.onMalformedInput(CodingErrorAction.REPLACE)
671-
.onUnmappableCharacter(CodingErrorAction.REPLACE);
672-
char[] ca = new char[en];
673-
int clen = ad.decode(bytes, offset, length, ca);
674-
if (COMPACT_STRINGS) {
675-
byte[] val = StringUTF16.compress(ca, 0, clen);;
676-
this.coder = StringUTF16.coderFromArrayLen(val, clen);
677-
this.value = val;
678-
return;
664+
return new String(dst, LATIN1);
665+
} else {
666+
int en = scale(length, cd.maxCharsPerByte());
667+
cd.onMalformedInput(CodingErrorAction.REPLACE)
668+
.onUnmappableCharacter(CodingErrorAction.REPLACE);
669+
char[] ca = new char[en];
670+
int clen = ad.decode(bytes, offset, length, ca);
671+
return new String(ca, 0, clen, null);
679672
}
680-
coder = UTF16;
681-
value = StringUTF16.toBytes(ca, 0, clen);
682-
return;
683673
}
684-
674+
} else {
685675
// decode using CharsetDecoder
686676
int en = scale(length, cd.maxCharsPerByte());
687677
cd.onMalformedInput(CodingErrorAction.REPLACE)
@@ -694,14 +684,7 @@ private String(Charset charset, byte[] bytes, int offset, int length) {
694684
// Substitution is enabled, so this shouldn't happen
695685
throw new Error(x);
696686
}
697-
if (COMPACT_STRINGS) {
698-
byte[] val = StringUTF16.compress(ca, 0, caLen);
699-
this.coder = StringUTF16.coderFromArrayLen(val, caLen);
700-
this.value = val;
701-
return;
702-
}
703-
coder = UTF16;
704-
value = StringUTF16.toBytes(ca, 0, caLen);
687+
return new String(ca, 0, caLen, null);
705688
}
706689
}
707690

0 commit comments

Comments
 (0)