@@ -553,135 +553,125 @@ public String(byte[] bytes, int offset, int length, Charset charset) {
553
553
* disambiguate it against other similar methods of this class.
554
554
*/
555
555
private String (Charset charset , byte [] bytes , int offset , int length ) {
556
+ String str ;
556
557
if (length == 0 ) {
557
- this .value = "" .value ;
558
- this .coder = "" .coder ;
558
+ str = "" ;
559
559
} else if (charset == UTF_8 .INSTANCE ) {
560
- if (COMPACT_STRINGS ) {
561
- int dp = StringCoding .countPositives (bytes , offset , length );
562
- if (dp == length ) {
563
- this .value = Arrays .copyOfRange (bytes , offset , offset + length );
564
- this .coder = LATIN1 ;
565
- return ;
560
+ str = utf8 (bytes , offset , length );
561
+ } else if (charset == ISO_8859_1 .INSTANCE ) {
562
+ str = iso88591 (bytes , offset , length );
563
+ } else if (charset == US_ASCII .INSTANCE ) {
564
+ str = ascii (bytes , offset , length );
565
+ } else {
566
+ str = decode (charset , bytes , offset , length );
567
+ }
568
+ this (str );
569
+ }
570
+
571
+ private static String utf8 (byte [] bytes , int offset , int length ) {
572
+ if (COMPACT_STRINGS ) {
573
+ int dp = StringCoding .countPositives (bytes , offset , length );
574
+ if (dp == length ) {
575
+ return new String (Arrays .copyOfRange (bytes , offset , offset + length ), LATIN1 );
576
+ }
577
+ // Decode with a stable copy, to be the result if the decoded length is the same
578
+ byte [] latin1 = Arrays .copyOfRange (bytes , offset , offset + length );
579
+ int sp = dp ; // first dp bytes are already in the copy
580
+ while (sp < length ) {
581
+ int b1 = latin1 [sp ++];
582
+ if (b1 >= 0 ) {
583
+ latin1 [dp ++] = (byte ) b1 ;
584
+ continue ;
566
585
}
567
- // Decode with a stable copy, to be the result if the decoded length is the same
568
- byte [] latin1 = Arrays .copyOfRange (bytes , offset , offset + length );
569
- int sp = dp ; // first dp bytes are already in the copy
570
- while (sp < length ) {
571
- int b1 = latin1 [sp ++];
572
- if (b1 >= 0 ) {
573
- latin1 [dp ++] = (byte )b1 ;
586
+ if ((b1 & 0xfe ) == 0xc2 && sp < length ) { // b1 either 0xc2 or 0xc3
587
+ int b2 = latin1 [sp ];
588
+ if (b2 < -64 ) { // continuation bytes are always negative values in the range -128 to -65
589
+ latin1 [dp ++] = (byte ) decode2 (b1 , b2 );
590
+ sp ++;
574
591
continue ;
575
592
}
576
- if ((b1 & 0xfe ) == 0xc2 && sp < length ) { // b1 either 0xc2 or 0xc3
577
- int b2 = latin1 [sp ];
578
- if (b2 < -64 ) { // continuation bytes are always negative values in the range -128 to -65
579
- latin1 [dp ++] = (byte )decode2 (b1 , b2 );
580
- sp ++;
581
- continue ;
582
- }
583
- }
584
- // anything not a latin1, including the REPL
585
- // we have to go with the utf16
586
- sp --;
587
- break ;
588
- }
589
- if (sp == length ) {
590
- if (dp != latin1 .length ) {
591
- latin1 = Arrays .copyOf (latin1 , dp );
592
- }
593
- this .value = latin1 ;
594
- this .coder = LATIN1 ;
595
- return ;
596
- }
597
- byte [] utf16 = StringUTF16 .newBytesFor (length );
598
- StringLatin1 .inflate (latin1 , 0 , utf16 , 0 , dp );
599
- dp = decodeUTF8_UTF16 (latin1 , sp , length , utf16 , dp , true );
600
- if (dp != length ) {
601
- utf16 = Arrays .copyOf (utf16 , dp << 1 );
602
593
}
603
- this .value = utf16 ;
604
- this .coder = UTF16 ;
605
- } else { // !COMPACT_STRINGS
606
- byte [] dst = StringUTF16 .newBytesFor (length );
607
- int dp = decodeUTF8_UTF16 (bytes , offset , offset + length , dst , 0 , true );
608
- if (dp != length ) {
609
- dst = Arrays .copyOf (dst , dp << 1 );
594
+ // anything not a latin1, including the REPL
595
+ // we have to go with the utf16
596
+ sp --;
597
+ break ;
598
+ }
599
+ if (sp == length ) {
600
+ if (dp != latin1 .length ) {
601
+ latin1 = Arrays .copyOf (latin1 , dp );
610
602
}
611
- this .value = dst ;
612
- this .coder = UTF16 ;
603
+ return new String (latin1 , LATIN1 );
613
604
}
614
- } else if (charset == ISO_8859_1 .INSTANCE ) {
615
- if (COMPACT_STRINGS ) {
616
- this .value = Arrays .copyOfRange (bytes , offset , offset + length );
617
- this .coder = LATIN1 ;
618
- } else {
619
- this .value = StringLatin1 .inflate (bytes , offset , length );
620
- this .coder = UTF16 ;
605
+ byte [] utf16 = StringUTF16 .newBytesFor (length );
606
+ StringLatin1 .inflate (latin1 , 0 , utf16 , 0 , dp );
607
+ dp = decodeUTF8_UTF16 (latin1 , sp , length , utf16 , dp , true );
608
+ if (dp != length ) {
609
+ utf16 = Arrays .copyOf (utf16 , dp << 1 );
621
610
}
622
- } else if (charset == US_ASCII .INSTANCE ) {
623
- if (COMPACT_STRINGS && !StringCoding .hasNegatives (bytes , offset , length )) {
624
- this .value = Arrays .copyOfRange (bytes , offset , offset + length );
625
- this .coder = LATIN1 ;
626
- } else {
627
- byte [] dst = StringUTF16 .newBytesFor (length );
628
- int dp = 0 ;
629
- while (dp < length ) {
630
- int b = bytes [offset ++];
631
- StringUTF16 .putChar (dst , dp ++, (b >= 0 ) ? (char ) b : REPL );
632
- }
633
- this .value = dst ;
634
- this .coder = UTF16 ;
611
+ return new String (utf16 , UTF16 );
612
+ } else { // !COMPACT_STRINGS
613
+ byte [] dst = StringUTF16 .newBytesFor (length );
614
+ int dp = decodeUTF8_UTF16 (bytes , offset , offset + length , dst , 0 , true );
615
+ if (dp != length ) {
616
+ dst = Arrays .copyOf (dst , dp << 1 );
635
617
}
618
+ return new String (dst , UTF16 );
619
+ }
620
+ }
621
+
622
+ private static String iso88591 (byte [] bytes , int offset , int length ) {
623
+ if (COMPACT_STRINGS ) {
624
+ return new String (Arrays .copyOfRange (bytes , offset , offset + length ), LATIN1 );
636
625
} else {
637
- // (1)We never cache the "external" cs, the only benefit of creating
638
- // an additional StringDe/Encoder object to wrap it is to share the
639
- // de/encode() method. These SD/E objects are short-lived, the young-gen
640
- // gc should be able to take care of them well. But the best approach
641
- // is still not to generate them if not really necessary.
642
- // (2)The defensive copy of the input byte/char[] has a big performance
643
- // impact, as well as the outgoing result byte/char[]. Need to do the
644
- // optimization check of (sm==null && classLoader0==null) for both.
645
- CharsetDecoder cd = charset .newDecoder ();
646
- // ArrayDecoder fastpaths
647
- if (cd instanceof ArrayDecoder ad ) {
648
- // ascii
649
- if (ad .isASCIICompatible () && !StringCoding .hasNegatives (bytes , offset , length )) {
650
- if (COMPACT_STRINGS ) {
651
- this .value = Arrays .copyOfRange (bytes , offset , offset + length );
652
- this .coder = LATIN1 ;
653
- return ;
654
- }
655
- this .value = StringLatin1 .inflate (bytes , offset , length );
656
- this .coder = UTF16 ;
657
- return ;
658
- }
626
+ return new String (StringLatin1 .inflate (bytes , offset , length ), UTF16 );
627
+ }
628
+ }
629
+
630
+ private static String ascii (byte [] bytes , int offset , int length ) {
631
+ if (COMPACT_STRINGS && !StringCoding .hasNegatives (bytes , offset , length )) {
632
+ return new String (Arrays .copyOfRange (bytes , offset , offset + length ), LATIN1 );
633
+ } else {
634
+ byte [] dst = StringUTF16 .newBytesFor (length );
635
+ int dp = 0 ;
636
+ while (dp < length ) {
637
+ int b = bytes [offset ++];
638
+ StringUTF16 .putChar (dst , dp ++, (b >= 0 ) ? (char ) b : REPL );
639
+ }
640
+ return new String (dst , UTF16 );
641
+ }
642
+ }
659
643
644
+ private static String decode (Charset charset , byte [] bytes , int offset , int length ) {
645
+ // (1)We never cache the "external" cs, the only benefit of creating
646
+ // an additional StringDe/Encoder object to wrap it is to share the
647
+ // de/encode() method. These SD/E objects are short-lived, the young-gen
648
+ // gc should be able to take care of them well. But the best approach
649
+ // is still not to generate them if not really necessary.
650
+ // (2)The defensive copy of the input byte/char[] has a big performance
651
+ // impact, as well as the outgoing result byte/char[]. Need to do the
652
+ // optimization check of (sm==null && classLoader0==null) for both.
653
+ CharsetDecoder cd = charset .newDecoder ();
654
+ // ArrayDecoder fastpaths
655
+ if (cd instanceof ArrayDecoder ad ) {
656
+ // ascii
657
+ if (ad .isASCIICompatible () && !StringCoding .hasNegatives (bytes , offset , length )) {
658
+ return iso88591 (bytes , offset , length );
659
+ } else {
660
660
// fastpath for always Latin1 decodable single byte
661
661
if (COMPACT_STRINGS && ad .isLatin1Decodable ()) {
662
662
byte [] dst = new byte [length ];
663
663
ad .decodeToLatin1 (bytes , offset , length , dst );
664
- this .value = dst ;
665
- this .coder = LATIN1 ;
666
- return ;
667
- }
668
-
669
- int en = scale (length , cd .maxCharsPerByte ());
670
- cd .onMalformedInput (CodingErrorAction .REPLACE )
671
- .onUnmappableCharacter (CodingErrorAction .REPLACE );
672
- char [] ca = new char [en ];
673
- int clen = ad .decode (bytes , offset , length , ca );
674
- if (COMPACT_STRINGS ) {
675
- byte [] val = StringUTF16 .compress (ca , 0 , clen );;
676
- this .coder = StringUTF16 .coderFromArrayLen (val , clen );
677
- this .value = val ;
678
- return ;
664
+ return new String (dst , LATIN1 );
665
+ } else {
666
+ int en = scale (length , cd .maxCharsPerByte ());
667
+ cd .onMalformedInput (CodingErrorAction .REPLACE )
668
+ .onUnmappableCharacter (CodingErrorAction .REPLACE );
669
+ char [] ca = new char [en ];
670
+ int clen = ad .decode (bytes , offset , length , ca );
671
+ return new String (ca , 0 , clen , null );
679
672
}
680
- coder = UTF16 ;
681
- value = StringUTF16 .toBytes (ca , 0 , clen );
682
- return ;
683
673
}
684
-
674
+ } else {
685
675
// decode using CharsetDecoder
686
676
int en = scale (length , cd .maxCharsPerByte ());
687
677
cd .onMalformedInput (CodingErrorAction .REPLACE )
@@ -694,14 +684,7 @@ private String(Charset charset, byte[] bytes, int offset, int length) {
694
684
// Substitution is enabled, so this shouldn't happen
695
685
throw new Error (x );
696
686
}
697
- if (COMPACT_STRINGS ) {
698
- byte [] val = StringUTF16 .compress (ca , 0 , caLen );
699
- this .coder = StringUTF16 .coderFromArrayLen (val , caLen );
700
- this .value = val ;
701
- return ;
702
- }
703
- coder = UTF16 ;
704
- value = StringUTF16 .toBytes (ca , 0 , caLen );
687
+ return new String (ca , 0 , caLen , null );
705
688
}
706
689
}
707
690
0 commit comments