55import java .math .BigInteger ;
66import java .nio .charset .Charset ;
77import java .nio .charset .StandardCharsets ;
8- import java .util .*;
8+ import java .util .ArrayList ;
9+ import java .util .Arrays ;
10+ import java .util .Stack ;
911
1012import com .fasterxml .jackson .core .*;
1113import com .fasterxml .jackson .core .base .ParserMinimalBase ;
1214import com .fasterxml .jackson .core .io .IOContext ;
1315import com .fasterxml .jackson .core .io .NumberInput ;
1416import com .fasterxml .jackson .core .json .DupDetector ;
1517import com .fasterxml .jackson .core .sym .ByteQuadsCanonicalizer ;
16- import com .fasterxml .jackson .core .util .*;
18+ import com .fasterxml .jackson .core .util .ByteArrayBuilder ;
19+ import com .fasterxml .jackson .core .util .JacksonFeatureSet ;
20+ import com .fasterxml .jackson .core .util .TextBuffer ;
1721
1822import static com .fasterxml .jackson .dataformat .cbor .CBORConstants .*;
1923
@@ -2289,10 +2293,9 @@ protected void _finishToken() throws IOException
22892293
22902294 if ((available >= len )
22912295 // if not, could we read? NOTE: we do not require it, just attempt to read
2292- || ((_inputBuffer .length >= len )
2293- && _tryToLoadToHaveAtLeast (len ))) {
2294- _finishShortText (len );
2295- return ;
2296+ || _tryToLoadToHaveAtLeast (len )) {
2297+ _finishShortText (len );
2298+ return ;
22962299 }
22972300 // If not enough space, need handling similar to chunked
22982301 _finishLongText (len );
@@ -2331,11 +2334,9 @@ protected String _finishTextToken(int ch) throws IOException
23312334 // due to inputBuffer never being even close to that big).
23322335
23332336 final int available = _inputEnd - _inputPtr ;
2334-
23352337 if ((available >= len )
23362338 // if not, could we read? NOTE: we do not require it, just attempt to read
2337- || ((_inputBuffer .length >= len )
2338- && _tryToLoadToHaveAtLeast (len ))) {
2339+ || _tryToLoadToHaveAtLeast (len )) {
23392340 return _finishShortText (len );
23402341 }
23412342 // If not enough space, need handling similar to chunked
@@ -2364,19 +2365,22 @@ private final String _finishShortText(int len) throws IOException
23642365
23652366 // Let's actually do a tight loop for ASCII first:
23662367 final int end = _inputPtr ;
2367-
2368- int i ;
2369- while (( i = inputBuf [inPtr ]) >= 0 ) {
2368+ int i = 0 ;
2369+ while ( inPtr < end && i >= 0 ) {
2370+ i = inputBuf [inPtr ++];
23702371 outBuf [outPtr ++] = (char ) i ;
2371- if (++inPtr == end ) {
2372- String str = _textBuffer .setCurrentAndReturn (outPtr );
2373- if (stringRefs != null ) {
2374- stringRefs .stringRefs .add (str );
2375- _sharedString = str ;
2376- }
2377- return str ;
2372+ }
2373+ if (inPtr == end && i >= 0 ) {
2374+ String str = _textBuffer .setCurrentAndReturn (outPtr );
2375+ if (stringRefs != null ) {
2376+ stringRefs .stringRefs .add (str );
2377+ _sharedString = str ;
23782378 }
2379+ return str ;
23792380 }
2381+ // Correct extra increments
2382+ outPtr -= 1 ;
2383+ inPtr -= 1 ;
23802384 final int [] codes = UTF8_UNIT_CODES ;
23812385 do {
23822386 i = inputBuf [inPtr ++] & 0xFF ;
@@ -2443,10 +2447,17 @@ private final String _finishShortText(int len) throws IOException
24432447
24442448 private final String _finishLongText (int len ) throws IOException
24452449 {
2446- char [] outBuf = _textBuffer .emptyAndGetCurrentSegment ();
2447- int outPtr = 0 ;
2448- final int [] codes = UTF8_UNIT_CODES ;
2450+ StringRefList stringRefs = null ;
2451+ if (!_stringRefs .empty () &&
2452+ shouldReferenceString (_stringRefs .peek ().stringRefs .size (), len )) {
2453+ stringRefs = _stringRefs .peek ();
2454+ }
2455+ // First a tight loop for ASCII.
2456+ len = _finishLongTextAscii (len );
2457+ char [] outBuf = _textBuffer .getBufferWithoutReset ();
2458+ int outPtr = _textBuffer .getCurrentSegmentSize ();
24492459 int outEnd = outBuf .length ;
2460+ final int [] codes = UTF8_UNIT_CODES ;
24502461
24512462 while (--len >= 0 ) {
24522463 int c = _nextByte () & 0xFF ;
@@ -2500,14 +2511,51 @@ private final String _finishLongText(int len) throws IOException
25002511 outBuf [outPtr ++] = (char ) c ;
25012512 }
25022513 String str = _textBuffer .setCurrentAndReturn (outPtr );
2503- if (!_stringRefs .empty () &&
2504- shouldReferenceString (_stringRefs .peek ().stringRefs .size (), len )) {
2505- _stringRefs .peek ().stringRefs .add (str );
2514+ if (stringRefs != null ) {
2515+ stringRefs .stringRefs .add (str );
25062516 _sharedString = str ;
25072517 }
25082518 return str ;
25092519 }
25102520
2521+ /**
2522+ * Consumes as many ascii chars as possible in a tight loop. Returns the amount of bytes remaining.
2523+ */
2524+ private final int _finishLongTextAscii (int len ) throws IOException
2525+ {
2526+ char [] outBuf = _textBuffer .emptyAndGetCurrentSegment ();
2527+ final byte [] input = _inputBuffer ;
2528+ while (len > 0 ) {
2529+ // load as much input as possible
2530+ int size = Math .min (len , Math .min (outBuf .length , input .length ));
2531+ if (!_tryToLoadToHaveAtLeast (size )) {
2532+ return len ;
2533+ }
2534+ int outEnd = size ;
2535+ int outPtr = 0 ;
2536+ int inPtr = _inputPtr ;
2537+ int i = 0 ;
2538+ // Tight loop to copy into the output buffer, bail if a non-ascii char is found
2539+ while (outPtr < outEnd && i >= 0 ) {
2540+ i = input [inPtr ++];
2541+ outBuf [outPtr ++] = (char ) i ;
2542+ }
2543+ // Found a non-ascii char, correct pointers and return to the caller.
2544+ if (i < 0 ) {
2545+ --outPtr ;
2546+ _inputPtr = inPtr - 1 ;
2547+ _textBuffer .setCurrentLength (outPtr );
2548+ return len - outPtr ;
2549+ }
2550+ _inputPtr = inPtr ;
2551+ if (outPtr >= outBuf .length ) {
2552+ outBuf = _textBuffer .finishCurrentSegment ();
2553+ }
2554+ len -= size ;
2555+ }
2556+ return len ;
2557+ }
2558+
25112559 private final void _finishChunkedText () throws IOException
25122560 {
25132561 char [] outBuf = _textBuffer .emptyAndGetCurrentSegment ();
@@ -2532,7 +2580,6 @@ private final void _finishChunkedText() throws IOException
25322580 }
25332581 break ;
25342582 }
2535- _chunkLeft = len ;
25362583 int end = _inputPtr + len ;
25372584 if (end <= _inputEnd ) { // all within buffer
25382585 _chunkLeft = 0 ;
@@ -2541,19 +2588,22 @@ private final void _finishChunkedText() throws IOException
25412588 _chunkLeft = (end - _inputEnd );
25422589 _chunkEnd = _inputEnd ;
25432590 }
2544- }
2545- // besides of which just need to ensure there's content
2546- if (_inputPtr >= _inputEnd ) { // end of buffer, but not necessarily chunk
2547- loadMoreGuaranteed ();
2548- int end = _inputPtr + _chunkLeft ;
2549- if (end <= _inputEnd ) { // all within buffer
2550- _chunkLeft = 0 ;
2551- _chunkEnd = end ;
2552- } else { // stretches beyond
2553- _chunkLeft = (end - _inputEnd );
2554- _chunkEnd = _inputEnd ;
2591+ // start of a new chunk
2592+ // First a tight loop for ASCII.
2593+ _textBuffer .setCurrentLength (outPtr );
2594+ if (_finishChunkedTextAscii ()) {
2595+ // chunk fully consumed, let's get the next one
2596+ outBuf = _textBuffer .getBufferWithoutReset ();
2597+ outPtr = _textBuffer .getCurrentSegmentSize ();
2598+ outEnd = outBuf .length ;
2599+ continue ;
25552600 }
2601+ outBuf = _textBuffer .getBufferWithoutReset ();
2602+ outPtr = _textBuffer .getCurrentSegmentSize ();
2603+ outEnd = outBuf .length ;
25562604 }
2605+ // besides of which just need to ensure there's content
2606+ _loadMoreForChunkIfNeeded ();
25572607 }
25582608 int c = input [_inputPtr ++] & 0xFF ;
25592609 int code = codes [c ];
@@ -2563,9 +2613,9 @@ private final void _finishChunkedText() throws IOException
25632613 }
25642614
25652615 switch (code ) {
2566- case 0 :
2567- break ;
2568- case 1 : // 2-byte UTF
2616+ case 0 :
2617+ break ;
2618+ case 1 : // 2-byte UTF
25692619 {
25702620 int d = _nextChunkedByte ();
25712621 if ((d & 0xC0 ) != 0x080 ) {
@@ -2574,24 +2624,24 @@ private final void _finishChunkedText() throws IOException
25742624 c = ((c & 0x1F ) << 6 ) | (d & 0x3F );
25752625 }
25762626 break ;
2577- case 2 : // 3-byte UTF
2578- c = _decodeChunkedUTF8_3 (c );
2579- break ;
2580- case 3 : // 4-byte UTF
2581- c = _decodeChunkedUTF8_4 (c );
2582- // Let's add first part right away:
2583- if (outPtr >= outBuf .length ) {
2584- outBuf = _textBuffer .finishCurrentSegment ();
2585- outPtr = 0 ;
2586- outEnd = outBuf .length ;
2587- }
2588- outBuf [outPtr ++] = (char ) (0xD800 | (c >> 10 ));
2589- c = 0xDC00 | (c & 0x3FF );
2590- // And let the other char output down below
2591- break ;
2592- default :
2593- // Is this good enough error message?
2594- _reportInvalidInitial (c );
2627+ case 2 : // 3-byte UTF
2628+ c = _decodeChunkedUTF8_3 (c );
2629+ break ;
2630+ case 3 : // 4-byte UTF
2631+ c = _decodeChunkedUTF8_4 (c );
2632+ // Let's add first part right away:
2633+ if (outPtr >= outBuf .length ) {
2634+ outBuf = _textBuffer .finishCurrentSegment ();
2635+ outPtr = 0 ;
2636+ outEnd = outBuf .length ;
2637+ }
2638+ outBuf [outPtr ++] = (char ) (0xD800 | (c >> 10 ));
2639+ c = 0xDC00 | (c & 0x3FF );
2640+ // And let the other char output down below
2641+ break ;
2642+ default :
2643+ // Is this good enough error message?
2644+ _reportInvalidInitial (c );
25952645 }
25962646 // Need more room?
25972647 if (outPtr >= outEnd ) {
@@ -2602,9 +2652,75 @@ private final void _finishChunkedText() throws IOException
26022652 // Ok, let's add char to output:
26032653 outBuf [outPtr ++] = (char ) c ;
26042654 }
2655+
26052656 _textBuffer .setCurrentLength (outPtr );
26062657 }
26072658
2659+ /**
2660+ * Reads in a tight loop ASCII text until a non-ASCII char is found. If any, then it returns false to signal the
2661+ * caller that the chunk wasn't finished. The caller will keep adding to the _outBuf at the _outPtr position to
2662+ * finish the current text buffer segment
2663+ */
2664+ private final boolean _finishChunkedTextAscii () throws IOException
2665+ {
2666+ final byte [] input = _inputBuffer ;
2667+ int outPtr = _textBuffer .getCurrentSegmentSize ();
2668+ char [] outBuf = _textBuffer .getBufferWithoutReset ();
2669+ int outEnd = outBuf .length ;
2670+ while (true ) {
2671+ // besides of which just need to ensure there's content
2672+ _loadMoreForChunkIfNeeded ();
2673+
2674+ // Find the size of the loop
2675+ int inSize = _chunkEnd - _inputPtr ;
2676+ int outSize = outEnd - outPtr ;
2677+ int inputPtr = _inputPtr ;
2678+ int inputPtrEnd = _inputPtr + Math .min (inSize , outSize );
2679+ int i = 0 ;
2680+ // loop with copying what we can.
2681+ while (inputPtr < inputPtrEnd && i >= 0 ) {
2682+ i = input [inputPtr ++];
2683+ char val = (char ) i ;
2684+ outBuf [outPtr ++] = val ;
2685+ }
2686+ _inputPtr = inputPtr ;
2687+
2688+ if (i < 0 ) {
2689+ // Found a non-ascii char, correct pointers and return to the caller.
2690+ _inputPtr -= 1 ;
2691+ _textBuffer .setCurrentLength (outPtr - 1 );
2692+ // return false to signal this to the calling code to allow the multi-byte code-path to kick.
2693+ return false ;
2694+ }
2695+ // Need more room?
2696+ if (outPtr >= outEnd ) {
2697+ outBuf = _textBuffer .finishCurrentSegment ();
2698+ outPtr = 0 ;
2699+ outEnd = outBuf .length ;
2700+ }
2701+ if (_inputPtr < _chunkEnd || _chunkLeft > 0 ) {
2702+ continue ;
2703+ }
2704+ _textBuffer .setCurrentLength (outPtr );
2705+ return true ;
2706+ }
2707+ }
2708+
2709+ private final void _loadMoreForChunkIfNeeded () throws IOException
2710+ {
2711+ if (_inputPtr >= _inputEnd ) { // end of buffer, but not necessarily chunk
2712+ loadMoreGuaranteed ();
2713+ int end = _inputPtr + _chunkLeft ;
2714+ if (end <= _inputEnd ) { // all within buffer
2715+ _chunkLeft = 0 ;
2716+ _chunkEnd = end ;
2717+ } else { // stretches beyond
2718+ _chunkLeft = (end - _inputEnd );
2719+ _chunkEnd = _inputEnd ;
2720+ }
2721+ }
2722+ }
2723+
26082724 private final int _nextByte () throws IOException {
26092725 int inPtr = _inputPtr ;
26102726 if (inPtr < _inputEnd ) {
@@ -3716,6 +3832,10 @@ protected final boolean _tryToLoadToHaveAtLeast(int minAvailable) throws IOExcep
37163832 if (_inputStream == null ) {
37173833 return false ;
37183834 }
3835+ // The code below assumes this is true, so we check it here.
3836+ if (_inputBuffer .length < minAvailable ) {
3837+ return false ;
3838+ }
37193839 // Need to move remaining data in front?
37203840 int amount = _inputEnd - _inputPtr ;
37213841 if (amount > 0 && _inputPtr > 0 ) {
0 commit comments