@@ -221,6 +221,8 @@ public class Tokenizer implements Locator, Locator2 {
221
221
222
222
public static final int PROCESSING_INSTRUCTION_QUESTION_MARK = 74 ;
223
223
224
+ public static final int AMBIGUOUS_AMPERSAND = 75 ;
225
+
224
226
/**
225
227
* Magic value for UTF-16 operations.
226
228
*/
@@ -3054,6 +3056,7 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
3054
3056
case '<' :
3055
3057
case '&' :
3056
3058
case '\u0000' :
3059
+ case ';' :
3057
3060
emitOrAppendCharRefBuf (returnState );
3058
3061
if ((returnState & DATA_AND_RCDATA_MASK ) == 0 ) {
3059
3062
cstart = pos ;
@@ -3082,17 +3085,12 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
3082
3085
firstCharKey = c - 'A' ;
3083
3086
} else {
3084
3087
// No match
3085
- /*
3086
- * If no match can be made, then this is a parse
3087
- * error.
3088
- */
3089
- errNoNamedCharacterMatch ();
3090
3088
emitOrAppendCharRefBuf (returnState );
3091
3089
if ((returnState & DATA_AND_RCDATA_MASK ) == 0 ) {
3092
3090
cstart = pos ;
3093
3091
}
3094
3092
reconsume = true ;
3095
- state = transition (state , returnState , reconsume , pos );
3093
+ state = transition (state , Tokenizer . AMBIGUOUS_AMPERSAND , reconsume , pos );
3096
3094
continue stateloop ;
3097
3095
}
3098
3096
// Didn't fail yet
@@ -3153,17 +3151,12 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
3153
3151
}
3154
3152
}
3155
3153
if (hilo == 0 ) {
3156
- /*
3157
- * If no match can be made, then this is a parse
3158
- * error.
3159
- */
3160
- errNoNamedCharacterMatch ();
3161
3154
emitOrAppendCharRefBuf (returnState );
3162
3155
if ((returnState & DATA_AND_RCDATA_MASK ) == 0 ) {
3163
3156
cstart = pos ;
3164
3157
}
3165
3158
reconsume = true ;
3166
- state = transition (state , returnState , reconsume , pos );
3159
+ state = transition (state , Tokenizer . AMBIGUOUS_AMPERSAND , reconsume , pos );
3167
3160
continue stateloop ;
3168
3161
}
3169
3162
// Didn't fail yet
@@ -3246,16 +3239,12 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
3246
3239
3247
3240
if (candidate == -1 ) {
3248
3241
// reconsume deals with CR, LF or nul
3249
- /*
3250
- * If no match can be made, then this is a parse error.
3251
- */
3252
- errNoNamedCharacterMatch ();
3253
3242
emitOrAppendCharRefBuf (returnState );
3254
3243
if ((returnState & DATA_AND_RCDATA_MASK ) == 0 ) {
3255
3244
cstart = pos ;
3256
3245
}
3257
3246
reconsume = true ;
3258
- state = transition (state , returnState , reconsume , pos );
3247
+ state = transition (state , Tokenizer . AMBIGUOUS_AMPERSAND , reconsume , pos );
3259
3248
continue stateloop ;
3260
3249
} else {
3261
3250
// c can't be CR, LF or nul if we got here
@@ -3293,10 +3282,9 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
3293
3282
* after the U+0026 AMPERSAND (&) must be
3294
3283
* unconsumed, and nothing is returned.
3295
3284
*/
3296
- errNoNamedCharacterMatch ();
3297
3285
appendCharRefBufToStrBuf ();
3298
3286
reconsume = true ;
3299
- state = transition (state , returnState , reconsume , pos );
3287
+ state = transition (state , Tokenizer . AMBIGUOUS_AMPERSAND , reconsume , pos );
3300
3288
continue stateloop ;
3301
3289
}
3302
3290
}
@@ -3359,6 +3347,28 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
3359
3347
* I'm ∉ I tell you.
3360
3348
*/
3361
3349
}
3350
+ // XXX reorder point
3351
+ case AMBIGUOUS_AMPERSAND :
3352
+ ampersandloop : for (;;) {
3353
+ if (reconsume ) {
3354
+ if (++pos == endPos ) {
3355
+ break stateloop ;
3356
+ }
3357
+ pos --;
3358
+ c = checkChar (buf , pos );
3359
+ }
3360
+ if (c == ';' ) {
3361
+ errNoNamedCharacterMatch ();
3362
+ } else if ((c >= '0' && c <= '9' )
3363
+ || (c >= 'A' && c <= 'Z' )
3364
+ || (c >= 'a' && c <= 'z' )) {
3365
+ appendStrBuf (c );
3366
+ pos ++;
3367
+ continue ;
3368
+ }
3369
+ state = transition (state , returnState , reconsume , pos );
3370
+ continue stateloop ;
3371
+ }
3362
3372
case CONSUME_NCR :
3363
3373
if (++pos == endPos ) {
3364
3374
break stateloop ;
@@ -6449,7 +6459,6 @@ public void eof() throws SAXException {
6449
6459
state = returnState ;
6450
6460
continue ;
6451
6461
case CHARACTER_REFERENCE_HILO_LOOKUP :
6452
- errNoNamedCharacterMatch ();
6453
6462
emitOrAppendCharRefBuf (returnState );
6454
6463
state = returnState ;
6455
6464
continue ;
@@ -6503,10 +6512,6 @@ public void eof() throws SAXException {
6503
6512
}
6504
6513
6505
6514
if (candidate == -1 ) {
6506
- /*
6507
- * If no match can be made, then this is a parse error.
6508
- */
6509
- errNoNamedCharacterMatch ();
6510
6515
emitOrAppendCharRefBuf (returnState );
6511
6516
state = returnState ;
6512
6517
continue eofloop ;
@@ -6544,7 +6549,6 @@ public void eof() throws SAXException {
6544
6549
* after the U+0026 AMPERSAND (&) must be
6545
6550
* unconsumed, and nothing is returned.
6546
6551
*/
6547
- errNoNamedCharacterMatch ();
6548
6552
appendCharRefBufToStrBuf ();
6549
6553
state = returnState ;
6550
6554
continue eofloop ;
0 commit comments