@@ -220,6 +220,8 @@ public class Tokenizer implements Locator {
220
220
221
221
public static final int PROCESSING_INSTRUCTION_QUESTION_MARK = 74 ;
222
222
223
+ public static final int AMBIGUOUS_AMPERSAND = 75 ;
224
+
223
225
/**
224
226
* Magic value for UTF-16 operations.
225
227
*/
@@ -3106,6 +3108,7 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
3106
3108
case '<' :
3107
3109
case '&' :
3108
3110
case '\u0000' :
3111
+ case ';' :
3109
3112
emitOrAppendCharRefBuf (returnState );
3110
3113
if ((returnState & DATA_AND_RCDATA_MASK ) == 0 ) {
3111
3114
cstart = pos ;
@@ -3134,17 +3137,12 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
3134
3137
firstCharKey = c - 'A' ;
3135
3138
} else {
3136
3139
// No match
3137
- /*
3138
- * If no match can be made, then this is a parse
3139
- * error.
3140
- */
3141
- errNoNamedCharacterMatch ();
3142
3140
emitOrAppendCharRefBuf (returnState );
3143
3141
if ((returnState & DATA_AND_RCDATA_MASK ) == 0 ) {
3144
3142
cstart = pos ;
3145
3143
}
3146
3144
reconsume = true ;
3147
- state = transition (state , returnState , reconsume , pos );
3145
+ state = transition (state , Tokenizer . AMBIGUOUS_AMPERSAND , reconsume , pos );
3148
3146
continue stateloop ;
3149
3147
}
3150
3148
// Didn't fail yet
@@ -3205,17 +3203,12 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
3205
3203
}
3206
3204
}
3207
3205
if (hilo == 0 ) {
3208
- /*
3209
- * If no match can be made, then this is a parse
3210
- * error.
3211
- */
3212
- errNoNamedCharacterMatch ();
3213
3206
emitOrAppendCharRefBuf (returnState );
3214
3207
if ((returnState & DATA_AND_RCDATA_MASK ) == 0 ) {
3215
3208
cstart = pos ;
3216
3209
}
3217
3210
reconsume = true ;
3218
- state = transition (state , returnState , reconsume , pos );
3211
+ state = transition (state , Tokenizer . AMBIGUOUS_AMPERSAND , reconsume , pos );
3219
3212
continue stateloop ;
3220
3213
}
3221
3214
// Didn't fail yet
@@ -3298,16 +3291,12 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
3298
3291
3299
3292
if (candidate == -1 ) {
3300
3293
// reconsume deals with CR, LF or nul
3301
- /*
3302
- * If no match can be made, then this is a parse error.
3303
- */
3304
- errNoNamedCharacterMatch ();
3305
3294
emitOrAppendCharRefBuf (returnState );
3306
3295
if ((returnState & DATA_AND_RCDATA_MASK ) == 0 ) {
3307
3296
cstart = pos ;
3308
3297
}
3309
3298
reconsume = true ;
3310
- state = transition (state , returnState , reconsume , pos );
3299
+ state = transition (state , Tokenizer . AMBIGUOUS_AMPERSAND , reconsume , pos );
3311
3300
continue stateloop ;
3312
3301
} else {
3313
3302
// c can't be CR, LF or nul if we got here
@@ -3345,10 +3334,9 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
3345
3334
* after the U+0026 AMPERSAND (&) must be
3346
3335
* unconsumed, and nothing is returned.
3347
3336
*/
3348
- errNoNamedCharacterMatch ();
3349
3337
appendCharRefBufToStrBuf ();
3350
3338
reconsume = true ;
3351
- state = transition (state , returnState , reconsume , pos );
3339
+ state = transition (state , Tokenizer . AMBIGUOUS_AMPERSAND , reconsume , pos );
3352
3340
continue stateloop ;
3353
3341
}
3354
3342
}
@@ -3411,6 +3399,28 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
3411
3399
* I'm ∉ I tell you.
3412
3400
*/
3413
3401
}
3402
+ // XXX reorder point
3403
+ case AMBIGUOUS_AMPERSAND :
3404
+ ampersandloop : for (;;) {
3405
+ if (reconsume ) {
3406
+ if (++pos == endPos ) {
3407
+ break stateloop ;
3408
+ }
3409
+ pos --;
3410
+ c = checkChar (buf , pos );
3411
+ }
3412
+ if (c == ';' ) {
3413
+ errNoNamedCharacterMatch ();
3414
+ } else if ((c >= '0' && c <= '9' )
3415
+ || (c >= 'A' && c <= 'Z' )
3416
+ || (c >= 'a' && c <= 'z' )) {
3417
+ appendStrBuf (c );
3418
+ pos ++;
3419
+ continue ;
3420
+ }
3421
+ state = transition (state , returnState , reconsume , pos );
3422
+ continue stateloop ;
3423
+ }
3414
3424
case CONSUME_NCR :
3415
3425
if (++pos == endPos ) {
3416
3426
break stateloop ;
@@ -6501,7 +6511,6 @@ public void eof() throws SAXException {
6501
6511
state = returnState ;
6502
6512
continue ;
6503
6513
case CHARACTER_REFERENCE_HILO_LOOKUP :
6504
- errNoNamedCharacterMatch ();
6505
6514
emitOrAppendCharRefBuf (returnState );
6506
6515
state = returnState ;
6507
6516
continue ;
@@ -6555,10 +6564,6 @@ public void eof() throws SAXException {
6555
6564
}
6556
6565
6557
6566
if (candidate == -1 ) {
6558
- /*
6559
- * If no match can be made, then this is a parse error.
6560
- */
6561
- errNoNamedCharacterMatch ();
6562
6567
emitOrAppendCharRefBuf (returnState );
6563
6568
state = returnState ;
6564
6569
continue eofloop ;
@@ -6596,7 +6601,6 @@ public void eof() throws SAXException {
6596
6601
* after the U+0026 AMPERSAND (&) must be
6597
6602
* unconsumed, and nothing is returned.
6598
6603
*/
6599
- errNoNamedCharacterMatch ();
6600
6604
appendCharRefBufToStrBuf ();
6601
6605
state = returnState ;
6602
6606
continue eofloop ;
0 commit comments