@@ -222,6 +222,14 @@ public class Tokenizer implements Locator {
222
222
223
223
public static final int AMBIGUOUS_AMPERSAND = 75 ;
224
224
225
+ public static final int COMMENT_LESSTHAN = 76 ;
226
+
227
+ public static final int COMMENT_LESSTHAN_BANG = 77 ;
228
+
229
+ public static final int COMMENT_LESSTHAN_BANG_DASH = 78 ;
230
+
231
+ public static final int COMMENT_LESSTHAN_BANG_DASH_DASH = 79 ;
232
+
225
233
/**
226
234
* Magic value for UTF-16 operations.
227
235
*/
@@ -1015,9 +1023,8 @@ private void maybeAppendSpaceToBogusComment() throws SAXException {
1015
1023
1016
1024
// ]NOCPP]
1017
1025
1018
- @ Inline private void adjustDoubleHyphenAndAppendToStrBufAndErr (char c )
1026
+ @ Inline private void adjustDoubleHyphenAndAppendToStrBufAndErr (char c , boolean reportedConsecutiveHyphens )
1019
1027
throws SAXException {
1020
- errConsecutiveHyphens ();
1021
1028
// [NOCPP[
1022
1029
switch (commentPolicy ) {
1023
1030
case ALTER_INFOSET :
@@ -1028,7 +1035,9 @@ private void maybeAppendSpaceToBogusComment() throws SAXException {
1028
1035
appendStrBuf ('-' );
1029
1036
// CPPONLY: MOZ_FALLTHROUGH;
1030
1037
case ALLOW :
1031
- warn ("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment." );
1038
+ if (!reportedConsecutiveHyphens ) {
1039
+ warn ("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment." );
1040
+ }
1032
1041
// ]NOCPP]
1033
1042
appendStrBuf (c );
1034
1043
// [NOCPP[
@@ -1490,6 +1499,7 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
1490
1499
@ SuppressWarnings ("unused" ) private int stateLoop (int state , char c ,
1491
1500
int pos , @ NoLength char [] buf , boolean reconsume , int returnState ,
1492
1501
int endPos ) throws SAXException {
1502
+ boolean reportedConsecutiveHyphens = false ;
1493
1503
/*
1494
1504
* Idioms used in this code:
1495
1505
*
@@ -2577,6 +2587,7 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
2577
2587
}
2578
2588
// CPPONLY: MOZ_FALLTHROUGH;
2579
2589
case COMMENT_START :
2590
+ reportedConsecutiveHyphens = false ;
2580
2591
commentstartloop : for (;;) {
2581
2592
if (++pos == endPos ) {
2582
2593
break stateloop ;
@@ -2609,6 +2620,10 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
2609
2620
*/
2610
2621
state = transition (state , Tokenizer .DATA , reconsume , pos );
2611
2622
continue stateloop ;
2623
+ case '<' :
2624
+ appendStrBuf (c );
2625
+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
2626
+ continue stateloop ;
2612
2627
case '\r' :
2613
2628
appendStrBufCarriageReturn ();
2614
2629
state = transition (state , Tokenizer .COMMENT , reconsume , pos );
@@ -2654,6 +2669,10 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
2654
2669
state = transition (state , Tokenizer .COMMENT_END_DASH , reconsume , pos );
2655
2670
break commentloop ;
2656
2671
// continue stateloop;
2672
+ case '<' :
2673
+ appendStrBuf (c );
2674
+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
2675
+ continue stateloop ;
2657
2676
case '\r' :
2658
2677
appendStrBufCarriageReturn ();
2659
2678
break stateloop ;
@@ -2696,6 +2715,10 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
2696
2715
state = transition (state , Tokenizer .COMMENT_END , reconsume , pos );
2697
2716
break commentenddashloop ;
2698
2717
// continue stateloop;
2718
+ case '<' :
2719
+ appendStrBuf (c );
2720
+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
2721
+ continue stateloop ;
2699
2722
case '\r' :
2700
2723
appendStrBufCarriageReturn ();
2701
2724
state = transition (state , Tokenizer .COMMENT , reconsume , pos );
@@ -2750,11 +2773,16 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
2750
2773
* Append a U+002D HYPHEN-MINUS (-) character to
2751
2774
* the comment token's data.
2752
2775
*/
2753
- adjustDoubleHyphenAndAppendToStrBufAndErr (c );
2776
+ adjustDoubleHyphenAndAppendToStrBufAndErr (c , reportedConsecutiveHyphens );
2777
+ reportedConsecutiveHyphens = true ;
2754
2778
/*
2755
2779
* Stay in the comment end state.
2756
2780
*/
2757
2781
continue ;
2782
+ case '<' :
2783
+ appendStrBuf (c );
2784
+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
2785
+ continue stateloop ;
2758
2786
case '\r' :
2759
2787
adjustDoubleHyphenAndAppendToStrBufCarriageReturn ();
2760
2788
state = transition (state , Tokenizer .COMMENT , reconsume , pos );
@@ -2764,7 +2792,6 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
2764
2792
state = transition (state , Tokenizer .COMMENT , reconsume , pos );
2765
2793
continue stateloop ;
2766
2794
case '!' :
2767
- errHyphenHyphenBang ();
2768
2795
appendStrBuf (c );
2769
2796
state = transition (state , Tokenizer .COMMENT_END_BANG , reconsume , pos );
2770
2797
continue stateloop ;
@@ -2777,7 +2804,8 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
2777
2804
* and the input character to the comment
2778
2805
* token's data.
2779
2806
*/
2780
- adjustDoubleHyphenAndAppendToStrBufAndErr (c );
2807
+ adjustDoubleHyphenAndAppendToStrBufAndErr (c , reportedConsecutiveHyphens );
2808
+ reportedConsecutiveHyphens = true ;
2781
2809
/*
2782
2810
* Switch to the comment state.
2783
2811
*/
@@ -2845,6 +2873,148 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
2845
2873
continue stateloop ;
2846
2874
}
2847
2875
}
2876
+ case COMMENT_LESSTHAN :
2877
+ for (;;) {
2878
+ if (++pos == endPos ) {
2879
+ break stateloop ;
2880
+ }
2881
+ c = checkChar (buf , pos );
2882
+ switch (c ) {
2883
+ case '!' :
2884
+ appendStrBuf (c );
2885
+ state = transition (state , Tokenizer .COMMENT_LESSTHAN_BANG , reconsume , pos );
2886
+ continue stateloop ;
2887
+ case '<' :
2888
+ appendStrBuf (c );
2889
+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
2890
+ continue stateloop ;
2891
+ case '-' :
2892
+ appendStrBuf (c );
2893
+ state = transition (state , Tokenizer .COMMENT_END_DASH , reconsume , pos );
2894
+ continue stateloop ;
2895
+ case '\r' :
2896
+ appendStrBufCarriageReturn ();
2897
+ break stateloop ;
2898
+ case '\n' :
2899
+ appendStrBufLineFeed ();
2900
+ continue ;
2901
+ case '\u0000' :
2902
+ c = '\uFFFD' ;
2903
+ // fall thru
2904
+ default :
2905
+ appendStrBuf (c );
2906
+ state = transition (state , Tokenizer .COMMENT , reconsume , pos );
2907
+ continue stateloop ;
2908
+ }
2909
+ }
2910
+ case COMMENT_LESSTHAN_BANG :
2911
+ for (;;) {
2912
+ if (++pos == endPos ) {
2913
+ break stateloop ;
2914
+ }
2915
+ c = checkChar (buf , pos );
2916
+ switch (c ) {
2917
+ case '-' :
2918
+ appendStrBuf (c );
2919
+ state = transition (state , Tokenizer .COMMENT_LESSTHAN_BANG_DASH , reconsume , pos );
2920
+ continue stateloop ;
2921
+ case '<' :
2922
+ appendStrBuf (c );
2923
+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
2924
+ continue stateloop ;
2925
+ case '\r' :
2926
+ appendStrBufCarriageReturn ();
2927
+ break stateloop ;
2928
+ case '\n' :
2929
+ appendStrBufLineFeed ();
2930
+ continue ;
2931
+ case '\u0000' :
2932
+ c = '\uFFFD' ;
2933
+ // fall thru
2934
+ default :
2935
+ appendStrBuf (c );
2936
+ state = transition (state , Tokenizer .COMMENT , reconsume , pos );
2937
+ continue stateloop ;
2938
+ }
2939
+ }
2940
+ case COMMENT_LESSTHAN_BANG_DASH :
2941
+ for (;;) {
2942
+ if (++pos == endPos ) {
2943
+ break stateloop ;
2944
+ }
2945
+ c = checkChar (buf , pos );
2946
+ switch (c ) {
2947
+ case '-' :
2948
+ appendStrBuf (c );
2949
+ state = transition (state , Tokenizer .COMMENT_LESSTHAN_BANG_DASH_DASH , reconsume , pos );
2950
+ continue stateloop ;
2951
+ case '<' :
2952
+ appendStrBuf (c );
2953
+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
2954
+ continue stateloop ;
2955
+ case '\r' :
2956
+ appendStrBufCarriageReturn ();
2957
+ break stateloop ;
2958
+ case '\n' :
2959
+ appendStrBufLineFeed ();
2960
+ continue ;
2961
+ case '\u0000' :
2962
+ c = '\uFFFD' ;
2963
+ // fall thru
2964
+ default :
2965
+ appendStrBuf (c );
2966
+ state = transition (state , Tokenizer .COMMENT , reconsume , pos );
2967
+ continue stateloop ;
2968
+ }
2969
+ }
2970
+ case COMMENT_LESSTHAN_BANG_DASH_DASH :
2971
+ for (;;) {
2972
+ if (++pos == endPos ) {
2973
+ break stateloop ;
2974
+ }
2975
+ c = checkChar (buf , pos );
2976
+ switch (c ) {
2977
+ case '>' :
2978
+ appendStrBuf (c );
2979
+ emitComment (3 , pos );
2980
+ state = transition (state , Tokenizer .DATA , reconsume , pos );
2981
+ continue stateloop ;
2982
+ case '-' :
2983
+ errNestedComment ();
2984
+ adjustDoubleHyphenAndAppendToStrBufAndErr (c , reportedConsecutiveHyphens );
2985
+ reportedConsecutiveHyphens = true ;
2986
+ state = transition (state , Tokenizer .COMMENT_END , reconsume , pos );
2987
+ continue stateloop ;
2988
+ case '\r' :
2989
+ errNestedComment ();
2990
+ adjustDoubleHyphenAndAppendToStrBufAndErr (c , reportedConsecutiveHyphens );
2991
+ reportedConsecutiveHyphens = true ;
2992
+ state = transition (state , Tokenizer .COMMENT , reconsume , pos );
2993
+ break stateloop ;
2994
+ case '\n' :
2995
+ errNestedComment ();
2996
+ adjustDoubleHyphenAndAppendToStrBufAndErr (c , reportedConsecutiveHyphens );
2997
+ reportedConsecutiveHyphens = true ;
2998
+ state = transition (state , Tokenizer .COMMENT , reconsume , pos );
2999
+ continue ;
3000
+ case '\u0000' :
3001
+ c = '\uFFFD' ;
3002
+ // fall thru
3003
+ case '!' :
3004
+ errNestedComment ();
3005
+ adjustDoubleHyphenAndAppendToStrBufAndErr (c , reportedConsecutiveHyphens );
3006
+ reportedConsecutiveHyphens = true ;
3007
+ state = transition (state , Tokenizer .COMMENT_END_BANG , reconsume , pos );
3008
+ continue stateloop ;
3009
+ default :
3010
+ errNestedComment ();
3011
+ adjustDoubleHyphenAndAppendToStrBufAndErr (c , reportedConsecutiveHyphens );
3012
+ reportedConsecutiveHyphens = true ;
3013
+ state = transition (state , Tokenizer .COMMENT_END , reconsume , pos );
3014
+ continue stateloop ;
3015
+ }
3016
+ }
3017
+ // XXX reorder point
2848
3018
case COMMENT_START_DASH :
2849
3019
if (++pos == endPos ) {
2850
3020
break stateloop ;
@@ -2873,6 +3043,10 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
2873
3043
*/
2874
3044
state = transition (state , Tokenizer .DATA , reconsume , pos );
2875
3045
continue stateloop ;
3046
+ case '<' :
3047
+ appendStrBuf (c );
3048
+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
3049
+ continue stateloop ;
2876
3050
case '\r' :
2877
3051
appendStrBufCarriageReturn ();
2878
3052
state = transition (state , Tokenizer .COMMENT , reconsume , pos );
@@ -6005,13 +6179,13 @@ private void initDoctypeFields() {
6005
6179
@ Inline private void adjustDoubleHyphenAndAppendToStrBufCarriageReturn ()
6006
6180
throws SAXException {
6007
6181
silentCarriageReturn ();
6008
- adjustDoubleHyphenAndAppendToStrBufAndErr ('\n' );
6182
+ adjustDoubleHyphenAndAppendToStrBufAndErr ('\n' , false );
6009
6183
}
6010
6184
6011
6185
@ Inline private void adjustDoubleHyphenAndAppendToStrBufLineFeed ()
6012
6186
throws SAXException {
6013
6187
silentLineFeed ();
6014
- adjustDoubleHyphenAndAppendToStrBufAndErr ('\n' );
6188
+ adjustDoubleHyphenAndAppendToStrBufAndErr ('\n' , false );
6015
6189
}
6016
6190
6017
6191
@ Inline private void appendStrBufLineFeed () {
@@ -6316,6 +6490,8 @@ public void eof() throws SAXException {
6316
6490
break eofloop ;
6317
6491
case COMMENT_START :
6318
6492
case COMMENT :
6493
+ case COMMENT_LESSTHAN :
6494
+ case COMMENT_LESSTHAN_BANG :
6319
6495
/*
6320
6496
* EOF Parse error.
6321
6497
*/
@@ -6327,6 +6503,7 @@ public void eof() throws SAXException {
6327
6503
*/
6328
6504
break eofloop ;
6329
6505
case COMMENT_END :
6506
+ case COMMENT_LESSTHAN_BANG_DASH_DASH :
6330
6507
errEofInComment ();
6331
6508
/* Emit the comment token. */
6332
6509
emitComment (2 , 0 );
@@ -6336,6 +6513,7 @@ public void eof() throws SAXException {
6336
6513
break eofloop ;
6337
6514
case COMMENT_END_DASH :
6338
6515
case COMMENT_START_DASH :
6516
+ case COMMENT_LESSTHAN_BANG_DASH :
6339
6517
errEofInComment ();
6340
6518
/* Emit the comment token. */
6341
6519
emitComment (1 , 0 );
@@ -6960,7 +7138,7 @@ protected void errGtInPublicId() throws SAXException {
6960
7138
protected void errNamelessDoctype () throws SAXException {
6961
7139
}
6962
7140
6963
- protected void errConsecutiveHyphens () throws SAXException {
7141
+ protected void errNestedComment () throws SAXException {
6964
7142
}
6965
7143
6966
7144
protected void errPrematureEndOfComment () throws SAXException {
@@ -7110,9 +7288,6 @@ protected void errExpectedSystemId() throws SAXException {
7110
7288
protected void errMissingSpaceBeforeDoctypeName () throws SAXException {
7111
7289
}
7112
7290
7113
- protected void errHyphenHyphenBang () throws SAXException {
7114
- }
7115
-
7116
7291
protected void errNcrControlChar () throws SAXException {
7117
7292
}
7118
7293
0 commit comments