@@ -222,6 +222,14 @@ public class Tokenizer implements Locator {
222
222
223
223
public static final int AMBIGUOUS_AMPERSAND = 75 ;
224
224
225
+ public static final int COMMENT_LESSTHAN = 76 ;
226
+
227
+ public static final int COMMENT_LESSTHAN_BANG = 77 ;
228
+
229
+ public static final int COMMENT_LESSTHAN_BANG_DASH = 78 ;
230
+
231
+ public static final int COMMENT_LESSTHAN_BANG_DASH_DASH = 79 ;
232
+
225
233
/**
226
234
* Magic value for UTF-16 operations.
227
235
*/
@@ -1034,9 +1042,8 @@ private void maybeAppendSpaceToBogusComment() throws SAXException {
1034
1042
1035
1043
// ]NOCPP]
1036
1044
1037
- @ Inline private void adjustDoubleHyphenAndAppendToStrBufAndErr (char c )
1045
+ @ Inline private void adjustDoubleHyphenAndAppendToStrBufAndErr (char c , boolean reportedConsecutiveHyphens )
1038
1046
throws SAXException {
1039
- errConsecutiveHyphens ();
1040
1047
// [NOCPP[
1041
1048
switch (commentPolicy ) {
1042
1049
case ALTER_INFOSET :
@@ -1047,7 +1054,9 @@ private void maybeAppendSpaceToBogusComment() throws SAXException {
1047
1054
appendStrBuf ('-' );
1048
1055
// CPPONLY: MOZ_FALLTHROUGH;
1049
1056
case ALLOW :
1050
- warn ("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment." );
1057
+ if (!reportedConsecutiveHyphens ) {
1058
+ warn ("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment." );
1059
+ }
1051
1060
// ]NOCPP]
1052
1061
appendStrBuf (c );
1053
1062
// [NOCPP[
@@ -1509,6 +1518,7 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
1509
1518
@ SuppressWarnings ("unused" ) private int stateLoop (int state , char c ,
1510
1519
int pos , @ NoLength char [] buf , boolean reconsume , int returnState ,
1511
1520
int endPos ) throws SAXException {
1521
+ boolean reportedConsecutiveHyphens = false ;
1512
1522
/*
1513
1523
* Idioms used in this code:
1514
1524
*
@@ -2594,6 +2604,7 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
2594
2604
}
2595
2605
// CPPONLY: MOZ_FALLTHROUGH;
2596
2606
case COMMENT_START :
2607
+ reportedConsecutiveHyphens = false ;
2597
2608
commentstartloop : for (;;) {
2598
2609
if (++pos == endPos ) {
2599
2610
break stateloop ;
@@ -2626,6 +2637,10 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
2626
2637
*/
2627
2638
state = transition (state , Tokenizer .DATA , reconsume , pos );
2628
2639
continue stateloop ;
2640
+ case '<' :
2641
+ appendStrBuf (c );
2642
+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
2643
+ continue stateloop ;
2629
2644
case '\r' :
2630
2645
appendStrBufCarriageReturn ();
2631
2646
state = transition (state , Tokenizer .COMMENT , reconsume , pos );
@@ -2671,6 +2686,10 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
2671
2686
state = transition (state , Tokenizer .COMMENT_END_DASH , reconsume , pos );
2672
2687
break commentloop ;
2673
2688
// continue stateloop;
2689
+ case '<' :
2690
+ appendStrBuf (c );
2691
+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
2692
+ continue stateloop ;
2674
2693
case '\r' :
2675
2694
appendStrBufCarriageReturn ();
2676
2695
break stateloop ;
@@ -2713,6 +2732,10 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
2713
2732
state = transition (state , Tokenizer .COMMENT_END , reconsume , pos );
2714
2733
break commentenddashloop ;
2715
2734
// continue stateloop;
2735
+ case '<' :
2736
+ appendStrBuf (c );
2737
+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
2738
+ continue stateloop ;
2716
2739
case '\r' :
2717
2740
appendStrBufCarriageReturn ();
2718
2741
state = transition (state , Tokenizer .COMMENT , reconsume , pos );
@@ -2767,11 +2790,16 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
2767
2790
* Append a U+002D HYPHEN-MINUS (-) character to
2768
2791
* the comment token's data.
2769
2792
*/
2770
- adjustDoubleHyphenAndAppendToStrBufAndErr (c );
2793
+ adjustDoubleHyphenAndAppendToStrBufAndErr (c , reportedConsecutiveHyphens );
2794
+ reportedConsecutiveHyphens = true ;
2771
2795
/*
2772
2796
* Stay in the comment end state.
2773
2797
*/
2774
2798
continue ;
2799
+ case '<' :
2800
+ appendStrBuf (c );
2801
+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
2802
+ continue stateloop ;
2775
2803
case '\r' :
2776
2804
adjustDoubleHyphenAndAppendToStrBufCarriageReturn ();
2777
2805
state = transition (state , Tokenizer .COMMENT , reconsume , pos );
@@ -2781,7 +2809,6 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
2781
2809
state = transition (state , Tokenizer .COMMENT , reconsume , pos );
2782
2810
continue stateloop ;
2783
2811
case '!' :
2784
- errHyphenHyphenBang ();
2785
2812
appendStrBuf (c );
2786
2813
state = transition (state , Tokenizer .COMMENT_END_BANG , reconsume , pos );
2787
2814
continue stateloop ;
@@ -2794,7 +2821,8 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
2794
2821
* and the input character to the comment
2795
2822
* token's data.
2796
2823
*/
2797
- adjustDoubleHyphenAndAppendToStrBufAndErr (c );
2824
+ adjustDoubleHyphenAndAppendToStrBufAndErr (c , reportedConsecutiveHyphens );
2825
+ reportedConsecutiveHyphens = true ;
2798
2826
/*
2799
2827
* Switch to the comment state.
2800
2828
*/
@@ -2864,6 +2892,148 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
2864
2892
continue stateloop ;
2865
2893
}
2866
2894
}
2895
+ case COMMENT_LESSTHAN :
2896
+ for (;;) {
2897
+ if (++pos == endPos ) {
2898
+ break stateloop ;
2899
+ }
2900
+ c = checkChar (buf , pos );
2901
+ switch (c ) {
2902
+ case '!' :
2903
+ appendStrBuf (c );
2904
+ state = transition (state , Tokenizer .COMMENT_LESSTHAN_BANG , reconsume , pos );
2905
+ continue stateloop ;
2906
+ case '<' :
2907
+ appendStrBuf (c );
2908
+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
2909
+ continue stateloop ;
2910
+ case '-' :
2911
+ appendStrBuf (c );
2912
+ state = transition (state , Tokenizer .COMMENT_END_DASH , reconsume , pos );
2913
+ continue stateloop ;
2914
+ case '\r' :
2915
+ appendStrBufCarriageReturn ();
2916
+ break stateloop ;
2917
+ case '\n' :
2918
+ appendStrBufLineFeed ();
2919
+ continue ;
2920
+ case '\u0000' :
2921
+ c = '\uFFFD' ;
2922
+ // fall thru
2923
+ default :
2924
+ appendStrBuf (c );
2925
+ state = transition (state , Tokenizer .COMMENT , reconsume , pos );
2926
+ continue stateloop ;
2927
+ }
2928
+ }
2929
+ case COMMENT_LESSTHAN_BANG :
2930
+ for (;;) {
2931
+ if (++pos == endPos ) {
2932
+ break stateloop ;
2933
+ }
2934
+ c = checkChar (buf , pos );
2935
+ switch (c ) {
2936
+ case '-' :
2937
+ appendStrBuf (c );
2938
+ state = transition (state , Tokenizer .COMMENT_LESSTHAN_BANG_DASH , reconsume , pos );
2939
+ continue stateloop ;
2940
+ case '<' :
2941
+ appendStrBuf (c );
2942
+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
2943
+ continue stateloop ;
2944
+ case '\r' :
2945
+ appendStrBufCarriageReturn ();
2946
+ break stateloop ;
2947
+ case '\n' :
2948
+ appendStrBufLineFeed ();
2949
+ continue ;
2950
+ case '\u0000' :
2951
+ c = '\uFFFD' ;
2952
+ // fall thru
2953
+ default :
2954
+ appendStrBuf (c );
2955
+ state = transition (state , Tokenizer .COMMENT , reconsume , pos );
2956
+ continue stateloop ;
2957
+ }
2958
+ }
2959
+ case COMMENT_LESSTHAN_BANG_DASH :
2960
+ for (;;) {
2961
+ if (++pos == endPos ) {
2962
+ break stateloop ;
2963
+ }
2964
+ c = checkChar (buf , pos );
2965
+ switch (c ) {
2966
+ case '-' :
2967
+ appendStrBuf (c );
2968
+ state = transition (state , Tokenizer .COMMENT_LESSTHAN_BANG_DASH_DASH , reconsume , pos );
2969
+ continue stateloop ;
2970
+ case '<' :
2971
+ appendStrBuf (c );
2972
+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
2973
+ continue stateloop ;
2974
+ case '\r' :
2975
+ appendStrBufCarriageReturn ();
2976
+ break stateloop ;
2977
+ case '\n' :
2978
+ appendStrBufLineFeed ();
2979
+ continue ;
2980
+ case '\u0000' :
2981
+ c = '\uFFFD' ;
2982
+ // fall thru
2983
+ default :
2984
+ appendStrBuf (c );
2985
+ state = transition (state , Tokenizer .COMMENT , reconsume , pos );
2986
+ continue stateloop ;
2987
+ }
2988
+ }
2989
+ case COMMENT_LESSTHAN_BANG_DASH_DASH :
2990
+ for (;;) {
2991
+ if (++pos == endPos ) {
2992
+ break stateloop ;
2993
+ }
2994
+ c = checkChar (buf , pos );
2995
+ switch (c ) {
2996
+ case '>' :
2997
+ appendStrBuf (c );
2998
+ emitComment (3 , pos );
2999
+ state = transition (state , Tokenizer .DATA , reconsume , pos );
3000
+ continue stateloop ;
3001
+ case '-' :
3002
+ errNestedComment ();
3003
+ adjustDoubleHyphenAndAppendToStrBufAndErr (c , reportedConsecutiveHyphens );
3004
+ reportedConsecutiveHyphens = true ;
3005
+ state = transition (state , Tokenizer .COMMENT_END , reconsume , pos );
3006
+ continue stateloop ;
3007
+ case '\r' :
3008
+ errNestedComment ();
3009
+ adjustDoubleHyphenAndAppendToStrBufAndErr (c , reportedConsecutiveHyphens );
3010
+ reportedConsecutiveHyphens = true ;
3011
+ state = transition (state , Tokenizer .COMMENT , reconsume , pos );
3012
+ break stateloop ;
3013
+ case '\n' :
3014
+ errNestedComment ();
3015
+ adjustDoubleHyphenAndAppendToStrBufAndErr (c , reportedConsecutiveHyphens );
3016
+ reportedConsecutiveHyphens = true ;
3017
+ state = transition (state , Tokenizer .COMMENT , reconsume , pos );
3018
+ continue ;
3019
+ case '\u0000' :
3020
+ c = '\uFFFD' ;
3021
+ // fall thru
3022
+ case '!' :
3023
+ errNestedComment ();
3024
+ adjustDoubleHyphenAndAppendToStrBufAndErr (c , reportedConsecutiveHyphens );
3025
+ reportedConsecutiveHyphens = true ;
3026
+ state = transition (state , Tokenizer .COMMENT_END_BANG , reconsume , pos );
3027
+ continue stateloop ;
3028
+ default :
3029
+ errNestedComment ();
3030
+ adjustDoubleHyphenAndAppendToStrBufAndErr (c , reportedConsecutiveHyphens );
3031
+ reportedConsecutiveHyphens = true ;
3032
+ state = transition (state , Tokenizer .COMMENT_END , reconsume , pos );
3033
+ continue stateloop ;
3034
+ }
3035
+ }
3036
+ // XXX reorder point
2867
3037
case COMMENT_START_DASH :
2868
3038
if (++pos == endPos ) {
2869
3039
break stateloop ;
@@ -2892,6 +3062,10 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
2892
3062
*/
2893
3063
state = transition (state , Tokenizer .DATA , reconsume , pos );
2894
3064
continue stateloop ;
3065
+ case '<' :
3066
+ appendStrBuf (c );
3067
+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
3068
+ continue stateloop ;
2895
3069
case '\r' :
2896
3070
appendStrBufCarriageReturn ();
2897
3071
state = transition (state , Tokenizer .COMMENT , reconsume , pos );
@@ -6026,13 +6200,13 @@ private void initDoctypeFields() {
6026
6200
@ Inline private void adjustDoubleHyphenAndAppendToStrBufCarriageReturn ()
6027
6201
throws SAXException {
6028
6202
silentCarriageReturn ();
6029
- adjustDoubleHyphenAndAppendToStrBufAndErr ('\n' );
6203
+ adjustDoubleHyphenAndAppendToStrBufAndErr ('\n' , false );
6030
6204
}
6031
6205
6032
6206
@ Inline private void adjustDoubleHyphenAndAppendToStrBufLineFeed ()
6033
6207
throws SAXException {
6034
6208
silentLineFeed ();
6035
- adjustDoubleHyphenAndAppendToStrBufAndErr ('\n' );
6209
+ adjustDoubleHyphenAndAppendToStrBufAndErr ('\n' , false );
6036
6210
}
6037
6211
6038
6212
@ Inline private void appendStrBufLineFeed () {
@@ -6337,6 +6511,8 @@ public void eof() throws SAXException {
6337
6511
break eofloop ;
6338
6512
case COMMENT_START :
6339
6513
case COMMENT :
6514
+ case COMMENT_LESSTHAN :
6515
+ case COMMENT_LESSTHAN_BANG :
6340
6516
/*
6341
6517
* EOF Parse error.
6342
6518
*/
@@ -6348,6 +6524,7 @@ public void eof() throws SAXException {
6348
6524
*/
6349
6525
break eofloop ;
6350
6526
case COMMENT_END :
6527
+ case COMMENT_LESSTHAN_BANG_DASH_DASH :
6351
6528
errEofInComment ();
6352
6529
/* Emit the comment token. */
6353
6530
emitComment (2 , 0 );
@@ -6357,6 +6534,7 @@ public void eof() throws SAXException {
6357
6534
break eofloop ;
6358
6535
case COMMENT_END_DASH :
6359
6536
case COMMENT_START_DASH :
6537
+ case COMMENT_LESSTHAN_BANG_DASH :
6360
6538
errEofInComment ();
6361
6539
/* Emit the comment token. */
6362
6540
emitComment (1 , 0 );
@@ -6981,7 +7159,7 @@ protected void errGtInPublicId() throws SAXException {
6981
7159
protected void errNamelessDoctype () throws SAXException {
6982
7160
}
6983
7161
6984
- protected void errConsecutiveHyphens () throws SAXException {
7162
+ protected void errNestedComment () throws SAXException {
6985
7163
}
6986
7164
6987
7165
protected void errPrematureEndOfComment () throws SAXException {
@@ -7131,9 +7309,6 @@ protected void errExpectedSystemId() throws SAXException {
7131
7309
protected void errMissingSpaceBeforeDoctypeName () throws SAXException {
7132
7310
}
7133
7311
7134
- protected void errHyphenHyphenBang () throws SAXException {
7135
- }
7136
-
7137
7312
protected void errNcrControlChar () throws SAXException {
7138
7313
}
7139
7314
0 commit comments