@@ -223,6 +223,14 @@ public class Tokenizer implements Locator, Locator2 {
223
223
224
224
public static final int AMBIGUOUS_AMPERSAND = 75 ;
225
225
226
+ public static final int COMMENT_LESSTHAN = 76 ;
227
+
228
+ public static final int COMMENT_LESSTHAN_BANG = 77 ;
229
+
230
+ public static final int COMMENT_LESSTHAN_BANG_DASH = 78 ;
231
+
232
+ public static final int COMMENT_LESSTHAN_BANG_DASH_DASH = 79 ;
233
+
226
234
/**
227
235
* Magic value for UTF-16 operations.
228
236
*/
@@ -1031,9 +1039,8 @@ private void maybeAppendSpaceToBogusComment() throws SAXException {
1031
1039
1032
1040
// ]NOCPP]
1033
1041
1034
- @ Inline private void adjustDoubleHyphenAndAppendToStrBufAndErr (char c )
1042
+ @ Inline private void adjustDoubleHyphenAndAppendToStrBufAndErr (char c , boolean reportedConsecutiveHyphens )
1035
1043
throws SAXException {
1036
- errConsecutiveHyphens ();
1037
1044
// [NOCPP[
1038
1045
switch (commentPolicy ) {
1039
1046
case ALTER_INFOSET :
@@ -1044,7 +1051,9 @@ private void maybeAppendSpaceToBogusComment() throws SAXException {
1044
1051
appendStrBuf ('-' );
1045
1052
// CPPONLY: MOZ_FALLTHROUGH;
1046
1053
case ALLOW :
1047
- warn ("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment." );
1054
+ if (!reportedConsecutiveHyphens ) {
1055
+ warn ("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment." );
1056
+ }
1048
1057
// ]NOCPP]
1049
1058
appendStrBuf (c );
1050
1059
// [NOCPP[
@@ -1466,6 +1475,7 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
1466
1475
@ SuppressWarnings ("unused" ) private int stateLoop (int state , char c ,
1467
1476
int pos , @ NoLength char [] buf , boolean reconsume , int returnState ,
1468
1477
int endPos ) throws SAXException {
1478
+ boolean reportedConsecutiveHyphens = false ;
1469
1479
/*
1470
1480
* Idioms used in this code:
1471
1481
*
@@ -2542,6 +2552,7 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
2542
2552
}
2543
2553
// CPPONLY: MOZ_FALLTHROUGH;
2544
2554
case COMMENT_START :
2555
+ reportedConsecutiveHyphens = false ;
2545
2556
commentstartloop : for (;;) {
2546
2557
if (++pos == endPos ) {
2547
2558
break stateloop ;
@@ -2574,6 +2585,10 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
2574
2585
*/
2575
2586
state = transition (state , Tokenizer .DATA , reconsume , pos );
2576
2587
continue stateloop ;
2588
+ case '<' :
2589
+ appendStrBuf (c );
2590
+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
2591
+ continue stateloop ;
2577
2592
case '\r' :
2578
2593
appendStrBufCarriageReturn ();
2579
2594
state = transition (state , Tokenizer .COMMENT , reconsume , pos );
@@ -2619,6 +2634,10 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
2619
2634
state = transition (state , Tokenizer .COMMENT_END_DASH , reconsume , pos );
2620
2635
break commentloop ;
2621
2636
// continue stateloop;
2637
+ case '<' :
2638
+ appendStrBuf (c );
2639
+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
2640
+ continue stateloop ;
2622
2641
case '\r' :
2623
2642
appendStrBufCarriageReturn ();
2624
2643
break stateloop ;
@@ -2661,6 +2680,10 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
2661
2680
state = transition (state , Tokenizer .COMMENT_END , reconsume , pos );
2662
2681
break commentenddashloop ;
2663
2682
// continue stateloop;
2683
+ case '<' :
2684
+ appendStrBuf (c );
2685
+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
2686
+ continue stateloop ;
2664
2687
case '\r' :
2665
2688
appendStrBufCarriageReturn ();
2666
2689
state = transition (state , Tokenizer .COMMENT , reconsume , pos );
@@ -2715,11 +2738,16 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
2715
2738
* Append a U+002D HYPHEN-MINUS (-) character to
2716
2739
* the comment token's data.
2717
2740
*/
2718
- adjustDoubleHyphenAndAppendToStrBufAndErr (c );
2741
+ adjustDoubleHyphenAndAppendToStrBufAndErr (c , reportedConsecutiveHyphens );
2742
+ reportedConsecutiveHyphens = true ;
2719
2743
/*
2720
2744
* Stay in the comment end state.
2721
2745
*/
2722
2746
continue ;
2747
+ case '<' :
2748
+ appendStrBuf (c );
2749
+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
2750
+ continue stateloop ;
2723
2751
case '\r' :
2724
2752
adjustDoubleHyphenAndAppendToStrBufCarriageReturn ();
2725
2753
state = transition (state , Tokenizer .COMMENT , reconsume , pos );
@@ -2729,7 +2757,6 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
2729
2757
state = transition (state , Tokenizer .COMMENT , reconsume , pos );
2730
2758
continue stateloop ;
2731
2759
case '!' :
2732
- errHyphenHyphenBang ();
2733
2760
appendStrBuf (c );
2734
2761
state = transition (state , Tokenizer .COMMENT_END_BANG , reconsume , pos );
2735
2762
continue stateloop ;
@@ -2742,7 +2769,8 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
2742
2769
* and the input character to the comment
2743
2770
* token's data.
2744
2771
*/
2745
- adjustDoubleHyphenAndAppendToStrBufAndErr (c );
2772
+ adjustDoubleHyphenAndAppendToStrBufAndErr (c , reportedConsecutiveHyphens );
2773
+ reportedConsecutiveHyphens = true ;
2746
2774
/*
2747
2775
* Switch to the comment state.
2748
2776
*/
@@ -2812,6 +2840,148 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
2812
2840
continue stateloop ;
2813
2841
}
2814
2842
}
2843
+ case COMMENT_LESSTHAN :
2844
+ for (;;) {
2845
+ if (++pos == endPos ) {
2846
+ break stateloop ;
2847
+ }
2848
+ c = checkChar (buf , pos );
2849
+ switch (c ) {
2850
+ case '!' :
2851
+ appendStrBuf (c );
2852
+ state = transition (state , Tokenizer .COMMENT_LESSTHAN_BANG , reconsume , pos );
2853
+ continue stateloop ;
2854
+ case '<' :
2855
+ appendStrBuf (c );
2856
+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
2857
+ continue stateloop ;
2858
+ case '-' :
2859
+ appendStrBuf (c );
2860
+ state = transition (state , Tokenizer .COMMENT_END_DASH , reconsume , pos );
2861
+ continue stateloop ;
2862
+ case '\r' :
2863
+ appendStrBufCarriageReturn ();
2864
+ break stateloop ;
2865
+ case '\n' :
2866
+ appendStrBufLineFeed ();
2867
+ continue ;
2868
+ case '\u0000' :
2869
+ c = '\uFFFD' ;
2870
+ // fall thru
2871
+ default :
2872
+ appendStrBuf (c );
2873
+ state = transition (state , Tokenizer .COMMENT , reconsume , pos );
2874
+ continue stateloop ;
2875
+ }
2876
+ }
2877
+ case COMMENT_LESSTHAN_BANG :
2878
+ for (;;) {
2879
+ if (++pos == endPos ) {
2880
+ break stateloop ;
2881
+ }
2882
+ c = checkChar (buf , pos );
2883
+ switch (c ) {
2884
+ case '-' :
2885
+ appendStrBuf (c );
2886
+ state = transition (state , Tokenizer .COMMENT_LESSTHAN_BANG_DASH , reconsume , pos );
2887
+ continue stateloop ;
2888
+ case '<' :
2889
+ appendStrBuf (c );
2890
+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
2891
+ continue stateloop ;
2892
+ case '\r' :
2893
+ appendStrBufCarriageReturn ();
2894
+ break stateloop ;
2895
+ case '\n' :
2896
+ appendStrBufLineFeed ();
2897
+ continue ;
2898
+ case '\u0000' :
2899
+ c = '\uFFFD' ;
2900
+ // fall thru
2901
+ default :
2902
+ appendStrBuf (c );
2903
+ state = transition (state , Tokenizer .COMMENT , reconsume , pos );
2904
+ continue stateloop ;
2905
+ }
2906
+ }
2907
+ case COMMENT_LESSTHAN_BANG_DASH :
2908
+ for (;;) {
2909
+ if (++pos == endPos ) {
2910
+ break stateloop ;
2911
+ }
2912
+ c = checkChar (buf , pos );
2913
+ switch (c ) {
2914
+ case '-' :
2915
+ appendStrBuf (c );
2916
+ state = transition (state , Tokenizer .COMMENT_LESSTHAN_BANG_DASH_DASH , reconsume , pos );
2917
+ continue stateloop ;
2918
+ case '<' :
2919
+ appendStrBuf (c );
2920
+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
2921
+ continue stateloop ;
2922
+ case '\r' :
2923
+ appendStrBufCarriageReturn ();
2924
+ break stateloop ;
2925
+ case '\n' :
2926
+ appendStrBufLineFeed ();
2927
+ continue ;
2928
+ case '\u0000' :
2929
+ c = '\uFFFD' ;
2930
+ // fall thru
2931
+ default :
2932
+ appendStrBuf (c );
2933
+ state = transition (state , Tokenizer .COMMENT , reconsume , pos );
2934
+ continue stateloop ;
2935
+ }
2936
+ }
2937
+ case COMMENT_LESSTHAN_BANG_DASH_DASH :
2938
+ for (;;) {
2939
+ if (++pos == endPos ) {
2940
+ break stateloop ;
2941
+ }
2942
+ c = checkChar (buf , pos );
2943
+ switch (c ) {
2944
+ case '>' :
2945
+ appendStrBuf (c );
2946
+ emitComment (3 , pos );
2947
+ state = transition (state , Tokenizer .DATA , reconsume , pos );
2948
+ continue stateloop ;
2949
+ case '-' :
2950
+ errNestedComment ();
2951
+ adjustDoubleHyphenAndAppendToStrBufAndErr (c , reportedConsecutiveHyphens );
2952
+ reportedConsecutiveHyphens = true ;
2953
+ state = transition (state , Tokenizer .COMMENT_END , reconsume , pos );
2954
+ continue stateloop ;
2955
+ case '\r' :
2956
+ errNestedComment ();
2957
+ adjustDoubleHyphenAndAppendToStrBufAndErr (c , reportedConsecutiveHyphens );
2958
+ reportedConsecutiveHyphens = true ;
2959
+ state = transition (state , Tokenizer .COMMENT , reconsume , pos );
2960
+ break stateloop ;
2961
+ case '\n' :
2962
+ errNestedComment ();
2963
+ adjustDoubleHyphenAndAppendToStrBufAndErr (c , reportedConsecutiveHyphens );
2964
+ reportedConsecutiveHyphens = true ;
2965
+ state = transition (state , Tokenizer .COMMENT , reconsume , pos );
2966
+ continue ;
2967
+ case '\u0000' :
2968
+ c = '\uFFFD' ;
2969
+ // fall thru
2970
+ case '!' :
2971
+ errNestedComment ();
2972
+ adjustDoubleHyphenAndAppendToStrBufAndErr (c , reportedConsecutiveHyphens );
2973
+ reportedConsecutiveHyphens = true ;
2974
+ state = transition (state , Tokenizer .COMMENT_END_BANG , reconsume , pos );
2975
+ continue stateloop ;
2976
+ default :
2977
+ errNestedComment ();
2978
+ adjustDoubleHyphenAndAppendToStrBufAndErr (c , reportedConsecutiveHyphens );
2979
+ reportedConsecutiveHyphens = true ;
2980
+ state = transition (state , Tokenizer .COMMENT_END , reconsume , pos );
2981
+ continue stateloop ;
2982
+ }
2983
+ }
2984
+ // XXX reorder point
2815
2985
case COMMENT_START_DASH :
2816
2986
if (++pos == endPos ) {
2817
2987
break stateloop ;
@@ -2840,6 +3010,10 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
2840
3010
*/
2841
3011
state = transition (state , Tokenizer .DATA , reconsume , pos );
2842
3012
continue stateloop ;
3013
+ case '<' :
3014
+ appendStrBuf (c );
3015
+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
3016
+ continue stateloop ;
2843
3017
case '\r' :
2844
3018
appendStrBufCarriageReturn ();
2845
3019
state = transition (state , Tokenizer .COMMENT , reconsume , pos );
@@ -5967,13 +6141,13 @@ private void initDoctypeFields() {
5967
6141
@ Inline private void adjustDoubleHyphenAndAppendToStrBufCarriageReturn ()
5968
6142
throws SAXException {
5969
6143
silentCarriageReturn ();
5970
- adjustDoubleHyphenAndAppendToStrBufAndErr ('\n' );
6144
+ adjustDoubleHyphenAndAppendToStrBufAndErr ('\n' , false );
5971
6145
}
5972
6146
5973
6147
@ Inline private void adjustDoubleHyphenAndAppendToStrBufLineFeed ()
5974
6148
throws SAXException {
5975
6149
silentLineFeed ();
5976
- adjustDoubleHyphenAndAppendToStrBufAndErr ('\n' );
6150
+ adjustDoubleHyphenAndAppendToStrBufAndErr ('\n' , false );
5977
6151
}
5978
6152
5979
6153
@ Inline private void appendStrBufLineFeed () {
@@ -6278,6 +6452,8 @@ public void eof() throws SAXException {
6278
6452
break eofloop ;
6279
6453
case COMMENT_START :
6280
6454
case COMMENT :
6455
+ case COMMENT_LESSTHAN :
6456
+ case COMMENT_LESSTHAN_BANG :
6281
6457
/*
6282
6458
* EOF Parse error.
6283
6459
*/
@@ -6289,6 +6465,7 @@ public void eof() throws SAXException {
6289
6465
*/
6290
6466
break eofloop ;
6291
6467
case COMMENT_END :
6468
+ case COMMENT_LESSTHAN_BANG_DASH_DASH :
6292
6469
errEofInComment ();
6293
6470
/* Emit the comment token. */
6294
6471
emitComment (2 , 0 );
@@ -6298,6 +6475,7 @@ public void eof() throws SAXException {
6298
6475
break eofloop ;
6299
6476
case COMMENT_END_DASH :
6300
6477
case COMMENT_START_DASH :
6478
+ case COMMENT_LESSTHAN_BANG_DASH :
6301
6479
errEofInComment ();
6302
6480
/* Emit the comment token. */
6303
6481
emitComment (1 , 0 );
@@ -6921,7 +7099,7 @@ protected void errGtInPublicId() throws SAXException {
6921
7099
protected void errNamelessDoctype () throws SAXException {
6922
7100
}
6923
7101
6924
- protected void errConsecutiveHyphens () throws SAXException {
7102
+ protected void errNestedComment () throws SAXException {
6925
7103
}
6926
7104
6927
7105
protected void errPrematureEndOfComment () throws SAXException {
@@ -7064,9 +7242,6 @@ protected void errExpectedSystemId() throws SAXException {
7064
7242
protected void errMissingSpaceBeforeDoctypeName () throws SAXException {
7065
7243
}
7066
7244
7067
- protected void errHyphenHyphenBang () throws SAXException {
7068
- }
7069
-
7070
7245
protected void errNcrControlChar () throws SAXException {
7071
7246
}
7072
7247
0 commit comments