@@ -221,6 +221,14 @@ public class Tokenizer implements Locator, Locator2 {
221
221
222
222
public static final int PROCESSING_INSTRUCTION_QUESTION_MARK = 74 ;
223
223
224
+ public static final int COMMENT_LESSTHAN = 76 ;
225
+
226
+ public static final int COMMENT_LESSTHAN_BANG = 77 ;
227
+
228
+ public static final int COMMENT_LESSTHAN_BANG_DASH = 78 ;
229
+
230
+ public static final int COMMENT_LESSTHAN_BANG_DASH_DASH = 79 ;
231
+
224
232
/**
225
233
* Magic value for UTF-16 operations.
226
234
*/
@@ -1029,9 +1037,8 @@ private void maybeAppendSpaceToBogusComment() throws SAXException {
1029
1037
1030
1038
// ]NOCPP]
1031
1039
1032
- @ Inline private void adjustDoubleHyphenAndAppendToStrBufAndErr (char c )
1040
+ @ Inline private void adjustDoubleHyphenAndAppendToStrBufAndErr (char c , boolean reportedConsecutiveHyphens )
1033
1041
throws SAXException {
1034
- errConsecutiveHyphens ();
1035
1042
// [NOCPP[
1036
1043
switch (commentPolicy ) {
1037
1044
case ALTER_INFOSET :
@@ -1042,7 +1049,9 @@ private void maybeAppendSpaceToBogusComment() throws SAXException {
1042
1049
appendStrBuf ('-' );
1043
1050
// CPPONLY: MOZ_FALLTHROUGH;
1044
1051
case ALLOW :
1045
- warn ("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment." );
1052
+ if (!reportedConsecutiveHyphens ) {
1053
+ warn ("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment." );
1054
+ }
1046
1055
// ]NOCPP]
1047
1056
appendStrBuf (c );
1048
1057
// [NOCPP[
@@ -1464,6 +1473,7 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
1464
1473
@ SuppressWarnings ("unused" ) private int stateLoop (int state , char c ,
1465
1474
int pos , @ NoLength char [] buf , boolean reconsume , int returnState ,
1466
1475
int endPos ) throws SAXException {
1476
+ boolean reportedConsecutiveHyphens = false ;
1467
1477
/*
1468
1478
* Idioms used in this code:
1469
1479
*
@@ -2540,6 +2550,7 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
2540
2550
}
2541
2551
// CPPONLY: MOZ_FALLTHROUGH;
2542
2552
case COMMENT_START :
2553
+ reportedConsecutiveHyphens = false ;
2543
2554
commentstartloop : for (;;) {
2544
2555
if (++pos == endPos ) {
2545
2556
break stateloop ;
@@ -2572,6 +2583,10 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
2572
2583
*/
2573
2584
state = transition (state , Tokenizer .DATA , reconsume , pos );
2574
2585
continue stateloop ;
2586
+ case '<' :
2587
+ appendStrBuf (c );
2588
+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
2589
+ continue stateloop ;
2575
2590
case '\r' :
2576
2591
appendStrBufCarriageReturn ();
2577
2592
state = transition (state , Tokenizer .COMMENT , reconsume , pos );
@@ -2617,6 +2632,10 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
2617
2632
state = transition (state , Tokenizer .COMMENT_END_DASH , reconsume , pos );
2618
2633
break commentloop ;
2619
2634
// continue stateloop;
2635
+ case '<' :
2636
+ appendStrBuf (c );
2637
+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
2638
+ continue stateloop ;
2620
2639
case '\r' :
2621
2640
appendStrBufCarriageReturn ();
2622
2641
break stateloop ;
@@ -2659,6 +2678,10 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
2659
2678
state = transition (state , Tokenizer .COMMENT_END , reconsume , pos );
2660
2679
break commentenddashloop ;
2661
2680
// continue stateloop;
2681
+ case '<' :
2682
+ appendStrBuf (c );
2683
+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
2684
+ continue stateloop ;
2662
2685
case '\r' :
2663
2686
appendStrBufCarriageReturn ();
2664
2687
state = transition (state , Tokenizer .COMMENT , reconsume , pos );
@@ -2713,11 +2736,16 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
2713
2736
* Append a U+002D HYPHEN-MINUS (-) character to
2714
2737
* the comment token's data.
2715
2738
*/
2716
- adjustDoubleHyphenAndAppendToStrBufAndErr (c );
2739
+ adjustDoubleHyphenAndAppendToStrBufAndErr (c , reportedConsecutiveHyphens );
2740
+ reportedConsecutiveHyphens = true ;
2717
2741
/*
2718
2742
* Stay in the comment end state.
2719
2743
*/
2720
2744
continue ;
2745
+ case '<' :
2746
+ appendStrBuf (c );
2747
+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
2748
+ continue stateloop ;
2721
2749
case '\r' :
2722
2750
adjustDoubleHyphenAndAppendToStrBufCarriageReturn ();
2723
2751
state = transition (state , Tokenizer .COMMENT , reconsume , pos );
@@ -2727,7 +2755,6 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
2727
2755
state = transition (state , Tokenizer .COMMENT , reconsume , pos );
2728
2756
continue stateloop ;
2729
2757
case '!' :
2730
- errHyphenHyphenBang ();
2731
2758
appendStrBuf (c );
2732
2759
state = transition (state , Tokenizer .COMMENT_END_BANG , reconsume , pos );
2733
2760
continue stateloop ;
@@ -2740,7 +2767,8 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
2740
2767
* and the input character to the comment
2741
2768
* token's data.
2742
2769
*/
2743
- adjustDoubleHyphenAndAppendToStrBufAndErr (c );
2770
+ adjustDoubleHyphenAndAppendToStrBufAndErr (c , reportedConsecutiveHyphens );
2771
+ reportedConsecutiveHyphens = true ;
2744
2772
/*
2745
2773
* Switch to the comment state.
2746
2774
*/
@@ -2810,6 +2838,148 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
2810
2838
continue stateloop ;
2811
2839
}
2812
2840
}
2841
+ case COMMENT_LESSTHAN :
2842
+ for (;;) {
2843
+ if (++pos == endPos ) {
2844
+ break stateloop ;
2845
+ }
2846
+ c = checkChar (buf , pos );
2847
+ switch (c ) {
2848
+ case '!' :
2849
+ appendStrBuf (c );
2850
+ state = transition (state , Tokenizer .COMMENT_LESSTHAN_BANG , reconsume , pos );
2851
+ continue stateloop ;
2852
+ case '<' :
2853
+ appendStrBuf (c );
2854
+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
2855
+ continue stateloop ;
2856
+ case '-' :
2857
+ appendStrBuf (c );
2858
+ state = transition (state , Tokenizer .COMMENT_END_DASH , reconsume , pos );
2859
+ continue stateloop ;
2860
+ case '\r' :
2861
+ appendStrBufCarriageReturn ();
2862
+ break stateloop ;
2863
+ case '\n' :
2864
+ appendStrBufLineFeed ();
2865
+ continue ;
2866
+ case '\u0000' :
2867
+ c = '\uFFFD' ;
2868
+ // fall thru
2869
+ default :
2870
+ appendStrBuf (c );
2871
+ state = transition (state , Tokenizer .COMMENT , reconsume , pos );
2872
+ continue stateloop ;
2873
+ }
2874
+ }
2875
+ case COMMENT_LESSTHAN_BANG :
2876
+ for (;;) {
2877
+ if (++pos == endPos ) {
2878
+ break stateloop ;
2879
+ }
2880
+ c = checkChar (buf , pos );
2881
+ switch (c ) {
2882
+ case '-' :
2883
+ appendStrBuf (c );
2884
+ state = transition (state , Tokenizer .COMMENT_LESSTHAN_BANG_DASH , reconsume , pos );
2885
+ continue stateloop ;
2886
+ case '<' :
2887
+ appendStrBuf (c );
2888
+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
2889
+ continue stateloop ;
2890
+ case '\r' :
2891
+ appendStrBufCarriageReturn ();
2892
+ break stateloop ;
2893
+ case '\n' :
2894
+ appendStrBufLineFeed ();
2895
+ continue ;
2896
+ case '\u0000' :
2897
+ c = '\uFFFD' ;
2898
+ // fall thru
2899
+ default :
2900
+ appendStrBuf (c );
2901
+ state = transition (state , Tokenizer .COMMENT , reconsume , pos );
2902
+ continue stateloop ;
2903
+ }
2904
+ }
2905
+ case COMMENT_LESSTHAN_BANG_DASH :
2906
+ for (;;) {
2907
+ if (++pos == endPos ) {
2908
+ break stateloop ;
2909
+ }
2910
+ c = checkChar (buf , pos );
2911
+ switch (c ) {
2912
+ case '-' :
2913
+ appendStrBuf (c );
2914
+ state = transition (state , Tokenizer .COMMENT_LESSTHAN_BANG_DASH_DASH , reconsume , pos );
2915
+ continue stateloop ;
2916
+ case '<' :
2917
+ appendStrBuf (c );
2918
+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
2919
+ continue stateloop ;
2920
+ case '\r' :
2921
+ appendStrBufCarriageReturn ();
2922
+ break stateloop ;
2923
+ case '\n' :
2924
+ appendStrBufLineFeed ();
2925
+ continue ;
2926
+ case '\u0000' :
2927
+ c = '\uFFFD' ;
2928
+ // fall thru
2929
+ default :
2930
+ appendStrBuf (c );
2931
+ state = transition (state , Tokenizer .COMMENT , reconsume , pos );
2932
+ continue stateloop ;
2933
+ }
2934
+ }
2935
+ case COMMENT_LESSTHAN_BANG_DASH_DASH :
2936
+ for (;;) {
2937
+ if (++pos == endPos ) {
2938
+ break stateloop ;
2939
+ }
2940
+ c = checkChar (buf , pos );
2941
+ switch (c ) {
2942
+ case '>' :
2943
+ appendStrBuf (c );
2944
+ emitComment (3 , pos );
2945
+ state = transition (state , Tokenizer .DATA , reconsume , pos );
2946
+ continue stateloop ;
2947
+ case '-' :
2948
+ errNestedComment ();
2949
+ adjustDoubleHyphenAndAppendToStrBufAndErr (c , reportedConsecutiveHyphens );
2950
+ reportedConsecutiveHyphens = true ;
2951
+ state = transition (state , Tokenizer .COMMENT_END , reconsume , pos );
2952
+ continue stateloop ;
2953
+ case '\r' :
2954
+ errNestedComment ();
2955
+ adjustDoubleHyphenAndAppendToStrBufAndErr (c , reportedConsecutiveHyphens );
2956
+ reportedConsecutiveHyphens = true ;
2957
+ state = transition (state , Tokenizer .COMMENT , reconsume , pos );
2958
+ break stateloop ;
2959
+ case '\n' :
2960
+ errNestedComment ();
2961
+ adjustDoubleHyphenAndAppendToStrBufAndErr (c , reportedConsecutiveHyphens );
2962
+ reportedConsecutiveHyphens = true ;
2963
+ state = transition (state , Tokenizer .COMMENT , reconsume , pos );
2964
+ continue ;
2965
+ case '\u0000' :
2966
+ c = '\uFFFD' ;
2967
+ // fall thru
2968
+ case '!' :
2969
+ errNestedComment ();
2970
+ adjustDoubleHyphenAndAppendToStrBufAndErr (c , reportedConsecutiveHyphens );
2971
+ reportedConsecutiveHyphens = true ;
2972
+ state = transition (state , Tokenizer .COMMENT_END_BANG , reconsume , pos );
2973
+ continue stateloop ;
2974
+ default :
2975
+ errNestedComment ();
2976
+ adjustDoubleHyphenAndAppendToStrBufAndErr (c , reportedConsecutiveHyphens );
2977
+ reportedConsecutiveHyphens = true ;
2978
+ state = transition (state , Tokenizer .COMMENT_END , reconsume , pos );
2979
+ continue stateloop ;
2980
+ }
2981
+ }
2982
+ // XXX reorder point
2813
2983
case COMMENT_START_DASH :
2814
2984
if (++pos == endPos ) {
2815
2985
break stateloop ;
@@ -2838,6 +3008,10 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
2838
3008
*/
2839
3009
state = transition (state , Tokenizer .DATA , reconsume , pos );
2840
3010
continue stateloop ;
3011
+ case '<' :
3012
+ appendStrBuf (c );
3013
+ state = transition (state , Tokenizer .COMMENT_LESSTHAN , reconsume , pos );
3014
+ continue stateloop ;
2841
3015
case '\r' :
2842
3016
appendStrBufCarriageReturn ();
2843
3017
state = transition (state , Tokenizer .COMMENT , reconsume , pos );
@@ -5957,13 +6131,13 @@ private void initDoctypeFields() {
5957
6131
@ Inline private void adjustDoubleHyphenAndAppendToStrBufCarriageReturn ()
5958
6132
throws SAXException {
5959
6133
silentCarriageReturn ();
5960
- adjustDoubleHyphenAndAppendToStrBufAndErr ('\n' );
6134
+ adjustDoubleHyphenAndAppendToStrBufAndErr ('\n' , false );
5961
6135
}
5962
6136
5963
6137
@ Inline private void adjustDoubleHyphenAndAppendToStrBufLineFeed ()
5964
6138
throws SAXException {
5965
6139
silentLineFeed ();
5966
- adjustDoubleHyphenAndAppendToStrBufAndErr ('\n' );
6140
+ adjustDoubleHyphenAndAppendToStrBufAndErr ('\n' , false );
5967
6141
}
5968
6142
5969
6143
@ Inline private void appendStrBufLineFeed () {
@@ -6268,6 +6442,8 @@ public void eof() throws SAXException {
6268
6442
break eofloop ;
6269
6443
case COMMENT_START :
6270
6444
case COMMENT :
6445
+ case COMMENT_LESSTHAN :
6446
+ case COMMENT_LESSTHAN_BANG :
6271
6447
/*
6272
6448
* EOF Parse error.
6273
6449
*/
@@ -6279,6 +6455,7 @@ public void eof() throws SAXException {
6279
6455
*/
6280
6456
break eofloop ;
6281
6457
case COMMENT_END :
6458
+ case COMMENT_LESSTHAN_BANG_DASH_DASH :
6282
6459
errEofInComment ();
6283
6460
/* Emit the comment token. */
6284
6461
emitComment (2 , 0 );
@@ -6288,6 +6465,7 @@ public void eof() throws SAXException {
6288
6465
break eofloop ;
6289
6466
case COMMENT_END_DASH :
6290
6467
case COMMENT_START_DASH :
6468
+ case COMMENT_LESSTHAN_BANG_DASH :
6291
6469
errEofInComment ();
6292
6470
/* Emit the comment token. */
6293
6471
emitComment (1 , 0 );
@@ -6917,7 +7095,7 @@ protected void errGtInPublicId() throws SAXException {
6917
7095
protected void errNamelessDoctype () throws SAXException {
6918
7096
}
6919
7097
6920
- protected void errConsecutiveHyphens () throws SAXException {
7098
+ protected void errNestedComment () throws SAXException {
6921
7099
}
6922
7100
6923
7101
protected void errPrematureEndOfComment () throws SAXException {
@@ -7060,9 +7238,6 @@ protected void errExpectedSystemId() throws SAXException {
7060
7238
protected void errMissingSpaceBeforeDoctypeName () throws SAXException {
7061
7239
}
7062
7240
7063
- protected void errHyphenHyphenBang () throws SAXException {
7064
- }
7065
-
7066
7241
protected void errNcrControlChar () throws SAXException {
7067
7242
}
7068
7243
0 commit comments