Skip to content

Commit e801337

Browse files
Make consecutive hyphens in comments a non-error
Also allow `<!-->` at (IE conditional) comment end See whatwg/html#1356 See whatwg/html#1456
1 parent 4a2c018 commit e801337

File tree

2 files changed

+190
-19
lines changed

2 files changed

+190
-19
lines changed

src/nu/validator/htmlparser/impl/ErrorReportingTokenizer.java

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2009-2013 Mozilla Foundation
2+
* Copyright (c) 2009-2017 Mozilla Foundation
33
*
44
* Permission is hereby granted, free of charge, to any person obtaining a
55
* copy of this software and associated documentation files (the "Software"),
@@ -413,8 +413,8 @@ private boolean isAstralPrivateUse(int c) {
413413
err("Nameless doctype.");
414414
}
415415

416-
@Override protected void errConsecutiveHyphens() throws SAXException {
417-
err("Consecutive hyphens did not terminate a comment. \u201C--\u201D is not permitted inside a comment, but e.g. \u201C- -\u201D is.");
416+
@Override protected void errNestedComment() throws SAXException {
417+
err("Saw \u201C<!--\u201D within a comment. Probable cause: Nested comment (not allowed).");
418418
}
419419

420420
@Override protected void errPrematureEndOfComment() throws SAXException {
@@ -712,10 +712,6 @@ private boolean isAstralPrivateUse(int c) {
712712
err("Missing space before doctype name.");
713713
}
714714

715-
@Override protected void errHyphenHyphenBang() throws SAXException {
716-
err("\u201C--!\u201D found in comment.");
717-
}
718-
719715
@Override protected void errNcrControlChar() throws SAXException {
720716
err("Character reference expands to a control character ("
721717
+ toUPlusString((char) value) + ").");

src/nu/validator/htmlparser/impl/Tokenizer.java

Lines changed: 187 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,14 @@ public class Tokenizer implements Locator {
222222

223223
public static final int AMBIGUOUS_AMPERSAND = 75;
224224

225+
public static final int COMMENT_LESSTHAN = 76;
226+
227+
public static final int COMMENT_LESSTHAN_BANG = 77;
228+
229+
public static final int COMMENT_LESSTHAN_BANG_DASH = 78;
230+
231+
public static final int COMMENT_LESSTHAN_BANG_DASH_DASH = 79;
232+
225233
/**
226234
* Magic value for UTF-16 operations.
227235
*/
@@ -1015,9 +1023,8 @@ private void maybeAppendSpaceToBogusComment() throws SAXException {
10151023

10161024
// ]NOCPP]
10171025

1018-
@Inline private void adjustDoubleHyphenAndAppendToStrBufAndErr(char c)
1026+
@Inline private void adjustDoubleHyphenAndAppendToStrBufAndErr(char c, boolean reportedConsecutiveHyphens)
10191027
throws SAXException {
1020-
errConsecutiveHyphens();
10211028
// [NOCPP[
10221029
switch (commentPolicy) {
10231030
case ALTER_INFOSET:
@@ -1028,7 +1035,9 @@ private void maybeAppendSpaceToBogusComment() throws SAXException {
10281035
appendStrBuf('-');
10291036
// CPPONLY: MOZ_FALLTHROUGH;
10301037
case ALLOW:
1031-
warn("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment.");
1038+
if (!reportedConsecutiveHyphens) {
1039+
warn("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment.");
1040+
}
10321041
// ]NOCPP]
10331042
appendStrBuf(c);
10341043
// [NOCPP[
@@ -1490,6 +1499,7 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
14901499
@SuppressWarnings("unused") private int stateLoop(int state, char c,
14911500
int pos, @NoLength char[] buf, boolean reconsume, int returnState,
14921501
int endPos) throws SAXException {
1502+
boolean reportedConsecutiveHyphens = false;
14931503
/*
14941504
* Idioms used in this code:
14951505
*
@@ -2577,6 +2587,7 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
25772587
}
25782588
// CPPONLY: MOZ_FALLTHROUGH;
25792589
case COMMENT_START:
2590+
reportedConsecutiveHyphens = false;
25802591
commentstartloop: for (;;) {
25812592
if (++pos == endPos) {
25822593
break stateloop;
@@ -2609,6 +2620,10 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
26092620
*/
26102621
state = transition(state, Tokenizer.DATA, reconsume, pos);
26112622
continue stateloop;
2623+
case '<':
2624+
appendStrBuf(c);
2625+
state = transition(state, Tokenizer.COMMENT_LESSTHAN, reconsume, pos);
2626+
continue stateloop;
26122627
case '\r':
26132628
appendStrBufCarriageReturn();
26142629
state = transition(state, Tokenizer.COMMENT, reconsume, pos);
@@ -2654,6 +2669,10 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
26542669
state = transition(state, Tokenizer.COMMENT_END_DASH, reconsume, pos);
26552670
break commentloop;
26562671
// continue stateloop;
2672+
case '<':
2673+
appendStrBuf(c);
2674+
state = transition(state, Tokenizer.COMMENT_LESSTHAN, reconsume, pos);
2675+
continue stateloop;
26572676
case '\r':
26582677
appendStrBufCarriageReturn();
26592678
break stateloop;
@@ -2696,6 +2715,10 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
26962715
state = transition(state, Tokenizer.COMMENT_END, reconsume, pos);
26972716
break commentenddashloop;
26982717
// continue stateloop;
2718+
case '<':
2719+
appendStrBuf(c);
2720+
state = transition(state, Tokenizer.COMMENT_LESSTHAN, reconsume, pos);
2721+
continue stateloop;
26992722
case '\r':
27002723
appendStrBufCarriageReturn();
27012724
state = transition(state, Tokenizer.COMMENT, reconsume, pos);
@@ -2750,11 +2773,16 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
27502773
* Append a U+002D HYPHEN-MINUS (-) character to
27512774
* the comment token's data.
27522775
*/
2753-
adjustDoubleHyphenAndAppendToStrBufAndErr(c);
2776+
adjustDoubleHyphenAndAppendToStrBufAndErr(c, reportedConsecutiveHyphens);
2777+
reportedConsecutiveHyphens = true;
27542778
/*
27552779
* Stay in the comment end state.
27562780
*/
27572781
continue;
2782+
case '<':
2783+
appendStrBuf(c);
2784+
state = transition(state, Tokenizer.COMMENT_LESSTHAN, reconsume, pos);
2785+
continue stateloop;
27582786
case '\r':
27592787
adjustDoubleHyphenAndAppendToStrBufCarriageReturn();
27602788
state = transition(state, Tokenizer.COMMENT, reconsume, pos);
@@ -2764,7 +2792,6 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
27642792
state = transition(state, Tokenizer.COMMENT, reconsume, pos);
27652793
continue stateloop;
27662794
case '!':
2767-
errHyphenHyphenBang();
27682795
appendStrBuf(c);
27692796
state = transition(state, Tokenizer.COMMENT_END_BANG, reconsume, pos);
27702797
continue stateloop;
@@ -2777,7 +2804,8 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
27772804
* and the input character to the comment
27782805
* token's data.
27792806
*/
2780-
adjustDoubleHyphenAndAppendToStrBufAndErr(c);
2807+
adjustDoubleHyphenAndAppendToStrBufAndErr(c, reportedConsecutiveHyphens);
2808+
reportedConsecutiveHyphens = true;
27812809
/*
27822810
* Switch to the comment state.
27832811
*/
@@ -2845,6 +2873,148 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
28452873
continue stateloop;
28462874
}
28472875
}
2876+
case COMMENT_LESSTHAN:
2877+
for (;;) {
2878+
if (++pos == endPos) {
2879+
break stateloop;
2880+
}
2881+
c = checkChar(buf, pos);
2882+
switch (c) {
2883+
case '!':
2884+
appendStrBuf(c);
2885+
state = transition(state, Tokenizer.COMMENT_LESSTHAN_BANG, reconsume, pos);
2886+
continue stateloop;
2887+
case '<':
2888+
appendStrBuf(c);
2889+
state = transition(state, Tokenizer.COMMENT_LESSTHAN, reconsume, pos);
2890+
continue stateloop;
2891+
case '-':
2892+
appendStrBuf(c);
2893+
state = transition(state, Tokenizer.COMMENT_END_DASH, reconsume, pos);
2894+
continue stateloop;
2895+
case '\r':
2896+
appendStrBufCarriageReturn();
2897+
break stateloop;
2898+
case '\n':
2899+
appendStrBufLineFeed();
2900+
continue;
2901+
case '\u0000':
2902+
c = '\uFFFD';
2903+
// fall thru
2904+
default:
2905+
appendStrBuf(c);
2906+
state = transition(state, Tokenizer.COMMENT, reconsume, pos);
2907+
continue stateloop;
2908+
}
2909+
}
2910+
case COMMENT_LESSTHAN_BANG:
2911+
for (;;) {
2912+
if (++pos == endPos) {
2913+
break stateloop;
2914+
}
2915+
c = checkChar(buf, pos);
2916+
switch (c) {
2917+
case '-':
2918+
appendStrBuf(c);
2919+
state = transition(state, Tokenizer.COMMENT_LESSTHAN_BANG_DASH, reconsume, pos);
2920+
continue stateloop;
2921+
case '<':
2922+
appendStrBuf(c);
2923+
state = transition(state, Tokenizer.COMMENT_LESSTHAN, reconsume, pos);
2924+
continue stateloop;
2925+
case '\r':
2926+
appendStrBufCarriageReturn();
2927+
break stateloop;
2928+
case '\n':
2929+
appendStrBufLineFeed();
2930+
continue;
2931+
case '\u0000':
2932+
c = '\uFFFD';
2933+
// fall thru
2934+
default:
2935+
appendStrBuf(c);
2936+
state = transition(state, Tokenizer.COMMENT, reconsume, pos);
2937+
continue stateloop;
2938+
}
2939+
}
2940+
case COMMENT_LESSTHAN_BANG_DASH:
2941+
for (;;) {
2942+
if (++pos == endPos) {
2943+
break stateloop;
2944+
}
2945+
c = checkChar(buf, pos);
2946+
switch (c) {
2947+
case '-':
2948+
appendStrBuf(c);
2949+
state = transition(state, Tokenizer.COMMENT_LESSTHAN_BANG_DASH_DASH, reconsume, pos);
2950+
continue stateloop;
2951+
case '<':
2952+
appendStrBuf(c);
2953+
state = transition(state, Tokenizer.COMMENT_LESSTHAN, reconsume, pos);
2954+
continue stateloop;
2955+
case '\r':
2956+
appendStrBufCarriageReturn();
2957+
break stateloop;
2958+
case '\n':
2959+
appendStrBufLineFeed();
2960+
continue;
2961+
case '\u0000':
2962+
c = '\uFFFD';
2963+
// fall thru
2964+
default:
2965+
appendStrBuf(c);
2966+
state = transition(state, Tokenizer.COMMENT, reconsume, pos);
2967+
continue stateloop;
2968+
}
2969+
}
2970+
case COMMENT_LESSTHAN_BANG_DASH_DASH:
2971+
for (;;) {
2972+
if (++pos == endPos) {
2973+
break stateloop;
2974+
}
2975+
c = checkChar(buf, pos);
2976+
switch (c) {
2977+
case '>':
2978+
appendStrBuf(c);
2979+
emitComment(3, pos);
2980+
state = transition(state, Tokenizer.DATA, reconsume, pos);
2981+
continue stateloop;
2982+
case '-':
2983+
errNestedComment();
2984+
adjustDoubleHyphenAndAppendToStrBufAndErr(c, reportedConsecutiveHyphens);
2985+
reportedConsecutiveHyphens = true;
2986+
state = transition(state, Tokenizer.COMMENT_END, reconsume, pos);
2987+
continue stateloop;
2988+
case '\r':
2989+
errNestedComment();
2990+
adjustDoubleHyphenAndAppendToStrBufAndErr(c, reportedConsecutiveHyphens);
2991+
reportedConsecutiveHyphens = true;
2992+
state = transition(state, Tokenizer.COMMENT, reconsume, pos);
2993+
break stateloop;
2994+
case '\n':
2995+
errNestedComment();
2996+
adjustDoubleHyphenAndAppendToStrBufAndErr(c, reportedConsecutiveHyphens);
2997+
reportedConsecutiveHyphens = true;
2998+
state = transition(state, Tokenizer.COMMENT, reconsume, pos);
2999+
continue;
3000+
case '\u0000':
3001+
c = '\uFFFD';
3002+
// fall thru
3003+
case '!':
3004+
errNestedComment();
3005+
adjustDoubleHyphenAndAppendToStrBufAndErr(c, reportedConsecutiveHyphens);
3006+
reportedConsecutiveHyphens = true;
3007+
state = transition(state, Tokenizer.COMMENT_END_BANG, reconsume, pos);
3008+
continue stateloop;
3009+
default:
3010+
errNestedComment();
3011+
adjustDoubleHyphenAndAppendToStrBufAndErr(c, reportedConsecutiveHyphens);
3012+
reportedConsecutiveHyphens = true;
3013+
state = transition(state, Tokenizer.COMMENT_END, reconsume, pos);
3014+
continue stateloop;
3015+
}
3016+
}
3017+
// XXX reorder point
28483018
case COMMENT_START_DASH:
28493019
if (++pos == endPos) {
28503020
break stateloop;
@@ -2873,6 +3043,10 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
28733043
*/
28743044
state = transition(state, Tokenizer.DATA, reconsume, pos);
28753045
continue stateloop;
3046+
case '<':
3047+
appendStrBuf(c);
3048+
state = transition(state, Tokenizer.COMMENT_LESSTHAN, reconsume, pos);
3049+
continue stateloop;
28763050
case '\r':
28773051
appendStrBufCarriageReturn();
28783052
state = transition(state, Tokenizer.COMMENT, reconsume, pos);
@@ -6005,13 +6179,13 @@ private void initDoctypeFields() {
60056179
@Inline private void adjustDoubleHyphenAndAppendToStrBufCarriageReturn()
60066180
throws SAXException {
60076181
silentCarriageReturn();
6008-
adjustDoubleHyphenAndAppendToStrBufAndErr('\n');
6182+
adjustDoubleHyphenAndAppendToStrBufAndErr('\n', false);
60096183
}
60106184

60116185
@Inline private void adjustDoubleHyphenAndAppendToStrBufLineFeed()
60126186
throws SAXException {
60136187
silentLineFeed();
6014-
adjustDoubleHyphenAndAppendToStrBufAndErr('\n');
6188+
adjustDoubleHyphenAndAppendToStrBufAndErr('\n', false);
60156189
}
60166190

60176191
@Inline private void appendStrBufLineFeed() {
@@ -6316,6 +6490,8 @@ public void eof() throws SAXException {
63166490
break eofloop;
63176491
case COMMENT_START:
63186492
case COMMENT:
6493+
case COMMENT_LESSTHAN:
6494+
case COMMENT_LESSTHAN_BANG:
63196495
/*
63206496
* EOF Parse error.
63216497
*/
@@ -6327,6 +6503,7 @@ public void eof() throws SAXException {
63276503
*/
63286504
break eofloop;
63296505
case COMMENT_END:
6506+
case COMMENT_LESSTHAN_BANG_DASH_DASH:
63306507
errEofInComment();
63316508
/* Emit the comment token. */
63326509
emitComment(2, 0);
@@ -6336,6 +6513,7 @@ public void eof() throws SAXException {
63366513
break eofloop;
63376514
case COMMENT_END_DASH:
63386515
case COMMENT_START_DASH:
6516+
case COMMENT_LESSTHAN_BANG_DASH:
63396517
errEofInComment();
63406518
/* Emit the comment token. */
63416519
emitComment(1, 0);
@@ -6966,7 +7144,7 @@ protected void errGtInPublicId() throws SAXException {
69667144
protected void errNamelessDoctype() throws SAXException {
69677145
}
69687146

6969-
protected void errConsecutiveHyphens() throws SAXException {
7147+
protected void errNestedComment() throws SAXException {
69707148
}
69717149

69727150
protected void errPrematureEndOfComment() throws SAXException {
@@ -7116,9 +7294,6 @@ protected void errExpectedSystemId() throws SAXException {
71167294
protected void errMissingSpaceBeforeDoctypeName() throws SAXException {
71177295
}
71187296

7119-
protected void errHyphenHyphenBang() throws SAXException {
7120-
}
7121-
71227297
protected void errNcrControlChar() throws SAXException {
71237298
}
71247299

0 commit comments

Comments
 (0)