Skip to content

Commit b25f655

Browse files
Make consecutive hyphens in comments a non-error
Also allow `<!-->` at (IE conditional) comment end See whatwg/html#1356 See whatwg/html#1456
1 parent 067faf0 commit b25f655

File tree

2 files changed

+190
-19
lines changed

2 files changed

+190
-19
lines changed

src/nu/validator/htmlparser/impl/ErrorReportingTokenizer.java

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2009-2013 Mozilla Foundation
2+
* Copyright (c) 2009-2017 Mozilla Foundation
33
*
44
* Permission is hereby granted, free of charge, to any person obtaining a
55
* copy of this software and associated documentation files (the "Software"),
@@ -413,8 +413,8 @@ private boolean isAstralPrivateUse(int c) {
413413
err("Nameless doctype.");
414414
}
415415

416-
@Override protected void errConsecutiveHyphens() throws SAXException {
417-
err("Consecutive hyphens did not terminate a comment. \u201C--\u201D is not permitted inside a comment, but e.g. \u201C- -\u201D is.");
416+
@Override protected void errNestedComment() throws SAXException {
417+
err("Saw \u201C<!--\u201D within a comment. Probable cause: Nested comment (not allowed).");
418418
}
419419

420420
@Override protected void errPrematureEndOfComment() throws SAXException {
@@ -712,10 +712,6 @@ private boolean isAstralPrivateUse(int c) {
712712
err("Missing space before doctype name.");
713713
}
714714

715-
@Override protected void errHyphenHyphenBang() throws SAXException {
716-
err("\u201C--!\u201D found in comment.");
717-
}
718-
719715
@Override protected void errNcrControlChar() throws SAXException {
720716
err("Character reference expands to a control character ("
721717
+ toUPlusString((char) value) + ").");

src/nu/validator/htmlparser/impl/Tokenizer.java

Lines changed: 187 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,14 @@ public class Tokenizer implements Locator {
222222

223223
public static final int AMBIGUOUS_AMPERSAND = 75;
224224

225+
public static final int COMMENT_LESSTHAN = 76;
226+
227+
public static final int COMMENT_LESSTHAN_BANG = 77;
228+
229+
public static final int COMMENT_LESSTHAN_BANG_DASH = 78;
230+
231+
public static final int COMMENT_LESSTHAN_BANG_DASH_DASH = 79;
232+
225233
/**
226234
* Magic value for UTF-16 operations.
227235
*/
@@ -1034,9 +1042,8 @@ private void maybeAppendSpaceToBogusComment() throws SAXException {
10341042

10351043
// ]NOCPP]
10361044

1037-
@Inline private void adjustDoubleHyphenAndAppendToStrBufAndErr(char c)
1045+
@Inline private void adjustDoubleHyphenAndAppendToStrBufAndErr(char c, boolean reportedConsecutiveHyphens)
10381046
throws SAXException {
1039-
errConsecutiveHyphens();
10401047
// [NOCPP[
10411048
switch (commentPolicy) {
10421049
case ALTER_INFOSET:
@@ -1047,7 +1054,9 @@ private void maybeAppendSpaceToBogusComment() throws SAXException {
10471054
appendStrBuf('-');
10481055
// CPPONLY: MOZ_FALLTHROUGH;
10491056
case ALLOW:
1050-
warn("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment.");
1057+
if (!reportedConsecutiveHyphens) {
1058+
warn("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment.");
1059+
}
10511060
// ]NOCPP]
10521061
appendStrBuf(c);
10531062
// [NOCPP[
@@ -1509,6 +1518,7 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
15091518
@SuppressWarnings("unused") private int stateLoop(int state, char c,
15101519
int pos, @NoLength char[] buf, boolean reconsume, int returnState,
15111520
int endPos) throws SAXException {
1521+
boolean reportedConsecutiveHyphens = false;
15121522
/*
15131523
* Idioms used in this code:
15141524
*
@@ -2594,6 +2604,7 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
25942604
}
25952605
// CPPONLY: MOZ_FALLTHROUGH;
25962606
case COMMENT_START:
2607+
reportedConsecutiveHyphens = false;
25972608
commentstartloop: for (;;) {
25982609
if (++pos == endPos) {
25992610
break stateloop;
@@ -2626,6 +2637,10 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
26262637
*/
26272638
state = transition(state, Tokenizer.DATA, reconsume, pos);
26282639
continue stateloop;
2640+
case '<':
2641+
appendStrBuf(c);
2642+
state = transition(state, Tokenizer.COMMENT_LESSTHAN, reconsume, pos);
2643+
continue stateloop;
26292644
case '\r':
26302645
appendStrBufCarriageReturn();
26312646
state = transition(state, Tokenizer.COMMENT, reconsume, pos);
@@ -2671,6 +2686,10 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
26712686
state = transition(state, Tokenizer.COMMENT_END_DASH, reconsume, pos);
26722687
break commentloop;
26732688
// continue stateloop;
2689+
case '<':
2690+
appendStrBuf(c);
2691+
state = transition(state, Tokenizer.COMMENT_LESSTHAN, reconsume, pos);
2692+
continue stateloop;
26742693
case '\r':
26752694
appendStrBufCarriageReturn();
26762695
break stateloop;
@@ -2713,6 +2732,10 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
27132732
state = transition(state, Tokenizer.COMMENT_END, reconsume, pos);
27142733
break commentenddashloop;
27152734
// continue stateloop;
2735+
case '<':
2736+
appendStrBuf(c);
2737+
state = transition(state, Tokenizer.COMMENT_LESSTHAN, reconsume, pos);
2738+
continue stateloop;
27162739
case '\r':
27172740
appendStrBufCarriageReturn();
27182741
state = transition(state, Tokenizer.COMMENT, reconsume, pos);
@@ -2767,11 +2790,16 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
27672790
* Append a U+002D HYPHEN-MINUS (-) character to
27682791
* the comment token's data.
27692792
*/
2770-
adjustDoubleHyphenAndAppendToStrBufAndErr(c);
2793+
adjustDoubleHyphenAndAppendToStrBufAndErr(c, reportedConsecutiveHyphens);
2794+
reportedConsecutiveHyphens = true;
27712795
/*
27722796
* Stay in the comment end state.
27732797
*/
27742798
continue;
2799+
case '<':
2800+
appendStrBuf(c);
2801+
state = transition(state, Tokenizer.COMMENT_LESSTHAN, reconsume, pos);
2802+
continue stateloop;
27752803
case '\r':
27762804
adjustDoubleHyphenAndAppendToStrBufCarriageReturn();
27772805
state = transition(state, Tokenizer.COMMENT, reconsume, pos);
@@ -2781,7 +2809,6 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
27812809
state = transition(state, Tokenizer.COMMENT, reconsume, pos);
27822810
continue stateloop;
27832811
case '!':
2784-
errHyphenHyphenBang();
27852812
appendStrBuf(c);
27862813
state = transition(state, Tokenizer.COMMENT_END_BANG, reconsume, pos);
27872814
continue stateloop;
@@ -2794,7 +2821,8 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
27942821
* and the input character to the comment
27952822
* token's data.
27962823
*/
2797-
adjustDoubleHyphenAndAppendToStrBufAndErr(c);
2824+
adjustDoubleHyphenAndAppendToStrBufAndErr(c, reportedConsecutiveHyphens);
2825+
reportedConsecutiveHyphens = true;
27982826
/*
27992827
* Switch to the comment state.
28002828
*/
@@ -2864,6 +2892,148 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
28642892
continue stateloop;
28652893
}
28662894
}
2895+
case COMMENT_LESSTHAN:
2896+
for (;;) {
2897+
if (++pos == endPos) {
2898+
break stateloop;
2899+
}
2900+
c = checkChar(buf, pos);
2901+
switch (c) {
2902+
case '!':
2903+
appendStrBuf(c);
2904+
state = transition(state, Tokenizer.COMMENT_LESSTHAN_BANG, reconsume, pos);
2905+
continue stateloop;
2906+
case '<':
2907+
appendStrBuf(c);
2908+
state = transition(state, Tokenizer.COMMENT_LESSTHAN, reconsume, pos);
2909+
continue stateloop;
2910+
case '-':
2911+
appendStrBuf(c);
2912+
state = transition(state, Tokenizer.COMMENT_END_DASH, reconsume, pos);
2913+
continue stateloop;
2914+
case '\r':
2915+
appendStrBufCarriageReturn();
2916+
break stateloop;
2917+
case '\n':
2918+
appendStrBufLineFeed();
2919+
continue;
2920+
case '\u0000':
2921+
c = '\uFFFD';
2922+
// fall thru
2923+
default:
2924+
appendStrBuf(c);
2925+
state = transition(state, Tokenizer.COMMENT, reconsume, pos);
2926+
continue stateloop;
2927+
}
2928+
}
2929+
case COMMENT_LESSTHAN_BANG:
2930+
for (;;) {
2931+
if (++pos == endPos) {
2932+
break stateloop;
2933+
}
2934+
c = checkChar(buf, pos);
2935+
switch (c) {
2936+
case '-':
2937+
appendStrBuf(c);
2938+
state = transition(state, Tokenizer.COMMENT_LESSTHAN_BANG_DASH, reconsume, pos);
2939+
continue stateloop;
2940+
case '<':
2941+
appendStrBuf(c);
2942+
state = transition(state, Tokenizer.COMMENT_LESSTHAN, reconsume, pos);
2943+
continue stateloop;
2944+
case '\r':
2945+
appendStrBufCarriageReturn();
2946+
break stateloop;
2947+
case '\n':
2948+
appendStrBufLineFeed();
2949+
continue;
2950+
case '\u0000':
2951+
c = '\uFFFD';
2952+
// fall thru
2953+
default:
2954+
appendStrBuf(c);
2955+
state = transition(state, Tokenizer.COMMENT, reconsume, pos);
2956+
continue stateloop;
2957+
}
2958+
}
2959+
case COMMENT_LESSTHAN_BANG_DASH:
2960+
for (;;) {
2961+
if (++pos == endPos) {
2962+
break stateloop;
2963+
}
2964+
c = checkChar(buf, pos);
2965+
switch (c) {
2966+
case '-':
2967+
appendStrBuf(c);
2968+
state = transition(state, Tokenizer.COMMENT_LESSTHAN_BANG_DASH_DASH, reconsume, pos);
2969+
continue stateloop;
2970+
case '<':
2971+
appendStrBuf(c);
2972+
state = transition(state, Tokenizer.COMMENT_LESSTHAN, reconsume, pos);
2973+
continue stateloop;
2974+
case '\r':
2975+
appendStrBufCarriageReturn();
2976+
break stateloop;
2977+
case '\n':
2978+
appendStrBufLineFeed();
2979+
continue;
2980+
case '\u0000':
2981+
c = '\uFFFD';
2982+
// fall thru
2983+
default:
2984+
appendStrBuf(c);
2985+
state = transition(state, Tokenizer.COMMENT, reconsume, pos);
2986+
continue stateloop;
2987+
}
2988+
}
2989+
case COMMENT_LESSTHAN_BANG_DASH_DASH:
2990+
for (;;) {
2991+
if (++pos == endPos) {
2992+
break stateloop;
2993+
}
2994+
c = checkChar(buf, pos);
2995+
switch (c) {
2996+
case '>':
2997+
appendStrBuf(c);
2998+
emitComment(3, pos);
2999+
state = transition(state, Tokenizer.DATA, reconsume, pos);
3000+
continue stateloop;
3001+
case '-':
3002+
errNestedComment();
3003+
adjustDoubleHyphenAndAppendToStrBufAndErr(c, reportedConsecutiveHyphens);
3004+
reportedConsecutiveHyphens = true;
3005+
state = transition(state, Tokenizer.COMMENT_END, reconsume, pos);
3006+
continue stateloop;
3007+
case '\r':
3008+
errNestedComment();
3009+
adjustDoubleHyphenAndAppendToStrBufAndErr(c, reportedConsecutiveHyphens);
3010+
reportedConsecutiveHyphens = true;
3011+
state = transition(state, Tokenizer.COMMENT, reconsume, pos);
3012+
break stateloop;
3013+
case '\n':
3014+
errNestedComment();
3015+
adjustDoubleHyphenAndAppendToStrBufAndErr(c, reportedConsecutiveHyphens);
3016+
reportedConsecutiveHyphens = true;
3017+
state = transition(state, Tokenizer.COMMENT, reconsume, pos);
3018+
continue;
3019+
case '\u0000':
3020+
c = '\uFFFD';
3021+
// fall thru
3022+
case '!':
3023+
errNestedComment();
3024+
adjustDoubleHyphenAndAppendToStrBufAndErr(c, reportedConsecutiveHyphens);
3025+
reportedConsecutiveHyphens = true;
3026+
state = transition(state, Tokenizer.COMMENT_END_BANG, reconsume, pos);
3027+
continue stateloop;
3028+
default:
3029+
errNestedComment();
3030+
adjustDoubleHyphenAndAppendToStrBufAndErr(c, reportedConsecutiveHyphens);
3031+
reportedConsecutiveHyphens = true;
3032+
state = transition(state, Tokenizer.COMMENT_END, reconsume, pos);
3033+
continue stateloop;
3034+
}
3035+
}
3036+
// XXX reorder point
28673037
case COMMENT_START_DASH:
28683038
if (++pos == endPos) {
28693039
break stateloop;
@@ -2892,6 +3062,10 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
28923062
*/
28933063
state = transition(state, Tokenizer.DATA, reconsume, pos);
28943064
continue stateloop;
3065+
case '<':
3066+
appendStrBuf(c);
3067+
state = transition(state, Tokenizer.COMMENT_LESSTHAN, reconsume, pos);
3068+
continue stateloop;
28953069
case '\r':
28963070
appendStrBufCarriageReturn();
28973071
state = transition(state, Tokenizer.COMMENT, reconsume, pos);
@@ -6026,13 +6200,13 @@ private void initDoctypeFields() {
60266200
@Inline private void adjustDoubleHyphenAndAppendToStrBufCarriageReturn()
60276201
throws SAXException {
60286202
silentCarriageReturn();
6029-
adjustDoubleHyphenAndAppendToStrBufAndErr('\n');
6203+
adjustDoubleHyphenAndAppendToStrBufAndErr('\n', false);
60306204
}
60316205

60326206
@Inline private void adjustDoubleHyphenAndAppendToStrBufLineFeed()
60336207
throws SAXException {
60346208
silentLineFeed();
6035-
adjustDoubleHyphenAndAppendToStrBufAndErr('\n');
6209+
adjustDoubleHyphenAndAppendToStrBufAndErr('\n', false);
60366210
}
60376211

60386212
@Inline private void appendStrBufLineFeed() {
@@ -6337,6 +6511,8 @@ public void eof() throws SAXException {
63376511
break eofloop;
63386512
case COMMENT_START:
63396513
case COMMENT:
6514+
case COMMENT_LESSTHAN:
6515+
case COMMENT_LESSTHAN_BANG:
63406516
/*
63416517
* EOF Parse error.
63426518
*/
@@ -6348,6 +6524,7 @@ public void eof() throws SAXException {
63486524
*/
63496525
break eofloop;
63506526
case COMMENT_END:
6527+
case COMMENT_LESSTHAN_BANG_DASH_DASH:
63516528
errEofInComment();
63526529
/* Emit the comment token. */
63536530
emitComment(2, 0);
@@ -6357,6 +6534,7 @@ public void eof() throws SAXException {
63576534
break eofloop;
63586535
case COMMENT_END_DASH:
63596536
case COMMENT_START_DASH:
6537+
case COMMENT_LESSTHAN_BANG_DASH:
63606538
errEofInComment();
63616539
/* Emit the comment token. */
63626540
emitComment(1, 0);
@@ -6981,7 +7159,7 @@ protected void errGtInPublicId() throws SAXException {
69817159
protected void errNamelessDoctype() throws SAXException {
69827160
}
69837161

6984-
protected void errConsecutiveHyphens() throws SAXException {
7162+
protected void errNestedComment() throws SAXException {
69857163
}
69867164

69877165
protected void errPrematureEndOfComment() throws SAXException {
@@ -7131,9 +7309,6 @@ protected void errExpectedSystemId() throws SAXException {
71317309
protected void errMissingSpaceBeforeDoctypeName() throws SAXException {
71327310
}
71337311

7134-
protected void errHyphenHyphenBang() throws SAXException {
7135-
}
7136-
71377312
protected void errNcrControlChar() throws SAXException {
71387313
}
71397314

0 commit comments

Comments
 (0)