Skip to content

Commit

Permalink
Merge pull request #1333 from Gregable/regexpmatching
Browse files Browse the repository at this point in the history
Fix some bugs regarding regular expression parsing in javascript.
  • Loading branch information
Gregable committed Jan 7, 2016
2 parents e3fd74b + 761272a commit cefd79e
Show file tree
Hide file tree
Showing 7 changed files with 155 additions and 31 deletions.
2 changes: 1 addition & 1 deletion validator/testdata/feature_tests/dog_doc_type.out
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
FAIL
feature_tests/dog_doc_type.html:23:0 DISALLOWED_ATTR 🐶
feature_tests/dog_doc_type.html:35:7 MANDATORY_TAG_MISSING html ⚡ for top-level html (see https://github.com/ampproject/amphtml/blob/master/spec/amp-html-format.md#ampd)
feature_tests/dog_doc_type.html:35:7 MANDATORY_TAG_MISSING html ⚡ for top-level html (see https://github.com/ampproject/amphtml/blob/master/spec/amp-html-format.md#ampd)
81 changes: 81 additions & 0 deletions validator/testdata/feature_tests/regexps.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
<!--
Copyright 2015 The AMP HTML Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS-IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the license.
-->
<!--
Test Description:
This tests looks at specific errors related to positive and negative
regexps used inside the validator.
-->
<!doctype html>
<html >
<head>
<meta charset="utf-8">
<link rel="canonical" href="./regular-html-version.html" />
<meta name="viewport" content="width=device-width,minimum-scale=1">
<style>invalid body {opacity: 0}</style>
<noscript><style>body {opacity: 1}</style></noscript>
<script async src="https://cdn.ampproject.org/v0.js"></script>

<!--
href value_regex: "https://fonts\\.googleapis\\.com/css\\?.*|https://fast\\.fonts\\.net/.*"
The first example is valid, the second example is invalid.
-->
<link rel="stylesheet" type="text/css"
href="https://fonts.googleapis.com/css?foobar">
<link rel="stylesheet" type="text/css"
href="http://xss.com/https://fonts.googleapis.com/css?foobar">

<!--
rel value_regex: lenghty, see protoascii
The first three examples are valid. The latter three examples are invalid.
-->
<link rel="accessibility">
<link rel="accessibility alternate">
<link rel="accessibility alternate archives">
<link rel="foo">
<link rel="accessibility foo">
<link rel="foo accessibility">

<!--
name blacklisted_value_regex: "(^|\\s)(viewport|content-disposition|revisit-after)($|\\s)"
The first two examples are valid. The latter two examples are invalid.
-->
<meta name="valid" content="">
<meta name="validcontent-disposition" content="">
<meta name="content-disposition" content="">
<meta name="invalid content-disposition" content="">

</head>
<body>

<!--
autoplay value_regex: "^$|desktop|tablet|mobile"
The first two examples are valid, the latter three examples are invalid.
-->
<amp-audio src="" layout="fixed" autoplay="">
<amp-audio src="" layout="fixed" autoplay="desktop">
<amp-audio src="" layout="fixed" autoplay="invalid">
<amp-audio src="" layout="fixed" autoplay="desktopfoo">
<amp-audio src="" layout="fixed" autoplay="foodesktop">

<!--
href blacklisted_value_regex: "^\\s*(javascript|vbscript|data):"
The first example is valid, the latter two examples are invalid.
-->
<a href="https://example.com/article/about/javascript:alert.html">
<a href="javascript:alert">
<a href=" javascript:alert">
</body>
</html>
13 changes: 13 additions & 0 deletions validator/testdata/feature_tests/regexps.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
FAIL
feature_tests/regexps.html:27:2 MANDATORY_CDATA_MISSING_OR_INCORRECT mandatory style (js enabled) opacity 0 (see https://github.com/ampproject/amphtml/blob/master/spec/amp-html-format.md#opacity)
feature_tests/regexps.html:37:2 INVALID_ATTR_VALUE rel=stylesheet (see https://github.com/ampproject/amphtml/blob/master/spec/amp-html-format.md#canon)
feature_tests/regexps.html:47:2 INVALID_ATTR_VALUE rel=foo (see https://github.com/ampproject/amphtml/blob/master/spec/amp-html-format.md#canon)
feature_tests/regexps.html:48:2 INVALID_ATTR_VALUE rel=accessibility foo (see https://github.com/ampproject/amphtml/blob/master/spec/amp-html-format.md#canon)
feature_tests/regexps.html:49:2 INVALID_ATTR_VALUE rel=foo accessibility (see https://github.com/ampproject/amphtml/blob/master/spec/amp-html-format.md#canon)
feature_tests/regexps.html:57:2 INVALID_ATTR_VALUE name=content-disposition (see https://github.com/ampproject/amphtml/blob/master/spec/amp-html-format.md#vprt)
feature_tests/regexps.html:58:2 INVALID_ATTR_VALUE name=invalid content-disposition (see https://github.com/ampproject/amphtml/blob/master/spec/amp-html-format.md#vprt)
feature_tests/regexps.html:69:2 INVALID_ATTR_VALUE autoplay=invalid (see https://github.com/ampproject/amphtml/blob/master/extensions/amp-audio/amp-audio.md)
feature_tests/regexps.html:70:2 INVALID_ATTR_VALUE autoplay=desktopfoo (see https://github.com/ampproject/amphtml/blob/master/extensions/amp-audio/amp-audio.md)
feature_tests/regexps.html:71:2 INVALID_ATTR_VALUE autoplay=foodesktop (see https://github.com/ampproject/amphtml/blob/master/extensions/amp-audio/amp-audio.md)
feature_tests/regexps.html:78:2 INVALID_ATTR_VALUE href=javascript:alert
feature_tests/regexps.html:79:2 INVALID_ATTR_VALUE href= javascript:alert
23 changes: 13 additions & 10 deletions validator/validator.js
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ goog.require('goog.asserts');
goog.require('goog.string');
goog.require('goog.structs.Map');
goog.require('goog.structs.Set');
goog.require('goog.uri.utils');
goog.require('parse_css.BlockType');
goog.require('parse_css.parseAStylesheet');
goog.require('parse_css.tokenize');
Expand Down Expand Up @@ -259,7 +260,8 @@ amp.validator.Terminal.prototype.error = function(msg) {
function errorLine(filenameOrUrl, error) {
const line = error.line || 1;
const col = error.col || 0;
let errorLine = filenameOrUrl + ':' + line + ':' + col + ' ' +
let errorLine = goog.uri.utils.removeFragment(filenameOrUrl) +
':' + line + ':' + col + ' ' +
error.code + ' ' + error.detail;
if (error.specUrl) {
errorLine += ' (see ' + error.specUrl + ')';
Expand Down Expand Up @@ -568,7 +570,7 @@ CdataMatcher.prototype.match = function(cdata, context, validationResult) {
return;
}
if (cdataSpec.cdataRegex !== null) {
const cdataRegex = new RegExp(cdataSpec.cdataRegex, 'g');
const cdataRegex = new RegExp('^(' + cdataSpec.cdataRegex + ')$');
if (!cdataRegex.test(cdata)) {
context.addError(
amp.validator.ValidationError.Code.
Expand Down Expand Up @@ -629,7 +631,7 @@ CdataMatcher.prototype.match = function(cdata, context, validationResult) {
if (context.getProgress(validationResult).complete) {
return;
}
const blacklistRegex = new RegExp(blacklist.regex, 'gi');
const blacklistRegex = new RegExp(blacklist.regex, 'i');
if (blacklistRegex.test(cdata)) {
context.addError(
amp.validator.ValidationError.Code.
Expand Down Expand Up @@ -1135,7 +1137,7 @@ amp.validator.CssLengthAndUnit = function(input, allowAuto) {
this.isValid = allowAuto;
return;
}
const re = new RegExp('^\\d+(?:\\.\\d+)?(px|em|rem|vh|vw|vmin|vmax)?$', 'g');
const re = new RegExp('^\\d+(?:\\.\\d+)?(px|em|rem|vh|vw|vmin|vmax)?$');
const match = re.exec(input);
if (match !== null) {
this.isValid = true;
Expand Down Expand Up @@ -1301,7 +1303,7 @@ ParsedTagSpec.prototype.valueHasUnescapedTemplateSyntax = function(value) {
// Mustache (https://mustache.github.io/mustache.5.html), our template
// system, supports {{{unescaped}}} or {{{&unescaped}}} and there can
// be whitespace after the 2nd '{'. We disallow these in attribute Values.
const unescapedOpenTag = new RegExp('{{\\s*[&{]', 'g');
const unescapedOpenTag = new RegExp('{{\\s*[&{]');
return unescapedOpenTag.test(value);
};

Expand All @@ -1315,7 +1317,7 @@ ParsedTagSpec.prototype.valueHasPartialsTemplateSyntax = function(value) {
// system, supports 'partials' which include other Mustache templates
// in the format of {{>partial}} and there can be whitespace after the {{.
// We disallow partials in attribute values.
const partialsTag = new RegExp('{{\\s*>', 'g');
const partialsTag = new RegExp('{{\\s*>');
return partialsTag.test(value);
};

Expand Down Expand Up @@ -1526,7 +1528,8 @@ ParsedTagSpec.prototype.validateAttributes = function(
}
}
if (parsedSpec.getSpec().valueRegex !== null) {
const valueRegex = new RegExp(parsedSpec.getSpec().valueRegex, 'g');
const valueRegex =
new RegExp('^(' + parsedSpec.getSpec().valueRegex + ')$');
if (!valueRegex.test(encounteredAttrValue)) {
context.addError(amp.validator.ValidationError.Code.INVALID_ATTR_VALUE,
encounteredAttrName + '=' + encounteredAttrValue,
Expand All @@ -1545,8 +1548,8 @@ ParsedTagSpec.prototype.validateAttributes = function(
}
// } end oneof
if (parsedSpec.getSpec().blacklistedValueRegex !== null) {
const blacklistedValueRegex =
new RegExp(parsedSpec.getSpec().blacklistedValueRegex, 'gi');
const blacklistedValueRegex = new RegExp(
parsedSpec.getSpec().blacklistedValueRegex, 'i');
if (blacklistedValueRegex.test(encounteredAttrValue)) {
context.addError(amp.validator.ValidationError.Code.INVALID_ATTR_VALUE,
encounteredAttrName + '=' + encounteredAttrValue,
Expand Down Expand Up @@ -1774,7 +1777,7 @@ ParsedValidatorRules.prototype.maybeEmitMandatoryTagValidationErrors = function(
const alternative = spec.mandatoryAlternatives;
if (!satisfied.contains(alternative)) {
missing.push(alternative);
specUrlsByMissing[missing] = spec.specUrl;
specUrlsByMissing[alternative] = spec.specUrl;
}
}
}
Expand Down
3 changes: 2 additions & 1 deletion validator/validator.proto
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ message AttrSpec {
// } end oneof

// If set, then the attribute value may not match this regex, which is
// always applied case-insensitively.
// always applied case-insensitively and as a partial match.
optional string blacklisted_value_regex = 6;
// If set, generates a DEPRECATED_ATTR error with severity WARNING.
optional string deprecation = 7;
Expand All @@ -89,6 +89,7 @@ message AttrList {
// Regex which, if matches the cdata of a tag, causes the tag validation to
// fail.
message BlackListedCDataRegex {
// Syntax is partial match, use ^ and $ if you want global match.
optional string regex = 1;
optional string error_message = 2;
}
Expand Down
34 changes: 17 additions & 17 deletions validator/validator.protoascii
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,12 @@
# in production from crashing. This id is not relevant to validator.js
# because thus far, engine (validator.js) and spec file (validator.protoascii)
# are always released together.
min_validator_revision_required: 67
min_validator_revision_required: 70
# The spec file revision allows the validator engine to distinguish
# newer versions of the spec file. This is currently a Google internal
# mechanism, validator.js does not use this facility. However, any
# change to this file requires updating this revision id.
spec_file_revision: 102
spec_file_revision: 103
# Rules for AMP HTML
# (https://github.com/google/amphtml/blob/master/spec/amp-html-format.md).
# These rules are just enough to validate the example from the spec.
Expand Down Expand Up @@ -418,7 +418,7 @@ tags: {
attrs: {
name: "name"
mandatory: true
blacklisted_value_regex: "viewport|content-disposition|revisit-after"
blacklisted_value_regex: "(^|\\s)(viewport|content-disposition|revisit-after)(\\s|$)"
}
attrs: {
name: "content"
Expand Down Expand Up @@ -479,41 +479,41 @@ cdata: {
# These regex blacklists are temporary hacks to validate essential CSS
# rules. They will be replaced later with more principalled solutions.
blacklisted_cdata_regex: {
regex: ".*\\.i?-amp-.*"
regex: "\\.i?-amp-"
error_message: "CSS -amp- class name prefix"
}
blacklisted_cdata_regex: {
regex: ".*!important.*"
regex: "!important"
error_message: "CSS !important"
}
# All known @ rules except for media and font-face are disabled.
# See http://b/24415204.
blacklisted_cdata_regex: {
regex: ".*charset.*"
regex: "charset"
error_message: "CSS @charset"
}
blacklisted_cdata_regex: {
regex: ".*@import.*"
regex: "@import"
error_message: "CSS @import"
}
blacklisted_cdata_regex: {
regex: ".*@namespace.*"
regex: "@namespace"
error_message: "CSS @namespace"
}
blacklisted_cdata_regex: {
regex: ".*@supports.*"
regex: "@supports"
error_message: "CSS @supports"
}
blacklisted_cdata_regex: {
regex: ".*@document.*"
regex: "@document"
error_message: "CSS @document"
}
blacklisted_cdata_regex: {
regex: ".*@page.*"
regex: "@page"
error_message: "CSS @page"
}
blacklisted_cdata_regex: {
regex: ".*@viewport.*"
regex: "@viewport"
error_message: "CSS @viewport"
}
}
Expand All @@ -527,7 +527,7 @@ tags: {
mandatory: true
spec_url: "https://github.com/ampproject/amphtml/blob/master/spec/amp-html-format.md#opacity"
cdata: {
cdata_regex: "^body ?{opacity: ?0}$"
cdata_regex: "body ?{opacity: ?0}"
}
}
tags: {
Expand All @@ -538,7 +538,7 @@ tags: {
unique: true
mandatory: true
cdata {
cdata_regex: "^body ?{opacity: ?1}$"
cdata_regex: "body ?{opacity: ?1}"
}
}
tags: {
Expand Down Expand Up @@ -615,7 +615,7 @@ tags: {
name: "a"
attrs: {
name: "href"
blacklisted_value_regex: "(javascript|vbscript|data):.*"
blacklisted_value_regex: "^\\s*(javascript|vbscript|data):"
}
attrs: {
name: "target"
Expand Down Expand Up @@ -1939,8 +1939,8 @@ tags: {
attrs: { name: "src" }
attrs: {
name: "autoplay"
# | prefix intended. empty string is valid.
value_regex: "|desktop|tablet|mobile"
# ^$| prefix intended. empty string is valid.
value_regex: "^$|desktop|tablet|mobile"
}
attrs: { name: "loop" value: "" }
attrs: { name: "muted" value: "" }
Expand Down
Loading

0 comments on commit cefd79e

Please sign in to comment.