Skip to content

Commit

Permalink
Revise URI template handling in WhatWgUrlParser
Browse files Browse the repository at this point in the history
Closes gh-33673
  • Loading branch information
rstoyanchev committed Oct 9, 2024
1 parent 1f4743a commit f4967f2
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 67 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -482,6 +482,7 @@ public void advanceTo(State state) {
"index=" + this.index + ", componentIndex=" + this.componentIndex);
}
this.state = state;
this.openCurlyBracketCount = 0;
}

public void advanceTo(State state, int componentIndex) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,9 @@
* <p>Comments in this class correlate to the parsing algorithm.
* The implementation differs from the spec in the following ways:
* <ul>
* <li>Support for URI templates has been added, through the
* {@link State#URL_TEMPLATE} state</li>
* <li>Consequentially, the {@linkplain UrlRecord#port() URL port} has been
* changed from an integer to a string,</li>
* <li>To ensure that trailing slashes are significant, this implementation
* prepends a '/' to each segment.</li>
* <li>Supports URI template variables within URI components.
* <li>Consequently, the port is a String and not an integer.
* <li>Prepends '/' to each segment to ensure trailing slashes are significant.
* </ul>
* All of these modifications have been indicated through comments that start
* with {@code EXTRA}.
Expand Down Expand Up @@ -89,9 +86,6 @@ final class WhatWgUrlParser {
@Nullable
private State state;

@Nullable
private State previousState;

@Nullable
private State stateOverride;

Expand All @@ -101,6 +95,8 @@ final class WhatWgUrlParser {

private boolean insideBrackets;

private int openCurlyBracketCount;

private boolean stopMainLoop = false;


Expand Down Expand Up @@ -235,12 +231,22 @@ private void setState(State newState) {
else {
c = "EOF";
}
logger.trace("Changing state from " + this.state + " to " +
newState + " (cur: " + c + " prev: " + this.previousState + ")");
logger.trace("Changing state from " + this.state + " to " + newState + " (cur: " + c + ")");
}
// EXTRA: we keep the previous state, to ensure that the parser can escape from malformed URI templates
this.previousState = this.state;
this.state = newState;
this.openCurlyBracketCount = 0;
}

private boolean processCurlyBrackets(int c) {
if (c == '{') {
this.openCurlyBracketCount++;
return true;
}
if (c == '}') {
this.openCurlyBracketCount--;
return true;
}
return (this.openCurlyBracketCount > 0 && c != EOF);
}

private static LinkedList<String> strictSplit(String input, int delimiter) {
Expand Down Expand Up @@ -755,12 +761,11 @@ public void handle(int c, UrlRecord url, WhatWgUrlParser p) {
p.append(Character.toLowerCase((char) c));
p.setState(SCHEME);
}
// EXTRA: if c is '{', then append c to buffer, set previous state to scheme state,
// and state to url template state.
else if (p.previousState != URL_TEMPLATE && c == '{') {
// EXTRA: if c is '{', append to buffer and continue as SCHEME
else if (c == '{') {
p.openCurlyBracketCount++;
p.append(c);
p.previousState = SCHEME;
p.state = URL_TEMPLATE;
p.setState(SCHEME);
}
// Otherwise, if state override is not given,
// set state to no scheme state and decrease pointer by 1.
Expand All @@ -781,11 +786,6 @@ public void handle(int c, UrlRecord url, WhatWgUrlParser p) {
if (isAsciiAlphaNumeric(c) || (c == '+' || c == '-' || c == '.')) {
p.append(Character.toLowerCase((char) c));
}
// EXTRA: if c is '{', then append c to buffer, set state to url template state.
else if (p.previousState != URL_TEMPLATE && c == '{') {
p.append(c);
p.setState(URL_TEMPLATE);
}
// Otherwise, if c is U+003A (:), then:
else if (c == ':') {
// If state override is given, then:
Expand Down Expand Up @@ -858,6 +858,10 @@ else if (p.remaining(0) == '/') {
p.setState(OPAQUE_PATH);
}
}
// EXTRA: if c is within URI variable, keep appending
else if (p.processCurlyBrackets(c)) {
p.append(c);
}
// Otherwise, if state override is not given, set buffer to the empty string,
// state to no scheme state, and start over (from the first code point in input).
else if (p.stateOverride == null) {
Expand Down Expand Up @@ -1225,11 +1229,6 @@ public void handle(int c, UrlRecord url, WhatWgUrlParser p) {
if (isAsciiDigit(c)) {
p.append(c);
}
// EXTRA: if c is '{', then append c to buffer, set state to url template state.
else if (p.previousState != URL_TEMPLATE && c == '{') {
p.append(c);
p.setState(URL_TEMPLATE);
}
// Otherwise, if one of the following is true:
// - c is the EOF code point, U+002F (/), U+003F (?), or U+0023 (#)
// - url is special and c is U+005C (\)
Expand Down Expand Up @@ -1279,6 +1278,10 @@ else if (c == EOF || c == '/' || c == '?' || c == '#' ||
p.setState(PATH_START);
p.pointer--;
}
// EXTRA: if c is within URI variable, keep appending
else if (p.processCurlyBrackets(c)) {
p.append(c);
}
// Otherwise, port-invalid validation error, return failure.
else {
p.failure("Invalid port: \"" + Character.toString(c) + "\"");
Expand Down Expand Up @@ -1547,11 +1550,6 @@ else if (!singlePathSegment) {
p.setState(FRAGMENT);
}
}
// EXTRA: Otherwise, if c is '{', then append c to buffer, set state to url template state.
else if (p.previousState != URL_TEMPLATE && c == '{') {
p.append(c);
p.setState(URL_TEMPLATE);
}
// Otherwise, run these steps:
else {
if (p.validate()) {
Expand Down Expand Up @@ -1582,12 +1580,6 @@ else if (c == '%' &&
OPAQUE_PATH {
@Override
public void handle(int c, UrlRecord url, WhatWgUrlParser p) {
// EXTRA: if previous state is URL Template and the buffer is empty,
// append buffer to url's path and empty the buffer
if (p.previousState == URL_TEMPLATE && !p.buffer.isEmpty()) {
url.path.append(p.buffer.toString());
p.emptyBuffer();
}
// If c is U+003F (?), then set url’s query to the empty string and state to query state.
if (c == '?') {
url.query = new StringBuilder();
Expand All @@ -1599,11 +1591,6 @@ else if (c == '#') {
url.fragment = new StringBuilder();
p.setState(FRAGMENT);
}
// EXTRA: Otherwise, if c is '{', then append c to buffer, set state to url template state.
else if (p.previousState != URL_TEMPLATE && c == '{') {
p.append(c);
p.setState(URL_TEMPLATE);
}
// Otherwise:
else {
if (p.validate()) {
Expand Down Expand Up @@ -1668,11 +1655,6 @@ public void handle(int c, UrlRecord url, WhatWgUrlParser p) {
p.setState(FRAGMENT);
}
}
// EXTRA: Otherwise, if c is '{', then append c to buffer, set state to url template state.
else if (p.previousState != URL_TEMPLATE && c == '{') {
p.append(c);
p.setState(URL_TEMPLATE);
}
// Otherwise, if c is not the EOF code point:
else if (c != EOF) {
if (p.validate()) {
Expand Down Expand Up @@ -1725,24 +1707,6 @@ else if (c == '%' &&
}
}
}
},
URL_TEMPLATE {
@Override
public void handle(int c, UrlRecord url, WhatWgUrlParser p) {
Assert.state(p.previousState != null, "No previous state set");
if (c == '}') {
p.append(c);
p.setState(p.previousState);
}
else if (c == EOF) {
p.pointer -= p.buffer.length() + 1;
p.emptyBuffer();
p.setState(p.previousState);
}
else {
p.append(c);
}
}
};

public abstract void handle(int c, UrlRecord url, WhatWgUrlParser p);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -603,6 +603,18 @@ void buildAndExpandHierarchical() {
assertThat(result.toUriString()).isEqualTo("/fooValue/barValue");
}

@ParameterizedTest
@EnumSource(value = ParserType.class)
void parseBuildAndExpandHierarchical(ParserType parserType) {
URI uri = UriComponentsBuilder
.fromUriString("{scheme}://{host}:{port}/{segment}?{query}#{fragment}", parserType)
.buildAndExpand(Map.of(
"scheme", "ws", "host", "example.org", "port", "7777", "segment", "path",
"query", "q=1", "fragment", "foo"))
.toUri();
assertThat(uri.toString()).isEqualTo("ws://example.org:7777/path?q=1#foo");
}

@ParameterizedTest
@EnumSource(value = ParserType.class)
void buildAndExpandOpaque(ParserType parserType) {
Expand Down

0 comments on commit f4967f2

Please sign in to comment.