Skip to content

[backport] Fix newline bugs #4937

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Sep 10, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

Bugfixes:
* Type Checker: Report error when using structs in events without experimental ABIEncoderV2. This used to crash or log the wrong values.
* Parser: Treat unicode line endings as terminating strings and single-line comments.
* Parser: Disallow unterminated multi-line comments at the end of input.
* Parser: Treat ``/** /`` as unterminated multi-line comment.

### 0.4.24 (2018-05-16)

Expand Down
84 changes: 53 additions & 31 deletions libsolidity/parsing/Scanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -243,22 +243,17 @@ bool Scanner::skipWhitespace()
return sourcePos() != startPosition;
}

bool Scanner::skipWhitespaceExceptLF()
void Scanner::skipWhitespaceExceptUnicodeLinebreak()
{
int const startPosition = sourcePos();
while (isWhiteSpace(m_char) && !isLineTerminator(m_char))
while (isWhiteSpace(m_char) && !isUnicodeLinebreak())
advance();
// Return whether or not we skipped any characters.
return sourcePos() != startPosition;
}

Token::Value Scanner::skipSingleLineComment()
{
// The line terminator at the end of the line is not considered
// to be part of the single-line comment; it is recognized
// separately by the lexical grammar and becomes part of the
// stream of input elements for the syntactic grammar
while (!isLineTerminator(m_char))
// Line terminator is not part of the comment. If it is a
// non-ascii line terminator, it will result in a parser error.
while (!isUnicodeLinebreak())
if (!advance()) break;

return Token::Whitespace;
Expand All @@ -268,7 +263,9 @@ Token::Value Scanner::scanSingleLineDocComment()
{
LiteralScope literal(this, LITERAL_TYPE_COMMENT);
advance(); //consume the last '/' at ///
skipWhitespaceExceptLF();

skipWhitespaceExceptUnicodeLinebreak();

while (!isSourcePastEndOfInput())
{
if (isLineTerminator(m_char))
Expand All @@ -287,6 +284,10 @@ Token::Value Scanner::scanSingleLineDocComment()
break; // next line is not a documentation comment, we are done

}
else if (isUnicodeLinebreak())
// Any line terminator that is not '\n' is considered to end the
// comment.
break;
addCommentLiteralChar(m_char);
advance();
}
Expand Down Expand Up @@ -321,6 +322,9 @@ Token::Value Scanner::scanMultiLineDocComment()
bool endFound = false;
bool charsAdded = false;

while (isWhiteSpace(m_char) && !isLineTerminator(m_char))
advance();

while (!isSourcePastEndOfInput())
{
//handle newlines in multline comments
Expand Down Expand Up @@ -372,7 +376,7 @@ Token::Value Scanner::scanSlash()
if (m_char == '/')
{
if (!advance()) /* double slash comment directly before EOS */
return Token::Whitespace;
return Token::Whitespace;
else if (m_char == '/')
{
// doxygen style /// comment
Expand All @@ -390,24 +394,27 @@ Token::Value Scanner::scanSlash()
{
// doxygen style /** natspec comment
if (!advance()) /* slash star comment before EOS */
return Token::Whitespace;
return Token::Illegal;
else if (m_char == '*')
{
advance(); //consume the last '*' at /**
skipWhitespaceExceptLF();

// special case of a closed normal multiline comment
if (!m_source.isPastEndOfInput() && m_source.get(0) == '/')
advance(); //skip the closing slash
else // we actually have a multiline documentation comment
// "/**/"
if (m_char == '/')
{
Token::Value comment;
m_nextSkippedComment.location.start = firstSlashPosition;
comment = scanMultiLineDocComment();
m_nextSkippedComment.location.end = sourcePos();
m_nextSkippedComment.token = comment;
advance(); //skip the closing slash
return Token::Whitespace;
}
return Token::Whitespace;
// we actually have a multiline documentation comment
Token::Value comment;
m_nextSkippedComment.location.start = firstSlashPosition;
comment = scanMultiLineDocComment();
m_nextSkippedComment.location.end = sourcePos();
m_nextSkippedComment.token = comment;
if (comment == Token::Illegal)
return Token::Illegal;
else
return Token::Whitespace;
}
else
return skipMultiLineComment();
Expand Down Expand Up @@ -435,11 +442,6 @@ void Scanner::scanToken()
m_nextToken.location.start = sourcePos();
switch (m_char)
{
case '\n':
case ' ':
case '\t':
token = selectToken(Token::Whitespace);
break;
case '"':
case '\'':
token = scanString();
Expand Down Expand Up @@ -675,18 +677,38 @@ bool Scanner::scanEscape()
if (!scanHexByte(c))
return false;
break;
default:
return false;
}

addLiteralChar(c);
return true;
}

bool Scanner::isUnicodeLinebreak()
{
if (0x0a <= m_char && m_char <= 0x0d)
// line feed, vertical tab, form feed, carriage return
return true;
else if (!m_source.isPastEndOfInput(1) && uint8_t(m_source.get(0)) == 0xc2 && uint8_t(m_source.get(1)) == 0x85)
// NEL - U+0085, C2 85 in utf8
return true;
else if (!m_source.isPastEndOfInput(2) && uint8_t(m_source.get(0)) == 0xe2 && uint8_t(m_source.get(1)) == 0x80 && (
uint8_t(m_source.get(2)) == 0xa8 || uint8_t(m_source.get(2)) == 0xa9
))
// LS - U+2028, E2 80 A8 in utf8
// PS - U+2029, E2 80 A9 in utf8
return true;
else
return false;
}

Token::Value Scanner::scanString()
{
char const quote = m_char;
advance(); // consume quote
LiteralScope literal(this, LITERAL_TYPE_STRING);
while (m_char != quote && !isSourcePastEndOfInput() && !isLineTerminator(m_char))
while (m_char != quote && !isSourcePastEndOfInput() && !isUnicodeLinebreak())
{
char c = m_char;
advance();
Expand All @@ -710,7 +732,7 @@ Token::Value Scanner::scanHexString()
char const quote = m_char;
advance(); // consume quote
LiteralScope literal(this, LITERAL_TYPE_STRING);
while (m_char != quote && !isSourcePastEndOfInput() && !isLineTerminator(m_char))
while (m_char != quote && !isSourcePastEndOfInput())
{
char c = m_char;
if (!scanHexByte(c))
Expand Down
7 changes: 5 additions & 2 deletions libsolidity/parsing/Scanner.h
Original file line number Diff line number Diff line change
Expand Up @@ -197,8 +197,8 @@ class Scanner

/// Skips all whitespace and @returns true if something was skipped.
bool skipWhitespace();
/// Skips all whitespace except Line feeds and returns true if something was skipped
bool skipWhitespaceExceptLF();
/// Skips all whitespace that are neither '\r' nor '\n'.
void skipWhitespaceExceptUnicodeLinebreak();
Token::Value skipSingleLineComment();
Token::Value skipMultiLineComment();

Expand All @@ -218,6 +218,9 @@ class Scanner
/// is scanned.
bool scanEscape();

/// @returns true iff we are currently positioned at a unicode line break.
bool isUnicodeLinebreak();

/// Return the current source position.
int sourcePos() const { return m_source.position(); }
bool isSourcePastEndOfInput() const { return m_source.isPastEndOfInput(); }
Expand Down
106 changes: 106 additions & 0 deletions test/libsolidity/SolidityScanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
#include <libsolidity/parsing/Scanner.h>
#include <boost/test/unit_test.hpp>

using namespace std;

namespace dev
{
namespace solidity
Expand Down Expand Up @@ -393,6 +395,110 @@ BOOST_AUTO_TEST_CASE(invalid_hex_literal_nonhex_string)
BOOST_CHECK_EQUAL(scanner.next(), Token::Illegal);
}

BOOST_AUTO_TEST_CASE(invalid_multiline_comment_close)
{
// This used to parse as "comment", "identifier"
Scanner scanner(CharStream("/** / x"));
BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Illegal);
BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
}

BOOST_AUTO_TEST_CASE(multiline_doc_comment_at_eos)
{
// This used to parse as "whitespace"
Scanner scanner(CharStream("/**"));
BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Illegal);
BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
}

BOOST_AUTO_TEST_CASE(multiline_comment_at_eos)
{
Scanner scanner(CharStream("/*"));
BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Illegal);
BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
}

BOOST_AUTO_TEST_CASE(regular_line_break_in_single_line_comment)
{
for (auto const& nl: {"\r", "\n"})
{
Scanner scanner(CharStream("// abc " + string(nl) + " def "));
BOOST_CHECK_EQUAL(scanner.currentCommentLiteral(), "");
BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Identifier);
BOOST_CHECK_EQUAL(scanner.currentLiteral(), "def");
BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
}
}

BOOST_AUTO_TEST_CASE(irregular_line_breaks_in_single_line_comment)
{
for (auto const& nl: {"\v", "\f", "\xE2\x80\xA8", "\xE2\x80\xA9"})
{
Scanner scanner(CharStream("// abc " + string(nl) + " def "));
BOOST_CHECK_EQUAL(scanner.currentCommentLiteral(), "");
BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Illegal);
for (size_t i = 0; i < string(nl).size() - 1; i++)
BOOST_CHECK_EQUAL(scanner.next(), Token::Illegal);
BOOST_CHECK_EQUAL(scanner.next(), Token::Identifier);
BOOST_CHECK_EQUAL(scanner.currentLiteral(), "def");
BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
}
}

BOOST_AUTO_TEST_CASE(regular_line_breaks_in_single_line_doc_comment)
{
for (auto const& nl: {"\r", "\n"})
{
Scanner scanner(CharStream("/// abc " + string(nl) + " def "));
BOOST_CHECK_EQUAL(scanner.currentCommentLiteral(), "abc ");
BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Identifier);
BOOST_CHECK_EQUAL(scanner.currentLiteral(), "def");
BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
}
}

BOOST_AUTO_TEST_CASE(irregular_line_breaks_in_single_line_doc_comment)
{
for (auto const& nl: {"\v", "\f", "\xE2\x80\xA8", "\xE2\x80\xA9"})
{
Scanner scanner(CharStream("/// abc " + string(nl) + " def "));
BOOST_CHECK_EQUAL(scanner.currentCommentLiteral(), "abc ");
BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Illegal);
for (size_t i = 0; i < string(nl).size() - 1; i++)
BOOST_CHECK_EQUAL(scanner.next(), Token::Illegal);
BOOST_CHECK_EQUAL(scanner.next(), Token::Identifier);
BOOST_CHECK_EQUAL(scanner.currentLiteral(), "def");
BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
}
}

BOOST_AUTO_TEST_CASE(regular_line_breaks_in_strings)
{
for (auto const& nl: {"\n", "\r"})
{
Scanner scanner(CharStream("\"abc " + string(nl) + " def\""));
BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Illegal);
BOOST_CHECK_EQUAL(scanner.next(), Token::Identifier);
BOOST_CHECK_EQUAL(scanner.currentLiteral(), "def");
BOOST_CHECK_EQUAL(scanner.next(), Token::Illegal);
BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
}
}

BOOST_AUTO_TEST_CASE(irregular_line_breaks_in_strings)
{
for (auto const& nl: {"\v", "\f", "\xE2\x80\xA8", "\xE2\x80\xA9"})
{
Scanner scanner(CharStream("\"abc " + string(nl) + " def\""));
BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Illegal);
for (size_t i = 0; i < string(nl).size(); i++)
BOOST_CHECK_EQUAL(scanner.next(), Token::Illegal);
BOOST_CHECK_EQUAL(scanner.next(), Token::Identifier);
BOOST_CHECK_EQUAL(scanner.currentLiteral(), "def");
BOOST_CHECK_EQUAL(scanner.next(), Token::Illegal);
BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
}
}

BOOST_AUTO_TEST_SUITE_END()

Expand Down