Skip to content

liblangutil: refactor SourceReferenceFormatter (split out extraction / line-cutting part). #5535

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions libevmasm/Assembly.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ string locationFromSources(StringMap const& _sourceCodes, SourceLocation const&
if (_location.isEmpty() || _sourceCodes.empty() || _location.start >= _location.end || _location.start < 0)
return "";

auto it = _sourceCodes.find(*_location.sourceName);
auto it = _sourceCodes.find(_location.source->name());
if (it == _sourceCodes.end())
return "";

Expand Down Expand Up @@ -186,11 +186,10 @@ class Functionalizer

void printLocation()
{
if (!m_location.sourceName && m_location.isEmpty())
if (!m_location.source && m_location.isEmpty())
return;
m_out << m_prefix << " /*";
if (m_location.sourceName)
m_out << " \"" + *m_location.sourceName + "\"";
m_out << " \"" + m_location.source->name() + "\"";
if (!m_location.isEmpty())
m_out << ":" << to_string(m_location.start) + ":" + to_string(m_location.end);
m_out << " " << locationFromSources(m_sourceCodes, m_location);
Expand Down
5 changes: 4 additions & 1 deletion liblangutil/CharStream.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,8 @@ class CharStream
{
public:
CharStream(): m_position(0) {}
explicit CharStream(std::string const& _source): m_source(_source), m_position(0) {}
explicit CharStream(std::string const& _source, std::string const& name):
m_source(_source), m_name(name), m_position(0) {}

int position() const { return m_position; }
bool isPastEndOfInput(size_t _charsForward = 0) const { return (m_position + _charsForward) >= m_source.size(); }
Expand All @@ -80,6 +81,7 @@ class CharStream
void reset() { m_position = 0; }

std::string const& source() const { return m_source; }
std::string const& name() const noexcept { return m_name; }

///@{
///@name Error printing helper functions
Expand All @@ -91,6 +93,7 @@ class CharStream

private:
std::string m_source;
std::string m_name;
size_t m_position;
};

Expand Down
9 changes: 2 additions & 7 deletions liblangutil/ParserBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,6 @@
using namespace std;
using namespace langutil;

std::shared_ptr<string const> const& ParserBase::sourceName() const
{
return m_scanner->sourceName();
}

int ParserBase::position() const
{
return m_scanner->currentLocation().start;
Expand Down Expand Up @@ -105,10 +100,10 @@ void ParserBase::decreaseRecursionDepth()

void ParserBase::parserError(string const& _description)
{
m_errorReporter.parserError(SourceLocation(position(), endPosition(), sourceName()), _description);
m_errorReporter.parserError(SourceLocation(position(), endPosition(), source()), _description);
}

void ParserBase::fatalParserError(string const& _description)
{
m_errorReporter.fatalParserError(SourceLocation(position(), endPosition(), sourceName()), _description);
m_errorReporter.fatalParserError(SourceLocation(position(), endPosition(), source()), _description);
}
3 changes: 2 additions & 1 deletion liblangutil/ParserBase.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#pragma once

#include <liblangutil/Token.h>
#include <liblangutil/Scanner.h>
#include <memory>
#include <string>

Expand All @@ -37,7 +38,7 @@ class ParserBase
public:
explicit ParserBase(ErrorReporter& errorReporter): m_errorReporter(errorReporter) {}

std::shared_ptr<std::string const> const& sourceName() const;
std::shared_ptr<CharStream> source() const { return m_scanner->charStream(); }

protected:
/// Utility class that creates an error and throws an exception if the
Expand Down
53 changes: 29 additions & 24 deletions liblangutil/Scanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -167,17 +167,22 @@ class LiteralScope
}; // end of LiteralScope class


void Scanner::reset(CharStream _source, string _sourceName)
void Scanner::reset(CharStream _source)
{
m_source = std::move(_source);
m_sourceName = make_shared<string const>(std::move(_sourceName));
m_source = make_shared<CharStream>(std::move(_source));
reset();
}

void Scanner::reset(std::shared_ptr<CharStream> _source)
{
m_source = _source;
reset();
}

void Scanner::reset()
{
m_source.reset();
m_char = m_source.get();
m_source->reset();
m_char = m_source->get();
skipWhitespace();
scanToken();
next();
Expand Down Expand Up @@ -296,13 +301,13 @@ Token Scanner::scanSingleLineDocComment()
{
// check if next line is also a documentation comment
skipWhitespace();
if (!m_source.isPastEndOfInput(3) &&
m_source.get(0) == '/' &&
m_source.get(1) == '/' &&
m_source.get(2) == '/')
if (!m_source->isPastEndOfInput(3) &&
m_source->get(0) == '/' &&
m_source->get(1) == '/' &&
m_source->get(2) == '/')
{
addCommentLiteralChar('\n');
m_char = m_source.advanceAndGet(3);
m_char = m_source->advanceAndGet(3);
}
else
break; // next line is not a documentation comment, we are done
Expand Down Expand Up @@ -355,30 +360,30 @@ Token Scanner::scanMultiLineDocComment()
if (isLineTerminator(m_char))
{
skipWhitespace();
if (!m_source.isPastEndOfInput(1) && m_source.get(0) == '*' && m_source.get(1) == '*')
if (!m_source->isPastEndOfInput(1) && m_source->get(0) == '*' && m_source->get(1) == '*')
{ // it is unknown if this leads to the end of the comment
addCommentLiteralChar('*');
advance();
}
else if (!m_source.isPastEndOfInput(1) && m_source.get(0) == '*' && m_source.get(1) != '/')
else if (!m_source->isPastEndOfInput(1) && m_source->get(0) == '*' && m_source->get(1) != '/')
{ // skip first '*' in subsequent lines
if (charsAdded)
addCommentLiteralChar('\n');
m_char = m_source.advanceAndGet(2);
m_char = m_source->advanceAndGet(2);
}
else if (!m_source.isPastEndOfInput(1) && m_source.get(0) == '*' && m_source.get(1) == '/')
else if (!m_source->isPastEndOfInput(1) && m_source->get(0) == '*' && m_source->get(1) == '/')
{ // if after newline the comment ends, don't insert the newline
m_char = m_source.advanceAndGet(2);
m_char = m_source->advanceAndGet(2);
endFound = true;
break;
}
else if (charsAdded)
addCommentLiteralChar('\n');
}

if (!m_source.isPastEndOfInput(1) && m_source.get(0) == '*' && m_source.get(1) == '/')
if (!m_source->isPastEndOfInput(1) && m_source->get(0) == '*' && m_source->get(1) == '/')
{
m_char = m_source.advanceAndGet(2);
m_char = m_source->advanceAndGet(2);
endFound = true;
break;
}
Expand Down Expand Up @@ -715,11 +720,11 @@ bool Scanner::isUnicodeLinebreak()
if (0x0a <= m_char && m_char <= 0x0d)
// line feed, vertical tab, form feed, carriage return
return true;
else if (!m_source.isPastEndOfInput(1) && uint8_t(m_source.get(0)) == 0xc2 && uint8_t(m_source.get(1)) == 0x85)
else if (!m_source->isPastEndOfInput(1) && uint8_t(m_source->get(0)) == 0xc2 && uint8_t(m_source->get(1)) == 0x85)
// NEL - U+0085, C2 85 in utf8
return true;
else if (!m_source.isPastEndOfInput(2) && uint8_t(m_source.get(0)) == 0xe2 && uint8_t(m_source.get(1)) == 0x80 && (
uint8_t(m_source.get(2)) == 0xa8 || uint8_t(m_source.get(2)) == 0xa9
else if (!m_source->isPastEndOfInput(2) && uint8_t(m_source->get(0)) == 0xe2 && uint8_t(m_source->get(1)) == 0x80 && (
uint8_t(m_source->get(2)) == 0xa8 || uint8_t(m_source->get(2)) == 0xa9
))
// LS - U+2028, E2 80 A8 in utf8
// PS - U+2029, E2 80 A9 in utf8
Expand Down Expand Up @@ -783,7 +788,7 @@ void Scanner::scanDecimalDigits()

// May continue with decimal digit or underscore for grouping.
do addLiteralCharAndAdvance();
while (!m_source.isPastEndOfInput() && (isDecimalDigit(m_char) || m_char == '_'));
while (!m_source->isPastEndOfInput() && (isDecimalDigit(m_char) || m_char == '_'));

// Defer further validation of underscore to SyntaxChecker.
}
Expand Down Expand Up @@ -829,15 +834,15 @@ Token Scanner::scanNumber(char _charSeen)
scanDecimalDigits(); // optional
if (m_char == '.')
{
if (!m_source.isPastEndOfInput(1) && m_source.get(1) == '_')
if (!m_source->isPastEndOfInput(1) && m_source->get(1) == '_')
{
// Assume the input may be a floating point number with leading '_' in fraction part.
// Recover by consuming it all but returning `Illegal` right away.
addLiteralCharAndAdvance(); // '.'
addLiteralCharAndAdvance(); // '_'
scanDecimalDigits();
}
if (m_source.isPastEndOfInput() || !isDecimalDigit(m_source.get(1)))
if (m_source->isPastEndOfInput() || !isDecimalDigit(m_source->get(1)))
{
// A '.' has to be followed by a number.
literal.complete();
Expand All @@ -854,7 +859,7 @@ Token Scanner::scanNumber(char _charSeen)
solAssert(kind != HEX, "'e'/'E' must be scanned as part of the hex number");
if (kind != DECIMAL)
return setError(ScannerError::IllegalExponent);
else if (!m_source.isPastEndOfInput(1) && m_source.get(1) == '_')
else if (!m_source->isPastEndOfInput(1) && m_source->get(1) == '_')
{
// Recover from wrongly placed underscore as delimiter in literal with scientific
// notation by consuming until the end.
Expand Down
34 changes: 17 additions & 17 deletions liblangutil/Scanner.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,12 +90,16 @@ class Scanner
{
friend class LiteralScope;
public:
explicit Scanner(CharStream _source = CharStream(), std::string _sourceName = "") { reset(std::move(_source), std::move(_sourceName)); }
explicit Scanner(std::shared_ptr<CharStream> _source) { reset(std::move(_source)); }
explicit Scanner(CharStream _source = CharStream()) { reset(std::move(_source)); }

std::string source() const { return m_source.source(); }
std::string source() const { return m_source->source(); }

/// Resets the scanner as if newly constructed with _source and _sourceName as input.
void reset(CharStream _source, std::string _sourceName);
std::shared_ptr<CharStream> charStream() noexcept { return m_source; }

/// Resets the scanner as if newly constructed with _source as input.
void reset(CharStream _source);
void reset(std::shared_ptr<CharStream> _source);
/// Resets scanner to the start of input.
void reset();

Expand Down Expand Up @@ -146,20 +150,17 @@ class Scanner
std::string const& peekLiteral() const { return m_nextToken.literal; }
///@}

std::shared_ptr<std::string const> const& sourceName() const { return m_sourceName; }

///@{
///@name Error printing helper functions
/// Functions that help pretty-printing parse errors
/// Do only use in error cases, they are quite expensive.
std::string lineAtPosition(int _position) const { return m_source.lineAtPosition(_position); }
std::tuple<int, int> translatePositionToLineColumn(int _position) const { return m_source.translatePositionToLineColumn(_position); }
std::string lineAtPosition(int _position) const { return m_source->lineAtPosition(_position); }
std::tuple<int, int> translatePositionToLineColumn(int _position) const { return m_source->translatePositionToLineColumn(_position); }
std::string sourceAt(SourceLocation const& _location) const
{
solAssert(!_location.isEmpty(), "");
solAssert(m_sourceName && _location.sourceName, "");
solAssert(*m_sourceName == *_location.sourceName, "");
return m_source.source().substr(_location.start, _location.end - _location.start);
solAssert(m_source.get() == _location.source.get(), "CharStream memory locations must match.");
return m_source->source().substr(_location.start, _location.end - _location.start);
}
///@}

Expand Down Expand Up @@ -188,8 +189,8 @@ class Scanner
void addUnicodeAsUTF8(unsigned codepoint);
///@}

bool advance() { m_char = m_source.advanceAndGet(); return !m_source.isPastEndOfInput(); }
void rollback(int _amount) { m_char = m_source.rollback(_amount); }
bool advance() { m_char = m_source->advanceAndGet(); return !m_source->isPastEndOfInput(); }
void rollback(int _amount) { m_char = m_source->rollback(_amount); }

inline Token selectErrorToken(ScannerError _err) { advance(); return setError(_err); }
inline Token selectToken(Token _tok) { advance(); return _tok; }
Expand Down Expand Up @@ -229,17 +230,16 @@ class Scanner
bool isUnicodeLinebreak();

/// Return the current source position.
int sourcePos() const { return m_source.position(); }
bool isSourcePastEndOfInput() const { return m_source.isPastEndOfInput(); }
int sourcePos() const { return m_source->position(); }
bool isSourcePastEndOfInput() const { return m_source->isPastEndOfInput(); }

TokenDesc m_skippedComment; // desc for current skipped comment
TokenDesc m_nextSkippedComment; // desc for next skipped comment

TokenDesc m_currentToken; // desc for current token (as returned by Next())
TokenDesc m_nextToken; // desc for next token (one token look-ahead)

CharStream m_source;
std::shared_ptr<std::string const> m_sourceName;
std::shared_ptr<CharStream> m_source;

/// one character look-ahead, equals 0 at end of input
char m_char;
Expand Down
Loading