Skip to content

Commit

Permalink
[DSLX:frontend] Remove exceptional case for string scanning.
Browse files Browse the repository at this point in the history
Strings are tokens too! Pop() should yield a string token similarly to
everything else.

This eliminates discrepancies that were present between Pop() until
extinguished and PopAll(), the latter of which had special handling when it saw
an open quote.

PiperOrigin-RevId: 587845151
  • Loading branch information
cdleary authored and copybara-github committed Dec 4, 2023
1 parent 09b2800 commit 494b048
Show file tree
Hide file tree
Showing 9 changed files with 94 additions and 53 deletions.
3 changes: 3 additions & 0 deletions xls/dslx/frontend/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,7 @@ cc_library(
"@com_google_absl//absl/strings",
"@com_google_absl//absl/strings:str_format",
"@com_google_absl//absl/types:span",
"//xls/common:test_macros",
"//xls/common/logging",
"//xls/common/status:ret_check",
"//xls/common/status:status_macros",
Expand All @@ -261,8 +262,10 @@ cc_test(
name = "scanner_test",
srcs = ["scanner_test.cc"],
deps = [
":comment_data",
":pos",
":scanner",
":token",
"@com_google_absl//absl/random",
"@com_google_absl//absl/status",
"@com_google_absl//absl/status:statusor",
Expand Down
32 changes: 16 additions & 16 deletions xls/dslx/frontend/parser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -444,9 +444,14 @@ Parser::ParseAttribute(absl::flat_hash_map<std::string, Function*>* name_to_fn,
peek->span(), absl::StrCat("Invalid test type: ", peek->ToString()));
}
if (directive_name == "extern_verilog") {
Span template_span;
XLS_RETURN_IF_ERROR(DropTokenOrError(TokenKind::kOParen));
XLS_ASSIGN_OR_RETURN(std::string ffi_annotation, PopString(&template_span));
Pos template_start = GetPos();
Pos template_limit;
XLS_ASSIGN_OR_RETURN(
Token ffi_annotation_token,
PopTokenOrError(TokenKind::kString, /*start=*/nullptr,
"extern_verilog template", &template_limit));
std::string ffi_annotation = *ffi_annotation_token.GetValue();
XLS_RETURN_IF_ERROR(DropTokenOrError(TokenKind::kCParen));
XLS_RETURN_IF_ERROR(DropTokenOrError(TokenKind::kCBrack));
XLS_ASSIGN_OR_RETURN(bool dropped_pub, TryDropKeyword(Keyword::kPub));
Expand All @@ -457,8 +462,8 @@ Parser::ParseAttribute(absl::flat_hash_map<std::string, Function*>* name_to_fn,
if (!parsed_ffi_annotation.ok()) {
const int64_t error_at =
CodeTemplate::ExtractErrorColumn(parsed_ffi_annotation.status());
const Pos& start = template_span.start();
Pos error_pos{start.filename(), start.lineno(), start.colno() + error_at};
Pos error_pos{template_start.filename(), template_start.lineno(),
template_start.colno() + error_at};
dslx::Span error_span(error_pos, error_pos);
return ParseErrorStatus(error_span,
parsed_ffi_annotation.status().message());
Expand Down Expand Up @@ -1507,20 +1512,15 @@ absl::StatusOr<Expr*> Parser::ParseTermLhs(Bindings& outer_bindings,
if (peek->IsKindIn({TokenKind::kNumber, TokenKind::kCharacter}) ||
peek->IsKeywordIn({Keyword::kTrue, Keyword::kFalse})) {
XLS_ASSIGN_OR_RETURN(lhs, ParseNumber(outer_bindings));
} else if (peek->IsKindIn({TokenKind::kDoubleQuote})) {
// Eat characters until the first unescaped double quote.
//
// Make a defensive copy of the peek token span as it may/will be
// invalidated via the PopString() call.
const Span span_copy = peek->span();
XLS_ASSIGN_OR_RETURN(std::string text, PopString());
if (text.empty()) {
// TODO(https://github.com/google/xls/issues/1105): 2021-05-20 Add
// zero-length support.
return ParseErrorStatus(span_copy,
} else if (peek->kind() == TokenKind::kString) {
Token tok = PopTokenOrDie();
// TODO(https://github.com/google/xls/issues/1105): 2021-05-20 Add
// zero-length string support akin to zero-length array support.
if (tok.GetValue()->empty()) {
return ParseErrorStatus(tok.span(),
"Zero-length strings are not supported.");
}
return module_->Make<String>(Span(start_pos, GetPos()), text);
return module_->Make<String>(tok.span(), *tok.GetValue());
} else if (peek->IsKindIn({TokenKind::kBang, TokenKind::kMinus})) {
Token tok = PopTokenOrDie();
XLS_ASSIGN_OR_RETURN(Expr * arg, ParseTerm(outer_bindings, restrictions));
Expand Down
17 changes: 16 additions & 1 deletion xls/dslx/frontend/scanner.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#include "xls/common/status/status_macros.h"
#include "xls/dslx/frontend/comment_data.h"
#include "xls/dslx/frontend/pos.h"
#include "xls/dslx/frontend/scanner_keywords.inc"
#include "xls/dslx/frontend/token.h"

namespace xls::dslx {
Expand Down Expand Up @@ -325,6 +326,17 @@ absl::StatusOr<std::optional<Token>> Scanner::TryPopWhitespaceOrComment() {
return std::nullopt;
}

absl::StatusOr<Token> Scanner::ScanString(const Pos& start_pos) {
DropChar();
XLS_ASSIGN_OR_RETURN(std::string value, ScanUntilDoubleQuote());
if (!TryDropChar('"')) {
return ScanErrorStatus(
Span(start_pos, GetPos()),
"Expected close quote character to terminate open quote character.");
}
return Token(TokenKind::kString, Span(start_pos, GetPos()), value);
}

absl::StatusOr<Token> Scanner::ScanNumber(char startc, const Pos& start_pos) {
bool negative = startc == '-';
if (negative) {
Expand Down Expand Up @@ -461,6 +473,10 @@ absl::StatusOr<Token> Scanner::Pop() {
const char startc = PeekChar();
std::optional<Token> result;
switch (startc) {
case '"': {
XLS_ASSIGN_OR_RETURN(result, ScanString(start_pos));
break;
}
case '\'': {
XLS_ASSIGN_OR_RETURN(result, ScanChar(start_pos));
break;
Expand Down Expand Up @@ -566,7 +582,6 @@ absl::StatusOr<Token> Scanner::Pop() {
case '%': DropChar(); result = Token(TokenKind::kPercent, mk_span()); break; // NOLINT
case '^': DropChar(); result = Token(TokenKind::kHat, mk_span()); break; // NOLINT
case '/': DropChar(); result = Token(TokenKind::kSlash, mk_span()); break; // NOLINT
case '"': DropChar(); result = Token(TokenKind::kDoubleQuote, mk_span()); break; // NOLINT
// clang-format on
default:
if (std::isalpha(startc) != 0 || startc == '_') {
Expand Down
26 changes: 20 additions & 6 deletions xls/dslx/frontend/scanner.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include "absl/types/span.h"
#include "xls/common/logging/logging.h"
#include "xls/common/status/status_macros.h"
#include "xls/common/test_macros.h"
#include "xls/dslx/frontend/comment_data.h"
#include "xls/dslx/frontend/pos.h"
#include "xls/dslx/frontend/token.h"
Expand Down Expand Up @@ -93,12 +94,6 @@ class Scanner {
return AtCharEof();
}

// Proceeds through the stream until an unescaped double quote is encountered.
//
// Note: since there are special character escapes in strings, they are
// specially handled by this routine when an open quote is encountered.
absl::StatusOr<std::string> ScanUntilDoubleQuote();

void EnableDoubleCAngle() { double_c_angle_enabled_ = true; }
void DisableDoubleCAngle() { double_c_angle_enabled_ = false; }

Expand All @@ -114,18 +109,37 @@ class Scanner {
}

private:
// These tests generally use the private ScanUntilDoubleQuote() helper.
XLS_FRIEND_TEST(ScannerTest, RecognizesEscapes);
XLS_FRIEND_TEST(ScannerTest, StringCharUnicodeBadStartChar);
XLS_FRIEND_TEST(ScannerTest, StringCharUnicodeBadTerminator);
XLS_FRIEND_TEST(ScannerTest, StringCharUnicodeEscapeEmpty);
XLS_FRIEND_TEST(ScannerTest, StringCharUnicodeEscapeNonHexDigit);
XLS_FRIEND_TEST(ScannerTest, StringCharUnicodeInvalidSequence);
XLS_FRIEND_TEST(ScannerTest, StringCharUnicodeMoreThanSixDigits);

// Determines whether string "s" matches a keyword -- if so, returns the
// keyword enum that it corresponds to. Otherwise, typically the caller will
// assume s is an identifier.
static std::optional<Keyword> GetKeyword(std::string_view s);

// Proceeds through the stream until an unescaped double quote is encountered.
//
// Note: since there are special character escapes in strings, they are
// specially handled by this routine when an open quote is encountered.
absl::StatusOr<std::string> ScanUntilDoubleQuote();

// Scans a number token out of the character stream. Note the number may have
// a base determined by a radix-noting prefix; e.g. "0x" or "0b".
//
// Precondition: The character stream must be positioned over either a digit
// or a minus sign.
absl::StatusOr<Token> ScanNumber(char startc, const Pos& start_pos);

// Scans a string token out of the character stream -- character cursor should
// be over the opening quote character.
absl::StatusOr<Token> ScanString(const Pos& start_pos);

// Scans a character literal from the character stream as a character token.
//
// Precondition: The character stream must be positioned at an open quote.
Expand Down
34 changes: 33 additions & 1 deletion xls/dslx/frontend/scanner_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@
#include "absl/status/statusor.h"
#include "xls/common/status/matchers.h"
#include "xls/dslx/error_test_utils.h"
#include "xls/dslx/frontend/comment_data.h"
#include "xls/dslx/frontend/pos.h"
#include "xls/dslx/frontend/token.h"

namespace xls::dslx {
namespace {
Expand All @@ -39,6 +41,8 @@ absl::StatusOr<std::vector<Token>> ToTokens(std::string text) {
return s.PopAll();
}

} // namespace

TEST(ScannerTest, SimpleTokens) {
XLS_ASSERT_OK_AND_ASSIGN(std::vector<Token> tokens, ToTokens("+ - ++ << >>"));
ASSERT_EQ(5, tokens.size());
Expand Down Expand Up @@ -323,6 +327,18 @@ TEST(ScannerTest, HexCharLiteralBadDigit) {
IsPosError("ScanError", HasSubstr("Only hex digits are allowed")));
}

TEST(ScannerTest, NoCloseQuoteOnString) {
std::string text = R"("abc)";
Scanner s("fake_file.x", text);
absl::StatusOr<std::vector<Token>> result = s.PopAll();
EXPECT_THAT(
result.status(),
IsPosError(
"ScanError",
HasSubstr(
"Reached end of file without finding a closing double quote.")));
}

TEST(ScannerTest, StringCharUnicodeEscapeNonHexDigit) {
std::string text = R"(\u{jk}")";
Scanner s("fake_file.x", text);
Expand Down Expand Up @@ -389,6 +405,23 @@ TEST(ScannerTest, StringCharUnicodeBadStartChar) {
"be followed by a character code, such as \"{...}\"")));
}

TEST(ScannerTest, SimpleString) {
std::string text = R"({"hello world!"})";
Scanner s("fake_file.x", text);

XLS_ASSERT_OK_AND_ASSIGN(Token ocurl, s.Pop());
EXPECT_EQ(ocurl.kind(), TokenKind::kOBrace);

XLS_ASSERT_OK_AND_ASSIGN(Token t, s.Pop());

XLS_ASSERT_OK_AND_ASSIGN(Token ccurl, s.Pop());
EXPECT_EQ(ccurl.kind(), TokenKind::kCBrace);

ASSERT_TRUE(s.AtEof());
EXPECT_EQ(t.kind(), TokenKind::kString);
EXPECT_EQ(*t.GetValue(), "hello world!");
}

TEST(ScannerTest, SimpleCommentData) {
std::string text = R"(// I haz comments!)";
Scanner s("fake_file.x", text);
Expand Down Expand Up @@ -448,5 +481,4 @@ bar // another thing
}
}

} // namespace
} // namespace xls::dslx
3 changes: 3 additions & 0 deletions xls/dslx/frontend/token.cc
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,9 @@ std::string Token::ToString() const {
if (kind() == TokenKind::kCharacter) {
return absl::StrCat("'", GetValue().value(), "'");
}
if (kind() == TokenKind::kString) {
return absl::StrCat("\"", GetValue().value(), "\"");
}
if (GetValue().has_value()) {
return GetValue().value();
}
Expand Down
1 change: 1 addition & 0 deletions xls/dslx/frontend/token.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ namespace xls::dslx {
X(kDoubleDot, DOUBLE_DOT, "..") \
X(kEllipsis, ELLIPSIS, "...") \
X(kHash, HASH, "#") \
X(kString, STRING, "string") \
/* When in whitespace/comment mode; e.g. for syntax highlighting. */ \
X(kWhitespace, WHITESPACE, "whitespace") \
X(kComment, COMMENT, "comment")
Expand Down
14 changes: 2 additions & 12 deletions xls/dslx/frontend/token_parser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@
#include "absl/types/span.h"
#include "xls/common/status/status_macros.h"
#include "xls/dslx/frontend/bindings.h"
#include "xls/dslx/frontend/pos.h"
#include "xls/dslx/frontend/scanner.h"
#include "xls/dslx/frontend/token.h"

namespace xls::dslx {

Expand Down Expand Up @@ -124,16 +126,4 @@ absl::Status TokenParser::DropKeywordOrError(Keyword target, Pos* limit_pos) {
return absl::OkStatus();
}

absl::StatusOr<std::string> TokenParser::PopString(Span* span) {
XLS_RETURN_IF_ERROR(DropTokenOrError(TokenKind::kDoubleQuote));
const Pos start = scanner_->GetPos();
XLS_ASSIGN_OR_RETURN(std::string text, scanner_->ScanUntilDoubleQuote());
const Pos end = scanner_->GetPos();
XLS_RETURN_IF_ERROR(DropTokenOrError(TokenKind::kDoubleQuote));
if (span != nullptr) {
*span = Span(start, end);
}
return text;
}

} // namespace xls::dslx
17 changes: 0 additions & 17 deletions xls/dslx/frontend/token_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -200,23 +200,6 @@ class TokenParser {

absl::Status DropKeywordOrError(Keyword target, Pos* limit_pos = nullptr);

// Returns a string if present at the head of the token string. This will
// match stream contents of "abcd....xyz" - including the quotation marks The
// quotation marks will not be present in the returned string.
//
// When an open quotation mark is seen, characters will be consumed until an
// unescaped quotation mark is seen. In other words, ..." will terminate the
// string, but ...\" will not.
//
// Optional "span" is filled with the span the string occupies, exlcuding the
// surrounding quotes.
//
// TODO(leary): 2023-08-19 Strings are tokens just like any other -- this
// routine should implicitly be part of the sequence given by
// Scanner::PopAll() yielding a string token instead of needing to be
// explicitly called on TokenParser via some known parsin context.
absl::StatusOr<std::string> PopString(Span* span = nullptr);

void DisableDoubleCAngle() { scanner_->DisableDoubleCAngle(); }
void EnableDoubleCAngle() { scanner_->EnableDoubleCAngle(); }

Expand Down

0 comments on commit 494b048

Please sign in to comment.