Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add digit separators #1160

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
Prev Previous commit
Next Next commit
Tests and fixes.
  • Loading branch information
seizethedave committed Jun 19, 2024
commit 8d19efd3a27c485342feb3a42d9b8f051f12664f
43 changes: 34 additions & 9 deletions core/lexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,8 @@ std::string lex_number(const char *&c, const std::string &filename, const Locati
AFTER_E,
AFTER_EXP_SIGN,
AFTER_EXP_DIGIT,
AFTER_UNDERSCORE
AFTER_UNDERSCORE,
AFTER_EXP_UNDERSCORE
} state;

std::string r;
Expand Down Expand Up @@ -330,12 +331,39 @@ std::string lex_number(const char *&c, const std::string &filename, const Locati
case '8':
case '9': state = AFTER_DIGIT; break;

//case '_': state = AFTER_UNDERSCORE; goto skip_char;
case '_': state = AFTER_UNDERSCORE; goto skip_char;

default: goto end;
}
break;

case AFTER_UNDERSCORE:
switch (*c) {
case '_': {
throw StaticError(filename, begin, "couldn't lex number, multiple consecutive _'s");
}

// The only valid transition from _ is to a digit.

case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9': state = AFTER_ONE_TO_NINE; break;

default: {
std::stringstream ss;
ss << "couldn't lex number, junk after _: " << *c;
throw StaticError(filename, begin, ss.str());
}
}
break;

case AFTER_E:
switch (*c) {
case '+':
Expand Down Expand Up @@ -394,19 +422,16 @@ std::string lex_number(const char *&c, const std::string &filename, const Locati
case '8':
case '9': state = AFTER_EXP_DIGIT; break;

case '_': state = AFTER_UNDERSCORE; goto skip_char;
case '_': state = AFTER_EXP_UNDERSCORE; goto skip_char;

default: goto end;
}
break;

case AFTER_UNDERSCORE:
case AFTER_EXP_UNDERSCORE:
switch (*c) {
case '_': {
// Can't do repeated _s.
std::stringstream ss;
ss << "couldn't lex number, multiple consecutive _'s: " << *c;
throw StaticError(filename, begin, ss.str());
throw StaticError(filename, begin, "couldn't lex number, multiple consecutive _'s");
}

// The only valid transition from _ is to a digit.
Expand All @@ -432,7 +457,7 @@ std::string lex_number(const char *&c, const std::string &filename, const Locati
}
r += *c;

skip_char:
skip_char:
c++;
}
end:
Expand Down
20 changes: 20 additions & 0 deletions core/lexer_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,27 @@ TEST(Lexer, TestNumbers)
"1e+!",
{},
"number 1e+!:1:1: couldn't lex number, junk after exponent sign: !");
}

TEST(Lexer, TestNumbersWithSeparators)
{
testLex("number 123_456", "123_456", {Token(Token::Kind::NUMBER, "123456")}, "");
testLex("number 1_750_000", "1_750_000", {Token(Token::Kind::NUMBER, "1750000")}, "");
testLex("number 1_2_3", "1_2_3", {Token(Token::Kind::NUMBER, "123")}, "");
testLex("number 3.141_592", "3.141_592", {Token(Token::Kind::NUMBER, "3.141592")}, "");

testLex("number 1_2.0", "1_2.0", {Token(Token::Kind::NUMBER, "12.0")}, "");
testLex("number 0e1_01", "0e1_01", {Token(Token::Kind::NUMBER, "0e101")}, "");
testLex("number 10_10e3", "10_10e3", {Token(Token::Kind::NUMBER, "1010e3")}, "");
testLex("number 2_3e1_2", "2_3e1_2", {Token(Token::Kind::NUMBER, "23e12")}, "");
testLex("number 1.1_2e100", "1.1_2e100", {Token(Token::Kind::NUMBER, "1.12e100")}, "");
testLex("number 1.1e-10_1", "1.1e-10_1", {Token(Token::Kind::NUMBER, "1.1e-101")}, "");

testLex("number 123456_!", "123456_!", {}, "number 123456_!:1:1: couldn't lex number, junk after _: !");
testLex("number 123__456",
"123__456",
{},
"number 123__456:1:1: couldn't lex number, multiple consecutive _'s");
}

TEST(Lexer, TestDoubleStrings)
Expand Down Expand Up @@ -330,6 +349,7 @@ TEST(Lexer, TestIdentifier)
"foo bar123",
{Token(Token::Kind::IDENTIFIER, "foo"), Token(Token::Kind::IDENTIFIER, "bar123")},
"");
testLex("identifier _123", "_123", {Token(Token::Kind::IDENTIFIER, "_123")}, "");
}

TEST(Lexer, TestComments)
Expand Down