Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add digit separators #1160

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
66 changes: 62 additions & 4 deletions core/lexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -217,19 +217,23 @@ std::string lex_number(const char *&c, const std::string &filename, const Locati
// https://www.json.org/img/number.png

// Note, we deviate from the json.org documentation as follows:
// There is no reason to lex negative numbers as atomic tokens, it is better to parse them
// as a unary operator combined with a numeric literal. This avoids x-1 being tokenized as
// <identifier> <number> instead of the intended <identifier> <binop> <number>.
// * There is no reason to lex negative numbers as atomic tokens, it is better to parse them
// as a unary operator combined with a numeric literal. This avoids x-1 being tokenized as
// <identifier> <number> instead of the intended <identifier> <binop> <number>.
// * We support digit separators using the _ character for readability in
// large numeric literals.

enum State {
BEGIN,
AFTER_ZERO,
AFTER_ONE_TO_NINE,
AFTER_DOT,
AFTER_DIGIT,
AFTER_UNDERSCORE,
AFTER_E,
AFTER_EXP_SIGN,
AFTER_EXP_DIGIT
AFTER_EXP_DIGIT,
AFTER_EXP_UNDERSCORE
} state;

std::string r;
Expand Down Expand Up @@ -262,6 +266,8 @@ std::string lex_number(const char *&c, const std::string &filename, const Locati
case 'e':
case 'E': state = AFTER_E; break;

case '_': state = AFTER_UNDERSCORE; goto skip_char;

default: goto end;
}
break;
Expand All @@ -284,6 +290,8 @@ std::string lex_number(const char *&c, const std::string &filename, const Locati
case '8':
case '9': state = AFTER_ONE_TO_NINE; break;

case '_': state = AFTER_UNDERSCORE; goto skip_char;

default: goto end;
}
break;
Expand Down Expand Up @@ -325,10 +333,34 @@ std::string lex_number(const char *&c, const std::string &filename, const Locati
case '8':
case '9': state = AFTER_DIGIT; break;

case '_': state = AFTER_UNDERSCORE; goto skip_char;

default: goto end;
}
break;

case AFTER_UNDERSCORE:
switch (*c) {
// The only valid transition from _ is to a digit.
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9': state = AFTER_ONE_TO_NINE; break;

default: {
std::stringstream ss;
ss << "couldn't lex number, junk after _: " << *c;
throw StaticError(filename, begin, ss.str());
}
}
break;

case AFTER_E:
switch (*c) {
case '+':
Expand Down Expand Up @@ -386,12 +418,38 @@ std::string lex_number(const char *&c, const std::string &filename, const Locati
case '7':
case '8':
case '9': state = AFTER_EXP_DIGIT; break;

case '_': state = AFTER_EXP_UNDERSCORE; goto skip_char;

default: goto end;
}
break;

case AFTER_EXP_UNDERSCORE:
switch (*c) {
// The only valid transition from _ is to a digit.
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9': state = AFTER_EXP_DIGIT; break;

default: {
std::stringstream ss;
ss << "couldn't lex number, junk after _: " << *c;
throw StaticError(filename, begin, ss.str());
}
}
break;
}
r += *c;

skip_char:
c++;
}
end:
Expand Down
50 changes: 50 additions & 0 deletions core/lexer_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,55 @@ TEST(Lexer, TestNumbers)
"number 1e+!:1:1: couldn't lex number, junk after exponent sign: !");
}

TEST(Lexer, TestNumbersWithSeparators)
{
testLex("number 123_456", "123_456", {Token(Token::Kind::NUMBER, "123456")}, "");
testLex("number 1_750_000", "1_750_000", {Token(Token::Kind::NUMBER, "1750000")}, "");
testLex("number 1_2_3", "1_2_3", {Token(Token::Kind::NUMBER, "123")}, "");
testLex("number 3.141_592", "3.141_592", {Token(Token::Kind::NUMBER, "3.141592")}, "");
testLex("number 01_100", "01_100", {Token(Token::Kind::NUMBER, "0"), Token(Token::Kind::NUMBER, "1100")}, "");
testLex("number 1_200.0", "1_200.0", {Token(Token::Kind::NUMBER, "1200.0")}, "");
testLex("number 0e1_01", "0e1_01", {Token(Token::Kind::NUMBER, "0e101")}, "");
testLex("number 10_10e3", "10_10e3", {Token(Token::Kind::NUMBER, "1010e3")}, "");
testLex("number 2_3e1_2", "2_3e1_2", {Token(Token::Kind::NUMBER, "23e12")}, "");
testLex("number 1.1_2e100", "1.1_2e100", {Token(Token::Kind::NUMBER, "1.12e100")}, "");
testLex("number 1.1e-10_1", "1.1e-10_1", {Token(Token::Kind::NUMBER, "1.1e-101")}, "");
testLex("number 9.109_383_56e-31", "9.109_383_56e-31", {Token(Token::Kind::NUMBER, "9.10938356e-31")}, "");

testLex("number 123456_!",
"123456_!",
{},
"number 123456_!:1:1: couldn't lex number, junk after _: !");
testLex("number 123__456",
"123__456",
{},
"number 123__456:1:1: couldn't lex number, junk after _: _");
testLex("number 1_200_.0",
"1_200_.0",
{},
"number 1_200_.0:1:1: couldn't lex number, junk after _: .");
testLex("number 1_200._0",
"1_200._0",
{},
"number 1_200._0:1:1: couldn't lex number, junk after decimal point: _");
testLex("number 1_200_e2",
"1_200_e2",
{},
"number 1_200_e2:1:1: couldn't lex number, junk after _: e");
testLex("number 1_200e_2",
"1_200e_2",
{},
"number 1_200e_2:1:1: couldn't lex number, junk after 'E': _");
testLex("number 200e-_2",
"200e-_2",
{},
"number 200e-_2:1:1: couldn't lex number, junk after exponent sign: _");
testLex("number 200e+_2",
"200e+_2",
{},
"number 200e+_2:1:1: couldn't lex number, junk after exponent sign: _");
}

TEST(Lexer, TestDoubleStrings)
{
testLex("double string \"hi\"", "\"hi\"", {Token(Token::Kind::STRING_DOUBLE, "hi")}, "");
Expand Down Expand Up @@ -328,6 +377,7 @@ TEST(Lexer, TestIdentifier)
"foo bar123",
{Token(Token::Kind::IDENTIFIER, "foo"), Token(Token::Kind::IDENTIFIER, "bar123")},
"");
testLex("identifier _123", "_123", {Token(Token::Kind::IDENTIFIER, "_123")}, "");
}

TEST(Lexer, TestComments)
Expand Down
29 changes: 14 additions & 15 deletions doc/_includes/examples/syntax.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,6 @@
{
cocktails: {
// Ingredient quantities are in fl oz.
'Tom Collins': {
ingredients: [
{ kind: "Farmer's Gin", qty: 1.5 },
{ kind: 'Lemon', qty: 1 },
{ kind: 'Simple Syrup', qty: 0.5 },
{ kind: 'Soda', qty: 2 },
{ kind: 'Angostura', qty: 'dash' },
],
garnish: 'Maraschino Cherry',
served: 'Tall',
description: |||
The Tom Collins is essentially gin and
lemonade. The bitters add complexity.
|||,
},
Manhattan: {
ingredients: [
{ kind: 'Rye', qty: 2.5 },
Expand All @@ -28,5 +13,19 @@
served: 'Straight Up',
description: @'A clear \ red drink.',
},
'Trinidad Sour': {
ingredients: [
{ kind: 'Angostura bitters', qty: 1.333_333 },
{ kind: 'Rye whiskey', qty: 0.5 },
{ kind: 'Fresh lemon juice', qty: 0.75 },
{ kind: 'Orgeat syrup', qty: 1 },
],
garnish: 'Lemon twist',
served: 'chilled Nick & Nora glass',
description: |||
Boldly balanced: 1 1/3 oz Angostura
transforms bitters into the star spirit.
|||,
},
},
}
26 changes: 11 additions & 15 deletions doc/_includes/examples/syntax.jsonnet.golden
Original file line number Diff line number Diff line change
Expand Up @@ -19,32 +19,28 @@
],
"served": "Straight Up"
},
"Tom Collins": {
"description": "The Tom Collins is essentially gin and\nlemonade. The bitters add complexity.\n",
"garnish": "Maraschino Cherry",
"Trinidad Sour": {
"description": "Boldly balanced: 1 1/3 oz Angostura\ntransforms bitters into the star spirit.\n",
"garnish": "Lemon twist",
"ingredients": [
{
"kind": "Farmer's Gin",
"qty": 1.5
},
{
"kind": "Lemon",
"qty": 1
"kind": "Angostura bitters",
"qty": 1.333333
},
{
"kind": "Simple Syrup",
"kind": "Rye whiskey",
"qty": 0.5
},
{
"kind": "Soda",
"qty": 2
"kind": "Fresh lemon juice",
"qty": 0.75
},
{
"kind": "Angostura",
"qty": "dash"
"kind": "Orgeat syrup",
"qty": 1
}
],
"served": "Tall"
"served": "chilled Nick & Nora glass"
}
}
}
3 changes: 3 additions & 0 deletions doc/learning/tutorial.html
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ <h2 id="syntax">Syntax</h2>
<li>
Verbatim strings <code>@'foo'</code> and <code>@"foo"</code> are for single lines.
</li>
<li>
Large numeric literals may be rendered more readable by using underscores, e.g. <code>1_000_000</code>.
</li>
</ul>
<p>
Using the interactive demo below, try modifying the strings / quantities. Try adding a "Dry
Expand Down
15 changes: 13 additions & 2 deletions doc/ref/spec.html
Original file line number Diff line number Diff line change
Expand Up @@ -142,8 +142,19 @@ <h2 id="lexing">Lexing</h2>
</li>
<li>
<p>
<i>number</i>: As defined by <a href="https://json.org/">JSON</a> but without the leading
minus.
<i>number</i>: As defined by <a href="https://json.org/">JSON</a>, with two exceptions:
<ul>
<li>
Numeric literals may be rendered with underscores (<code>_</code>) between any two adjacent 0-9 digits
to improve readability. The underscores are discarded by the lexer.
<br />
Examples: <code>1_000_000</code>, <code>0.000_001</code>, <code>6.022_140_76e23</code>
</li>
<li>
Negative numbers are lexed as the <code>-</code> unary operator applied to a positive number to
simplify parsing.
</li>
</ul>
</p>
</li>
<li>
Expand Down
19 changes: 19 additions & 0 deletions test_suite/digitsep.jsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
local cases = [
[123_456, "123_456"],
[1_750_000, "1_750_000"],
[1_2_3, "1_2_3"],
[3.141_592, "3.141_592"],
[1_200.0, "1_200.0"],
[0e1_01, "0e1_01"],
[10_10e3, "10_10e3"],
[2_3e1_2, "2_3e1_2"],
[1.1_2e100, "1.1_2e100"],
[1.1e-10_1, "1.1e-10_1"],
[9.109_383_56e-31, "9.109_383_56e-31"],
];

local sepParse(s) = std.parseJson(std.strReplace(s, "_", ""));

{
test_results: [std.assertEqual(c[0], sepParse(c[1])) for c in cases],
}
15 changes: 15 additions & 0 deletions test_suite/digitsep.jsonnet.golden
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"test_results": [
true,
true,
true,
true,
true,
true,
true,
true,
true,
true,
true
]
}
1 change: 1 addition & 0 deletions test_suite/error.std_parseJson.nodigitsep.jsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
std.parseJson("987_543")
2 changes: 2 additions & 0 deletions test_suite/error.std_parseJson.nodigitsep.jsonnet.golden
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
RUNTIME ERROR: [json.exception.parse_error.101] parse error at line 1, column 4: syntax error while parsing value - invalid literal; last read: '987_'; expected end of input
error.std_parseJson.nodigitsep.jsonnet:1:1-25
Loading