Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

(parser) Support hexadecimal number literals #217

Merged
merged 1 commit into from
Oct 11, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Directory.Build.props
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
<PackageReference Include="StyleCop.Analyzers" Version="1.2.0-beta.321" PrivateAssets="all" />
<AdditionalFiles Include="$(SolutionDir)stylecop.json" Link="stylecop.json" />

<PackageReference Include="SonarAnalyzer.CSharp" Version="8.17.0.26580" />
<PackageReference Include="SonarAnalyzer.CSharp" Version="8.22.0.31243" />
perlun marked this conversation as resolved.
Show resolved Hide resolved
</ItemGroup>

<!--
Expand Down
68 changes: 60 additions & 8 deletions src/Perlang.Parser/Scanner.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
using System;
using System.Collections.Generic;
using System.Collections.Immutable;
using System.Globalization;
using System.Linq;
using System.Numerics;
using static Perlang.TokenType;
Expand Down Expand Up @@ -261,7 +262,9 @@ private void ScanToken()
break;

default:
if (IsDigit(c))
// Even if the number is a number in a different base than 10 (binary, hexadecimal etc), it always
// starts with a "normal" (decimal) digit because of the prefix characters - e.g. 0x1234.
if (IsDigit(c, Base.DECIMAL))
{
Number();
}
Expand Down Expand Up @@ -295,21 +298,36 @@ private void Identifier()
private void Number()
{
bool isFractional = false;
var numberStyles = NumberStyles.Any;
var numberBase = Base.DECIMAL;
int startOffset = 0;

while (IsDigit(Peek()))
if (Char.ToLower(Peek()) == 'x')
{
numberStyles = NumberStyles.HexNumber;
numberBase = Base.HEXADECIMAL;

// Moving the `start` pointer forward is important, since `BigInteger.Parse()` does not accept a prefix
// like 0x or 0X being present. Adding a `startOffset` here feels safer than mutating `start`,
// especially in case parsing fails somehow.
Advance();
startOffset = 2;
}

while (IsDigit(Peek(), numberBase))
{
Advance();
}

// Look for a fractional part.
if (Peek() == '.' && IsDigit(PeekNext()))
if (Peek() == '.' && IsDigit(PeekNext(), numberBase))
{
isFractional = true;

// Consume the "."
Advance();

while (IsDigit(Peek()))
while (IsDigit(Peek(), numberBase))
{
Advance();
}
Expand All @@ -325,7 +343,28 @@ private void Number()
// the number as an unsigned value. However, we still try to coerce it to the smallest signed or
// unsigned integer type in which it will fit (but never smaller than 32-bit). This coincidentally
// follows the same semantics as how C# does it, for simplicity.
BigInteger value = BigInteger.Parse(source[start..current]);
BigInteger value;

if (numberBase == Base.HEXADECIMAL)
{
string numberCharacters = source[(start + startOffset)..current];

// Quoting from
//https://docs.microsoft.com/en-us/dotnet/api/system.numerics.biginteger.parse?view=net-5.0#System_Numerics_BigInteger_Parse_System_ReadOnlySpan_System_Char__System_Globalization_NumberStyles_System_IFormatProvider_
//
// If value is a hexadecimal string, the Parse(String, NumberStyles) method interprets value as a
// negative number stored by using two's complement representation if its first two hexadecimal
// digits are greater than or equal to 0x80. In other words, the method interprets the highest-order
// bit of the first byte in value as the sign bit. To make sure that a hexadecimal string is
// correctly interpreted as a positive number, the first digit in value must have a value of zero.
//
// We presume that all hexadecimals should be treated as positive numbers for now.
value = BigInteger.Parse('0' + numberCharacters, numberStyles);
}
else
{
value = BigInteger.Parse(source[(start + startOffset)..current], numberStyles);
}

if (value < Int32.MaxValue)
{
Expand Down Expand Up @@ -436,10 +475,17 @@ private static bool IsAlpha(char c)
}

private static bool IsAlphaNumeric(char c) =>
IsAlpha(c) || IsDigit(c);
IsAlpha(c) || IsDigit(c, Base.DECIMAL);

private static bool IsDigit(char c) =>
c >= '0' && c <= '9';
private static bool IsDigit(char c, Base @base) =>
(int)@base switch
{
2 => c == '0' || c == '1',
8 => c >= '0' && c <= '7',
10 => c >= '0' && c <= '9',
16 => (c >= '0' && c <= '9') || (Char.ToUpper(c) >= 'A' && Char.ToUpper(c) <= 'F'),
_ => throw new ArgumentException($"Base {@base} is not supported")
};

private bool IsAtEnd() =>
current >= source.Length;
Expand All @@ -455,5 +501,11 @@ private void AddToken(TokenType type, object literal = null)
string text = source[start..current];
tokens.Add(new Token(type, text, literal, line));
}

private enum Base
{
DECIMAL = 10,
HEXADECIMAL = 16
}
}
}
12 changes: 12 additions & 0 deletions src/Perlang.Tests.Integration/Number/NumberTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -106,5 +106,17 @@ public void literal_negative_float()

Assert.Equal(-0.001, result);
}

[Fact]
public void literal_hexadecimal()
{
string source = @"
0xC0CAC01A
";

object result = Eval(source);

Assert.Equal(3234512922, result);
}
}
}
13 changes: 13 additions & 0 deletions src/Perlang.Tests/Interpreter/Typing/TypeResolverTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,19 @@ public void Resolve_var_with_long_type_defines_variable_with_expected_ClrType()
Assert.Equal(typeof(Int64), ((Stmt.Var)singleStatement).TypeReference.ClrType);
}

[Fact]
public void Resolve_implicitly_typed_var_initialized_from_hexadecimal_literal_has_expected_ClrType()
{
(Stmt singleStatement, NameResolver resolver) = ScanParseResolveAndTypeResolveSingleStatement(@"
var v = 0xC0CAC01A;
");

// Assert
Assert.IsType<Stmt.Var>(singleStatement);
Assert.True(resolver.Globals.ContainsKey("v"));
Assert.Equal(typeof(UInt32), ((Stmt.Var)singleStatement).TypeReference.ClrType);
perlun marked this conversation as resolved.
Show resolved Hide resolved
}

[Fact]
public void Resolve_implicitly_typed_var_initialized_from_long_var_has_expected_ClrType()
{
Expand Down