Skip to content

Commit 7847b75

Browse files
authored
Convert Perl scripts to C# in coreclr (#104867)
* Convert Perl scripts to C# in coreclr * Delete obsolete IL script * Add license header and regenerate * Add a README * Mention generator in ilasm readme and cleanups
1 parent fcc916c commit 7847b75

File tree

9 files changed

+121
-667
lines changed

9 files changed

+121
-667
lines changed

src/coreclr/dlls/mscordac/update.pl

Lines changed: 0 additions & 34 deletions
This file was deleted.
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
3+
<PropertyGroup>
4+
<OutputType>Exe</OutputType>
5+
<TargetFramework>$(NetCoreAppToolCurrent)</TargetFramework>
6+
<Nullable>enable</Nullable>
7+
</PropertyGroup>
8+
9+
</Project>
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
using System;
5+
using System.IO;
6+
using System.Text.RegularExpressions;
7+
using static Patterns;
8+
9+
if (args.Length != 1)
10+
{
11+
Console.Error.WriteLine("Usage: <file>");
12+
return 1;
13+
}
14+
15+
string filePath = args[0];
16+
17+
string fileContent = File.ReadAllText(filePath);
18+
19+
var match = GetRegexExtractMarkers().Match(fileContent);
20+
if (!match.Success)
21+
{
22+
Console.Error.WriteLine("Could not find %% markers");
23+
return 1;
24+
}
25+
26+
//string prefix = match.Groups[1].Value;
27+
string grammar = match.Groups[2].Value;
28+
29+
// Remove any text in {}
30+
var regexRemoveTextInBraces = GetRegexRemoveTextInBraces();
31+
string previousGrammar;
32+
33+
do
34+
{
35+
previousGrammar = grammar;
36+
grammar = regexRemoveTextInBraces.Replace(grammar, "$1");
37+
} while (grammar != previousGrammar);
38+
39+
// Change keyword identifiers into the string they represent (lowercase)
40+
grammar = GetRegexKeywordIdentifiers().Replace(grammar, m => $"'{m.Groups[1].Value.ToLowerInvariant()}'");
41+
42+
// Change assembler directives into their string (lowercase with a period)
43+
grammar = GetRegexAssemblerDirectives().Replace(grammar, m => $"'.{m.Groups[1].Value.ToLowerInvariant()}'");
44+
45+
// Handle special punctuation
46+
grammar = GetRegexEllipsis().Replace(grammar, "'...'");
47+
grammar = GetRegexDcolon().Replace(grammar, "'::'");
48+
49+
// Print the output header
50+
Console.Write(@"// Licensed to the .NET Foundation under one or more agreements.
51+
// The .NET Foundation licenses this file to you under the MIT license.
52+
53+
Lexical tokens
54+
ID - C style alphaNumeric identifier (e.g. Hello_There2)
55+
DOTTEDNAME - Sequence of dot-separated IDs (e.g. System.Object)
56+
QSTRING - C style quoted string (e.g. ""hi\n"")
57+
SQSTRING - C style singlely quoted string(e.g. 'hi')
58+
INT32 - C style 32 bit integer (e.g. 235, 03423, 0x34FFF)
59+
INT64 - C style 64 bit integer (e.g. -2353453636235234, 0x34FFFFFFFFFF)
60+
FLOAT64 - C style floating point number (e.g. -0.2323, 354.3423, 3435.34E-5)
61+
INSTR_* - IL instructions of a particular class (see opcode.def).
62+
HEXBYTE - 1- or 2-digit hexadecimal number (e.g., A2, F0).
63+
Auxiliary lexical tokens
64+
TYPEDEF_T - Aliased class (TypeDef or TypeRef).
65+
TYPEDEF_M - Aliased method.
66+
TYPEDEF_F - Aliased field.
67+
TYPEDEF_TS - Aliased type specification (TypeSpec).
68+
TYPEDEF_MR - Aliased field/method reference (MemberRef).
69+
TYPEDEF_CA - Aliased Custom Attribute.
70+
----------------------------------------------------------------------------------
71+
START : decls
72+
;");
73+
74+
// Print the output
75+
Console.Write(grammar);
76+
77+
return 0;
78+
79+
internal static partial class Patterns
80+
{
81+
[GeneratedRegex(@"^(.*)%%(.*)%%", RegexOptions.Singleline)]
82+
internal static partial Regex GetRegexExtractMarkers();
83+
84+
[GeneratedRegex(@"\s*([^'])\{[^{}]*\}", RegexOptions.Singleline)]
85+
internal static partial Regex GetRegexRemoveTextInBraces();
86+
87+
[GeneratedRegex(@"\b([A-Z0-9_]+)_\b", RegexOptions.Singleline)]
88+
internal static partial Regex GetRegexKeywordIdentifiers();
89+
90+
[GeneratedRegex(@"\b_([A-Z0-9]+)\b", RegexOptions.Singleline)]
91+
internal static partial Regex GetRegexAssemblerDirectives();
92+
93+
[GeneratedRegex(@"\bELLIPSIS\b", RegexOptions.Singleline)]
94+
internal static partial Regex GetRegexEllipsis();
95+
96+
[GeneratedRegex(@"\bDCOLON\b", RegexOptions.Singleline)]
97+
internal static partial Regex GetRegexDcolon();
98+
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# Grammar extractor for IL tools
2+
3+
Tool to extract IL grammar in `Backus-Naur Form (BNF)` from `Yet Another Compiler-Compiler (Yacc)`.
4+
5+
Usage:
6+
7+
```sh
8+
cd runtime
9+
./dotnet.sh run --project src/coreclr/ilasm/GrammarExtractor src/coreclr/ilasm/asmparse.y > src/coreclr/ilasm/prebuilt/asmparse.grammar
10+
```

src/coreclr/ilasm/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,4 @@ $ docker run --rm -v$(pwd):/runtime -w /runtime/src/coreclr/ilasm alpine \
1616
sh -c 'apk add bison && yacc asmparse.y -o prebuilt/asmparse.cpp'
1717
```
1818

19+
To generate grammar, see [GrammarExtractor README](GrammarExtractor/README.md).

src/coreclr/ilasm/extractGrammar.pl

Lines changed: 0 additions & 59 deletions
This file was deleted.

src/coreclr/ilasm/prebuilt/asmparse.grammar

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -76,11 +76,11 @@ dottedName : id
7676
| dottedName '.' dottedName
7777
;
7878

79-
int32 : INT32
79+
int32 : INT32_V
8080
;
8181

82-
int64 : INT64
83-
| INT32
82+
int64 : INT64_V
83+
| INT32_V
8484
;
8585

8686
float64 : FLOAT64

0 commit comments

Comments
 (0)