Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add toml parser #4678

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Add support for TOML parsing
  • Loading branch information
Steve Teplica committed Nov 18, 2024
commit a71cb9dcb43a625efa3ed9f5c7c09caab47a1c07
1 change: 1 addition & 0 deletions IDE.properties.tmp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ rewrite-json
rewrite-maven
rewrite-properties
rewrite-protobuf
rewrite-toml
rewrite-xml
rewrite-yaml

Expand Down
31 changes: 31 additions & 0 deletions rewrite-toml/build.gradle.kts
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
plugins {
id("org.openrewrite.build.language-library")
id("antlr")
}

tasks.register<JavaExec>("generateAntlrSources") {
mainClass.set("org.antlr.v4.Tool")

args = listOf(
"-o", "src/main/java/org/openrewrite/toml/internal/grammar",
"-package", "org.openrewrite.toml.internal.grammar",
"-visitor"
) + fileTree("src/main/antlr").matching { include("**/*.g4") }.map { it.path }

classpath = sourceSets["main"].runtimeClasspath + configurations["antlr"]
}

dependencies {
antlr("org.antlr:antlr4:4.11.1")

api(project(":rewrite-core"))
api("org.jetbrains:annotations:latest.release")
api("com.fasterxml.jackson.core:jackson-annotations")

compileOnly(project(":rewrite-test"))

implementation("org.antlr:antlr4-runtime:4.11.1")
implementation("io.micrometer:micrometer-core:1.9.+")

testImplementation(project(":rewrite-test"))
}
149 changes: 149 additions & 0 deletions rewrite-toml/src/main/antlr/TomlLexer.g4
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
/*
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
*/

// $antlr-format alignTrailingComments true, columnLimit 150, maxEmptyLinesToKeep 1, reflowComments false, useTab false
// $antlr-format allowShortRulesOnASingleLine true, allowShortBlocksOnASingleLine true, minEmptyLines 0, alignSemicolons ownLine
// $antlr-format alignColons trailing, singleLineOverrulesHangingColon true, alignLexerCommands true, alignLabels true, alignTrailers true

lexer grammar TomlLexer;

WS : [ \t]+ -> skip;
NL : ('\r'? '\n')+;
COMMENT : '#' (~[\n])*;
L_BRACKET : '[';
DOUBLE_L_BRACKET : '[[';
R_BRACKET : ']';
DOUBLE_R_BRACKET : ']]';
EQUALS : '=' -> pushMode(SIMPLE_VALUE_MODE);
DOT : '.';
COMMA : ',';

fragment DIGIT : [0-9];
fragment ALPHA : [A-Za-z];

// strings
fragment ESC : '\\' (["\\/bfnrt] | UNICODE | EX_UNICODE);
fragment UNICODE : 'u' HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT;
fragment EX_UNICODE:
'U' HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT
;
BASIC_STRING : '"' (ESC | ~["\\\n])*? '"';
LITERAL_STRING : '\'' (~['\n])*? '\'';

// keys
UNQUOTED_KEY: (ALPHA | DIGIT | '-' | '_')+;

mode SIMPLE_VALUE_MODE;

VALUE_WS: WS -> skip;

L_BRACE : '{' -> mode(INLINE_TABLE_MODE);
ARRAY_START : L_BRACKET -> type(L_BRACKET), mode(ARRAY_MODE);

// booleans
BOOLEAN: ('true' | 'false') -> popMode;

// strings
fragment ML_ESC : '\\' '\r'? '\n' | ESC;
VALUE_BASIC_STRING : BASIC_STRING -> type(BASIC_STRING), popMode;
ML_BASIC_STRING : '"""' (ML_ESC | ~["\\])*? '"""' -> popMode;
VALUE_LITERAL_STRING : LITERAL_STRING -> type(LITERAL_STRING), popMode;
ML_LITERAL_STRING : '\'\'\'' (.)*? '\'\'\'' -> popMode;

// floating point numbers
fragment EXP : ('e' | 'E') [+-]? ZERO_PREFIXABLE_INT;
fragment ZERO_PREFIXABLE_INT : DIGIT (DIGIT | '_' DIGIT)*;
fragment FRAC : '.' ZERO_PREFIXABLE_INT;
FLOAT : DEC_INT ( EXP | FRAC EXP?) -> popMode;
INF : [+-]? 'inf' -> popMode;
NAN : [+-]? 'nan' -> popMode;

// integers
fragment HEX_DIGIT : [A-Fa-f] | DIGIT;
fragment DIGIT_1_9 : [1-9];
fragment DIGIT_0_7 : [0-7];
fragment DIGIT_0_1 : [0-1];
DEC_INT : [+-]? (DIGIT | (DIGIT_1_9 (DIGIT | '_' DIGIT)+)) -> popMode;
HEX_INT : '0x' HEX_DIGIT (HEX_DIGIT | '_' HEX_DIGIT)* -> popMode;
OCT_INT : '0o' DIGIT_0_7 (DIGIT_0_7 | '_' DIGIT_0_7)* -> popMode;
BIN_INT : '0b' DIGIT_0_1 (DIGIT_0_1 | '_' DIGIT_0_1)* -> popMode;

// dates
fragment YEAR : DIGIT DIGIT DIGIT DIGIT;
fragment MONTH : DIGIT DIGIT;
fragment DAY : DIGIT DIGIT;
fragment DELIM : 'T' | 't' | ' ';
fragment HOUR : DIGIT DIGIT;
fragment MINUTE : DIGIT DIGIT;
fragment SECOND : DIGIT DIGIT;
fragment SECFRAC : '.' DIGIT+;
fragment NUMOFFSET : ('+' | '-') HOUR ':' MINUTE;
fragment OFFSET : 'Z' | NUMOFFSET;
fragment PARTIAL_TIME : HOUR ':' MINUTE ':' SECOND SECFRAC?;
fragment FULL_DATE : YEAR '-' MONTH '-' DAY;
fragment FULL_TIME : PARTIAL_TIME OFFSET;
OFFSET_DATE_TIME : FULL_DATE DELIM FULL_TIME -> popMode;
LOCAL_DATE_TIME : FULL_DATE DELIM PARTIAL_TIME -> popMode;
LOCAL_DATE : FULL_DATE -> popMode;
LOCAL_TIME : PARTIAL_TIME -> popMode;

mode INLINE_TABLE_MODE;

INLINE_TABLE_WS : WS -> skip;
INLINE_TABLE_KEY_DOT : DOT -> type(DOT);
INLINE_TABLE_COMMA : COMMA -> type(COMMA);
R_BRACE : '}' -> popMode;

INLINE_TABLE_KEY_BASIC_STRING : BASIC_STRING -> type(BASIC_STRING);
INLINE_TABLE_KEY_LITERAL_STRING : LITERAL_STRING -> type(LITERAL_STRING);
INLINE_TABLE_KEY_UNQUOTED : UNQUOTED_KEY -> type(UNQUOTED_KEY);

INLINE_TABLE_EQUALS: EQUALS -> type(EQUALS), pushMode(SIMPLE_VALUE_MODE);

mode ARRAY_MODE;

ARRAY_WS : WS -> skip;
ARRAY_NL : NL -> type(NL);
ARRAY_COMMENT : COMMENT -> type(COMMENT);
ARRAY_COMMA : COMMA -> type(COMMA);

ARRAY_INLINE_TABLE_START : L_BRACE -> type(L_BRACE), pushMode(INLINE_TABLE_MODE);
NESTED_ARRAY_START : L_BRACKET -> type(L_BRACKET), pushMode(ARRAY_MODE);
ARRAY_END : R_BRACKET -> type(R_BRACKET), popMode;

ARRAY_BOOLEAN: BOOLEAN -> type(BOOLEAN);

ARRAY_BASIC_STRING : BASIC_STRING -> type(BASIC_STRING);
ARRAY_ML_BASIC_STRING : ML_BASIC_STRING -> type(ML_BASIC_STRING);
ARRAY_LITERAL_STRING : LITERAL_STRING -> type(LITERAL_STRING);
ARRAY_ML_LITERAL_STRING : ML_LITERAL_STRING -> type(ML_LITERAL_STRING);

ARRAY_FLOAT : FLOAT -> type(FLOAT);
ARRAY_INF : INF -> type(INF);
ARRAY_NAN : NAN -> type(NAN);

ARRAY_DEC_INT : DEC_INT -> type(DEC_INT);
ARRAY_HEX_INT : HEX_INT -> type(HEX_INT);
ARRAY_OCT_INT : OCT_INT -> type(OCT_INT);
ARRAY_BIN_INT : BIN_INT -> type(BIN_INT);

ARRAY_OFFSET_DATE_TIME : OFFSET_DATE_TIME -> type(OFFSET_DATE_TIME);
ARRAY_LOCAL_DATE_TIME : LOCAL_DATE_TIME -> type(LOCAL_DATE_TIME);
ARRAY_LOCAL_DATE : LOCAL_DATE -> type(LOCAL_DATE);
ARRAY_LOCAL_TIME : LOCAL_TIME -> type(LOCAL_TIME);
151 changes: 151 additions & 0 deletions rewrite-toml/src/main/antlr/TomlParser.g4
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
/*
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
*/

// $antlr-format alignTrailingComments true, columnLimit 150, minEmptyLines 1, maxEmptyLinesToKeep 1, reflowComments false, useTab false
// $antlr-format allowShortRulesOnASingleLine false, allowShortBlocksOnASingleLine true, alignSemicolons hanging, alignColons hanging

parser grammar TomlParser;

options {
tokenVocab = TomlLexer;
}

document
: expression (NL expression)* EOF
;

expression
: key_value comment
| table comment
| comment
;

comment
: COMMENT?
;

key_value
: key EQUALS value
;

key
: simple_key
| dotted_key
;

simple_key
: quoted_key
| unquoted_key
;

unquoted_key
: UNQUOTED_KEY
;

quoted_key
: BASIC_STRING
| LITERAL_STRING
;

dotted_key
: simple_key (DOT simple_key)+
;

value
: string
| integer
| floating_point
| bool_
| date_time
| array_
| inline_table
;

string
: BASIC_STRING
| ML_BASIC_STRING
| LITERAL_STRING
| ML_LITERAL_STRING
;

integer
: DEC_INT
| HEX_INT
| OCT_INT
| BIN_INT
;

floating_point
: FLOAT
| INF
| NAN
;

bool_
: BOOLEAN
;

date_time
: OFFSET_DATE_TIME
| LOCAL_DATE_TIME
| LOCAL_DATE
| LOCAL_TIME
;

array_
: L_BRACKET array_values? comment_or_nl R_BRACKET
;

array_values
: (comment_or_nl value nl_or_comment COMMA array_values comment_or_nl)
| comment_or_nl value nl_or_comment COMMA?
;

comment_or_nl
: (COMMENT? NL)*
;

nl_or_comment
: (NL COMMENT?)*
;

table
: standard_table
| array_table
;

standard_table
: L_BRACKET key R_BRACKET
;

inline_table
: L_BRACE inline_table_keyvals R_BRACE
;

inline_table_keyvals
: inline_table_keyvals_non_empty?
;

inline_table_keyvals_non_empty
: key EQUALS value (COMMA inline_table_keyvals_non_empty)?
;

array_table
: DOUBLE_L_BRACKET key DOUBLE_R_BRACKET
;
53 changes: 53 additions & 0 deletions rewrite-toml/src/main/java/org/openrewrite/toml/Assertions.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
/*
* Copyright 2022 the original author or authors.
* <p>
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* <p>
* https://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.openrewrite.toml;

import org.intellij.lang.annotations.Language;
import org.jspecify.annotations.Nullable;
import org.openrewrite.test.SourceSpec;
import org.openrewrite.test.SourceSpecs;
import org.openrewrite.toml.tree.Toml;

import java.util.function.Consumer;

public class Assertions {

private Assertions() {
}

public static SourceSpecs toml(@Language("toml") @Nullable String before) {
return toml(before, s -> {
});
}

public static SourceSpecs toml(@Language("toml") @Nullable String before, Consumer<SourceSpec<Toml.Document>> spec) {
SourceSpec<Toml.Document> toml = new SourceSpec<>(Toml.Document.class, null, TomlParser.builder(), before, null);
spec.accept(toml);
return toml;
}

public static SourceSpecs toml(@Language("toml") @Nullable String before, @Language("toml") @Nullable String after) {
return toml(before, after, s -> {
});
}

public static SourceSpecs toml(@Language("toml") @Nullable String before, @Language("toml") @Nullable String after,
Consumer<SourceSpec<Toml.Document>> spec) {
SourceSpec<Toml.Document> toml = new SourceSpec<>(Toml.Document.class, null, TomlParser.builder(), before, s -> after);
spec.accept(toml);
return toml;
}
}
Loading