Skip to content

Commit fd65a0a

Browse files
committed
Implementing $split
1 parent 0f59a9f commit fd65a0a

File tree

8 files changed

+89
-11
lines changed

8 files changed

+89
-11
lines changed

src/main/antlr4/dev/vepo/jsonata/functions/generated/JSONataGrammar.g4

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ expression:
2929
| '(' expression ')' # contextValue
3030
| '(' expression ';' (expression ';')+ ')' # blockExpression
3131
| FV_NAME VAR_ASSIGN (expression|functionDeclaration) # variableAssignment
32-
| regex # regexValue
32+
| REGEX # regexValue
3333
| STRING # stringValue
3434
| NUMBER # numberValue
3535
| FLOAT # floatValue
@@ -46,11 +46,6 @@ expressionList: expression (',' expression)*;
4646
fieldList: expression ':' uniqueObj expOrObject (',' expression ':' uniqueObj expOrObject)*;
4747
expOrObject: expression | object;
4848

49-
regex : '/' regexPattern '/' REGEX_MODIFIER?;
50-
regexPattern : (~REGEX_BOUNDARY | '\\' REGEX_BOUNDARY)*;
51-
REGEX_BOUNDARY : '/' ;
52-
REGEX_MODIFIER: 'm' | 'i';
53-
5449
rangePredicate: ARR_OPEN ARR_OPEN NUMBER '..' NUMBER ARR_CLOSE ARR_CLOSE;
5550
BOOLEAN: 'true' | 'false';
5651
ROOT : '$$' ;
@@ -74,6 +69,10 @@ STRING:
7469
| '"' (ESC | ~["\\])* '"'
7570
;
7671
72+
REGEX:
73+
'/' (ESC | ~['/])* '/' ('m' | 'i' | 'g' | 'd')?
74+
;
75+
7776
NUMBER: '-'? [0-9]+;
7877
FLOAT: '-'? [0-9]+ '.' [0-9]+;
7978
EXP_NUMBER: '-'? [0-9]+ [eE] '-'? [0-9]+;
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
package dev.vepo.jsonata.functions.buildin;
2+
3+
import java.util.List;
4+
import java.util.function.Supplier;
5+
import java.util.stream.Stream;
6+
7+
import dev.vepo.jsonata.functions.JSONataFunction;
8+
import dev.vepo.jsonata.functions.data.Data;
9+
import dev.vepo.jsonata.functions.json.JsonFactory;
10+
11+
public record SplitJSONataFunction(List<JSONataFunction> valueProviders) implements JSONataFunction {
12+
public SplitJSONataFunction {
13+
if (valueProviders.size() < 2 || valueProviders.size() > 3) {
14+
throw new IllegalArgumentException("$split function must have 2 or 3 arguments!");
15+
}
16+
}
17+
18+
@Override
19+
public Data map(Data original, Data current) {
20+
var value = valueProviders.get(0).map(original, current).toJson().asText();
21+
var patternData = valueProviders.get(1).map(original, current);
22+
Supplier<String[]> splitFunction = patternData.isRegex() ? () -> patternData.asRegex().split(value)
23+
: () -> value.split(patternData.toJson().asText());
24+
25+
if (valueProviders.size() == 3) {
26+
var limit = valueProviders.get(2).map(original, current).toJson().asInt();
27+
return JsonFactory.arrayValue(Stream.of(splitFunction.get())
28+
.limit(limit)
29+
.toArray(String[]::new));
30+
} else {
31+
return JsonFactory.arrayValue(splitFunction.get());
32+
}
33+
}
34+
35+
}

src/main/java/dev/vepo/jsonata/functions/json/JsonFactory.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,13 @@ public static Stream<Data> planify(JsonNode value) {
5252
}
5353
}
5454

55+
public static Data arrayValue(String[] values) {
56+
var array = mapper.createArrayNode();
57+
Stream.of(values)
58+
.forEach(array::add);
59+
return new ArrayData(array);
60+
}
61+
5562
public static Data numberValue(Integer value) {
5663
return new ObjectData(mapper.getNodeFactory().numberNode(value));
5764
}

src/main/java/dev/vepo/jsonata/functions/regex/RegExp.java

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
import javax.script.ScriptEngineManager;
88
import javax.script.ScriptException;
99

10+
import org.openjdk.nashorn.api.scripting.ScriptObjectMirror;
11+
1012
public class RegExp {
1113

1214
private static ScriptEngine loadEngine() {
@@ -15,6 +17,7 @@ private static ScriptEngine loadEngine() {
1517
}
1618

1719
private final Function<String, Boolean> isContainedFn;
20+
private final Function<String, String[]> splitFn;
1821
private final ScriptEngine engine;
1922

2023
public RegExp(String pattern) {
@@ -25,6 +28,9 @@ public RegExp(String pattern) {
2528
function isContained(content) {
2629
return content.match(pattern) !== null;
2730
}
31+
function split(content) {
32+
return content.split(pattern);
33+
}
2834
""", pattern));
2935
isContainedFn = content -> {
3036
try {
@@ -35,6 +41,17 @@ function isContained(content) {
3541
throw new IllegalArgumentException("Invalid pattern: " + pattern, e);
3642
}
3743
};
44+
splitFn = content -> {
45+
try {
46+
var bindings = engine.getBindings(ScriptContext.ENGINE_SCOPE);
47+
bindings.put("content", content);
48+
ScriptObjectMirror ret = (ScriptObjectMirror) engine.eval("split(content)", bindings);
49+
return ret.to(String[].class);
50+
} catch (ScriptException e) {
51+
throw new IllegalArgumentException("Invalid pattern: " + pattern, e);
52+
}
53+
};
54+
3855
} catch (ScriptException e) {
3956
throw new IllegalArgumentException("Invalid pattern: " + pattern, e);
4057
}
@@ -43,4 +60,8 @@ function isContained(content) {
4360
public Boolean isContainedIn(String content) {
4461
return isContainedFn.apply(content);
4562
}
63+
64+
public String[] split(String value) {
65+
return splitFn.apply(value);
66+
}
4667
}

src/main/java/dev/vepo/jsonata/parser/BuiltInFunction.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@ public enum BuiltInFunction {
1515
UPPERCASE("$uppercase"),
1616
TRIM("$trim"),
1717
PAD("$pad"),
18-
CONTAINS("$contains");
18+
CONTAINS("$contains"),
19+
SPLIT("$split"),;
1920

2021
public static Optional<BuiltInFunction> get(String name) {
2122
return Stream.of(values())

src/main/java/dev/vepo/jsonata/parser/JSONataGrammarListener.java

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
import dev.vepo.jsonata.functions.buildin.LowecaseJSONataFunction;
5050
import dev.vepo.jsonata.functions.buildin.PadJSONataFunction;
5151
import dev.vepo.jsonata.functions.buildin.SortJSONataFunction;
52+
import dev.vepo.jsonata.functions.buildin.SplitJSONataFunction;
5253
import dev.vepo.jsonata.functions.buildin.StringJSONataFunction;
5354
import dev.vepo.jsonata.functions.buildin.SubstringAfterJSONataFunction;
5455
import dev.vepo.jsonata.functions.buildin.SubstringBeforeJSONataFunction;
@@ -83,7 +84,7 @@
8384
import dev.vepo.jsonata.functions.generated.JSONataGrammarParser.ObjectMapperContext;
8485
import dev.vepo.jsonata.functions.generated.JSONataGrammarParser.PathContext;
8586
import dev.vepo.jsonata.functions.generated.JSONataGrammarParser.RangeQueryContext;
86-
import dev.vepo.jsonata.functions.generated.JSONataGrammarParser.RegexContext;
87+
import dev.vepo.jsonata.functions.generated.JSONataGrammarParser.RegexValueContext;
8788
import dev.vepo.jsonata.functions.generated.JSONataGrammarParser.RootPathContext;
8889
import dev.vepo.jsonata.functions.generated.JSONataGrammarParser.StringValueContext;
8990
import dev.vepo.jsonata.functions.generated.JSONataGrammarParser.ToArrayContext;
@@ -182,8 +183,11 @@ public void exitFunctionCall(FunctionCallContext ctx) {
182183
.parameterStatement()
183184
.size()));
184185
case CONTAINS -> new ContainsJSONataFunction(previous(ctx.functionStatement()
185-
.parameterStatement()
186-
.size()));
186+
.parameterStatement()
187+
.size()));
188+
case SPLIT -> new SplitJSONataFunction(previous(ctx.functionStatement()
189+
.parameterStatement()
190+
.size()));
187191
})
188192
.orElseGet(() -> Optional.ofNullable(this.blocks.peek())
189193
.flatMap(block -> block.function(fnName))
@@ -409,7 +413,7 @@ public void exitVariableUsage(VariableUsageContext ctx) {
409413
}
410414

411415
@Override
412-
public void exitRegex(RegexContext ctx) {
416+
public void exitRegexValue(RegexValueContext ctx) {
413417
logger.atInfo().setMessage("Regex! {}").addArgument(ctx::getText).log();
414418
expressions.offer((original, current) -> JsonFactory.regex(ctx.getText()));
415419
}

src/main/java/module-info.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
requires org.apache.commons.lang3;
66
requires org.slf4j;
77
requires java.scripting;
8+
requires org.openjdk.nashorn;
89

910
exports dev.vepo.jsonata;
1011
}

src/test/java/dev/vepo/jsonata/JSONataTest.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -327,6 +327,16 @@ void containsTest() {
327327
assertThat(jsonata("Phone[$contains(number, /^077/)]").evaluate(OBJECT).asText()).isEqualTo("{\"type\":\"mobile\",\"number\":\"077 7700 1234\"}");
328328
}
329329

330+
@Test
331+
void splitTest() {
332+
// $split("so many words", " ") => [ "so", "many", "words" ]
333+
assertThat(jsonata("$split(\"so many words\", \" \")").evaluate("{}").multi().asText()).containsExactly("so", "many", "words");
334+
// $split("so many words", " ", 2) => [ "so", "many" ]
335+
assertThat(jsonata("$split(\"so many words\", \" \", 2)").evaluate("{}").multi().asText()).containsExactly("so", "many");
336+
// $split("too much, punctuation. hard; to read", /[ ,.;]+/) => ["too", "much", "punctuation", "hard", "to", "read"]
337+
assertThat(jsonata("$split(\"too much, punctuation. hard; to read\", /[ ,.;]+/)").evaluate("{}").multi().asText()).containsExactly("too", "much", "punctuation", "hard", "to", "read");
338+
}
339+
330340
@Test
331341
void sortTest() {
332342
assertThat(jsonata("""

0 commit comments

Comments
 (0)