Skip to content

Commit

Permalink
Add GraalJS regular expression and factory implementation (#1058)
Browse files Browse the repository at this point in the history
  • Loading branch information
justin-tay authored Jun 13, 2024
1 parent b063972 commit 3daa5bc
Show file tree
Hide file tree
Showing 23 changed files with 954 additions and 18 deletions.
21 changes: 17 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -157,14 +157,26 @@ The following are the optional dependencies that may be required for certain opt
These are not automatically included and setting the relevant option without adding the library will result in a `ClassNotFoundException`.

```xml
<!-- This is required when setting setEcma262Validator(true) -->
<!-- Either library is required when setting setEcma262Validator(true) or explicitly via setRegularExpressionFactory() -->
<dependency>
<!-- Used to validate ECMA 262 regular expressions -->
<!-- Approximately 2 MB in dependencies -->
<!-- JoniRegularExpressionFactory -->
<groupId>org.jruby.joni</groupId>
<artifactId>joni</artifactId>
<version>${version.joni}</version>
<optional>true</optional>
</dependency>

<dependency>
<!-- Used to validate ECMA 262 regular expressions -->
<!-- Approximately 50 MB in dependencies -->
<!-- GraalJSRegularExpressionFactory -->
<groupId>org.graalvm.js</groupId>
<artifactId>js</artifactId>
<version>${version.graaljs}</version>
<optional>true</optional>
</dependency>
```

##### Excludable Dependencies
Expand Down Expand Up @@ -258,7 +270,7 @@ SchemaValidatorsConfig config = new SchemaValidatorsConfig();
// By default JSON Path is used for reporting the instance location and evaluation path
config.setPathType(PathType.JSON_POINTER);
// By default the JDK regular expression implementation which is not ECMA 262 compliant is used
// Note that setting this to true requires including the optional joni dependency
// Note that setting this to true requires including the optional joni or graaljs dependency
// config.setEcma262Validator(true);

// Due to the mapping the schema will be retrieved from the classpath at classpath:schema/example-main.json.
Expand Down Expand Up @@ -293,7 +305,7 @@ SchemaValidatorsConfig config = new SchemaValidatorsConfig();
// By default JSON Path is used for reporting the instance location and evaluation path
config.setPathType(PathType.JSON_POINTER);
// By default the JDK regular expression implementation which is not ECMA 262 compliant is used
// Note that setting this to true requires including the optional joni dependency
// Note that setting this to true requires including the optional joni or graaljs dependency
// config.setEcma262Validator(true);

// Due to the mapping the meta-schema will be retrieved from the classpath at classpath:draft/2020-12/schema.
Expand Down Expand Up @@ -517,7 +529,7 @@ The following is sample output from the Hierarchical format.
| Name | Description | Default Value
|---------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------
| `pathType` | The path type to use for reporting the instance location and evaluation path. Set to `PathType.JSON_POINTER` to use JSON Pointer. | `PathType.DEFAULT`
| `ecma262Validator` | Whether to use the ECMA 262 `joni` library to validate the `pattern` keyword. This requires the dependency to be manually added to the project or a `ClassNotFoundException` will be thrown. | `false`
| `ecma262Validator` | Whether to use the ECMA 262 `joni` or `graaljs` library to validate the `pattern` keyword. This requires the dependency to be manually added to the project or a `ClassNotFoundException` will be thrown. | `false`
| `executionContextCustomizer` | This can be used to customize the `ExecutionContext` generated by the `JsonSchema` for each validation run. | `null`
| `schemaIdValidator` | This is used to customize how the `$id` values are validated. Note that the default implementation allows non-empty fragments where no base IRI is specified and also allows non-absolute IRI `$id` values in the root schema. | `JsonSchemaIdValidator.DEFAULT`
| `messageSource` | This is used to retrieve the locale specific messages. | `DefaultMessageSource.getInstance()`
Expand All @@ -527,6 +539,7 @@ The following is sample output from the Hierarchical format.
| `locale` | The locale to use for generating messages in the `ValidationMessage`. | `Locale.getDefault()`
| `failFast` | Whether to return failure immediately when an assertion is generated. | `false`
| `formatAssertionsEnabled` | The default is to generate format assertions from Draft 4 to Draft 7 and to only generate annotations from Draft 2019-09. Setting to `true` or `false` will override the default behavior. | `null`
| `regularExpressionFactory` | The factory to use to create regular expressions for instance `JoniRegularExpressionFactory` or `GraalJSRegularExpressionFactory`. | `JDKRegularExpressionFactory.getInstance()`

## Performance Considerations

Expand Down
22 changes: 21 additions & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,9 @@
<version.itu>1.10.2</version.itu>
<version.jackson>2.17.0</version.jackson>
<version.joni>2.2.1</version.joni>
<version.logback>1.3.14</version.logback>
<version.logback>1.3.14</version.logback> <!-- 1.4.x and above is not Java 8 compatible -->
<version.slf4j>2.0.13</version.slf4j>
<version.graaljs>21.3.10</version.graaljs> <!-- 22.x and above is not Java 8 compatible -->

<version.hamcrest>2.2</version.hamcrest>
<version.junit>5.10.2</version.junit>
Expand Down Expand Up @@ -138,12 +139,31 @@

<dependency>
<!-- Used to validate ECMA 262 regular expressions -->
<!-- Approximately 2 MB in dependencies -->
<groupId>org.jruby.joni</groupId>
<artifactId>joni</artifactId>
<version>${version.joni}</version>
<optional>true</optional>
</dependency>

<dependency>
<!-- Used to validate ECMA 262 regular expressions -->
<!-- Approximately 50 MB in dependencies -->
<groupId>org.graalvm.js</groupId>
<artifactId>js</artifactId>
<version>${version.graaljs}</version>
<optional>true</optional>
</dependency>

<dependency>
<!-- Used to validate ECMA 262 regular expressions -->
<!-- Approximately 50 MB in dependencies -->
<groupId>org.graalvm.sdk</groupId>
<artifactId>graal-sdk</artifactId>
<version>${version.graaljs}</version>
<optional>true</optional>
</dependency>

<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-engine</artifactId>
Expand Down
54 changes: 50 additions & 4 deletions src/main/java/com/networknt/schema/SchemaValidatorsConfig.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
import com.fasterxml.jackson.databind.JsonNode;
import com.networknt.schema.i18n.DefaultMessageSource;
import com.networknt.schema.i18n.MessageSource;
import com.networknt.schema.regex.ECMAScriptRegularExpressionFactory;
import com.networknt.schema.regex.JDKRegularExpressionFactory;
import com.networknt.schema.regex.RegularExpressionFactory;
import com.networknt.schema.walk.DefaultItemWalkListenerRunner;
import com.networknt.schema.walk.DefaultKeywordWalkListenerRunner;
import com.networknt.schema.walk.DefaultPropertyWalkListenerRunner;
Expand Down Expand Up @@ -59,9 +62,9 @@ public class SchemaValidatorsConfig {
private ApplyDefaultsStrategy applyDefaultsStrategy = ApplyDefaultsStrategy.EMPTY_APPLY_DEFAULTS_STRATEGY;

/**
* When set to true, use ECMA-262 compatible validator
* Used to create {@link com.networknt.schema.regex.RegularExpression}.
*/
private boolean ecma262Validator;
private RegularExpressionFactory regularExpressionFactory = JDKRegularExpressionFactory.getInstance();

/**
* When set to true, use Java-specific semantics rather than native JavaScript
Expand Down Expand Up @@ -268,12 +271,55 @@ public void setHandleNullableField(boolean handleNullableField) {
this.handleNullableField = handleNullableField;
}

/**
* Gets whether to use a ECMA-262 compliant regular expression validator.
* <p>
* This defaults to the false and setting true require inclusion of optional
* org.jruby.joni:joni or org.graalvm.js:js dependencies.
*
* @return true if ECMA-262 compliant
*/
public boolean isEcma262Validator() {
return this.ecma262Validator;
return !(this.regularExpressionFactory instanceof JDKRegularExpressionFactory);
}

/**
* Sets whether to use a ECMA-262 compliant regular expression validator.
* <p>
* This defaults to the false and setting true require inclusion of optional
* org.jruby.joni:joni or org.graalvm.js:js dependencies.
*
* @param ecma262Validator true if ECMA-262 compliant
*/
public void setEcma262Validator(boolean ecma262Validator) {
this.ecma262Validator = ecma262Validator;
this.regularExpressionFactory = ecma262Validator ? ECMAScriptRegularExpressionFactory.getInstance()
: JDKRegularExpressionFactory.getInstance();
}

/**
* Gets the regular expression factory.
* <p>
* This defaults to the JDKRegularExpressionFactory and the implementations
* require inclusion of optional org.jruby.joni:joni or org.graalvm.js:js dependencies.
*
* @return the factory
*/
public RegularExpressionFactory getRegularExpressionFactory() {
return regularExpressionFactory;
}

/**
* Sets the regular expression factory.
* <p>
* This defaults to the JDKRegularExpressionFactory and the implementations
* require inclusion of optional org.jruby.joni:joni or org.graalvm.js:js dependencies.
*
* @see JDKRegularExpressionFactory
* @see ECMAScriptRegularExpressionFactory
* @param regularExpressionFactory the factory
*/
public void setRegularExpressionFactory(RegularExpressionFactory regularExpressionFactory) {
this.regularExpressionFactory = regularExpressionFactory;
}

public boolean isJavaSemantics() {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
/*
* Copyright (c) 2024 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.networknt.schema.regex;

import com.networknt.schema.utils.Classes;

/**
* ECMAScript regular expression factory that chooses between GraalJS or Joni
* implementations depending on which is on the classpath.
*/
public class ECMAScriptRegularExpressionFactory implements RegularExpressionFactory {
private static final boolean JONI_PRESENT = Classes.isPresent("org.joni.Regex",
ECMAScriptRegularExpressionFactory.class.getClassLoader());
private static final boolean GRAALJS_PRESENT = Classes.isPresent("com.oracle.truffle.js.parser.GraalJSEvaluator",
ECMAScriptRegularExpressionFactory.class.getClassLoader());

private static final RegularExpressionFactory DELEGATE = GRAALJS_PRESENT
? GraalJSRegularExpressionFactory.getInstance()
: JoniRegularExpressionFactory.getInstance();

public static final ECMAScriptRegularExpressionFactory INSTANCE = new ECMAScriptRegularExpressionFactory();

public static ECMAScriptRegularExpressionFactory getInstance() {
if (!JONI_PRESENT && !GRAALJS_PRESENT) {
throw new IllegalArgumentException(
"Either org.jruby.joni:joni or org.graalvm.js:js needs to be present in the classpath");
}
return INSTANCE;
}

@Override
public RegularExpression getRegularExpression(String regex) {
return DELEGATE.getRegularExpression(regex);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/*
* Copyright (c) 2024 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.networknt.schema.regex;

import org.graalvm.polyglot.Context;

/**
* Factory for the js {@link Context}.
*/
public class GraalJSContextFactory {
/**
* The holder defers the classloading until it is used.
*/
private static class Holder {
private static final Context INSTANCE = Context.newBuilder("js").option("engine.WarnInterpreterOnly", "false")
.build();
}

/**
* Gets the singleton instance of the Context.
* <p>
* This may need to be closed to release resources if no longer needed.
*
* @return the Context
*/
public static Context getInstance() {
return Holder.INSTANCE;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/*
* Copyright (c) 2024 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.networknt.schema.regex;

import org.graalvm.polyglot.Value;

/**
* GraalJS {@link RegularExpression}.
* <p>
* This requires a dependency on org.graalvm.js:js which along with its
* dependency libraries are 50 mb.
*/
class GraalJSRegularExpression implements RegularExpression {
private final GraalJSRegularExpressionContext context;
private final Value function;

GraalJSRegularExpression(String regex, GraalJSRegularExpressionContext context) {
this.context = context;
synchronized(context.getContext()) {
this.function = context.getRegExpBuilder().execute(regex);
}
}

@Override
public boolean matches(String value) {
synchronized(context.getContext()) {
return !function.execute(value).isNull();
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
/*
* Copyright (c) 2024 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.networknt.schema.regex;

import org.graalvm.polyglot.Context;
import org.graalvm.polyglot.Value;

/**
* GraalJSRegularExpressionContext.
*/
public class GraalJSRegularExpressionContext {
private static final String SOURCE = "pattern => {\n"
+ " const regex = new RegExp(pattern, 'u');\n"
+ " return text => text.match(regex)\n"
+ "};";

private final Context context;
private final Value regExpBuilder;

/**
* Constructor.
* <p>
* It is the caller's responsibility to release the context when it is no longer
* required.
*
* @param context the context
*/
public GraalJSRegularExpressionContext(Context context) {
this.context = context;
synchronized(this.context) {
this.regExpBuilder = this.context.eval("js", SOURCE);
}
}

/**
* Operations must synchronize on the {@link Context} as only a single thread
* can access the {@link Context} and {@link #getRegExpBuilder()} at one time.
*
* @return the context
*/
public Context getContext() {
return context;
}

/**
* Gets the RegExp builder.
*
* @return the regexp builder
*/
public Value getRegExpBuilder() {
return regExpBuilder;
}
}
Loading

0 comments on commit 3daa5bc

Please sign in to comment.