-
Notifications
You must be signed in to change notification settings - Fork 168
json-valid PPL function #3230
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
json-valid PPL function #3230
Changes from all commits
70152eb
76c3995
ce2c551
ad1bde3
ccf47a2
acc76a0
519c6f2
2e319fe
ee0820d
d44fc5a
3407d4a
7ef6cc9
e5e90ac
2187a5a
5e1e488
3512b33
9fea606
fbc54bc
31ad2a4
2b2a8f3
dc96563
1913bfe
67d979d
b0d7d1a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package org.opensearch.sql.expression.json; | ||
|
||
import static org.opensearch.sql.data.type.ExprCoreType.BOOLEAN; | ||
import static org.opensearch.sql.data.type.ExprCoreType.STRING; | ||
import static org.opensearch.sql.expression.function.FunctionDSL.define; | ||
import static org.opensearch.sql.expression.function.FunctionDSL.impl; | ||
|
||
import lombok.experimental.UtilityClass; | ||
import org.opensearch.sql.expression.function.BuiltinFunctionName; | ||
import org.opensearch.sql.expression.function.BuiltinFunctionRepository; | ||
import org.opensearch.sql.expression.function.DefaultFunctionResolver; | ||
import org.opensearch.sql.utils.JsonUtils; | ||
|
||
@UtilityClass | ||
public class JsonFunctions { | ||
public void register(BuiltinFunctionRepository repository) { | ||
repository.register(jsonValid()); | ||
} | ||
|
||
private DefaultFunctionResolver jsonValid() { | ||
return define( | ||
BuiltinFunctionName.JSON_VALID.getName(), impl(JsonUtils::isValidJson, BOOLEAN, STRING)); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
package org.opensearch.sql.utils; | ||
|
||
import com.fasterxml.jackson.core.JsonProcessingException; | ||
import com.fasterxml.jackson.databind.ObjectMapper; | ||
import lombok.experimental.UtilityClass; | ||
import org.opensearch.sql.data.model.ExprValue; | ||
import org.opensearch.sql.data.model.ExprValueUtils; | ||
|
||
@UtilityClass | ||
public class JsonUtils { | ||
/** | ||
* Checks if given JSON string can be parsed as valid JSON. | ||
* | ||
* @param jsonExprValue JSON string (e.g. "{\"hello\": \"world\"}"). | ||
* @return true if the string can be parsed as valid JSON, else false. | ||
*/ | ||
public static ExprValue isValidJson(ExprValue jsonExprValue) { | ||
ObjectMapper objectMapper = new ObjectMapper(); | ||
|
||
if (jsonExprValue.isNull() || jsonExprValue.isMissing()) { | ||
return ExprValueUtils.LITERAL_FALSE; | ||
} | ||
|
||
try { | ||
objectMapper.readTree(jsonExprValue.stringValue()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we extract the ObjectMapper as a static final class member? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd rather not waste the memory unless a user is making json calls. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmmm. What about the initialization-on-demand holder idiom? Agreed that it's probably better to avoid using the memory if it's not needed, but probably also good to avoid creating the same object hundreds of times if there are hundreds of calls to any JSON function? No need to address now either way, but maybe something to consider as JSON work progresses? |
||
return ExprValueUtils.LITERAL_TRUE; | ||
} catch (JsonProcessingException e) { | ||
return ExprValueUtils.LITERAL_FALSE; | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package org.opensearch.sql.expression.json; | ||
|
||
import static org.junit.jupiter.api.Assertions.assertEquals; | ||
import static org.junit.jupiter.api.Assertions.assertThrows; | ||
import static org.opensearch.sql.data.model.ExprValueUtils.LITERAL_FALSE; | ||
import static org.opensearch.sql.data.model.ExprValueUtils.LITERAL_MISSING; | ||
import static org.opensearch.sql.data.model.ExprValueUtils.LITERAL_NULL; | ||
import static org.opensearch.sql.data.model.ExprValueUtils.LITERAL_TRUE; | ||
|
||
import org.junit.jupiter.api.Test; | ||
import org.junit.jupiter.api.extension.ExtendWith; | ||
import org.mockito.junit.jupiter.MockitoExtension; | ||
import org.opensearch.sql.data.model.ExprValue; | ||
import org.opensearch.sql.data.model.ExprValueUtils; | ||
import org.opensearch.sql.exception.ExpressionEvaluationException; | ||
import org.opensearch.sql.expression.DSL; | ||
import org.opensearch.sql.expression.FunctionExpression; | ||
|
||
@ExtendWith(MockitoExtension.class) | ||
public class JsonFunctionsTest { | ||
private static final ExprValue JsonNestedObject = | ||
ExprValueUtils.stringValue("{\"a\":\"1\",\"b\":{\"c\":\"2\",\"d\":\"3\"}}"); | ||
private static final ExprValue JsonObject = | ||
ExprValueUtils.stringValue("{\"a\":\"1\",\"b\":\"2\"}"); | ||
private static final ExprValue JsonArray = ExprValueUtils.stringValue("[1, 2, 3, 4]"); | ||
private static final ExprValue JsonScalarString = ExprValueUtils.stringValue("\"abc\""); | ||
private static final ExprValue JsonEmptyString = ExprValueUtils.stringValue(""); | ||
private static final ExprValue JsonInvalidObject = | ||
ExprValueUtils.stringValue("{\"invalid\":\"json\", \"string\"}"); | ||
private static final ExprValue JsonInvalidScalar = ExprValueUtils.stringValue("abc"); | ||
acarbonetto marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
acarbonetto marked this conversation as resolved.
Show resolved
Hide resolved
|
||
@Test | ||
public void json_valid_returns_false() { | ||
assertEquals(LITERAL_FALSE, execute(JsonInvalidObject)); | ||
assertEquals(LITERAL_FALSE, execute(JsonInvalidScalar)); | ||
assertEquals(LITERAL_FALSE, execute(LITERAL_NULL)); | ||
assertEquals(LITERAL_FALSE, execute(LITERAL_MISSING)); | ||
} | ||
|
||
@Test | ||
public void json_valid_throws_ExpressionEvaluationException() { | ||
assertThrows( | ||
ExpressionEvaluationException.class, () -> execute(ExprValueUtils.booleanValue(true))); | ||
} | ||
|
||
@Test | ||
public void json_valid_returns_true() { | ||
assertEquals(LITERAL_TRUE, execute(JsonNestedObject)); | ||
assertEquals(LITERAL_TRUE, execute(JsonObject)); | ||
assertEquals(LITERAL_TRUE, execute(JsonArray)); | ||
assertEquals(LITERAL_TRUE, execute(JsonScalarString)); | ||
assertEquals(LITERAL_TRUE, execute(JsonEmptyString)); | ||
} | ||
|
||
private ExprValue execute(ExprValue jsonString) { | ||
FunctionExpression exp = DSL.jsonValid(DSL.literal(jsonString)); | ||
return exp.valueOf(); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
==================== | ||
JSON Functions | ||
==================== | ||
|
||
.. rubric:: Table of contents | ||
|
||
.. contents:: | ||
:local: | ||
:depth: 1 | ||
|
||
JSON_VALID | ||
---------- | ||
|
||
Description | ||
>>>>>>>>>>> | ||
|
||
Usage: `json_valid(json_string)` checks if `json_string` is a valid JSON-encoded string. | ||
|
||
Argument type: STRING | ||
|
||
Return type: BOOLEAN | ||
|
||
Example:: | ||
|
||
> source=json_test | eval is_valid = json_valid(json_string) | fields test_name, json_string, is_valid | ||
fetched rows / total rows = 6/6 | ||
+---------------------+---------------------------------+----------+ | ||
| test_name | json_string | is_valid | | ||
|---------------------|---------------------------------|----------| | ||
| json nested object | {"a":"1","b":{"c":"2","d":"3"}} | True | | ||
| json object | {"a":"1","b":"2"} | True | | ||
| json array | [1, 2, 3, 4] | True | | ||
| json scalar string | "abc" | True | | ||
| json empty string | | True | | ||
| json invalid object | {"invalid":"json", "string"} | False | | ||
+---------------------+---------------------------------+----------+ | ||
acarbonetto marked this conversation as resolved.
Show resolved
Hide resolved
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
{"test_name":"json nested object", "json_string":"{\"a\":\"1\",\"b\":{\"c\":\"2\",\"d\":\"3\"}}"} | ||
{"test_name":"json object", "json_string":"{\"a\":\"1\",\"b\":\"2\"}"} | ||
{"test_name":"json array", "json_string":"[1, 2, 3, 4]"} | ||
{"test_name":"json scalar string", "json_string":"\"abc\""} | ||
{"test_name":"json empty string","json_string":""} | ||
{"test_name":"json invalid object", "json_string":"{\"invalid\":\"json\", \"string\"}"} | ||
YANG-DB marked this conversation as resolved.
Show resolved
Hide resolved
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package org.opensearch.sql.ppl; | ||
|
||
import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_JSON_TEST; | ||
import static org.opensearch.sql.util.MatcherUtils.rows; | ||
import static org.opensearch.sql.util.MatcherUtils.schema; | ||
import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; | ||
import static org.opensearch.sql.util.MatcherUtils.verifySchema; | ||
|
||
import java.io.IOException; | ||
import org.json.JSONObject; | ||
import org.junit.jupiter.api.Test; | ||
|
||
public class JsonFunctionsIT extends PPLIntegTestCase { | ||
@Override | ||
public void init() throws IOException { | ||
loadIndex(Index.JSON_TEST); | ||
} | ||
|
||
@Test | ||
public void test_json_valid() throws IOException { | ||
JSONObject result; | ||
|
||
result = | ||
executeQuery( | ||
String.format( | ||
"source=%s | where json_valid(json_string) | fields test_name", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what if the json_string is There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm assuming null should return There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
@acarbonetto yes I think it makes sense There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If indeed There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Update: you are correct that null should return valse. |
||
TEST_INDEX_JSON_TEST)); | ||
verifySchema(result, schema("test_name", null, "string")); | ||
verifyDataRows( | ||
result, | ||
rows("json nested object"), | ||
rows("json object"), | ||
rows("json array"), | ||
rows("json scalar string"), | ||
rows("json empty string")); | ||
} | ||
|
||
@Test | ||
public void test_not_json_valid() throws IOException { | ||
JSONObject result; | ||
|
||
result = | ||
executeQuery( | ||
String.format( | ||
"source=%s | where not json_valid(json_string) | fields test_name", | ||
TEST_INDEX_JSON_TEST)); | ||
verifySchema(result, schema("test_name", null, "string")); | ||
verifyDataRows(result, rows("json invalid object"), rows("json null")); | ||
} | ||
} | ||
acarbonetto marked this conversation as resolved.
Show resolved
Hide resolved
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
{ | ||
"mappings": { | ||
"properties": { | ||
"test_name": { | ||
"type": "keyword" | ||
}, | ||
"json_string": { | ||
"type": "keyword" | ||
} | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
{"index":{"_id":"0"}} | ||
{"test_name":"json nested object", "json_string":"{\"a\":\"1\",\"b\":{\"c\":\"2\",\"d\":\"3\"}}"} | ||
{"index":{"_id":"1"}} | ||
{"test_name":"json object", "json_string":"{\"a\":\"1\",\"b\":\"2\"}"} | ||
{"index":{"_id":"2"}} | ||
{"test_name":"json array", "json_string":"[1, 2, 3, 4]"} | ||
{"index":{"_id":"3"}} | ||
{"test_name":"json scalar string", "json_string":"\"abc\""} | ||
{"index":{"_id":"4"}} | ||
{"test_name":"json empty string","json_string":""} | ||
{"index":{"_id":"5"}} | ||
{"test_name":"json invalid object", "json_string":"{\"invalid\":\"json\", \"string\"}"} | ||
{"index":{"_id":"6"}} | ||
{"test_name":"json null", "json_string":null} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
missing license header
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
actually why do we need 2 separate
JsonUtils
&JsonFunctions
?would it make sense to unify into a single class ? (inside maybe the different namespaces)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Excellent question. I dislike "utility classes" (what is the responsibility of a utility class?). But it gives us a central class to put all the
json
business logic. This seems to be how we do things (date time and IP address business logic also lives in util classes).As for the
JsonFunction
class, it provides an integration layer between the language parser's function expressions and the json business logic. The casting expressions will be another class that will access the json logic.This class could very well be named
Json
orJsonMapper
. But if we do this, we should rename all the util classes in theutils
package.