Skip to content

Commit

Permalink
Dynamic Create FlatObjectFieldType for dotpath field
Browse files Browse the repository at this point in the history
Signed-off-by: Mingshi Liu <mingshl@amazon.com>
  • Loading branch information
mingshl committed Mar 21, 2023
1 parent 67a367f commit 3eb4cf5
Show file tree
Hide file tree
Showing 5 changed files with 397 additions and 327 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,6 @@
import org.opensearch.index.query.QueryBuilders;
import org.opensearch.search.SearchHits;
import org.opensearch.search.builder.SearchSourceBuilder;
import org.opensearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.opensearch.search.sort.SortOrder;

import java.io.IOException;
import java.net.URISyntaxException;
Expand All @@ -54,8 +52,8 @@

@State(Scope.Thread)
@Fork(1)
@Warmup(iterations = 10, time = 1, timeUnit = TimeUnit.SECONDS)
@Measurement(iterations = 100, time = 1, timeUnit = TimeUnit.SECONDS)
@Warmup(iterations = 1, time = 1, timeUnit = TimeUnit.SECONDS)
@Measurement(iterations = 1, time = 1, timeUnit = TimeUnit.SECONDS)

public class FlatObjectMappingBenchmark {

Expand Down Expand Up @@ -87,7 +85,7 @@ public void tearDown() throws Exception {
@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MILLISECONDS)
public void CreateDynamicIndex(MyState state) throws IOException, URISyntaxException {
public void CreateDynamicIndex(MyState state) throws IOException {
GetDynamicIndex(state, "demo-dynamic-test");
DeleteIndex(state, "demo-dynamic-test");
}
Expand All @@ -99,7 +97,7 @@ public void CreateDynamicIndex(MyState state) throws IOException, URISyntaxExcep
@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MILLISECONDS)
public void CreateFlatObjectIndex(MyState state) throws IOException, URISyntaxException {
public void CreateFlatObjectIndex(MyState state) throws IOException {
GetFlatObjectIndex(state, "demo-flat-object-test", "host");
DeleteIndex(state, "demo-flat-object-test");
}
Expand All @@ -112,10 +110,10 @@ public void CreateFlatObjectIndex(MyState state) throws IOException, URISyntaxEx
@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MILLISECONDS)
public void indexDynamicMapping(MyState state) throws IOException, URISyntaxException {
public void indexDynamicMapping(MyState state) throws IOException {
GetDynamicIndex(state, "demo-dynamic-test1");
String doc =
"{ \"message\": \"[5592:1:0309/123054.737712:ERROR:child_process_sandbox_support_impl_linux.cc(79)] FontService unique font name matching request did not receive a response.\", \"fileset\": { \"name\": \"syslog\" }, \"process\": { \"name\": \"org.gnome.Shell.desktop\", \"pid\": 3383 }, \"@timestamp\": \"2020-03-09T18:00:54.000+05:30\", \"host\": { \"hostname\": \"bionic\", \"name\": \"bionic\" } }";
"{ \"message\": \"[1234:1:0309/123054.737712:ERROR: request did not receive a response.\", \"fileset\": { \"name\": \"syslog\" }, \"process\": { \"name\": \"org.gnome.Shell.desktop\", \"pid\": 1234 }, \"@timestamp\": \"2020-03-09T18:00:54.000+05:30\", \"host\": { \"hostname\": \"bionic\", \"name\": \"bionic\" } }";
UploadDoc(state, "demo-dynamic-test1", doc);
DeleteIndex(state, "demo-dynamic-test1");
}
Expand All @@ -130,7 +128,7 @@ public void indexDynamicMapping(MyState state) throws IOException, URISyntaxExce
public void indexFlatObjectMapping(MyState state) throws IOException, URISyntaxException {
GetFlatObjectIndex(state, "demo-flat-object-test1", "host");
String doc =
"{ \"message\": \"[5592:1:0309/123054.737712:ERROR:child_process_sandbox_support_impl_linux.cc(79)] FontService unique font name matching request did not receive a response.\", \"fileset\": { \"name\": \"syslog\" }, \"process\": { \"name\": \"org.gnome.Shell.desktop\", \"pid\": 3383 }, \"@timestamp\": \"2020-03-09T18:00:54.000+05:30\", \"host\": { \"hostname\": \"bionic\", \"name\": \"bionic\" } }";
"{ \"message\": \"[1234:1:0309/123054.737712:ERROR: request did not receive a response.\", \"fileset\": { \"name\": \"syslog\" }, \"process\": { \"name\": \"org.gnome.Shell.desktop\", \"pid\": 1234 }, \"@timestamp\": \"2020-03-09T18:00:54.000+05:30\", \"host\": { \"hostname\": \"bionic\", \"name\": \"bionic\" } }";
UploadDoc(state, "demo-flat-object-test1", doc);
DeleteIndex(state, "demo-flat-object-test1");
}
Expand All @@ -146,7 +144,7 @@ public void searchDynamicMapping(MyState state) throws IOException {
String indexName = "demo-dynamic-test2";
GetDynamicIndex(state, indexName);
String doc =
"{ \"message\": \"[5592:1:0309/123054.737712:ERROR:child_process_sandbox_support_impl_linux.cc(79)] FontService unique font name matching request did not receive a response.\", \"fileset\": { \"name\": \"syslog\" }, \"process\": { \"name\": \"org.gnome.Shell.desktop\", \"pid\": 3383 }, \"@timestamp\": \"2020-03-09T18:00:54.000+05:30\", \"host\": { \"hostname\": \"bionic\", \"name\": \"bionic\" } }";
"{ \"message\": \"[1234:1:0309/123054.737712:ERROR: request did not receive a response.\", \"fileset\": { \"name\": \"syslog\" }, \"process\": { \"name\": \"org.gnome.Shell.desktop\", \"pid\": 1234 }, \"@timestamp\": \"2020-03-09T18:00:54.000+05:30\", \"host\": { \"hostname\": \"bionic\", \"name\": \"bionic\" } }";
UploadDoc(state, indexName, doc);
SearchDoc(state, indexName, "host.hostname", "bionic", "@timestamp", "message");
DeleteIndex(state, indexName);
Expand All @@ -162,9 +160,9 @@ public void searchDynamicMapping(MyState state) throws IOException {
public void searchFlatObjectMapping(MyState state) throws IOException {
GetFlatObjectIndex(state, "demo-flat-object-test2", "host");
String doc =
"{ \"message\": \"[5592:1:0309/123054.737712:ERROR:child_process_sandbox_support_impl_linux.cc(79)] FontService unique font name matching request did not receive a response.\", \"fileset\": { \"name\": \"syslog\" }, \"process\": { \"name\": \"org.gnome.Shell.desktop\", \"pid\": 3383 }, \"@timestamp\": \"2020-03-09T18:00:54.000+05:30\", \"host\": { \"hostname\": \"bionic\", \"name\": \"bionic\" } }";
"{ \"message\": \"[1234:1:0309/123054.737712:ERROR: request did not receive a response.\", \"fileset\": { \"name\": \"syslog\" }, \"process\": { \"name\": \"org.gnome.Shell.desktop\", \"pid\": 1234 }, \"@timestamp\": \"2020-03-09T18:00:54.000+05:30\", \"host\": { \"hostname\": \"bionic\", \"name\": \"bionic\" } }";
UploadDoc(state, "demo-flat-object-test2", doc);
SearchDoc(state, "demo-flat-object-test2", "host", "name", "@timestamp", "message");
SearchDoc(state, "demo-flat-object-test2", "host.hostname", "name", "@timestamp", "message");
DeleteIndex(state, "demo-flat-object-test2");
}

Expand All @@ -174,38 +172,34 @@ public void searchFlatObjectMapping(MyState state) throws IOException {
* search for document and delete index
* Caught exceptions with the number of fields over 1000
*/
// @Benchmark
// @BenchmarkMode(Mode.AverageTime)
// @OutputTimeUnit(TimeUnit.MILLISECONDS)
// public void searchDynamicMappingWithOneHundredNestedJSON(MyState state) throws IOException {
//
// String indexName = "demo-dynamic-test3";
// GetDynamicIndex(state, indexName);
// String doc = GenerateRandomJson();
// Map<String, String> searchValueAndPath = findNestedValueAndPath(doc,99, "field0");
// String searchValue = searchValueAndPath.get("value");
// String searchFieldName = searchValueAndPath.get("path");
// UploadDoc(state, indexName, doc);
// SearchDoc(state,indexName,searchFieldName,searchValue,searchValue ,searchFieldName );
// DeleteIndex(state, indexName);
// }
@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MILLISECONDS)
public void searchDynamicMappingWithOneHundredNestedJSON(MyState state) throws IOException {
String indexName = "demo-dynamic-test3";
GetDynamicIndex(state, indexName);
String doc = GenerateRandomJson(10, "nested");
Map<String, String> searchValueAndPath = findNestedValueAndPath(doc, 26, "");
String searchValue = searchValueAndPath.get("value");
String searchFieldName = searchValueAndPath.get("path");
UploadDoc(state, indexName, doc);
SearchDoc(state, indexName, searchFieldName, searchValue, searchFieldName, searchFieldName);
DeleteIndex(state, indexName);
}

/**
* FlatObjectIndex:
* create index, upload a nested document in 100 levels, and each level with 10 fields,
* search for document and delete index
* works fine and able to return document
* debug search in dotpath
*/
@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MILLISECONDS)
public void searchFlatObjectMappingInValueWithOneHundredNestedJSON(MyState state) throws IOException {
String indexName = "demo-flat-object-test4";
GetFlatObjectIndex(state, indexName, "nested0");
String doc = GenerateRandomJson(100, "nested");
Map<String, String> searchValueAndPath = findNestedValueAndPath(doc, 6, "nested0");
String doc = GenerateRandomJson(10, "nested");
Map<String, String> searchValueAndPath = findNestedValueAndPath(doc, 26, "");
String SearchRandomWord = searchValueAndPath.get("value");
String SearchRandomPath = "nested0._value";
String SearchRandomPath = searchValueAndPath.get("path");
String searchFieldName = "nested0";
UploadDoc(state, indexName, doc);
SearchDoc(state, indexName, SearchRandomPath, SearchRandomWord, searchFieldName, searchFieldName);
Expand Down Expand Up @@ -277,35 +271,30 @@ private static void SearchDoc(
sourceBuilder.query(QueryBuilders.matchQuery(searchFieldName, searchText));
sourceBuilder.from(0);
sourceBuilder.size(10);
sourceBuilder.sort(sortFieldName, SortOrder.DESC);
sourceBuilder.highlighter(new HighlightBuilder().field(highlightFieldName));
SearchRequest searchRequest = new SearchRequest(indexName);
searchRequest.source(sourceBuilder);
SearchResponse SearchResponse = state.client.search(searchRequest, RequestOptions.DEFAULT);
if (!SearchResponse.status().toString().equals("OK")) {
System.out.println("the number of hit is: " + SearchResponse.getHits().getTotalHits().value);
System.out.println("SearchResponse: " + SearchResponse.toString());
}

SearchHits hits = SearchResponse.getHits();
long totalHits = hits.getTotalHits().value;
if (totalHits == 0) {
throw new IOException("No hit is found");
SearchHits hits = SearchResponse.getHits();
long totalHits = hits.getTotalHits().value;
if (totalHits == 0) {
throw new IOException("No hit is found");
}
}
}

private static String GenerateRandomJson(int numberOfNestedLevel, String subObjectName) {
JSONObject json = new JSONObject();
Random random = new Random();

// Create 100 nested levels
// Create nested levels

for (int i = 0; i < numberOfNestedLevel; i++) {
JSONObject nestedObject = new JSONObject();

// Add 10 fields to each nested level
for (int j = 0; j < 10; j++) {
String field = "field" + j;
String field = "field" + i + j;
String value = generateRandomString(random);
nestedObject.put(field, value);
}
Expand Down Expand Up @@ -337,31 +326,37 @@ private static Map<String, String> findNestedValueAndPath(String randomJsonStrin
String targetKey = "field" + levelNumber;
Map<String, String> result = new HashMap<>();
Iterator<String> keys = jsonObject.keys();
StringBuilder path = new StringBuilder();
while (keys.hasNext()) {
String key = keys.next();
if (path.length() == 0) {
path.append(currentPath);
}

Object value = jsonObject.get(key);
if (key.equals(targetKey)) {
result.put("value", value.toString());
result.put("path", key);
System.out.println("value is " + value.toString());
System.out.println("path is " + path.toString());
break;
if (currentPath.length() == 0) {
currentPath = key;
}
result.put("path", currentPath + "." + key);
return result;
}
if (value instanceof JSONObject) {

path.append("." + key);
if (value instanceof JSONObject) {
if (currentPath.length() == 0) {
currentPath = key;
} else {
if (currentPath.contains(".") && currentPath.split("\\.").length > 1) {
int pathLength = currentPath.split("\\.").length;
currentPath = "nested0." + key;
} else {
currentPath = currentPath + "." + key;
}

Map<String, String> nestedResult = findNestedValueAndPath(value.toString(), levelNumber, path.toString());
}
Map<String, String> nestedResult = findNestedValueAndPath(value.toString(), levelNumber, currentPath);
if (!nestedResult.isEmpty()) {
return nestedResult;
}
}
}

return result;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,10 @@
import java.io.IOException;
import java.nio.CharBuffer;
import java.util.ArrayList;
import java.util.logging.Logger;

/**
* JsonToStringParser is the main parser class to transform JSON into stringFields in a XContentParser
* returns XContentParser with 3 string fields
* returns XContentParser with one parent field and subfields
* fieldName, fieldName._value, fieldName._valueAndPath
* @opensearch.internal
*/
Expand All @@ -60,12 +59,11 @@ public class JsonToStringXContentParser extends AbstractXContentParser {
private NamedXContentRegistry xContentRegistry;

private DeprecationHandler deprecationHandler;
/**
* logging function
* To removed after draft PR
*/

private static final Logger logger = Logger.getLogger((JsonToStringXContentParser.class.getName()));
private static final String VALUE_AND_PATH_SUFFIX = "._valueAndPath";
private static final String VALUE_SUFFIX = "._value";
private static final String DOT_SYMBOL = ".";
private static final String EQUAL_SYMBOL = "=";

public JsonToStringXContentParser(
NamedXContentRegistry xContentRegistry,
Expand All @@ -85,11 +83,10 @@ public XContentParser parseObject() throws IOException {
builder.startObject();
parseToken();
builder.field(this.fieldTypeName, keyList);
builder.field(this.fieldTypeName + "._value", valueList);
builder.field(this.fieldTypeName + "._valueAndPath", valueAndPathList);
builder.field(this.fieldTypeName + VALUE_SUFFIX, valueList);
builder.field(this.fieldTypeName + VALUE_AND_PATH_SUFFIX, valueAndPathList);
builder.endObject();
String jString = XContentHelper.convertToJson(BytesReference.bytes(builder), false, XContentType.JSON);
logger.info("Before createParser, jString: " + jString + "\n");
return JsonXContent.jsonXContent.createParser(this.xContentRegistry, this.deprecationHandler, String.valueOf(jString));
}

Expand All @@ -98,60 +95,38 @@ private void parseToken() throws IOException {
while (this.parser.nextToken() != Token.END_OBJECT) {

currentFieldName = this.parser.currentName();

logger.info("currentFieldName: " + currentFieldName + "\n");
StringBuilder parsedFields = new StringBuilder();
StringBuilder path = new StringBuilder(fieldTypeName);
if (this.parser.nextToken() == Token.START_OBJECT) {
/**
* for nested Json, make a copy of parser, then parse the entire Json as string.
* for example:
* {"grandpa": {
* "dad": {
* "son": "me"
* } }
* the JSON object would be read as three string fields for "grandpa" would be
* grandpa: {"dad","son"} -- the parent string field contains the keys only.
* grandpa._value: { "{dad: {son: me}}} ,"{son: me}","me"} -- the _value sub string field contains the values only.
* grandpa._pathAndValue: { "grandpa={"dad: {son: me}}}","grandpa.dad={son: me}}", "grandpa.dad.son=me"}
* -- the _pathAndValue sub string field contains the "path=Value" format.
*/
// TODO: to convert the entire JsonObject as string without changing the tokenizer position.
path.append("." + currentFieldName);
path.append(DOT_SYMBOL + currentFieldName);
parsedFields.append(this.parser.toString());
this.keyList.add(currentFieldName);
this.valueList.add(parsedFields.toString());
this.valueAndPathList.add(path + "=" + parsedFields.toString());
this.valueAndPathList.add(path + EQUAL_SYMBOL + parsedFields.toString());
parseToken();
} else {
path.append("." + currentFieldName);
path.append(DOT_SYMBOL + currentFieldName);
parseValue(currentFieldName, parsedFields);
this.keyList.add(currentFieldName);
this.valueList.add(parsedFields.toString());
this.valueAndPathList.add(path + "=" + parsedFields.toString());
this.valueAndPathList.add(path + EQUAL_SYMBOL + parsedFields.toString());
}

}
}

private void parseValue(String currentFieldName, StringBuilder parsedFields) throws IOException {
logger.info("this.parser.currentToken(): " + this.parser.currentToken() + "\n");
switch (this.parser.currentToken()) {
case VALUE_STRING:
parsedFields.append(this.parser.textOrNull());
logger.info("currentFieldName and parsedFields :" + currentFieldName + " " + parsedFields.toString() + "\n");
break;
// Handle other token types as needed
// ToDo, what do we do, if encountered these fields?
// should never get to START_OBJECT
case START_OBJECT:
throw new IOException("Unsupported token type");
case FIELD_NAME:
// should never get to FIELD_NAME
logger.info("token is FIELD_NAME: " + this.parser.currentName() + "\n");
break;
case VALUE_EMBEDDED_OBJECT:
logger.info("token is VALUE_EMBEDDED_OBJECT: " + this.parser.objectText() + "\n");
break;
default:
throw new IOException("Unsupported token type [" + parser.currentToken() + "]");
Expand Down
Loading

0 comments on commit 3eb4cf5

Please sign in to comment.