Skip to content

Commit

Permalink
Fix parsing of flat object fields with dots in keys
Browse files Browse the repository at this point in the history
We have a bug where a flat object field with inner fields that contain
dots will "push" the dotted name onto the dot-path from the root, but
then would just "pop" off the last part of the dotted name.

This change adds more robust support for flat object keys and subkeys
that contain dots (i.e. it pops off the entirety of the latest key,
regardless of how many dots it contains).

Fixes opensearch-project#11402

Signed-off-by: Michael Froh <froh@amazon.com>
  • Loading branch information
msfroh committed Nov 30, 2023
1 parent 21c0597 commit 6f1e477
Show file tree
Hide file tree
Showing 4 changed files with 137 additions and 12 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),

### Fixed
- Fix failure in dissect ingest processor parsing empty brackets ([#9225](https://github.com/opensearch-project/OpenSearch/pull/9255))
- Fix class_cast_exception when passing int to _version and other metadata fields in ingest simulate API ([#10101](https://github.com/opensearch-project/OpenSearch/pull/10101))
- Fix `class_cast_exception` when passing int to `_version` and other metadata fields in ingest simulate API ([#10101](https://github.com/opensearch-project/OpenSearch/pull/10101))
- Fix Segment Replication ShardLockObtainFailedException bug during index corruption ([10370](https://github.com/opensearch-project/OpenSearch/pull/10370))
- Fix some test methods in SimulatePipelineRequestParsingTests never run and fix test failure ([#10496](https://github.com/opensearch-project/OpenSearch/pull/10496))
- Fix passing wrong parameter when calling newConfigurationException() in DotExpanderProcessor ([#10737](https://github.com/opensearch-project/OpenSearch/pull/10737))
Expand All @@ -175,6 +175,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Fix for stuck update action in a bulk with `retry_on_conflict` property ([#11152](https://github.com/opensearch-project/OpenSearch/issues/11152))
- Remove shadowJar from `lang-painless` module publication ([#11369](https://github.com/opensearch-project/OpenSearch/issues/11369))
- Fix remote shards balancer and remove unused variables ([#11167](https://github.com/opensearch-project/OpenSearch/pull/11167))
- Fix parsing of flat object fields with dots in keys ([#11425](https://github.com/opensearch-project/OpenSearch/pull/11425))

### Security

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
import org.opensearch.core.xcontent.XContentBuilder;
import org.opensearch.core.xcontent.XContentLocation;
import org.opensearch.core.xcontent.XContentParser;
import org.opensearch.index.mapper.ParseContext;

import java.io.IOException;
import java.math.BigInteger;
Expand All @@ -40,7 +39,6 @@ public class JsonToStringXContentParser extends AbstractXContentParser {
private ArrayList<String> keyList = new ArrayList<>();

private XContentBuilder builder = XContentBuilder.builder(JsonXContent.jsonXContent);
private ParseContext parseContext;

private NamedXContentRegistry xContentRegistry;

Expand All @@ -54,14 +52,13 @@ public class JsonToStringXContentParser extends AbstractXContentParser {
public JsonToStringXContentParser(
NamedXContentRegistry xContentRegistry,
DeprecationHandler deprecationHandler,
ParseContext parseContext,
XContentParser parser,
String fieldTypeName
) throws IOException {
super(xContentRegistry, deprecationHandler);
this.parseContext = parseContext;
this.deprecationHandler = deprecationHandler;
this.xContentRegistry = xContentRegistry;
this.parser = parseContext.parser();
this.parser = parser;
this.fieldTypeName = fieldTypeName;
}

Expand All @@ -86,8 +83,22 @@ private void parseToken(StringBuilder path, String currentFieldName) throws IOEx
StringBuilder parsedFields = new StringBuilder();

if (this.parser.currentToken() == Token.FIELD_NAME) {
path.append(DOT_SYMBOL + currentFieldName);
this.keyList.add(currentFieldName);
path.append(DOT_SYMBOL).append(currentFieldName);
int dotIndex = currentFieldName.indexOf(DOT_SYMBOL);
String fieldNameSuffix = currentFieldName;
// The field name may be of the form foo.bar.baz
// If that's the case, each "part" is a key.
while (dotIndex >= 0) {
String fieldNamePrefix = fieldNameSuffix.substring(0, dotIndex);
if (!fieldNamePrefix.isEmpty()) {
this.keyList.add(fieldNamePrefix);
}
fieldNameSuffix = fieldNameSuffix.substring(dotIndex + 1);
dotIndex = fieldNameSuffix.indexOf(DOT_SYMBOL);
}
if (!fieldNameSuffix.isEmpty()) {
this.keyList.add(fieldNameSuffix);
}
} else if (this.parser.currentToken() == Token.START_ARRAY) {
parseToken(path, currentFieldName);
break;
Expand All @@ -97,18 +108,18 @@ private void parseToken(StringBuilder path, String currentFieldName) throws IOEx
parseToken(path, currentFieldName);
int dotIndex = path.lastIndexOf(DOT_SYMBOL);
if (dotIndex != -1) {
path.delete(dotIndex, path.length());
path.setLength(path.length() - currentFieldName.length() - 1);
}
} else {
if (!path.toString().contains(currentFieldName)) {
path.append(DOT_SYMBOL + currentFieldName);
path.append(DOT_SYMBOL).append(currentFieldName);
}
parseValue(parsedFields);
this.valueList.add(parsedFields.toString());
this.valueAndPathList.add(path + EQUAL_SYMBOL + parsedFields);
int dotIndex = path.lastIndexOf(DOT_SYMBOL);
if (dotIndex != -1) {
path.delete(dotIndex, path.length());
path.setLength(path.length() - currentFieldName.length() - 1);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -572,7 +572,7 @@ protected void parseCreateField(ParseContext context) throws IOException {
JsonToStringXContentParser JsonToStringParser = new JsonToStringXContentParser(
NamedXContentRegistry.EMPTY,
DeprecationHandler.IGNORE_DEPRECATIONS,
context,
context.parser(),
fieldType().name()
);
/*
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.common.xcontent;

import org.opensearch.common.xcontent.json.JsonXContent;
import org.opensearch.core.xcontent.DeprecationHandler;
import org.opensearch.core.xcontent.XContentBuilder;
import org.opensearch.core.xcontent.XContentParser;
import org.opensearch.test.OpenSearchTestCase;

import java.io.IOException;

public class JsonToStringXContentParserTests extends OpenSearchTestCase {

private String flattenJsonString(String fieldName, String in) throws IOException {
String transformed;
try (
XContentParser parser = JsonXContent.jsonXContent.createParser(
xContentRegistry(),
DeprecationHandler.THROW_UNSUPPORTED_OPERATION,
in
)
) {
JsonToStringXContentParser jsonToStringXContentParser = new JsonToStringXContentParser(
xContentRegistry(),
DeprecationHandler.THROW_UNSUPPORTED_OPERATION,
parser,
fieldName
);
// Skip the START_OBJECT token:
jsonToStringXContentParser.nextToken();

XContentParser transformedParser = jsonToStringXContentParser.parseObject();
try (XContentBuilder jsonBuilder = XContentFactory.jsonBuilder()) {
jsonBuilder.copyCurrentStructure(transformedParser);
return jsonBuilder.toString();
}
}
}

public void testNestedObjects() throws IOException {
String jsonExample = "{" + "\"first\" : \"1\"," + "\"second\" : {" + " \"inner\": \"2.0\"" + "}," + "\"third\": \"three\"" + "}";

assertEquals(
"{"
+ "\"flat\":[\"first\",\"second\",\"inner\",\"third\"],"
+ "\"flat._value\":[\"1\",\"2.0\",\"three\"],"
+ "\"flat._valueAndPath\":[\"flat.first=1\",\"flat.second.inner=2.0\",\"flat.third=three\"]"
+ "}",
flattenJsonString("flat", jsonExample)
);
}

public void testChildHasDots() throws IOException {
// This should be exactly the same as testNestedObjects. We're just using the "flat" notation for the inner
// object.
String jsonExample = "{" + "\"first\" : \"1\"," + "\"second.inner\" : \"2.0\"," + "\"third\": \"three\"" + "}";

assertEquals(
"{"
+ "\"flat\":[\"first\",\"second\",\"inner\",\"third\"],"
+ "\"flat._value\":[\"1\",\"2.0\",\"three\"],"
+ "\"flat._valueAndPath\":[\"flat.first=1\",\"flat.second.inner=2.0\",\"flat.third=three\"]"
+ "}",
flattenJsonString("flat", jsonExample)
);
}

public void testNestChildObjectWithDots() throws IOException {
String jsonExample = "{"
+ "\"first\" : \"1\","
+ "\"second.inner\" : {"
+ " \"really_inner\" : \"2.0\""
+ "},"
+ "\"third\": \"three\""
+ "}";

assertEquals(
"{"
+ "\"flat\":[\"first\",\"second\",\"inner\",\"really_inner\",\"third\"],"
+ "\"flat._value\":[\"1\",\"2.0\",\"three\"],"
+ "\"flat._valueAndPath\":[\"flat.first=1\",\"flat.second.inner.really_inner=2.0\",\"flat.third=three\"]"
+ "}",
flattenJsonString("flat", jsonExample)
);
}

public void testNestChildObjectWithDotsAndFieldWithDots() throws IOException {
String jsonExample = "{"
+ "\"first\" : \"1\","
+ "\"second.inner\" : {"
+ " \"totally.absolutely.inner\" : \"2.0\""
+ "},"
+ "\"third\": \"three\""
+ "}";

assertEquals(
"{"
+ "\"flat\":[\"first\",\"second\",\"inner\",\"totally\",\"absolutely\",\"inner\",\"third\"],"
+ "\"flat._value\":[\"1\",\"2.0\",\"three\"],"
+ "\"flat._valueAndPath\":[\"flat.first=1\",\"flat.second.inner.totally.absolutely.inner=2.0\",\"flat.third=three\"]"
+ "}",
flattenJsonString("flat", jsonExample)
);
}

}

0 comments on commit 6f1e477

Please sign in to comment.