Skip to content

Commit 46057a7

Browse files
[ML] Support fields with commas in data frame analytics analyzed_fields (#91710)
This commit fixes a bug where fields containing comma (`,`) in their name would result in error when the `_start` or `_explain` APIs are called. If the field was `comma,field` for example, we would split the field into two tokens `comma` and `field` and thus the error would be that those fields could not be detected in the source index. Closes #72541
1 parent 0eef773 commit 46057a7

File tree

6 files changed

+150
-26
lines changed

6 files changed

+150
-26
lines changed

docs/changelog/91710.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 91710
2+
summary: Support fields with commas in data frame analytics `analyzed_fields`
3+
area: Machine Learning
4+
type: bug
5+
issues:
6+
- 72541

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/groups/GroupOrJobLookup.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import java.util.Collection;
1717
import java.util.Collections;
1818
import java.util.List;
19+
import java.util.Optional;
1920
import java.util.Set;
2021
import java.util.SortedMap;
2122
import java.util.TreeMap;
@@ -57,7 +58,7 @@ private void put(Job job) {
5758
}
5859

5960
public Set<String> expandJobIds(String expression, boolean allowNoMatch) {
60-
return new GroupOrJobResolver().expand(expression, allowNoMatch);
61+
return new GroupOrJobResolver().expand(expression, allowNoMatch, Optional.of(","));
6162
}
6263

6364
public boolean isGroupOrJob(String id) {

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/utils/NameResolver.java

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import java.util.Collections;
1414
import java.util.List;
1515
import java.util.Objects;
16+
import java.util.Optional;
1617
import java.util.Set;
1718
import java.util.SortedSet;
1819
import java.util.TreeSet;
@@ -34,11 +35,10 @@ protected NameResolver(Function<String, ResourceNotFoundException> notFoundExcep
3435
/**
3536
* Expands an expression into the set of matching names.
3637
* For example, given a set of names ["foo-1", "foo-2", "bar-1", bar-2"],
37-
* expressions resolve follows:
38+
* expressions resolve as follows:
3839
* <ul>
3940
* <li>"foo-1" : ["foo-1"]</li>
4041
* <li>"bar-1" : ["bar-1"]</li>
41-
* <li>"foo-1,foo-2" : ["foo-1", "foo-2"]</li>
4242
* <li>"foo-*" : ["foo-1", "foo-2"]</li>
4343
* <li>"*-1" : ["bar-1", "foo-1"]</li>
4444
* <li>"*" : ["bar-1", "bar-2", "foo-1", "foo-2"]</li>
@@ -52,11 +52,40 @@ protected NameResolver(Function<String, ResourceNotFoundException> notFoundExcep
5252
* @return the sorted set of matching names
5353
*/
5454
public SortedSet<String> expand(String expression, boolean allowNoMatch) {
55+
return expand(expression, allowNoMatch, Optional.empty());
56+
}
57+
58+
/**
59+
* Expands an expression into the set of matching names.
60+
* If a tokenization delimiter is provided, then the expression is first tokenized
61+
* and then each token is expanded.
62+
* For example, given a set of names ["foo-1", "foo-2", "bar-1", bar-2"] and comma as the delimiter
63+
* expressions resolve as follows:
64+
* <ul>
65+
* <li>"foo-1" : ["foo-1"]</li>
66+
* <li>"bar-1" : ["bar-1"]</li>
67+
* <li>"foo-1,foo-2" : ["foo-1", "foo-2"]</li>
68+
* <li>"foo-*" : ["foo-1", "foo-2"]</li>
69+
* <li>"*-1" : ["bar-1", "foo-1"]</li>
70+
* <li>"*" : ["bar-1", "bar-2", "foo-1", "foo-2"]</li>
71+
* <li>"_all" : ["bar-1", "bar-2", "foo-1", "foo-2"]</li>
72+
* </ul>
73+
*
74+
* @param expression the expression to resolve
75+
* @param allowNoMatch if {@code false}, an error is thrown when no name matches the {@code expression}.
76+
* This only applies to wild card expressions, if {@code expression} is not a
77+
* wildcard then setting this true will not suppress the exception
78+
* @param tokenizationDelimiter An optional delimiter to tokenize the expression on
79+
* @return the sorted set of matching names
80+
*/
81+
public SortedSet<String> expand(String expression, boolean allowNoMatch, Optional<String> tokenizationDelimiter) {
5582
SortedSet<String> result = new TreeSet<>();
5683
if (Strings.isAllOrWildcard(expression)) {
5784
result.addAll(nameSet());
5885
} else {
59-
String[] tokens = Strings.tokenizeToStringArray(expression, ",");
86+
String[] tokens = tokenizationDelimiter.isPresent()
87+
? Strings.tokenizeToStringArray(expression, tokenizationDelimiter.get())
88+
: new String[] { expression };
6089
for (String token : tokens) {
6190
if (Regex.isSimpleMatchPattern(token)) {
6291
List<String> expanded = keys().stream()

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetector.java

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -327,16 +327,15 @@ private void includeAndExcludeFields(Set<String> fields, Set<FieldSelection> fie
327327
try {
328328
// If the inclusion set does not match anything, that means the user's desired fields cannot be found in
329329
// the collection of supported field types. We should let the user know.
330-
Set<String> includedSet = NameResolver.newUnaliased(
330+
Set<String> includedSet = expandFields(
331+
analyzedFields.includes().length == 0 ? new String[] { "*" } : analyzedFields.includes(),
331332
fields,
332-
(ex) -> new ResourceNotFoundException(Messages.getMessage(Messages.DATA_FRAME_ANALYTICS_BAD_FIELD_FILTER, ex))
333-
).expand(includes, false);
333+
false
334+
);
335+
334336
// If the exclusion set does not match anything, that means the fields are already not present
335337
// no need to raise if nothing matched
336-
Set<String> excludedSet = NameResolver.newUnaliased(
337-
fieldCapabilitiesResponse.get().keySet(),
338-
(ex) -> new ResourceNotFoundException(Messages.getMessage(Messages.DATA_FRAME_ANALYTICS_BAD_FIELD_FILTER, ex))
339-
).expand(excludes, true);
338+
Set<String> excludedSet = expandFields(analyzedFields.excludes(), fieldCapabilitiesResponse.get().keySet(), true);
340339

341340
applyIncludesExcludes(fields, includedSet, excludedSet, fieldSelection);
342341
} catch (ResourceNotFoundException ex) {
@@ -345,6 +344,19 @@ private void includeAndExcludeFields(Set<String> fields, Set<FieldSelection> fie
345344
}
346345
}
347346

347+
private Set<String> expandFields(String[] fields, Set<String> nameset, boolean allowNoMatch) {
348+
NameResolver nameResolver = NameResolver.newUnaliased(
349+
nameset,
350+
(ex) -> new ResourceNotFoundException(Messages.getMessage(Messages.DATA_FRAME_ANALYTICS_BAD_FIELD_FILTER, ex))
351+
);
352+
353+
Set<String> expanded = new HashSet<>();
354+
for (String field : fields) {
355+
expanded.addAll(nameResolver.expand(field, allowNoMatch));
356+
}
357+
return expanded;
358+
}
359+
348360
private void checkIncludesExcludesAreNotObjects(FetchSourceContext analyzedFields) {
349361
List<String> objectFields = Stream.concat(Arrays.stream(analyzedFields.includes()), Arrays.stream(analyzedFields.excludes()))
350362
.filter(field -> isObject(getMappingTypes(field)) || isNested(getMappingTypes(field)))

x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetectorTests.java

Lines changed: 57 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1332,7 +1332,63 @@ public void testDetect_GivenAnalyzedFieldExcludesNestedField() {
13321332
assertThat(e.getMessage(), equalTo("analyzed_fields must not include or exclude object or nested fields: [nested_field]"));
13331333
}
13341334

1335-
public void testDetect_givenFeatureProcessorsFailures_ResultsField() {
1335+
public void testDetect_GivenAnalyzedFieldIncludesFieldWithCommaCharacter() {
1336+
FieldCapabilitiesResponse fieldCapabilities = new MockFieldCapsResponseBuilder().addAggregatableField("comma,field", "float")
1337+
.addAggregatableField("some_other_field", "float")
1338+
.build();
1339+
1340+
analyzedFields = FetchSourceContext.of(true, new String[] { "comma,field" }, null);
1341+
1342+
ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
1343+
buildOutlierDetectionConfig(),
1344+
100,
1345+
fieldCapabilities,
1346+
Collections.emptyMap()
1347+
);
1348+
1349+
Tuple<ExtractedFields, List<FieldSelection>> fieldExtraction = extractedFieldsDetector.detect();
1350+
1351+
List<ExtractedField> allFields = fieldExtraction.v1().getAllFields();
1352+
assertThat(allFields, hasSize(1));
1353+
assertThat(allFields.get(0).getName(), equalTo("comma,field"));
1354+
assertThat(allFields.get(0).getMethod(), equalTo(ExtractedField.Method.DOC_VALUE));
1355+
1356+
assertFieldSelectionContains(
1357+
fieldExtraction.v2(),
1358+
FieldSelection.included("comma,field", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL),
1359+
FieldSelection.excluded("some_other_field", Collections.singleton("float"), "field not in includes list")
1360+
);
1361+
}
1362+
1363+
public void testDetect_GivenAnalyzedFieldExcludesFieldWithCommaCharacter() {
1364+
FieldCapabilitiesResponse fieldCapabilities = new MockFieldCapsResponseBuilder().addAggregatableField("comma,field", "float")
1365+
.addAggregatableField("some_other_field", "float")
1366+
.build();
1367+
1368+
analyzedFields = FetchSourceContext.of(true, null, new String[] { "comma,field" });
1369+
1370+
ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
1371+
buildOutlierDetectionConfig(),
1372+
100,
1373+
fieldCapabilities,
1374+
Collections.emptyMap()
1375+
);
1376+
1377+
Tuple<ExtractedFields, List<FieldSelection>> fieldExtraction = extractedFieldsDetector.detect();
1378+
1379+
List<ExtractedField> allFields = fieldExtraction.v1().getAllFields();
1380+
assertThat(allFields, hasSize(1));
1381+
assertThat(allFields.get(0).getName(), equalTo("some_other_field"));
1382+
assertThat(allFields.get(0).getMethod(), equalTo(ExtractedField.Method.DOC_VALUE));
1383+
1384+
assertFieldSelectionContains(
1385+
fieldExtraction.v2(),
1386+
FieldSelection.excluded("comma,field", Collections.singleton("float"), "field in excludes list"),
1387+
FieldSelection.included("some_other_field", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL)
1388+
);
1389+
}
1390+
1391+
public void tesstDetect_givenFeatureProcessorsFailures_ResultsField() {
13361392
FieldCapabilitiesResponse fieldCapabilities = simpleFieldResponse();
13371393
ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
13381394
buildRegressionConfig("field_31", Arrays.asList(buildPreProcessor("ml.result", "foo"))),

x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/utils/NameResolverTests.java

Lines changed: 34 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import java.util.HashSet;
1717
import java.util.List;
1818
import java.util.Map;
19+
import java.util.Optional;
1920
import java.util.Set;
2021
import java.util.SortedSet;
2122
import java.util.TreeSet;
@@ -44,7 +45,7 @@ public void testNoMatchingNames_GivenPatternAndNotAllowNoMatch() {
4445
public void testNoMatchingNames_GivenMatchingNameAndNonMatchingPatternAndNotAllowNoMatch() {
4546
ResourceNotFoundException e = expectThrows(
4647
ResourceNotFoundException.class,
47-
() -> newUnaliasedResolver("foo").expand("foo, bar*", false)
48+
() -> newUnaliasedResolver("foo").expand("foo, bar*", false, Optional.of(","))
4849
);
4950
assertThat(e.getMessage(), equalTo("bar*"));
5051
}
@@ -56,17 +57,17 @@ public void testUnaliased() {
5657
assertThat(nameResolver.expand("foo-2", false), equalTo(newSortedSet("foo-2")));
5758
assertThat(nameResolver.expand("bar-1", false), equalTo(newSortedSet("bar-1")));
5859
assertThat(nameResolver.expand("bar-2", false), equalTo(newSortedSet("bar-2")));
59-
assertThat(nameResolver.expand("foo-1,foo-2", false), equalTo(newSortedSet("foo-1", "foo-2")));
60+
assertThat(nameResolver.expand("foo-1,foo-2", false, Optional.of(",")), equalTo(newSortedSet("foo-1", "foo-2")));
6061
assertThat(nameResolver.expand("foo-*", false), equalTo(newSortedSet("foo-1", "foo-2")));
6162
assertThat(nameResolver.expand("bar-*", false), equalTo(newSortedSet("bar-1", "bar-2")));
6263
assertThat(nameResolver.expand("*oo-*", false), equalTo(newSortedSet("foo-1", "foo-2")));
6364
assertThat(nameResolver.expand("*-1", false), equalTo(newSortedSet("foo-1", "bar-1")));
6465
assertThat(nameResolver.expand("*-2", false), equalTo(newSortedSet("foo-2", "bar-2")));
6566
assertThat(nameResolver.expand("*", false), equalTo(newSortedSet("foo-1", "foo-2", "bar-1", "bar-2")));
6667
assertThat(nameResolver.expand("_all", false), equalTo(newSortedSet("foo-1", "foo-2", "bar-1", "bar-2")));
67-
assertThat(nameResolver.expand("foo-1,foo-2", false), equalTo(newSortedSet("foo-1", "foo-2")));
68-
assertThat(nameResolver.expand("foo-1,bar-1", false), equalTo(newSortedSet("bar-1", "foo-1")));
69-
assertThat(nameResolver.expand("foo-*,bar-1", false), equalTo(newSortedSet("bar-1", "foo-1", "foo-2")));
68+
assertThat(nameResolver.expand("foo-1,foo-2", false, Optional.of(",")), equalTo(newSortedSet("foo-1", "foo-2")));
69+
assertThat(nameResolver.expand("foo-1,bar-1", false, Optional.of(",")), equalTo(newSortedSet("bar-1", "foo-1")));
70+
assertThat(nameResolver.expand("foo-*,bar-1", false, Optional.of(",")), equalTo(newSortedSet("bar-1", "foo-1", "foo-2")));
7071
}
7172

7273
public void testAliased() {
@@ -84,25 +85,44 @@ public void testAliased() {
8485
assertThat(nameResolver.expand("foo-2", false), equalTo(newSortedSet("foo-2")));
8586
assertThat(nameResolver.expand("bar-1", false), equalTo(newSortedSet("bar-1")));
8687
assertThat(nameResolver.expand("bar-2", false), equalTo(newSortedSet("bar-2")));
87-
assertThat(nameResolver.expand("foo-1,foo-2", false), equalTo(newSortedSet("foo-1", "foo-2")));
88+
assertThat(nameResolver.expand("foo-1,foo-2", false, Optional.of(",")), equalTo(newSortedSet("foo-1", "foo-2")));
8889
assertThat(nameResolver.expand("foo-*", false), equalTo(newSortedSet("foo-1", "foo-2")));
8990
assertThat(nameResolver.expand("bar-*", false), equalTo(newSortedSet("bar-1", "bar-2")));
9091
assertThat(nameResolver.expand("*oo-*", false), equalTo(newSortedSet("foo-1", "foo-2")));
9192
assertThat(nameResolver.expand("*-1", false), equalTo(newSortedSet("foo-1", "bar-1")));
9293
assertThat(nameResolver.expand("*-2", false), equalTo(newSortedSet("foo-2", "bar-2")));
9394
assertThat(nameResolver.expand("*", false), equalTo(newSortedSet("foo-1", "foo-2", "bar-1", "bar-2")));
9495
assertThat(nameResolver.expand("_all", false), equalTo(newSortedSet("foo-1", "foo-2", "bar-1", "bar-2")));
95-
assertThat(nameResolver.expand("foo-1,foo-2", false), equalTo(newSortedSet("foo-1", "foo-2")));
96-
assertThat(nameResolver.expand("foo-1,bar-1", false), equalTo(newSortedSet("bar-1", "foo-1")));
97-
assertThat(nameResolver.expand("foo-*,bar-1", false), equalTo(newSortedSet("bar-1", "foo-1", "foo-2")));
96+
assertThat(nameResolver.expand("foo-1,foo-2", false, Optional.of(",")), equalTo(newSortedSet("foo-1", "foo-2")));
97+
assertThat(nameResolver.expand("foo-1,bar-1", false, Optional.of(",")), equalTo(newSortedSet("bar-1", "foo-1")));
98+
assertThat(nameResolver.expand("foo-*,bar-1", false, Optional.of(",")), equalTo(newSortedSet("bar-1", "foo-1", "foo-2")));
9899

99-
// No let's test the aliases
100+
// Now let's test the aliases
100101
assertThat(nameResolver.expand("foo-group", false), equalTo(newSortedSet("foo-1", "foo-2")));
101102
assertThat(nameResolver.expand("bar-group", false), equalTo(newSortedSet("bar-1", "bar-2")));
102-
assertThat(nameResolver.expand("foo-group,bar-group", false), equalTo(newSortedSet("bar-1", "bar-2", "foo-1", "foo-2")));
103-
assertThat(nameResolver.expand("foo-group,foo-1", false), equalTo(newSortedSet("foo-1", "foo-2")));
104-
assertThat(nameResolver.expand("foo-group,bar-1", false), equalTo(newSortedSet("bar-1", "foo-1", "foo-2")));
105-
assertThat(nameResolver.expand("foo-group,bar-*", false), equalTo(newSortedSet("bar-1", "bar-2", "foo-1", "foo-2")));
103+
assertThat(
104+
nameResolver.expand("foo-group,bar-group", false, Optional.of(",")),
105+
equalTo(newSortedSet("bar-1", "bar-2", "foo-1", "foo-2"))
106+
);
107+
assertThat(nameResolver.expand("foo-group,foo-1", false, Optional.of(",")), equalTo(newSortedSet("foo-1", "foo-2")));
108+
assertThat(nameResolver.expand("foo-group,bar-1", false, Optional.of(",")), equalTo(newSortedSet("bar-1", "foo-1", "foo-2")));
109+
assertThat(
110+
nameResolver.expand("foo-group,bar-*", false, Optional.of(",")),
111+
equalTo(newSortedSet("bar-1", "bar-2", "foo-1", "foo-2"))
112+
);
113+
}
114+
115+
public void testUnaliased_NoDelimiter() {
116+
NameResolver nameResolver = newUnaliasedResolver("foo,1", "foo,2", "bar,1", "bar,2");
117+
118+
assertThat(nameResolver.expand("foo,1", false), equalTo(newSortedSet("foo,1")));
119+
assertThat(nameResolver.expand("foo,2", false), equalTo(newSortedSet("foo,2")));
120+
assertThat(nameResolver.expand("bar,1", false), equalTo(newSortedSet("bar,1")));
121+
assertThat(nameResolver.expand("bar,2", false), equalTo(newSortedSet("bar,2")));
122+
assertThat(nameResolver.expand("foo,*", false), equalTo(newSortedSet("foo,1", "foo,2")));
123+
assertThat(nameResolver.expand("bar,*", false), equalTo(newSortedSet("bar,1", "bar,2")));
124+
125+
expectThrows(ResourceNotFoundException.class, () -> nameResolver.expand("foo,*,bar,*", false));
106126
}
107127

108128
private static NameResolver newUnaliasedResolver(String... names) {

0 commit comments

Comments
 (0)