Skip to content

Commit 969a1c5

Browse files
author
David Roberts
committed
[ML] Include the "properties" layer in find_file_structure mappings (#62158)
Previously the "mappings" field of the response from the find_file_structure endpoint was not a drop-in for the mappings format of the create index endpoint - the "properties" layer was missing. The reason for omitting it initially was that the assumption was that the find_file_structure endpoint would only ever return very simple mappings without any nested objects. However, this will not be true in the future, as we will improve mappings detection for complex JSON objects. As a first step it makes sense to move the returned mappings closer to the standard format. This is a small building block towards fixing #55616
1 parent 038f7a8 commit 969a1c5

File tree

11 files changed

+160
-132
lines changed

11 files changed

+160
-132
lines changed

docs/reference/ml/anomaly-detection/apis/find-file-structure.asciidoc

Lines changed: 100 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -299,21 +299,23 @@ If the request does not encounter errors, you receive the following result:
299299
],
300300
"need_client_timezone" : true, <10>
301301
"mappings" : { <11>
302-
"@timestamp" : {
303-
"type" : "date"
304-
},
305-
"author" : {
306-
"type" : "keyword"
307-
},
308-
"name" : {
309-
"type" : "keyword"
310-
},
311-
"page_count" : {
312-
"type" : "long"
313-
},
314-
"release_date" : {
315-
"type" : "date",
316-
"format" : "iso8601"
302+
"properties" : {
303+
"@timestamp" : {
304+
"type" : "date"
305+
},
306+
"author" : {
307+
"type" : "keyword"
308+
},
309+
"name" : {
310+
"type" : "keyword"
311+
},
312+
"page_count" : {
313+
"type" : "long"
314+
},
315+
"release_date" : {
316+
"type" : "date",
317+
"format" : "iso8601"
318+
}
317319
}
318320
},
319321
"ingest_pipeline" : {
@@ -622,61 +624,63 @@ If the request does not encounter errors, you receive the following result:
622624
],
623625
"need_client_timezone" : true, <10>
624626
"mappings" : {
625-
"@timestamp" : {
626-
"type" : "date"
627-
},
628-
"DOLocationID" : {
629-
"type" : "long"
630-
},
631-
"PULocationID" : {
632-
"type" : "long"
633-
},
634-
"RatecodeID" : {
635-
"type" : "long"
636-
},
637-
"VendorID" : {
638-
"type" : "long"
639-
},
640-
"extra" : {
641-
"type" : "double"
642-
},
643-
"fare_amount" : {
644-
"type" : "double"
645-
},
646-
"improvement_surcharge" : {
647-
"type" : "double"
648-
},
649-
"mta_tax" : {
650-
"type" : "double"
651-
},
652-
"passenger_count" : {
653-
"type" : "long"
654-
},
655-
"payment_type" : {
656-
"type" : "long"
657-
},
658-
"store_and_fwd_flag" : {
659-
"type" : "keyword"
660-
},
661-
"tip_amount" : {
662-
"type" : "double"
663-
},
664-
"tolls_amount" : {
665-
"type" : "double"
666-
},
667-
"total_amount" : {
668-
"type" : "double"
669-
},
670-
"tpep_dropoff_datetime" : {
671-
"type" : "date",
672-
"format" : "yyyy-MM-dd HH:mm:ss"
673-
},
674-
"tpep_pickup_datetime" : {
675-
"type" : "date",
676-
"format" : "yyyy-MM-dd HH:mm:ss"
677-
},
678-
"trip_distance" : {
679-
"type" : "double"
627+
"properties" : {
628+
"@timestamp" : {
629+
"type" : "date"
630+
},
631+
"DOLocationID" : {
632+
"type" : "long"
633+
},
634+
"PULocationID" : {
635+
"type" : "long"
636+
},
637+
"RatecodeID" : {
638+
"type" : "long"
639+
},
640+
"VendorID" : {
641+
"type" : "long"
642+
},
643+
"extra" : {
644+
"type" : "double"
645+
},
646+
"fare_amount" : {
647+
"type" : "double"
648+
},
649+
"improvement_surcharge" : {
650+
"type" : "double"
651+
},
652+
"mta_tax" : {
653+
"type" : "double"
654+
},
655+
"passenger_count" : {
656+
"type" : "long"
657+
},
658+
"payment_type" : {
659+
"type" : "long"
660+
},
661+
"store_and_fwd_flag" : {
662+
"type" : "keyword"
663+
},
664+
"tip_amount" : {
665+
"type" : "double"
666+
},
667+
"tolls_amount" : {
668+
"type" : "double"
669+
},
670+
"total_amount" : {
671+
"type" : "double"
672+
},
673+
"tpep_dropoff_datetime" : {
674+
"type" : "date",
675+
"format" : "yyyy-MM-dd HH:mm:ss"
676+
},
677+
"tpep_pickup_datetime" : {
678+
"type" : "date",
679+
"format" : "yyyy-MM-dd HH:mm:ss"
680+
},
681+
"trip_distance" : {
682+
"type" : "double"
683+
}
680684
}
681685
},
682686
"ingest_pipeline" : {
@@ -1560,14 +1564,16 @@ this:
15601564
],
15611565
"need_client_timezone" : true,
15621566
"mappings" : {
1563-
"@timestamp" : {
1564-
"type" : "date"
1565-
},
1566-
"loglevel" : {
1567-
"type" : "keyword"
1568-
},
1569-
"message" : {
1570-
"type" : "text"
1567+
"properties" : {
1568+
"@timestamp" : {
1569+
"type" : "date"
1570+
},
1571+
"loglevel" : {
1572+
"type" : "keyword"
1573+
},
1574+
"message" : {
1575+
"type" : "text"
1576+
}
15711577
}
15721578
},
15731579
"ingest_pipeline" : {
@@ -1720,20 +1726,22 @@ this:
17201726
],
17211727
"need_client_timezone" : true,
17221728
"mappings" : {
1723-
"@timestamp" : {
1724-
"type" : "date"
1725-
},
1726-
"class" : {
1727-
"type" : "keyword"
1728-
},
1729-
"loglevel" : {
1730-
"type" : "keyword"
1731-
},
1732-
"message" : {
1733-
"type" : "text"
1734-
},
1735-
"node" : {
1736-
"type" : "keyword"
1729+
"properties" : {
1730+
"@timestamp" : {
1731+
"type" : "date"
1732+
},
1733+
"class" : {
1734+
"type" : "keyword"
1735+
},
1736+
"loglevel" : {
1737+
"type" : "keyword"
1738+
},
1739+
"message" : {
1740+
"type" : "text"
1741+
},
1742+
"node" : {
1743+
"type" : "keyword"
1744+
}
17371745
}
17381746
},
17391747
"ingest_pipeline" : {

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/DelimitedFileStructureFinder.java

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ static DelimitedFileStructureFinder makeDelimitedFileStructureFinder(List<String
102102
Tuple<SortedMap<String, Object>, SortedMap<String, FieldStats>> mappingsAndFieldStats =
103103
FileStructureUtils.guessMappingsAndCalculateFieldStats(explanation, sampleRecords, timeoutChecker);
104104

105-
SortedMap<String, Object> mappings = mappingsAndFieldStats.v1();
105+
SortedMap<String, Object> fieldMappings = mappingsAndFieldStats.v1();
106106

107107
List<String> columnNamesList = Arrays.asList(columnNames);
108108
char delimiter = (char) csvPreference.getDelimiterChar();
@@ -149,25 +149,25 @@ static DelimitedFileStructureFinder makeDelimitedFileStructureFinder(List<String
149149
.setJavaTimestampFormats(timeField.v2().getJavaTimestampFormats())
150150
.setNeedClientTimezone(needClientTimeZone)
151151
.setIngestPipeline(FileStructureUtils.makeIngestPipelineDefinition(null, Collections.emptyMap(), csvProcessorSettings,
152-
mappings, timeField.v1(), timeField.v2().getJavaTimestampFormats(), needClientTimeZone,
152+
fieldMappings, timeField.v1(), timeField.v2().getJavaTimestampFormats(), needClientTimeZone,
153153
timeField.v2().needNanosecondPrecision()))
154154
.setMultilineStartPattern(makeMultilineStartPattern(explanation, columnNamesList, maxLinesPerMessage, delimiterPattern,
155-
quotePattern, mappings, timeField.v1(), timeField.v2()));
155+
quotePattern, fieldMappings, timeField.v1(), timeField.v2()));
156156

157-
mappings.put(FileStructureUtils.DEFAULT_TIMESTAMP_FIELD, timeField.v2().getEsDateMappingTypeWithoutFormat());
157+
fieldMappings.put(FileStructureUtils.DEFAULT_TIMESTAMP_FIELD, timeField.v2().getEsDateMappingTypeWithoutFormat());
158158
} else {
159159
structureBuilder.setIngestPipeline(FileStructureUtils.makeIngestPipelineDefinition(null, Collections.emptyMap(),
160-
csvProcessorSettings, mappings, null, null, false, false));
160+
csvProcessorSettings, fieldMappings, null, null, false, false));
161161
structureBuilder.setMultilineStartPattern(makeMultilineStartPattern(explanation, columnNamesList, maxLinesPerMessage,
162-
delimiterPattern, quotePattern, mappings, null, null));
162+
delimiterPattern, quotePattern, fieldMappings, null, null));
163163
}
164164

165165
if (mappingsAndFieldStats.v2() != null) {
166166
structureBuilder.setFieldStats(mappingsAndFieldStats.v2());
167167
}
168168

169169
FileStructure structure = structureBuilder
170-
.setMappings(mappings)
170+
.setMappings(Collections.singletonMap(FileStructureUtils.MAPPING_PROPERTIES_SETTING, fieldMappings))
171171
.setExplanation(explanation)
172172
.build();
173173

@@ -628,7 +628,7 @@ static Map<String, Object> makeCsvProcessorSettings(String field, List<String> t
628628
* records.
629629
*/
630630
static String makeMultilineStartPattern(List<String> explanation, List<String> columnNames, int maxLinesPerMessage,
631-
String delimiterPattern, String quotePattern, Map<String, Object> mappings,
631+
String delimiterPattern, String quotePattern, Map<String, Object> fieldMappings,
632632
String timeFieldName, TimestampFormatFinder timeFieldFormat) {
633633

634634
assert columnNames.isEmpty() == false;
@@ -653,7 +653,7 @@ static String makeMultilineStartPattern(List<String> explanation, List<String> c
653653
explanation.add("Created a multi-line start pattern based on timestamp column [" + columnName + "]");
654654
return builder.toString();
655655
}
656-
Object columnMapping = mappings.get(columnName);
656+
Object columnMapping = fieldMappings.get(columnName);
657657
if (columnMapping instanceof Map) {
658658
String type = (String) ((Map<?, ?>) columnMapping).get(FileStructureUtils.MAPPING_TYPE_SETTING);
659659
if (type != null) {

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/FileStructureUtils.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -393,6 +393,7 @@ static boolean isMoreLikelyTextThanKeyword(String str) {
393393
* @param csvProcessorSettings The CSV processor settings for delimited formats. <code>null</code> for
394394
* non-delimited formats.
395395
* @param mappingsForConversions Mappings (or partial mappings) that will be considered for field type conversions.
396+
* The keys in the map are the top level field names - there is no properties layer.
396397
* @param timestampField The input field containing the timestamp to be parsed into <code>@timestamp</code>.
397398
* <code>null</code> if there is no timestamp.
398399
* @param timestampFormats Timestamp formats to be used for parsing {@code timestampField}.

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/NdJsonFileStructureFinder.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -71,17 +71,17 @@ static NdJsonFileStructureFinder makeNdJsonFileStructureFinder(List<String> expl
7171
Tuple<SortedMap<String, Object>, SortedMap<String, FieldStats>> mappingsAndFieldStats =
7272
FileStructureUtils.guessMappingsAndCalculateFieldStats(explanation, sampleRecords, timeoutChecker);
7373

74-
Map<String, Object> mappings = mappingsAndFieldStats.v1();
74+
Map<String, Object> fieldMappings = mappingsAndFieldStats.v1();
7575
if (timeField != null) {
76-
mappings.put(FileStructureUtils.DEFAULT_TIMESTAMP_FIELD, timeField.v2().getEsDateMappingTypeWithoutFormat());
76+
fieldMappings.put(FileStructureUtils.DEFAULT_TIMESTAMP_FIELD, timeField.v2().getEsDateMappingTypeWithoutFormat());
7777
}
7878

7979
if (mappingsAndFieldStats.v2() != null) {
8080
structureBuilder.setFieldStats(mappingsAndFieldStats.v2());
8181
}
8282

8383
FileStructure structure = structureBuilder
84-
.setMappings(mappings)
84+
.setMappings(Collections.singletonMap(FileStructureUtils.MAPPING_PROPERTIES_SETTING, fieldMappings))
8585
.setExplanation(explanation)
8686
.build();
8787

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/TextLogFileStructureFinder.java

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -109,15 +109,15 @@ static TextLogFileStructureFinder makeTextLogFileStructureFinder(List<String> ex
109109
.setMultilineStartPattern(multiLineRegex);
110110

111111
Map<String, String> messageMapping = Collections.singletonMap(FileStructureUtils.MAPPING_TYPE_SETTING, "text");
112-
SortedMap<String, Object> mappings = new TreeMap<>();
113-
mappings.put("message", messageMapping);
114-
mappings.put(FileStructureUtils.DEFAULT_TIMESTAMP_FIELD, timestampFormatFinder.getEsDateMappingTypeWithoutFormat());
112+
SortedMap<String, Object> fieldMappings = new TreeMap<>();
113+
fieldMappings.put("message", messageMapping);
114+
fieldMappings.put(FileStructureUtils.DEFAULT_TIMESTAMP_FIELD, timestampFormatFinder.getEsDateMappingTypeWithoutFormat());
115115

116116
SortedMap<String, FieldStats> fieldStats = new TreeMap<>();
117117
fieldStats.put("message", FileStructureUtils.calculateFieldStats(messageMapping, sampleMessages, timeoutChecker));
118118

119119
Map<String, String> customGrokPatternDefinitions = timestampFormatFinder.getCustomGrokPatternDefinitions();
120-
GrokPatternCreator grokPatternCreator = new GrokPatternCreator(explanation, sampleMessages, mappings, fieldStats,
120+
GrokPatternCreator grokPatternCreator = new GrokPatternCreator(explanation, sampleMessages, fieldMappings, fieldStats,
121121
customGrokPatternDefinitions, timeoutChecker);
122122
// We can't parse directly into @timestamp using Grok, so parse to some other time field, which the date filter will then remove
123123
String interimTimestampField = overrides.getTimestampField();
@@ -150,10 +150,10 @@ static TextLogFileStructureFinder makeTextLogFileStructureFinder(List<String> ex
150150
.setJavaTimestampFormats(timestampFormatFinder.getJavaTimestampFormats())
151151
.setNeedClientTimezone(needClientTimeZone)
152152
.setGrokPattern(grokPattern)
153-
.setIngestPipeline(FileStructureUtils.makeIngestPipelineDefinition(grokPattern, customGrokPatternDefinitions, null, mappings,
154-
interimTimestampField, timestampFormatFinder.getJavaTimestampFormats(), needClientTimeZone,
153+
.setIngestPipeline(FileStructureUtils.makeIngestPipelineDefinition(grokPattern, customGrokPatternDefinitions, null,
154+
fieldMappings, interimTimestampField, timestampFormatFinder.getJavaTimestampFormats(), needClientTimeZone,
155155
timestampFormatFinder.needNanosecondPrecision()))
156-
.setMappings(mappings)
156+
.setMappings(Collections.singletonMap(FileStructureUtils.MAPPING_PROPERTIES_SETTING, fieldMappings))
157157
.setFieldStats(fieldStats)
158158
.setExplanation(explanation)
159159
.build();

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/XmlFileStructureFinder.java

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -114,18 +114,18 @@ static XmlFileStructureFinder makeXmlFileStructureFinder(List<String> explanatio
114114
structureBuilder.setFieldStats(mappingsAndFieldStats.v2());
115115
}
116116

117-
Map<String, Object> innerMappings = mappingsAndFieldStats.v1();
117+
Map<String, Object> innerFieldMappings = mappingsAndFieldStats.v1();
118118
Map<String, Object> secondLevelProperties = new LinkedHashMap<>();
119119
secondLevelProperties.put(FileStructureUtils.MAPPING_TYPE_SETTING, "object");
120-
secondLevelProperties.put(FileStructureUtils.MAPPING_PROPERTIES_SETTING, innerMappings);
121-
SortedMap<String, Object> outerMappings = new TreeMap<>();
122-
outerMappings.put(topLevelTag, secondLevelProperties);
120+
secondLevelProperties.put(FileStructureUtils.MAPPING_PROPERTIES_SETTING, innerFieldMappings);
121+
SortedMap<String, Object> outerFieldMappings = new TreeMap<>();
122+
outerFieldMappings.put(topLevelTag, secondLevelProperties);
123123
if (timeField != null) {
124-
outerMappings.put(FileStructureUtils.DEFAULT_TIMESTAMP_FIELD, timeField.v2().getEsDateMappingTypeWithoutFormat());
124+
outerFieldMappings.put(FileStructureUtils.DEFAULT_TIMESTAMP_FIELD, timeField.v2().getEsDateMappingTypeWithoutFormat());
125125
}
126126

127127
FileStructure structure = structureBuilder
128-
.setMappings(outerMappings)
128+
.setMappings(Collections.singletonMap(FileStructureUtils.MAPPING_PROPERTIES_SETTING, outerFieldMappings))
129129
.setExplanation(explanation)
130130
.build();
131131

0 commit comments

Comments
 (0)