elastic · benwtrent · Mar 23, 2020 · Mar 23, 2020
diff --git a/...core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/Classification.java b/...core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/Classification.java
@@ -288,9 +288,11 @@ public List<FieldCardinalityConstraint> getFieldCardinalityConstraints() {
     @SuppressWarnings("unchecked")
     @Override
     public Map<String, Object> getExplicitlyMappedFields(Map<String, Object> mappingsProperties, String resultsFieldName) {
+        Map<String, Object> additionalProperties = new HashMap<>();
+        additionalProperties.put(resultsFieldName + ".feature_importance", MapUtils.featureImportanceMapping());
         Object dependentVariableMapping = extractMapping(dependentVariable, mappingsProperties);
         if ((dependentVariableMapping instanceof Map) == false) {
-            return Collections.emptyMap();
+            return additionalProperties;
         }
         Map<String, Object> dependentVariableMappingAsMap = (Map) dependentVariableMapping;
         // If the source field is an alias, fetch the concrete field that the alias points to.
@@ -301,9 +303,8 @@ public Map<String, Object> getExplicitlyMappedFields(Map<String, Object> mapping
         // We may have updated the value of {@code dependentVariableMapping} in the "if" block above.
         // Hence, we need to check the "instanceof" condition again.
         if ((dependentVariableMapping instanceof Map) == false) {
-            return Collections.emptyMap();
+            return additionalProperties;
         }
-        Map<String, Object> additionalProperties = new HashMap<>();
         additionalProperties.put(resultsFieldName + "." + predictionFieldName, dependentVariableMapping);
         additionalProperties.put(resultsFieldName + ".top_classes.class_name", dependentVariableMapping);
         return additionalProperties;

diff --git a/...lugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/MapUtils.java b/...lugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/MapUtils.java
@@ -0,0 +1,40 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License;
+ * you may not use this file except in compliance with the Elastic License.
+ *//*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License;
+ * you may not use this file except in compliance with the Elastic License.
+ */
+package org.elasticsearch.xpack.core.ml.dataframe.analyses;
+
+import org.elasticsearch.index.mapper.KeywordFieldMapper;
+import org.elasticsearch.index.mapper.NumberFieldMapper;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+
+final class MapUtils {
+
+    private static final Map<String, Object> FEATURE_IMPORTANCE_MAPPING;
+    static {
+        Map<String, Object> featureImportanceMappingProperties = new HashMap<>();
+        featureImportanceMappingProperties.put("feature_name", Collections.singletonMap("type", KeywordFieldMapper.CONTENT_TYPE));
+        featureImportanceMappingProperties.put("importance",
+            Collections.singletonMap("type", NumberFieldMapper.NumberType.DOUBLE.typeName()));
+        Map<String, Object> featureImportanceMapping = new HashMap<>();
+        // TODO sorted indices don't support nested types
+        //featureImportanceMapping.put("dynamic", true);
+        //featureImportanceMapping.put("type", ObjectMapper.NESTED_CONTENT_TYPE);
+        featureImportanceMapping.put("properties", featureImportanceMappingProperties);
+        FEATURE_IMPORTANCE_MAPPING = Collections.unmodifiableMap(featureImportanceMapping);
+    }
+
+    static Map<String, Object> featureImportanceMapping() {
+        return FEATURE_IMPORTANCE_MAPPING;
+    }
+
+    private MapUtils() {}
+}
diff --git a/...gin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/Regression.java b/...gin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/Regression.java
@@ -14,6 +14,7 @@
 import org.elasticsearch.common.xcontent.ConstructingObjectParser;
 import org.elasticsearch.common.xcontent.XContentBuilder;
 import org.elasticsearch.common.xcontent.XContentParser;
+import org.elasticsearch.index.mapper.NumberFieldMapper;
 import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper;
 
 import java.io.IOException;
@@ -187,9 +188,13 @@ public List<FieldCardinalityConstraint> getFieldCardinalityConstraints() {
 
     @Override
     public Map<String, Object> getExplicitlyMappedFields(Map<String, Object> mappingsProperties, String resultsFieldName) {
+        Map<String, Object> additionalProperties = new HashMap<>();
+        additionalProperties.put(resultsFieldName + ".feature_importance", MapUtils.featureImportanceMapping());
         // Prediction field should be always mapped as "double" rather than "float" in order to increase precision in case of
         // high (over 10M) values of dependent variable.
-        return Collections.singletonMap(resultsFieldName + "." + predictionFieldName, Collections.singletonMap("type", "double"));
+        additionalProperties.put(resultsFieldName + "." + predictionFieldName,
+            Collections.singletonMap("type", NumberFieldMapper.NumberType.DOUBLE.typeName()));
+        return additionalProperties;
     }
 
     @Override

diff --git a/...src/test/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/ClassificationTests.java b/...src/test/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/ClassificationTests.java
@@ -27,7 +27,6 @@
 import java.util.Set;
 
 import static org.hamcrest.Matchers.allOf;
-import static org.hamcrest.Matchers.anEmptyMap;
 import static org.hamcrest.Matchers.containsString;
 import static org.hamcrest.Matchers.empty;
 import static org.hamcrest.Matchers.equalTo;
@@ -244,39 +243,45 @@ public void testFieldCardinalityLimitsIsNonEmpty() {
     }
 
     public void testGetExplicitlyMappedFields() {
-        assertThat(new Classification("foo").getExplicitlyMappedFields(null, "results"), is(anEmptyMap()));
-        assertThat(new Classification("foo").getExplicitlyMappedFields(Collections.emptyMap(), "results"), is(anEmptyMap()));
+        assertThat(new Classification("foo").getExplicitlyMappedFields(null, "results"),
+            equalTo(Collections.singletonMap("results.feature_importance", MapUtils.featureImportanceMapping())));
+        assertThat(new Classification("foo").getExplicitlyMappedFields(Collections.emptyMap(), "results"),
+            equalTo(Collections.singletonMap("results.feature_importance", MapUtils.featureImportanceMapping())));
         assertThat(
             new Classification("foo").getExplicitlyMappedFields(Collections.singletonMap("foo", "not_a_map"), "results"),
-            is(anEmptyMap()));
-        assertThat(
-            new Classification("foo").getExplicitlyMappedFields(
-                Collections.singletonMap("foo", Collections.singletonMap("bar", "baz")),
-                "results"),
+            equalTo(Collections.singletonMap("results.feature_importance", MapUtils.featureImportanceMapping())));
+        Map<String, Object> explicitlyMappedFields = new Classification("foo").getExplicitlyMappedFields(
+            Collections.singletonMap("foo", Collections.singletonMap("bar", "baz")),
+            "results");
+        assertThat(explicitlyMappedFields,
             allOf(
                 hasEntry("results.foo_prediction", Collections.singletonMap("bar", "baz")),
                 hasEntry("results.top_classes.class_name", Collections.singletonMap("bar", "baz"))));
-        assertThat(
-            new Classification("foo").getExplicitlyMappedFields(
-                new HashMap<String, Object>() {{
-                    put("foo", new HashMap<String, String>() {{
-                        put("type", "alias");
-                        put("path", "bar");
-                    }});
-                    put("bar", Collections.singletonMap("type", "long"));
-                }},
-                "results"),
+        assertThat(explicitlyMappedFields, hasEntry("results.feature_importance", MapUtils.featureImportanceMapping()));
+
+        explicitlyMappedFields = new Classification("foo").getExplicitlyMappedFields(
+            new HashMap<String, Object>() {{
+                put("foo", new HashMap<String, String>() {{
+                    put("type", "alias");
+                    put("path", "bar");
+                }});
+                put("bar", Collections.singletonMap("type", "long"));
+            }},
+            "results");
+        assertThat(explicitlyMappedFields,
             allOf(
                 hasEntry("results.foo_prediction", Collections.singletonMap("type", "long")),
                 hasEntry("results.top_classes.class_name", Collections.singletonMap("type", "long"))));
+        assertThat(explicitlyMappedFields, hasEntry("results.feature_importance", MapUtils.featureImportanceMapping()));
+
         assertThat(
             new Classification("foo").getExplicitlyMappedFields(
                 Collections.singletonMap("foo", new HashMap<String, String>() {{
                     put("type", "alias");
                     put("path", "missing");
                 }}),
                 "results"),
-            is(anEmptyMap()));
+            equalTo(Collections.singletonMap("results.feature_importance", MapUtils.featureImportanceMapping())));
     }
 
     public void testToXContent_GivenVersionBeforeRandomizeSeedWasIntroduced() throws IOException {

diff --git a/...ore/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/RegressionTests.java b/...ore/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/RegressionTests.java
@@ -16,6 +16,7 @@
 import org.elasticsearch.xpack.core.ml.AbstractBWCSerializationTestCase;
 
 import java.io.IOException;
+import java.util.Map;
 import java.util.Collections;
 
 import static org.hamcrest.Matchers.allOf;
@@ -143,9 +144,9 @@ public void testFieldCardinalityLimitsIsEmpty() {
     }
 
     public void testGetExplicitlyMappedFields() {
-        assertThat(
-            new Regression("foo").getExplicitlyMappedFields(null, "results"),
-            hasEntry("results.foo_prediction", Collections.singletonMap("type", "double")));
+        Map<String, Object> explicitlyMappedFields = new Regression("foo").getExplicitlyMappedFields(null, "results");
+        assertThat(explicitlyMappedFields, hasEntry("results.foo_prediction", Collections.singletonMap("type", "double")));
+        assertThat(explicitlyMappedFields, hasEntry("results.feature_importance", MapUtils.featureImportanceMapping()));
     }
 
     public void testGetStateDocId() {

diff --git a/...lti-node-tests/src/test/java/org/elasticsearch/xpack/ml/integration/ClassificationIT.java b/...lti-node-tests/src/test/java/org/elasticsearch/xpack/ml/integration/ClassificationIT.java
@@ -77,7 +77,6 @@ public void cleanup() {
         cleanUp();
     }
 
-    @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/53236")
     public void testSingleNumericFeatureAndMixedTrainingAndNonTrainingRows() throws Exception {
         initialize("classification_single_numeric_feature_and_mixed_data_set");
         String predictedClassField = KEYWORD_FIELD + "_prediction";
@@ -109,7 +108,9 @@ public void testSingleNumericFeatureAndMixedTrainingAndNonTrainingRows() throws
             assertThat(getFieldValue(resultsObject, predictedClassField), is(in(KEYWORD_FIELD_VALUES)));
             assertThat(getFieldValue(resultsObject, "is_training"), is(destDoc.containsKey(KEYWORD_FIELD)));
             assertTopClasses(resultsObject, 2, KEYWORD_FIELD, KEYWORD_FIELD_VALUES);
-            assertThat(resultsObject.keySet().stream().filter(k -> k.startsWith("feature_importance.")).findAny().isPresent(), is(true));
+            @SuppressWarnings("unchecked")
+            List<Map<String, Object>> importanceArray = (List<Map<String, Object>>)resultsObject.get("feature_importance");
+            assertThat(importanceArray, hasSize(greaterThan(0)));
         }
 
         assertProgress(jobId, 100, 100, 100, 100);

diff --git a/...e-multi-node-tests/src/test/java/org/elasticsearch/xpack/ml/integration/RegressionIT.java b/...e-multi-node-tests/src/test/java/org/elasticsearch/xpack/ml/integration/RegressionIT.java
@@ -27,9 +27,11 @@
 import java.util.Map;
 import java.util.Set;
 
+import static org.elasticsearch.test.hamcrest.OptionalMatchers.isPresent;
 import static org.hamcrest.Matchers.anyOf;
 import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.greaterThan;
+import static org.hamcrest.Matchers.hasSize;
 import static org.hamcrest.Matchers.is;
 
 public class RegressionIT extends MlNativeDataFrameAnalyticsIntegTestCase {
@@ -50,7 +52,6 @@ public void cleanup() {
         cleanUp();
     }
 
-    @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/53236")
     public void testSingleNumericFeatureAndMixedTrainingAndNonTrainingRows() throws Exception {
         initialize("regression_single_numeric_feature_and_mixed_data_set");
         String predictedClassField = DEPENDENT_VARIABLE_FIELD + "_prediction";
@@ -88,11 +89,13 @@ public void testSingleNumericFeatureAndMixedTrainingAndNonTrainingRows() throws
             assertThat(resultsObject.containsKey(predictedClassField), is(true));
             assertThat(resultsObject.containsKey("is_training"), is(true));
             assertThat(resultsObject.get("is_training"), is(destDoc.containsKey(DEPENDENT_VARIABLE_FIELD)));
+            @SuppressWarnings("unchecked")
+            List<Map<String, Object>> importanceArray = (List<Map<String, Object>>)resultsObject.get("feature_importance");
+            assertThat(importanceArray, hasSize(greaterThan(0)));
             assertThat(
-                resultsObject.toString(),
-                resultsObject.containsKey("feature_importance." + NUMERICAL_FEATURE_FIELD)
-                    || resultsObject.containsKey("feature_importance." + DISCRETE_NUMERICAL_FEATURE_FIELD),
-                is(true));
+                importanceArray.stream().filter(m -> NUMERICAL_FEATURE_FIELD.equals(m.get("feature_name"))
+                    || DISCRETE_NUMERICAL_FEATURE_FIELD.equals(m.get("feature_name"))).findAny(),
+                isPresent());
         }
 
         assertProgress(jobId, 100, 100, 100, 100);