Skip to content

[ML] Activate model metadata output #1456

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Aug 14, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion docs/CHANGELOG.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@

== {es} version 7.10.0

=== Enhancements

* Calculate total feature importance to store with model metadata. (See {ml-pull}1387[#1387].)

=== Bug Fixes

* Fix progress on resume after final training has completed for classification and regression.
Expand Down Expand Up @@ -75,7 +79,6 @@
regression. (See {ml-pull}1340[#1340].)
* Improvement in handling large inference model definitions. (See {ml-pull}1349[#1349].)
* Add a peak_model_bytes field to model_size_stats. (See {ml-pull}1389[#1389].)
* Calculate total feature importance as a new result type. (See {ml-pull}1387[#1387].)

=== Bug Fixes

Expand Down
3 changes: 1 addition & 2 deletions lib/api/CDataFrameAnalyzer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -144,8 +144,7 @@ void CDataFrameAnalyzer::run() {
analysisRunner->waitToFinish();
this->writeInferenceModel(*analysisRunner, outputWriter);
this->writeResultsOf(*analysisRunner, outputWriter);
// TODO reactivate once Java parsing is ready
// this->writeInferenceModelMetadata(*analysisRunner, outputWriter);
this->writeInferenceModelMetadata(*analysisRunner, outputWriter);
}
}

Expand Down
107 changes: 51 additions & 56 deletions lib/api/unittest/CDataFrameAnalyzerFeatureImportanceTest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -537,16 +537,15 @@ BOOST_FIXTURE_TEST_CASE(testRegressionFeatureImportanceAllShap, SFixture) {
BOOST_REQUIRE_CLOSE(c3Sum, c4Sum, 5.0); // c3 and c4 within 5% of each other
// make sure the local approximation differs from the prediction always by the same bias (up to a numeric error)
BOOST_REQUIRE_SMALL(maths::CBasicStatistics::variance(bias), 1e-6);
// TODO reactivate once Java parsing is ready
// BOOST_TEST_REQUIRE(hasTotalFeatureImportance);
// BOOST_REQUIRE_CLOSE(c1TotalShapActual,
// maths::CBasicStatistics::mean(c1TotalShapExpected), 1.0);
// BOOST_REQUIRE_CLOSE(c2TotalShapActual,
// maths::CBasicStatistics::mean(c2TotalShapExpected), 1.0);
// BOOST_REQUIRE_CLOSE(c3TotalShapActual,
// maths::CBasicStatistics::mean(c3TotalShapExpected), 1.0);
// BOOST_REQUIRE_CLOSE(c4TotalShapActual,
// maths::CBasicStatistics::mean(c4TotalShapExpected), 1.0);
BOOST_TEST_REQUIRE(hasTotalFeatureImportance);
BOOST_REQUIRE_CLOSE(c1TotalShapActual,
maths::CBasicStatistics::mean(c1TotalShapExpected), 1.0);
BOOST_REQUIRE_CLOSE(c2TotalShapActual,
maths::CBasicStatistics::mean(c2TotalShapExpected), 1.0);
BOOST_REQUIRE_CLOSE(c3TotalShapActual,
maths::CBasicStatistics::mean(c3TotalShapExpected), 1.0);
BOOST_REQUIRE_CLOSE(c4TotalShapActual,
maths::CBasicStatistics::mean(c4TotalShapExpected), 1.0);
}

BOOST_FIXTURE_TEST_CASE(testRegressionFeatureImportanceNoImportance, SFixture) {
Expand Down Expand Up @@ -629,7 +628,6 @@ BOOST_FIXTURE_TEST_CASE(testClassificationFeatureImportanceAllShap, SFixture) {
if (result["model_metadata"].HasMember("total_feature_importance")) {
hasTotalFeatureImportance = true;
}
// TODO reactivate once Java parsing is ready
c1FooTotalShapActual = readTotalShapValue(result, "c1", "foo");
c2FooTotalShapActual = readTotalShapValue(result, "c2", "foo");
c3FooTotalShapActual = readTotalShapValue(result, "c3", "foo");
Expand All @@ -650,24 +648,23 @@ BOOST_FIXTURE_TEST_CASE(testClassificationFeatureImportanceAllShap, SFixture) {
BOOST_REQUIRE_CLOSE(c3Sum, c4Sum, 40.0); // c3 and c4 within 40% of each other
// make sure the local approximation differs from the prediction always by the same bias (up to a numeric error)
BOOST_REQUIRE_SMALL(maths::CBasicStatistics::variance(bias), 1e-6);
// TODO reactivate once Java parsing is ready
// BOOST_TEST_REQUIRE(hasTotalFeatureImportance);
// BOOST_REQUIRE_CLOSE(c1FooTotalShapActual,
// maths::CBasicStatistics::mean(c1TotalShapExpected), 1.0);
// BOOST_REQUIRE_CLOSE(c2FooTotalShapActual,
// maths::CBasicStatistics::mean(c2TotalShapExpected), 1.0);
// BOOST_REQUIRE_CLOSE(c3FooTotalShapActual,
// maths::CBasicStatistics::mean(c3TotalShapExpected), 1.0);
// BOOST_REQUIRE_CLOSE(c4FooTotalShapActual,
// maths::CBasicStatistics::mean(c4TotalShapExpected), 1.0);
// BOOST_REQUIRE_CLOSE(c1BarTotalShapActual,
// maths::CBasicStatistics::mean(c1TotalShapExpected), 1.0);
// BOOST_REQUIRE_CLOSE(c2BarTotalShapActual,
// maths::CBasicStatistics::mean(c2TotalShapExpected), 1.0);
// BOOST_REQUIRE_CLOSE(c3BarTotalShapActual,
// maths::CBasicStatistics::mean(c3TotalShapExpected), 1.0);
// BOOST_REQUIRE_CLOSE(c4BarTotalShapActual,
// maths::CBasicStatistics::mean(c4TotalShapExpected), 1.0);
BOOST_TEST_REQUIRE(hasTotalFeatureImportance);
BOOST_REQUIRE_CLOSE(c1FooTotalShapActual,
maths::CBasicStatistics::mean(c1TotalShapExpected), 1.0);
BOOST_REQUIRE_CLOSE(c2FooTotalShapActual,
maths::CBasicStatistics::mean(c2TotalShapExpected), 1.0);
BOOST_REQUIRE_CLOSE(c3FooTotalShapActual,
maths::CBasicStatistics::mean(c3TotalShapExpected), 1.0);
BOOST_REQUIRE_CLOSE(c4FooTotalShapActual,
maths::CBasicStatistics::mean(c4TotalShapExpected), 1.0);
BOOST_REQUIRE_CLOSE(c1BarTotalShapActual,
maths::CBasicStatistics::mean(c1TotalShapExpected), 1.0);
BOOST_REQUIRE_CLOSE(c2BarTotalShapActual,
maths::CBasicStatistics::mean(c2TotalShapExpected), 1.0);
BOOST_REQUIRE_CLOSE(c3BarTotalShapActual,
maths::CBasicStatistics::mean(c3TotalShapExpected), 1.0);
BOOST_REQUIRE_CLOSE(c4BarTotalShapActual,
maths::CBasicStatistics::mean(c4TotalShapExpected), 1.0);
}

BOOST_FIXTURE_TEST_CASE(testMultiClassClassificationFeatureImportanceAllShap, SFixture) {
Expand Down Expand Up @@ -734,7 +731,6 @@ BOOST_FIXTURE_TEST_CASE(testMultiClassClassificationFeatureImportanceAllShap, SF
if (result["model_metadata"].HasMember("total_feature_importance")) {
hasTotalFeatureImportance = true;
}
// TODO reactivate once Java parsing is ready
c1FooTotalShapActual = readTotalShapValue(result, "c1", "foo");
c2FooTotalShapActual = readTotalShapValue(result, "c2", "foo");
c3FooTotalShapActual = readTotalShapValue(result, "c3", "foo");
Expand All @@ -749,32 +745,31 @@ BOOST_FIXTURE_TEST_CASE(testMultiClassClassificationFeatureImportanceAllShap, SF
c4BazTotalShapActual = readTotalShapValue(result, "c4", "baz");
}
}
// TODO reactivate once Java parsing is ready
// BOOST_TEST_REQUIRE(hasTotalFeatureImportance);
// BOOST_REQUIRE_CLOSE(c1FooTotalShapActual,
// maths::CBasicStatistics::mean(c1FooTotalShapExpected), 1.0);
// BOOST_REQUIRE_CLOSE(c2FooTotalShapActual,
// maths::CBasicStatistics::mean(c2FooTotalShapExpected), 1.0);
// BOOST_REQUIRE_CLOSE(c3FooTotalShapActual,
// maths::CBasicStatistics::mean(c3FooTotalShapExpected), 1.0);
// BOOST_REQUIRE_CLOSE(c4FooTotalShapActual,
// maths::CBasicStatistics::mean(c4FooTotalShapExpected), 1.0);
// BOOST_REQUIRE_CLOSE(c1BarTotalShapActual,
// maths::CBasicStatistics::mean(c1BarTotalShapExpected), 1.0);
// BOOST_REQUIRE_CLOSE(c2BarTotalShapActual,
// maths::CBasicStatistics::mean(c2BarTotalShapExpected), 1.0);
// BOOST_REQUIRE_CLOSE(c3BarTotalShapActual,
// maths::CBasicStatistics::mean(c3BarTotalShapExpected), 1.0);
// BOOST_REQUIRE_CLOSE(c4BarTotalShapActual,
// maths::CBasicStatistics::mean(c4BarTotalShapExpected), 1.0);
// BOOST_REQUIRE_CLOSE(c1BazTotalShapActual,
// maths::CBasicStatistics::mean(c1BazTotalShapExpected), 1.0);
// BOOST_REQUIRE_CLOSE(c2BazTotalShapActual,
// maths::CBasicStatistics::mean(c2BazTotalShapExpected), 1.0);
// BOOST_REQUIRE_CLOSE(c3BazTotalShapActual,
// maths::CBasicStatistics::mean(c3BazTotalShapExpected), 1.0);
// BOOST_REQUIRE_CLOSE(c4BazTotalShapActual,
// maths::CBasicStatistics::mean(c4BazTotalShapExpected), 1.0);
BOOST_TEST_REQUIRE(hasTotalFeatureImportance);
BOOST_REQUIRE_CLOSE(c1FooTotalShapActual,
maths::CBasicStatistics::mean(c1FooTotalShapExpected), 1.0);
BOOST_REQUIRE_CLOSE(c2FooTotalShapActual,
maths::CBasicStatistics::mean(c2FooTotalShapExpected), 1.0);
BOOST_REQUIRE_CLOSE(c3FooTotalShapActual,
maths::CBasicStatistics::mean(c3FooTotalShapExpected), 1.0);
BOOST_REQUIRE_CLOSE(c4FooTotalShapActual,
maths::CBasicStatistics::mean(c4FooTotalShapExpected), 1.0);
BOOST_REQUIRE_CLOSE(c1BarTotalShapActual,
maths::CBasicStatistics::mean(c1BarTotalShapExpected), 1.0);
BOOST_REQUIRE_CLOSE(c2BarTotalShapActual,
maths::CBasicStatistics::mean(c2BarTotalShapExpected), 1.0);
BOOST_REQUIRE_CLOSE(c3BarTotalShapActual,
maths::CBasicStatistics::mean(c3BarTotalShapExpected), 1.0);
BOOST_REQUIRE_CLOSE(c4BarTotalShapActual,
maths::CBasicStatistics::mean(c4BarTotalShapExpected), 1.0);
BOOST_REQUIRE_CLOSE(c1BazTotalShapActual,
maths::CBasicStatistics::mean(c1BazTotalShapExpected), 1.0);
BOOST_REQUIRE_CLOSE(c2BazTotalShapActual,
maths::CBasicStatistics::mean(c2BazTotalShapExpected), 1.0);
BOOST_REQUIRE_CLOSE(c3BazTotalShapActual,
maths::CBasicStatistics::mean(c3BazTotalShapExpected), 1.0);
BOOST_REQUIRE_CLOSE(c4BazTotalShapActual,
maths::CBasicStatistics::mean(c4BazTotalShapExpected), 1.0);
}

BOOST_FIXTURE_TEST_CASE(testRegressionFeatureImportanceNoShap, SFixture) {
Expand Down