Skip to content

Commit

Permalink
fix model feature importances (microsoft#755)
Browse files Browse the repository at this point in the history
* fix model feature importance

* fix appveryor test

* Revert "fix appveryor test"

This reverts commit 3a10a17.

* fix warning & boost_from_average_

* fix bug

* alter num_used_model
  • Loading branch information
wxchan authored and guolinke committed Aug 1, 2017
1 parent 04e0db0 commit 2e83a1c
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 6 deletions.
6 changes: 3 additions & 3 deletions src/boosting/gbdt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -964,7 +964,7 @@ std::string GBDT::SaveModelToString(int num_iteration) const {
ss << models_[i]->ToString() << std::endl;
}

std::vector<std::pair<size_t, std::string>> pairs = FeatureImportance();
std::vector<std::pair<size_t, std::string>> pairs = FeatureImportance(num_used_model);
ss << std::endl << "feature importances:" << std::endl;
for (size_t i = 0; i < pairs.size(); ++i) {
ss << pairs[i].second << "=" << std::to_string(pairs[i].first) << std::endl;
Expand Down Expand Up @@ -1088,10 +1088,10 @@ bool GBDT::LoadModelFromString(const std::string& model_str) {
return true;
}

std::vector<std::pair<size_t, std::string>> GBDT::FeatureImportance() const {
std::vector<std::pair<size_t, std::string>> GBDT::FeatureImportance(int num_used_model) const {

std::vector<size_t> feature_importances(max_feature_idx_ + 1, 0);
for (size_t iter = 0; iter < models_.size(); ++iter) {
for (int iter = 0; iter < num_used_model; ++iter) {
for (int split_idx = 0; split_idx < models_[iter]->num_leaves() - 1; ++split_idx) {
if (models_[iter]->split_gain(split_idx) > 0) {
++feature_importances[models_[iter]->split_feature(split_idx)];
Expand Down
8 changes: 5 additions & 3 deletions src/boosting/gbdt.h
Original file line number Diff line number Diff line change
Expand Up @@ -175,15 +175,15 @@ class GBDT: public Boosting {

/*!
* \brief Save model to file
* \param num_used_model Number of model that want to save, -1 means save all
* \param num_iterations Number of model that want to save, -1 means save all
* \param filename Filename that want to save to
* \return is_finish Is training finished or not
*/
virtual bool SaveModelToFile(int num_iterations, const char* filename) const override;

/*!
* \brief Save model to string
* \param num_used_model Number of model that want to save, -1 means save all
* \param num_iterations Number of model that want to save, -1 means save all
* \return Non-empty string if succeeded
*/
virtual std::string SaveModelToString(int num_iterations) const override;
Expand Down Expand Up @@ -296,8 +296,10 @@ class GBDT: public Boosting {
std::string OutputMetric(int iter);
/*!
* \brief Calculate feature importances
* \param num_used_model Number of model that want to use for feature importance, -1 means use all
* \return sorted pairs of (feature_importance, feature_name)
*/
std::vector<std::pair<size_t, std::string>> FeatureImportance() const;
std::vector<std::pair<size_t, std::string>> FeatureImportance(int num_used_model) const;

/*! \brief current iteration */
int iter_;
Expand Down

0 comments on commit 2e83a1c

Please sign in to comment.