-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
3fbbc63
commit b084fd1
Showing
10 changed files
with
214 additions
and
2,842 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,7 @@ | ||
*.class | ||
*.log | ||
logs | ||
.~* | ||
|
||
#VIM spegcific | ||
tags | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,3 +4,4 @@ | |
/mass_kg.sqlite | ||
/gestation_days.sqlite | ||
/lifespan_2.sqlite | ||
/temperature.sqlite |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
outs: | ||
- md5: 2fecb538190048e43e4718a7d19a6020 | ||
path: temperature.sqlite |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
from functools import cached_property | ||
|
||
from sklearn.pipeline import Pipeline | ||
|
||
from yspecies.config import * | ||
from yspecies.partition import DataPartitioner | ||
from yspecies.partition import PartitionParameters | ||
from yspecies.preprocess import DataExtractor | ||
from yspecies.results import FeatureSummary | ||
from yspecies.selection import CrossValidator, ShapSelector | ||
from yspecies.tuning import MultiObjectiveResults | ||
from yspecies.workflow import TupleWith, Repeat, Collect | ||
|
||
|
||
@dataclass | ||
class PipelineFactory: | ||
locations: Locations | ||
repeats: int = 10 | ||
n_folds: int = 5 | ||
n_hold_out: int = 1 | ||
|
||
@cached_property | ||
def partition_parameters(self): | ||
return PartitionParameters(self.n_folds, self.n_hold_out, 2, 42) | ||
|
||
def load_study_by_trait(self, trait: str, study_name: str = None): | ||
path = self.locations.interim.optimization / (trait+".sqlite") | ||
study_name = f"{trait}_r2_huber_kendall" if study_name is None else study_name | ||
return self.load_study(path, study_name) | ||
|
||
def load_study(self, path: Path, name: str): | ||
url = f'sqlite:///' +str(path.absolute()) | ||
print('loading (if exists) study from '+url) | ||
storage = optuna.storages.RDBStorage( | ||
url=url | ||
#engine_kwargs={'check_same_thread': False} | ||
) | ||
return optuna.multi_objective.study.create_study(directions=['maximize', 'minimize', 'maximize'], storage=storage, study_name=name, load_if_exists = True) | ||
|
||
def make_partitioning_shap_pipeline(self, trait: str, study_name: str = None): | ||
study_name = f"{trait}_r2_huber_kendall" if study_name is None else study_name | ||
study = self.load_study_by_trait(trait, study_name) | ||
if len(study.get_pareto_front_trials())>0 : | ||
metrics, params = MultiObjectiveResults.from_study(study).best_metrics_params_r2() | ||
params["verbose"] = -1 | ||
if "early_stopping_round" not in params: | ||
params["early_stopping_round"] = 10 | ||
else: | ||
params = {"bagging_fraction": 0.9522534844058304, | ||
"boosting_type": "dart", | ||
"objective": "regression", | ||
"feature_fraction": 0.42236910941558053, | ||
"lambda_l1": 0.020847266580277746, | ||
"lambda_l2": 2.8448564854773326, | ||
"learning_rate": 0.11484015430016059, | ||
"max_depth": 3, | ||
"max_leaves": 35, | ||
"min_data_in_leaf": 9, | ||
"num_iterations": 250, | ||
"metrics": ["l1", "l2", "huber"] | ||
} | ||
return Pipeline([ | ||
("partitioner", DataPartitioner()), | ||
('prepare_for_selection', TupleWith(params)), | ||
("cross_validation", CrossValidator()), | ||
("shap_computation", ShapSelector()) | ||
] | ||
) | ||
|
||
def make_shap_pipeline(self, trait: str, study_name: str = None): | ||
partition_shap_pipe = self.make_partitioning_shap_pipeline(trait, study_name) | ||
return Pipeline( | ||
[ | ||
('extractor', DataExtractor()), | ||
('prepare_for_partitioning', TupleWith(self.partition_parameters)), # to extract the data required for ML from the dataset | ||
("partition_shap", partition_shap_pipe) | ||
] | ||
) | ||
|
||
def make_repeated_shap_pipeline(self, trait: str, study_name: str = None): | ||
partition_shap_pipe = self.make_partitioning_shap_pipeline(trait, study_name) | ||
repeated_cv = Repeat(partition_shap_pipe, self.repeats, lambda x,i: (x[0], replace(x[1], seed=i))) | ||
return Pipeline( | ||
[ | ||
('extractor', DataExtractor()), | ||
('prepare_for_partitioning', TupleWith(self.partition_parameters)), # to extract the data required for ML from the dataset | ||
("repeated_partition_shap", repeated_cv), | ||
("summarize", Collect(fold=lambda results: FeatureSummary(results))) | ||
] | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters