forked from microsoft/autogen
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_performance.py
107 lines (89 loc) · 3.66 KB
/
test_performance.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import sys
from openml.exceptions import OpenMLServerException
from requests.exceptions import ChunkedEncodingError, SSLError
from minio.error import ServerError
from flaml.tune.spark.utils import check_spark
import os
import pytest
spark_available, _ = check_spark()
skip_spark = not spark_available
pytestmark = pytest.mark.skipif(skip_spark, reason="Spark is not installed. Skip all spark tests.")
os.environ["FLAML_MAX_CONCURRENT"] = "2"
def run_automl(budget=3, dataset_format="dataframe", hpo_method=None):
from flaml.automl.data import load_openml_dataset
import urllib3
performance_check_budget = 3600
if sys.platform == "darwin" or "nt" in os.name or "3.10" not in sys.version:
budget = 3 # revise the buget if the platform is not linux + python 3.10
if budget >= performance_check_budget:
max_iter = 60
performance_check_budget = None
else:
max_iter = None
try:
X_train, X_test, y_train, y_test = load_openml_dataset(
dataset_id=1169, data_dir="test/", dataset_format=dataset_format
)
except (
OpenMLServerException,
ChunkedEncodingError,
urllib3.exceptions.ReadTimeoutError,
SSLError,
ServerError,
Exception,
) as e:
print(e)
return
""" import AutoML class from flaml package """
from flaml import AutoML
automl = AutoML()
settings = {
"time_budget": budget, # total running time in seconds
"max_iter": max_iter, # maximum number of iterations
"metric": "accuracy", # primary metrics can be chosen from: ['accuracy','roc_auc','roc_auc_ovr','roc_auc_ovo','f1','log_loss','mae','mse','r2']
"task": "classification", # task type
"log_file_name": "airlines_experiment.log", # flaml log file
"seed": 7654321, # random seed
"hpo_method": hpo_method,
"log_type": "all",
"estimator_list": [
"lgbm",
"xgboost",
"xgb_limitdepth",
"rf",
"extra_tree",
], # list of ML learners
"eval_method": "holdout",
"n_concurrent_trials": 2,
"use_spark": True,
}
"""The main flaml automl API"""
automl.fit(X_train=X_train, y_train=y_train, **settings)
""" retrieve best config and best learner """
print("Best ML leaner:", automl.best_estimator)
print("Best hyperparmeter config:", automl.best_config)
print("Best accuracy on validation data: {0:.4g}".format(1 - automl.best_loss))
print("Training duration of best run: {0:.4g} s".format(automl.best_config_train_time))
print(automl.model.estimator)
print(automl.best_config_per_estimator)
print("time taken to find best model:", automl.time_to_find_best_model)
""" compute predictions of testing dataset """
y_pred = automl.predict(X_test)
print("Predicted labels", y_pred)
print("True labels", y_test)
y_pred_proba = automl.predict_proba(X_test)[:, 1]
""" compute different metric values on testing dataset """
from flaml.automl.ml import sklearn_metric_loss_score
accuracy = 1 - sklearn_metric_loss_score("accuracy", y_pred, y_test)
print("accuracy", "=", accuracy)
print("roc_auc", "=", 1 - sklearn_metric_loss_score("roc_auc", y_pred_proba, y_test))
print("log_loss", "=", sklearn_metric_loss_score("log_loss", y_pred_proba, y_test))
if performance_check_budget is None:
assert accuracy >= 0.669, "the accuracy of flaml should be larger than 0.67"
def test_automl_array():
run_automl(3, "array", "bs")
def test_automl_performance():
run_automl(3600)
if __name__ == "__main__":
test_automl_array()
test_automl_performance()