forked from scikit-learn/scikit-learn
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbench_isotonic.py
91 lines (72 loc) · 2.98 KB
/
bench_isotonic.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
"""
Benchmarks of isotonic regression performance.
We generate a synthetic dataset of size 10^n, for n in [min, max], and
examine the time taken to run isotonic regression over the dataset.
The timings are then output to stdout, or visualized on a log-log scale
with matplotlib.
This allows the scaling of the algorithm with the problem size to be
visualized and understood.
"""
from __future__ import print_function
import numpy as np
import gc
from datetime import datetime
from sklearn.isotonic import isotonic_regression
from sklearn.utils.bench import total_seconds
import matplotlib.pyplot as plt
import argparse
def generate_perturbed_logarithm_dataset(size):
return np.random.randint(-50, 50, size=n) \
+ 50. * np.log(1 + np.arange(n))
def generate_logistic_dataset(size):
X = np.sort(np.random.normal(size=size))
return np.random.random(size=size) < 1.0 / (1.0 + np.exp(-X))
DATASET_GENERATORS = {
'perturbed_logarithm': generate_perturbed_logarithm_dataset,
'logistic': generate_logistic_dataset
}
def bench_isotonic_regression(Y):
"""
Runs a single iteration of isotonic regression on the input data,
and reports the total time taken (in seconds).
"""
gc.collect()
tstart = datetime.now()
isotonic_regression(Y)
delta = datetime.now() - tstart
return total_seconds(delta)
if __name__ == '__main__':
parser = argparse.ArgumentParser(
description="Isotonic Regression benchmark tool")
parser.add_argument('--iterations', type=int, required=True,
help="Number of iterations to average timings over "
"for each problem size")
parser.add_argument('--log_min_problem_size', type=int, required=True,
help="Base 10 logarithm of the minimum problem size")
parser.add_argument('--log_max_problem_size', type=int, required=True,
help="Base 10 logarithm of the maximum problem size")
parser.add_argument('--show_plot', action='store_true',
help="Plot timing output with matplotlib")
parser.add_argument('--dataset', choices=DATASET_GENERATORS.keys(),
required=True)
args = parser.parse_args()
timings = []
for exponent in range(args.log_min_problem_size,
args.log_max_problem_size):
n = 10 ** exponent
Y = DATASET_GENERATORS[args.dataset](n)
time_per_iteration = \
[bench_isotonic_regression(Y) for i in range(args.iterations)]
timing = (n, np.mean(time_per_iteration))
timings.append(timing)
# If we're not plotting, dump the timing to stdout
if not args.show_plot:
print(n, np.mean(time_per_iteration))
if args.show_plot:
plt.plot(*zip(*timings))
plt.title("Average time taken running isotonic regression")
plt.xlabel('Number of observations')
plt.ylabel('Time (s)')
plt.axis('tight')
plt.loglog()
plt.show()