-
Notifications
You must be signed in to change notification settings - Fork 14
/
lamorgia_classifier.py
54 lines (42 loc) · 1.94 KB
/
lamorgia_classifier.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import datetime
import pandas as pd
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.metrics import f1_score, recall_score, precision_score
from sklearn.model_selection import StratifiedKFold, cross_val_predict, train_test_split
from sklearn.tree import DecisionTreeClassifier
def classifier(time_freq):
computed_data = pd.read_csv('labeled_features/features_' + time_freq + '.csv.gz', parse_dates=['date'])
features = ['std_rush_order',
'avg_rush_order',
'std_trades',
'std_volume',
'avg_volume',
'std_price',
'avg_price',
'avg_price_max',
'hour_sin',
'hour_cos',
'minute_sin',
'minute_cos']
X = computed_data[features]
Y = computed_data['gt'].astype(int).values.ravel()
clf = RandomForestClassifier(n_estimators=200, max_depth=5, random_state=1)
#clf = AdaBoostClassifier(DecisionTreeClassifier(max_depth=5), n_estimators=100, learning_rate=0.5, random_state=0)
cv_list = [5]
processes = 7
for n_fold in cv_list:
#print('Processing: {} folds - time freq {}'.format(n_fold, time_freq))
#y_pred = cross_val_predict(clf, X, Y.ravel(), cv=StratifiedKFold(n_splits=n_fold), n_jobs=processes)
print(f'Fitting model for {time_freq}.')
X_train, X_test, Y_train, Y_test = train_test_split(X, Y.ravel(), shuffle=False, train_size=0.8)
clf.fit(X_train, Y_train)
y_pred = clf.predict(X_test)
print('Recall: {}'.format(recall_score(Y_test, y_pred)))
print('Precision: {}'.format(precision_score(Y_test, y_pred)))
print('F1 score: {}'.format(f1_score(Y_test, y_pred)))
if __name__ == '__main__':
start = datetime.datetime.now()
classifier(time_freq='25S')
classifier(time_freq='15S')
classifier(time_freq='5S')
print(datetime.datetime.now() - start)