-
Notifications
You must be signed in to change notification settings - Fork 1
/
Params.yaml
74 lines (65 loc) · 2.05 KB
/
Params.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# For stage 1.
RESULT_PATH : "./Results/"
CUSTDATA_PATH : './Datasets/Custom/tripadvisor_hotel_reviews.csv'
#USE_DATATYPE = ['Amazon', 'Hotel_review', 'Yelp', 'Custom']
USE_DATATYPE : 'Custom'
# the number of reviews
n_reviews : -1 #num_reviews == -1 : whole reviews
#LDA Tuning
startNum : 3
endNum : 12
step : 3
# the number of CPUs to work for LDA topic modeling
n_jobs : 8
n_wordsinTopic : 30
# the candidates of LDA hyper-parameters
lda_hyperparams :
- eta : [0.01]
alpha : [0.1]
filter_no_below : [10,30]
filter_no_above : [0.5, 0.7, 0.9]
passes : [40, 100, 200]
iters : [500, 1000, 3000]
---
# For stage 2.
RESULT_PATH : "./Results/"
LDA_RESULT_IDX : "[1]LDA/0/" # <- piz change the /index/ after you choose
# Enter the name of topic after running stage1 and interpreting the word list of each topic
#TopicList : ["Location", "View", "Breakfast", "Sleep Quality", "Bathroom", "Service", "Check", "Value", "Internet"]
TopicList : ["Location", "View", "Breakfast"]
# random state
random_state: 0
# the number of CPUs to work for training the ML models
n_jobs: -1
# the number of folds (K) for cross validation
n_splits : 5
# the candidates of model hyperparameter
model_hyperparams:
- model: 'LogisticRegression'
param:
- penalty : ['none']
- model: 'MLPClassifier'
param:
- hidden_layer_sizes : ['(128,64)', '(32,64,128,64,32)','(256,)']
activation : ['relu']
learning_rate_init : [0.001, 0.005]
shuffle : [True]
solver : ['adam']
batch_size : [64]
learning_rate : ['adaptive']
max_iter : [1000,3000]
- model: 'RandomForestClassifier'
param:
- n_estimators : [10,100]
max_depth : [6,8,10,12]
min_samples_leaf : [8,12,18]
min_samples_split : [8,16,20]
- model : 'LGBMClassifier'
param:
- num_leaves : [10, 40]
max_depth : [6,8,10,12]
objective : ['binary']
metric : ['auc']
verbosity : [-1]
# the number of samples for SAGE
n_samples : 512