Params.yaml

# For stage 1.
RESULT_PATH : "./Results/"
CUSTDATA_PATH : './Datasets/Custom/tripadvisor_hotel_reviews.csv'
#USE_DATATYPE = ['Amazon', 'Hotel_review', 'Yelp', 'Custom']
USE_DATATYPE : 'Custom'
# the number of reviews
n_reviews : -1 #num_reviews == -1 : whole reviews
#LDA Tuning
startNum : 3
endNum : 12
step : 3
# the number of CPUs to work for LDA topic modeling
n_jobs : 8 
n_wordsinTopic : 30

# the candidates of LDA hyper-parameters
lda_hyperparams :
    - eta : [0.01]
      alpha : [0.1]
      filter_no_below : [10,30]
      filter_no_above : [0.5, 0.7, 0.9]
      passes : [40, 100, 200]
      iters : [500, 1000, 3000]


---

# For stage 2.
RESULT_PATH : "./Results/"
LDA_RESULT_IDX : "[1]LDA/0/" # <- piz change the /index/ after you choose

# Enter the name of topic after running stage1 and interpreting the word list of each topic
#TopicList : ["Location", "View", "Breakfast", "Sleep Quality", "Bathroom", "Service", "Check", "Value", "Internet"]
TopicList : ["Location", "View", "Breakfast"]

# random state
random_state: 0
# the number of CPUs to work for training the ML models
n_jobs: -1
# the number of folds (K) for cross validation
n_splits : 5
# the candidates of model hyperparameter
model_hyperparams:
    - model: 'LogisticRegression'
      param: 
        - penalty : ['none']
    - model: 'MLPClassifier'
      param:
        - hidden_layer_sizes : ['(128,64)', '(32,64,128,64,32)','(256,)']
          activation : ['relu']
          learning_rate_init : [0.001, 0.005]
          shuffle : [True]
          solver : ['adam']
          batch_size : [64]
          learning_rate : ['adaptive']
          max_iter : [1000,3000]

    - model: 'RandomForestClassifier'
      param:
        - n_estimators : [10,100]
          max_depth : [6,8,10,12]
          min_samples_leaf : [8,12,18]
          min_samples_split : [8,16,20]

    - model : 'LGBMClassifier'
      param:
        - num_leaves : [10, 40]
          max_depth : [6,8,10,12]
          objective : ['binary']
          metric : ['auc']
          verbosity : [-1]

# the number of samples for SAGE
n_samples : 512