system.config

### use # to comment out the configure item

################ Status ################
mode=interactive_predict
# string: train/interactive_predict/test/save_pb_model

################ Datasets(Input/Output) ################
# 此处展示的是demo数据集所在的文件夹，训练自己数据前请自己设置一个文件夹
datasets_fold=data/example_datasets
train_file=train.csv
dev_file=dev.csv
test_file=None

# 设置词表存放的文件夹
vocabs_dir=data/example_datasets/vocabs
# 设置训练日志存放的文件夹
log_dir=data/example_datasets/logs

delimiter=b
# string: (t: "\t";"table")|(b: "backspace";" ")|(other, e.g., '|||', ...)

# 在此处设置模型保存位置，代码支持在原始模型上继续训练，新数据或从头训练一定要改！
checkpoints_dir=checkpoints/bilstm-crf
# 模型的名字
checkpoint_name=model

################ Labeling Scheme ################
label_scheme=BIO
# string: BIO/BIESO

label_level=2
# int, 1:BIO/BIESO; 2:BIO/BIESO + suffix
# max to 2

hyphen=-
# string: -|_, for connecting the prefix and suffix: `B_PER', `I_LOC'

suffix=[ORG,PER,LOC]
# unnecessary if label_level=1

measuring_metrics=[precision,recall,f1,accuracy]
# string: accuracy|precision|recall|f1
# f1 is compulsory

################ Model Configuration ################

use_pretrained_model=False
pretrained_model=Bert
# Bert/AlBert
huggingface_tag=bert-base-chinese
# huggingface上模型的名字
finetune=False

use_middle_model=True
middle_model=bilstm
# bilstm/idcnn

# 不使用预训练模型的时候词表的维度
embedding_dim=300
# int

# 选择lstm时，隐藏层大小
hidden_dim=200

# 选择idcnn时filter的个数
filter_nums=64

idcnn_nums=2

max_sequence_length=300
# int, cautions! set as a LARGE number as possible,
# this will be kept during training and inferring, text having length larger than this will be truncated.

CUDA_VISIBLE_DEVICES=0
# int, -1:CPU, [0,]:GPU
# coincides with tf.CUDA_VISIBLE_DEVICES

seed=42

################ Training Settings ################
epoch=300
batch_size=32

dropout=0.5
# 微调预训练模型时，建议设置为5e-5
learning_rate=0.001

optimizer=Adam
# string: SGD/Adagrad/AdaDelta/RMSprop/Adam/AdamW

use_gan=False
gan_method=fgm
# fgm/pgd

checkpoints_max_to_keep=3
print_per_batch=20

is_early_stop=True
patient=5
# unnecessary if is_early_stop=False