-
Notifications
You must be signed in to change notification settings - Fork 103
/
Copy pathtagging.ini
82 lines (65 loc) · 1.66 KB
/
tagging.ini
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
; This is an example configuration for training POS Tagger. For a more detailed
; description of an INI example, please refer to the translation.ini file
[main]
name="pos tagging"
output="out-example-tagging"
tf_manager=<tf_manager>
train_dataset=<train_data>
val_dataset=<val_data>
test_datasets=[<val_data>]
runners=[<runner>]
trainer=<trainer>
evaluation=[("tags", evaluators.accuracy.Accuracy)]
batch_size=50
runners_batch_size=50
epochs=100
validation_period=1500
logging_period=40
[tf_manager]
class=tf_manager.TensorFlowManager
num_sessions=1
num_threads=4
[train_data]
class=dataset.load_dataset_from_files
s_source="examples/data/tagging/train.forms-cs"
s_tags="examples/data/tagging/train.tags-cs.subpos"
[val_data]
class=dataset.load_dataset_from_files
s_source="examples/data/tagging/val.forms-cs"
s_tags="examples/data/tagging/val.tags-cs.subpos"
s_tags_out="out-example-tagging/val.tags-cs.subpos.out"
[source_vocabulary]
class=vocabulary.from_dataset
datasets=[<train_data>]
series_ids=["source"]
max_size=50000
[tags_vocabulary]
class=vocabulary.from_dataset
datasets=[<train_data>]
series_ids=["tags"]
max_size=50
[encoder]
class=encoders.recurrent.SentenceEncoder
name="sentence_encoder"
rnn_size=100
max_input_len=50
embedding_size=300
dropout_keep_prob=0.5
data_id="source"
vocabulary=<source_vocabulary>
[decoder]
class=decoders.sequence_labeler.SequenceLabeler
name="tagger"
encoder=<encoder>
data_id="tags"
dropout_keep_prob=0.5
vocabulary=<tags_vocabulary>
[trainer]
class=trainers.cross_entropy_trainer.CrossEntropyTrainer
decoders=[<decoder>]
l2_weight=1.0e-8
clip_norm=1.0
[runner]
class=runners.label_runner.LabelRunner
decoder=<decoder>
output_series="tags"