forked from openvenues/libpostal
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhe.yaml
269 lines (246 loc) · 7.09 KB
/
he.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
# he.yaml
# -------
# Hebrew language specification
alphabet: אבגדהוזחטיכךלמםנןסעפףצץקרשת
alphabet_probability: 0.8
components:
level:
null_probability: 0.95
alphanumeric_probability: 0.05
entrance:
null_probability: 0.9
alphanumeric_probability: 0.1
unit:
null_probability: 0.6
alphanumeric_probability: 0.4
combinations:
-
components:
- house_number
- entrance
- unit
label: house_number
separators:
- separator: "/"
probability: 0.95
- separator: "-"
probability: 0.05
probability: 0.7
-
components:
- house_number
- entrance
label: house_number
separators:
- separator: " "
probability: 0.5
- separator: ""
probability: 0.2
- separator: "/"
probability: 0.1
- separator: "-"
probability: 0.1
- separator: " - "
probability: 0.1
probability: 0.7
-
components:
- house_number
- unit
label: house_number
separators:
- separator: "/"
probability: 0.95
- separator: "-"
probability: 0.05
probability: 0.1
levels:
koma: &koma
canonical: קומה
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
ordinal:
direction: left
numeric_probability: 0.4
ordinal_probability: 0.6
koma_latin: &koma_latin
canonical: koma
sample: true
canonical_probability: 0.7
sample_probability: 0.3
numeric:
direction: left
ordinal:
direction: left
numeric_probability: 0.4
ordinal_probability: 0.6
komat_karka: &komat_karka
canonical: קומת קרקע
sample: true
canonical_probability: 0.8
sample_probability: 0.2
komat_karka_latin: &komat_karka_latin
canonical: komát karká
sample: true
canonical_probability: 0.6
sample_probability: 0.4
martef: &martef
canonical: מרתף
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
ordinal:
direction: left
number_abs_value: true
number_min_abs_value: 1
standalone_probability: 0.985
numeric_probability: 0.01
ordinal_probability: 0.005
martef_latin: &martef_latin
canonical: martef
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
ordinal:
direction: left
number_abs_value: true
number_min_abs_value: 1
standalone_probability: 0.985
numeric_probability: 0.01
ordinal_probability: 0.005
aliases:
"<-1":
default: *martef
probability: 0.9
alternatives:
- alternative: *martef_latin
probability: 0.1
"-1":
default: *martef
probability: 0.9
alternatives:
- alternative: *martef_latin
probability: 0.1
"0":
default: *komat_karka
probability: 0.9
alternatives:
- alternative: *komat_karka_latin
probability: 0.1
numbering_starts_at: 0
alphanumeric:
default: *koma
probability: 0.9
alternatives:
- alternative: *koma_latin
probability: 0.1
numeric_probability: 0.99 # With this probability, pick an integer
alpha_probability: 0.0098 # With this probability, pick a letter e.g. A
numeric_plus_alpha_probability: 0.0001 # e.g. 2A
alpha_plus_numeric_probability: 0.0001 # e.g. A2
entrances:
knisa: &knisa
canonical: כניסה
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
knisa_latin: &knisa_latin
canonical: knisa
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
# вход 1, вход A, etc.
alphanumeric:
default: *knisa
probability: 0.99
alternatives:
- alternative: *knisa_latin
probability: 0.01
numeric_probability: 0.1
alpha_probability: 0.9
po_boxes:
ta_doar: &ta_doar
canonical: תיבת דואר
abbreviated: ת.ד.
sample: true
canonical_probability: 0.4
abbreviated_probability: 0.4
sample_probability: 0.2
numeric:
direction: left
ta_doar_latin: &ta_doar_latin
canonical: abonementnyy pochtovyy yashchik
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
alphanumeric:
default: *ta_doar
probability: 0.8
alternatives:
- alternative: *ta_doar_latin
probability: 0.2
numeric_probability: 0.9 # ta doar 123
alpha_probability: 0.05 # ta doar А
numeric_plus_alpha_probability: 0.04 # ta doar 123А
alpha_plus_numeric_probability: 0.01 # ta doar А123
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
digits:
- length: 1
probability: 0.05
- length: 2
probability: 0.1
- length: 3
probability: 0.2
- length: 4
probability: 0.5
- length: 5
probability: 0.1
- length: 6
probability: 0.05
units:
dira: &dira
canonical: דירה
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
dira_latin: &dira_latin
canonical: dira
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: left
alphanumeric: &unit_alphanumeric
default: *dira
probability: 0.9
alternatives:
- alternative: *dira_latin
probability: 0.1
numeric_probability: 0.9 # e.g. dira 1
numeric_plus_alpha_probability: 0.03 # e.g. 1А
alpha_plus_numeric_probability: 0.03 # e.g. AА1
alpha_probability: 0.04 # e.g. dira А
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
# If there are 10 floors, create unit numbers like #301 or #1032
use_floor_probability: 0.1