-
Notifications
You must be signed in to change notification settings - Fork 372
/
InterIndic-Arabic.xml
143 lines (142 loc) · 3.7 KB
/
InterIndic-Arabic.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd">
<!--
Copyright © 1991-2016 Unicode, Inc.
CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
For terms of use, see http://www.unicode.org/copyright.html
-->
<supplementalData>
<version number="$Revision$"/>
<transforms>
<transform source="InterIndic" target="Arabic" direction="forward" visibility="internal">
<tRule><![CDATA[
$nonword = [^\uE000-\uE0FF];
$wordBoundary = [^[:L:][:M:][:N:]];
\uE015\uE03F\uE02F\uE03E } $nonword→كيا; # किया
\uE026\uE03F\uE02F\uE03E } $nonword→ديا; # दिया
\uE015\uE03F } $nonword→كي; # कि at word end
\uE039\uE048→هي; # है
\uE001 } $nonword→ن; # chandrabindu at end to noon
\uE001→ن; # chandrabindu not at end to noon
\uE002 } $nonword→ن; # anusvara to noon at end
\uE002→ن; # anusvara to noon ं
\uE003→ه ا; # viarga to ha + alif ः
\uE004→ا; # short a to alif ऄ
\uE005→ا; # अ
\uE006→ا ٓ; # alif with mad आ
[[:L:][:M:]] {\uE007}→ي; # इ after another letter or mark
\uE007→إ; # इ at beginning of word
[[:L:][:M:]] {\uE008}→ي; # ई after another letter or mark
\uE008→إ; # ई at beginning of word
\uE009→و; # उ
\uE00A→و; # ऊ
\uE00B→ر; # ऋ
\uE00C→ل; # ऌ
\uE00D→ا ي; # ऍ
\uE00E→ي; # ऎ
$wordBoundary {\uE00F} → إي; # word-initial ए
\uE00F } $nonword→ي; # ए use ي when at end
\uE00F→ي; # ए use ي when not at end
\uE010 } $nonword→ا ي; # ऐ use ي when at end
\uE010→ا ي; # ऐ use ي when not at end
\uE011→ا و; # ऑ
\uE012→ا و; # ऒ
\uE013→ا و; # ओ
\uE014→ا و; # औ
\uE015→ك; # क
\uE016→كه; # ख
\uE017→ج; # ग
\uE018→جه; # घ
\uE019→نج; # ङ
\uE01A→تش; # च
\uE01B→تشه; # छ
\uE01C→ج; # ज
\uE01D→جه; # झ
\uE01E→ن; # ञ
\uE01F→ط; # ट
\uE020→طه; # ठ
\uE021→د; # ड
\uE022→ده; # ढ
\uE023→ن; # ण
\uE024→ت; # त
\uE025→ته; # थ
\uE026→د; # द
\uE027→ده; # ध
\uE028→ن; # न
\uE029→ن; # ऩ
\uE02A→ب; # प
\uE02B→به; # फ
\uE02C→ب; # ब
\uE02D→به; # भ
\uE02E→م; # म
\uE02F→ي; # य
\uE030→ر; # र
\uE031→ر; # ऱ
\uE032→ل; # ल
\uE033→ر; # ळ
\uE034→ر; # ऴ
\uE035→و; # व
\uE036→ش; # श
\uE037→ش; # ष
\uE038→س; # स
\uE039→ه; # ह
\uE03C→; # ़
\uE03D→; # ऽ
\uE03E→ا; # ा
\uE03F→ي; # ि
\uE040→ي; # ी
\uE041→و; # ु
\uE042→و; # ू
\uE043→ر; # ृ
\uE044→ر; # ॄ
\uE045→ن; # ॅ
\uE046→ي; # ॆ
\uE047 } $nonword→ي; # े use ي when at end
\uE047→ي; # े use ي when not at end
\uE048 } $nonword→ا ي; # ै use ي when at end
\uE048→ا ي; # ै use ي when not at end
\uE049→و; # ॉ
\uE04A→ا و; # ॊ
\uE04B→و; # ो
\uE04C→ا و; # ौ
\uE04D→; # ्
\uE050→ا و; # ॐ
\uE051→; # ॑
\uE052→; # ॒
\uE053→; # ॓
\uE054→; # ॔
\uE058→ق; # क़
\uE059→خ; # ख़
\uE05A→غ; # ग़
\uE05B→ز; # ज़
\uE05C→ر; # ड़
\uE05D→ره; # ढ़
\uE05E→ف; # फ़
\uE05F→ي; # य़
\uE060→ر; # ॠ
\uE061→ل; # ॡ
\uE062→ل; # ॢ
\uE063→ل; # ॣ
\uE064→۔; # ।
\uE065→۔; # ॥
\uE066→\.; # ०
\uE067→١; # १
\uE068→٢; # २
\uE069→٣; # ३
\uE06A→٤; # ४
\uE06B→٥; # ५
\uE06C→٦; # ६
\uE06D→٧; # ७
\uE06E→٨; # ८
\uE06F→٩; # ९
\uE070→\.; # ॰
\uE082→; # ॽ
# Remove sequences of alif characters.
# For example, transform पाओला → بااولا → باولا.
::null;
$alif = [أإآا] [:M:]*;
($alif) $alif+ → $1;
]]></tRule>
</transform>
</transforms>
</supplementalData>