-
Notifications
You must be signed in to change notification settings - Fork 0
/
ipa2tipa.py
125 lines (103 loc) · 4.3 KB
/
ipa2tipa.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
from unicodedata import decomposition
import csv
from pathlib import Path
class IPA(str):
# load dictionaries
UNI2TIPA: list[dict[str, str]] = []
script_dir = Path(__file__).parent
for i in range(3):
with open(script_dir / f"uni2tipa/uni2tipa{i}.csv", 'r', encoding="utf-8") as f:
UNI2TIPA.append({row[0]: row[1] for row in csv.reader(f, quoting=csv.QUOTE_NONE)})
with open(script_dir / "uni2tipa/uni2tipa-tone.csv", 'r', encoding="utf-8") as f:
UNI2TIPA_TONE: dict[str, str] = {row[0]: row[1] for row in csv.reader(f, quoting=csv.QUOTE_NONE)}
with open(script_dir / "uni2tipa/uni2tipa-supsub.csv", 'r', encoding="utf-8") as f:
UNI2TIPA_SUPSUB: dict[str, str] = {row[0]: row[1] for row in csv.reader(f, quoting=csv.QUOTE_NONE)}
def __new__(cls, content):
return super().__new__(cls, content)
def __init__(self, content):
super().__init__()
self.xords = self._decompose()
self.charlist = self._parse()
self.tipa = self._ipa2tipa()
def __add__(self, other):
if isinstance(other, IPA):
return IPA(super().__add__(other))
elif isinstance(other, str):
return IPA(super().__add__(other))
else:
return NotImplemented
def __radd__(self, other):
if isinstance(other, str):
return IPA(other + self)
else:
return NotImplemented
def _decompose(self):
"""Convert string to list of lowercase hex codes."""
xords = []
for c in self:
decom = decomposition(c)
if decom:
xords.extend(code.lower() for code in decom.split())
else:
xords.append(f"{ord(c):04x}")
return xords
def _parse(self):
"""Group hex codes into characters with modifiers."""
charlist = []
i = len(self.xords) - 1 # reading from right to left
while i >= 0:
char = []
# if tone letter
if self.xords[i] in self.UNI2TIPA_TONE:
while i >= 0 and self.xords[i] in self.UNI2TIPA_TONE:
char.insert(0, self.xords[i])
i -= 1
else:
# if modifier
while i >= 0 and self.xords[i] in self.UNI2TIPA[1]:
char.insert(0, self.xords[i])
i -= 1
# if base
if i >= 0:
char.insert(0, self.xords[i])
i -= 1
# if sup/sub
if i >= 0 and self.xords[i] in self.UNI2TIPA_SUPSUB:
char.append(self.xords[i])
i -= 1
charlist.insert(0, char)
return charlist
def _ipa2tipa(self):
result = []
for char in self.charlist:
if char[0] in self.UNI2TIPA_TONE:
tone = "".join(list(map(self.UNI2TIPA_TONE.get, char)))
result.append(rf"\tone{{{tone}}}")
continue
base = self.UNI2TIPA[0].get(char[0], char[0]) # if not in dict, return unicode
# handle 1-ary modifiers
for modifier in char[1:]:
if modifier in self.UNI2TIPA[1]:
base = f"{self.UNI2TIPA[1][modifier]}{{{base}}}"
if modifier in self.UNI2TIPA_SUPSUB:
base = f"{self.UNI2TIPA_SUPSUB[modifier]}{{{base}}}"
result.append(base)
# handle 2-ary modifiers
i = 0
while i < len(result) - 1:
if result[i] in self.UNI2TIPA[2]:
result[i-1] = f"{self.UNI2TIPA[2][result[i]]}{{{result[i-1]}{result[i+1]}}}"
result = result[:i] + result[i+2:]
else:
i += 1
return ''.join(result)
def to_tipa(self):
"""Convert IPA to TIPA."""
return self.tipa
if __name__ == "__main__":
ipa = IPA("ʲ")
ipa = IPA("ko̞ko̞ ɲ̟i ") + IPA("ɲ̟ɯ̟ᵝːɾʲo̞kɯ̟ᵝ ɕi̥te̞ ") + IPA("kɯ̟ᵝda̠sa̠i")
print(*ipa.__dict__.items(), sep="\n")
ipa = IPA("ˈtʰiː ˌnãɪ̃ɾ̃iˈtʰu̟ː ˈd͡ʒeɪ ˈpʰiː")
print("Original:", ipa)
print("TIPA:", ipa.to_tipa())