Skip to content

Commit e75e2d4

Browse files
authored
Merge pull request #566 from veer66/dev
Refactor Royin Transliterate: Avoid embedded if blocks and simplified consonant replacing operations
2 parents cbf9360 + 2f3f85a commit e75e2d4

File tree

1 file changed

+31
-58
lines changed

1 file changed

+31
-58
lines changed

pythainlp/transliterate/royin.py

Lines changed: 31 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -146,68 +146,42 @@ def _replace_vowels(word: str) -> str:
146146
def _replace_consonants(word: str, consonants: str) -> str:
147147
_HO_HIP = "\u0e2b" # ห
148148
_RO_RUA = "\u0e23" # ร
149+
_DOUBLE_RO_RUA = _RO_RUA + _RO_RUA
149150

150151
if not consonants:
151152
return word
152153

153-
if len(consonants) == 1:
154-
return word.replace(consonants[0], _CONSONANTS[consonants[0]][0])
155-
len_cons = len(consonants)
156-
157-
i = 0
158-
while i < len_cons:
159-
len_word = len(word)
160-
if i == 0:
161-
if consonants[0] == _HO_HIP:
162-
word = word.replace(consonants[0], "")
163-
del consonants[0]
164-
len_cons -= 1
165-
else:
166-
word = word.replace(
167-
consonants[0], _CONSONANTS[consonants[0]][0]
168-
)
169-
i += 1
154+
skip = False
155+
mod_chars = []
156+
j = 0 # j is the index of consonants
157+
for i in range(len(word)):
158+
if skip:
159+
skip = False
160+
j += 1
161+
elif word[i] not in _CONSONANTS: # word[i] is not a Thai consonant.
162+
mod_chars.append(word[i])
170163
elif (
171-
i == len_word
172-
and consonants[i] == _RO_RUA
173-
and word[i - 1] == _RO_RUA
174-
):
175-
word = word.replace(consonants[i], _CONSONANTS[consonants[i]][1])
176-
elif i < len_word and consonants[i] == _RO_RUA:
177-
if i + 1 == len_word and word[i] == _RO_RUA:
178-
word = word.replace(
179-
consonants[i], _CONSONANTS[consonants[i]][1]
180-
)
181-
elif i + 1 < len_word and word[i] == _RO_RUA:
182-
if word[i + 1] == _RO_RUA:
183-
word = list(word)
184-
del word[i + 1]
185-
if i + 2 == len_cons:
186-
word[i] = "an"
187-
else:
188-
word[i] = "a"
189-
word = "".join(word)
190-
i += 1
191-
elif word[i] == _RO_RUA:
192-
word = word.replace(
193-
consonants[i], _CONSONANTS[consonants[i]][1]
194-
)
195-
i += 1
196-
else:
197-
word = word.replace(
198-
consonants[i], _CONSONANTS[consonants[i]][1]
199-
)
200-
i += 1
201-
else:
202-
word = word.replace(
203-
consonants[i], _CONSONANTS[consonants[i]][1]
204-
)
205-
i += 1
206-
else:
207-
word = word.replace(consonants[i], _CONSONANTS[consonants[i]][1])
208-
i += 1
209-
210-
return word
164+
len(mod_chars) == 0 and word[i] == _HO_HIP and len(consonants) != 1
165+
): # Skip HO HIP except that HO HIP is the only one consonant
166+
j += 1
167+
elif (
168+
len(mod_chars) == 0
169+
): # The first character must be an initial consonant.
170+
mod_chars.append(_CONSONANTS[consonants[j]][0])
171+
j += 1
172+
elif word[i:] == _DOUBLE_RO_RUA: # Double RO RUA is in end of word
173+
skip = True
174+
mod_chars.append("a")
175+
mod_chars.append("n")
176+
j += 1
177+
elif word[i : i + 2] == _DOUBLE_RO_RUA:
178+
skip = True
179+
mod_chars.append("a")
180+
j += 1
181+
else: # Assume that the rest are final consonants.
182+
mod_chars.append(_CONSONANTS[consonants[j]][1])
183+
j += 1
184+
return "".join(mod_chars)
211185

212186

213187
# support function for romanize()
@@ -222,7 +196,6 @@ def _romanize(word: str) -> str:
222196
word = "".join(word)
223197

224198
word = _replace_consonants(word, consonants)
225-
226199
return word
227200

228201

0 commit comments

Comments
 (0)