Skip to content

Commit

Permalink
Passing the correct sigmas in reversible romanizer.
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 673390799
  • Loading branch information
agutkin authored and copybara-github committed Sep 11, 2024
1 parent 7c7c458 commit 96a643a
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 4 deletions.
1 change: 1 addition & 0 deletions nisaba/scripts/brahmic/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ nisaba_compile_multi_grm_py(
data = [
":nfc.far",
":nfc_utf8.far",
":sigma_utf8.far",
] + [
"//nisaba/scripts/brahmic/data/{}:{}.tsv".format(script, file_name)
for script in SCRIPTS
Expand Down
14 changes: 10 additions & 4 deletions nisaba/scripts/brahmic/iso.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,16 +178,22 @@ def generator_main(exporter_map: multi_grm.ExporterMapping):
with p.default_token_type(token_type):
exporter = exporter_map[token_type]
from_script_fsts = []
sigma_fsts = []
for script in u.SCRIPTS:
sigma = u.OpenSigma(script, token_type)
sigma_fsts += [sigma]
from_script, to_script = _script_fsts(script, token_type)
from_script_fsts += [from_script]
script = script.upper()
exporter[f'FROM_{script}'] = from_script
exporter[f'TO_{script}'] = to_script
# TODO: Following rewrite assumes 'byte' token type. It should be
# made available to 'utf8' as well. The corresponding 'utf8_test' is
# missing as well.
exporter['FROM_BRAHMIC'] = rw.Rewrite(p.union(*from_script_fsts))
# TODO: The utf8 version of `FROM_BRAHMIC` transducer is
# failing to rewrite any native script inputs. The corresponding
# 'iso_utf8_test' is missing as well.
exporter['FROM_BRAHMIC'] = rw.Rewrite(
p.union(*from_script_fsts).optimize(),
sigma=p.union(*sigma_fsts).optimize()
)


if __name__ == '__main__':
Expand Down

0 comments on commit 96a643a

Please sign in to comment.