diff --git a/nisaba/scripts/natural_translit/brahmic/romanizer.py b/nisaba/scripts/natural_translit/brahmic/romanizer.py index be02328f..c2d396b6 100644 --- a/nisaba/scripts/natural_translit/brahmic/romanizer.py +++ b/nisaba/scripts/natural_translit/brahmic/romanizer.py @@ -108,6 +108,8 @@ def _transliterate_vocalic( TT_TR = rw.reassign(gr.RR + gr.RR, ph.T + ph.T, tr.T + tr.T + tr.R) RD_R = rw.reassign(gr.RD, ph.RD, tr.R) +RDH_DH = rw.reassign(gr.RDH, ph.RD + ph.ASP, tr.D + tr.H) +RDH_RH = rw.reassign(gr.RDH, ph.RD + ph.ASP, tr.R + tr.H) NY_N = rw.rewrite(ph.NY, tr.N, following=ph.CONSONANT) NY_GN = rw.reassign(gr.NY, ph.NY, tr.G + tr.N, ph.VOWEL, ph.VOWEL) @@ -157,7 +159,7 @@ def _transliterate_vocalic( # Compose common rules for romanization -TXN_TO_PSA_COMMON = (DIPHTHONG_GR @ NON_LABIAL_ANUSVARA) +TXN_TO_PSA_COMMON = (DIPHTHONG_GR @ NON_LABIAL_ANUSVARA @ RDH_DH) # Convert txn to PSAF and outputs only translit strings. TXN_TO_PSAF = ( diff --git a/nisaba/scripts/natural_translit/language_params/hi.py b/nisaba/scripts/natural_translit/language_params/hi.py index 7d06d066..7e5cac31 100644 --- a/nisaba/scripts/natural_translit/language_params/hi.py +++ b/nisaba/scripts/natural_translit/language_params/hi.py @@ -110,6 +110,7 @@ def iso_to_nat() -> fl.FstList: _TXN_OPS, romanizer.SIBV_TO_SIBW, romanizer.AA_WI, + romanizer.RDH_RH, romanizer.TXN_TO_PSA_COMMON, romanizer.IGNORE_LONG, romanizer.TRANSLIT_BY_PSA, diff --git a/nisaba/scripts/natural_translit/romanization/testdata/hi_iso_nat.textproto b/nisaba/scripts/natural_translit/romanization/testdata/hi_iso_nat.textproto index 76fd3f96..ccead084 100644 --- a/nisaba/scripts/natural_translit/romanization/testdata/hi_iso_nat.textproto +++ b/nisaba/scripts/natural_translit/romanization/testdata/hi_iso_nat.textproto @@ -75,3 +75,8 @@ rewrite { input: "akailā" output: "akaila" } +rewrite { + rule: "ISO_TO_NAT" + input: "bāṛʰa" + output: "barh" +} diff --git a/nisaba/scripts/natural_translit/romanization/testdata/hi_iso_psac.textproto b/nisaba/scripts/natural_translit/romanization/testdata/hi_iso_psac.textproto index f183a1e8..7fb5deea 100644 --- a/nisaba/scripts/natural_translit/romanization/testdata/hi_iso_psac.textproto +++ b/nisaba/scripts/natural_translit/romanization/testdata/hi_iso_psac.textproto @@ -74,3 +74,8 @@ rewrite { input: "akailā" output: "akaila" } +rewrite { + rule: "ISO_TO_PSAC" + input: "bāṛʰa" + output: "badh" +} diff --git a/nisaba/scripts/natural_translit/romanization/testdata/mr_iso_nat.textproto b/nisaba/scripts/natural_translit/romanization/testdata/mr_iso_nat.textproto index 2592a11d..72ed8cdb 100644 --- a/nisaba/scripts/natural_translit/romanization/testdata/mr_iso_nat.textproto +++ b/nisaba/scripts/natural_translit/romanization/testdata/mr_iso_nat.textproto @@ -60,3 +60,8 @@ rewrite { input: "siddʰēgavhāṇa" output: "siddhegavhan" } +rewrite { + rule: "ISO_TO_NAT" + input: "māṛʰā" + output: "madha" +} diff --git a/nisaba/scripts/natural_translit/romanization/testdata/mr_iso_psac.textproto b/nisaba/scripts/natural_translit/romanization/testdata/mr_iso_psac.textproto index fdbec5dd..515397ab 100644 --- a/nisaba/scripts/natural_translit/romanization/testdata/mr_iso_psac.textproto +++ b/nisaba/scripts/natural_translit/romanization/testdata/mr_iso_psac.textproto @@ -60,3 +60,8 @@ rewrite { input: "siddʰēgavhāṇa" output: "sidhegavhan" } +rewrite { + rule: "ISO_TO_PSAC" + input: "māṛʰā" + output: "madha" +}