iTlajtol is a mobile application that can translate from Nahuatl to Spanish and vice-versa.
This work is based on FastAI and uses SentencePiece. To prepare an enviroment to run this models, please install the requirements.
There are 2 main corpuses, first is Axolotl from this web. And the second one is a webscrapping result from JW using Nahuatl de la Huasteca.
This work also uses two NN architectures, an RNN that implements attention and a Transformer. This implementations are from FastAI and were finetunned to accomodate a low-resource language such as Nahuatl.
There are several models to carry out a comparative analysis. Such models can be found on the models folder with a pkl
extension and its weights as a pth
file.
So far only the best Transformer is available and the first RNN with attention.
Arquitecture | Corpus | Tokenization | From | To | %BLEU |
---|---|---|---|---|---|
Transformer | JW | SentencePiece | NCH | SPA | 62.5145 |
RNN+Att | Axolotl | Spacy | SPA | NAH | 32.2177 |
Transformer and RNN This data contains the vocabulary and segmented pieces.
For sentencePiece the vocabulary can be loadded with the files at vocab, such files had to be load with SentencePiece.
There are two clients, one written in Objective-C and the other in Swift using SwiftUI.
@inproceedings{9307780,
Author = {S. K. {Bello García} and E. {S{\'a}nchez Lucero} and B. E. {Pedroza M{\'e}ndez} and J. C. {Hern{\'a}ndez Hern{\'a}ndez} and E. {Bonilla Huerta} and J. F. {Ramírez Cruz}},
Booktitle = {2020 8th International Conference in Software Engineering Research and Innovation (CONISOFT)},
Doi = {10.1109/CONISOFT50191.2020.00041},
Keywords = {Spectrogram;Decoding;Tools;Mobile handsets;Internet;Engines;Speech recognition;Nahuatl;NMT;mobile;translation;attention;machine learning;CoreML;neural network;Mel spectrogram},
Month = {Nov},
Pages = {235-244},
Title = {Towards the implementation of an Attention-based Neural Machine Translation with artificial pronunciation for Nahuatl as a mobile application},
Year = {2020},
Bdsk-Url-1 = {https://doi.org/10.1109/CONISOFT50191.2020.00041}
}
@article{springer-jcr,
Author = {S. K. {Bello García} and E. {S{\'a}nchez Lucero} and E. {Bonilla Huerta} and J. C. {Hern{\'a}ndez Hern{\'a}ndez} and J. F. {Ramírez Cruz} and B. E. {Pedroza M{\'e}ndez}},
Date-Added = {2021-08-22 17:05:11 -0500},
Date-Modified = {2021-08-22 17:07:42 -0500},
Journal = {Programming and Computer Software},
Keywords = {machine translation; nahuatl; attention; application; transformers},
Month = {Dec},
Number = {8},
Title = {Implementation of Neural Machine Translation for Nahuatl as a Web Platform: A focus on text translation},
Volume = {47},
Year = {2021}
}
@InProceedings{10.1007/978-3-030-89820-5_10,
author="Bello Garc{\'i}a, Sergio Khalil
and S{\'a}nchez Lucero, Eduardo
and Bonilla Huerta, Edmundo
and Hern{\'a}ndez Hern{\'a}ndez, Jos{\'e} Crisp{\'i}n
and Ram{\'i}rez Cruz, Jos{\'e} Federico
and Pedroza M{\'e}ndez, Blanca Estela",
editor="Batyrshin, Ildar
and Gelbukh, Alexander
and Sidorov, Grigori",
title="Nahuatl Neural Machine Translation Using Attention Based Architectures: A Comparative Analysis for RNNs and Transformers as a Mobile Application Service",
booktitle="Advances in Soft Computing",
year="2021",
publisher="Springer International Publishing",
address="Cham",
pages="120--139",
isbn="978-3-030-89820-5"
}
@article{ISP-RAS,
Author = {Sergio. Khalil. {Bello García} and Eduardo. {S{\'a}nchez Lucero} and Edmundo. {Bonilla Huerta} and Jos{\'e}. Federico. {Ram{\'\i}rez Cruz} and Jos{\'e}. Crispín. {Hern{\'a}ndez Hern{\'a}ndez} and Blanca. Estela. {Pedroza M{\'e}ndez}},
Date-Added = {2021-08-22 17:29:22 -0500},
Date-Modified = {2021-08-22 17:57:28 -0500},
Journal = {Proceedings of the Institute for System Programming of the RAS},
Title = {Pushing the Text Nahuatl Neural Machine Translation Boundaries as a Mobile Application},
Year = {2021}
}