From 477783ade0010be974a508f3c3d6ef04ea4b8761 Mon Sep 17 00:00:00 2001 From: ZhymabekRoman Date: Wed, 14 Feb 2024 18:37:22 +0600 Subject: [PATCH 1/2] Basic promt implementation --- translatepy/translators/promt.py | 266 +++++++++++++++++++++++++++++++ 1 file changed, 266 insertions(+) create mode 100644 translatepy/translators/promt.py diff --git a/translatepy/translators/promt.py b/translatepy/translators/promt.py new file mode 100644 index 00000000..c433064b --- /dev/null +++ b/translatepy/translators/promt.py @@ -0,0 +1,266 @@ +""" +Promt Translate + +This implementation, based on a reverse-engineered Android client, was created by 'Zhymabek Roman' specifically for translatepy. +""" +import re +import uuid +import hashlib +import xml.etree.ElementTree as ET + +from translatepy.language import Language +from translatepy.translators.base import BaseTranslateException, BaseTranslator +from translatepy.utils.request import Request + +PROMT_ACC_ID = "PMTAndroid" +PROMT_ACC_KEY = "C3FC7A3D-FE52-4063-B11C-1565C627B7AD" + +# Source reverse engineered file: Slid.smal +class Slid: + _index_prefix = {} + _index_id = {} + _index_rfc = {} + _lang_name = {} + + def __init__(self, id, prefix, rfc_prefix, py_locale, beta=False, ocr_prefix=None): + self.id = id + self.prefix = prefix + self.rfc_prefix = rfc_prefix + self.py_locale = py_locale + self.beta = beta + self.ocr_prefix = ocr_prefix or py_locale + + # Populate indexes + Slid._index_prefix[prefix] = self + Slid._index_id[id] = self + Slid._index_rfc[rfc_prefix] = self + + @classmethod + def from_id(cls, id): + return cls._index_id.get(id, "Unknown") + + @classmethod + def from_prefix(cls, prefix): + return cls._index_prefix.get(prefix.lower(), "Auto") + + @classmethod + def from_rfc_prefix(cls, rfc_prefix): + return cls._index_rfc.get(rfc_prefix.lower(), "Unknown") + + @classmethod + def get_lang_name(cls, id): + return cls._lang_name.get(id, "Unknown Language") + + +class PromtHelper: + # register languages + Auto = Slid(0, "a", "Auto", "") + Unknown = Slid(-1, "", "", "") + English = Slid(1, "e", "en", "en_US") + Russian = Slid(2, "r", "ru", "ru_RU") + German = Slid(4, "g", "de", "de_DE") + French = Slid(8, "f", "fr", "fr_FR") + Italian = Slid(16, "i", "it", "it_IT") + Spanish = Slid(32, "s", "es", "es_ES") + Portuguese = Slid(64, "p", "pt", "pt_PT") + Ukrainian = Slid(128, "u", "uk", "uk_UA") + Lithuanian = Slid(256, "l", "lt", "lt_LT") + Chinese = Slid(512, "zh-cn", "zh-cn", "zh_CN") + TChinese = Slid(1024, "zh-tw", "zh-tw", "zh_TW") + EnglishUS = Slid(1025, "en_us", "en_us", "en_US") + EnglishGB = Slid(1026, "en_gb", "en_gb", "en_GB") + Latvian = Slid(1027, "lv", "lv", "lv_LV") + Polish = Slid(1028, "pl", "pl", "pl_PL") + Kazakh = Slid(1029, "kz", "kk", "kk_KZ") + Japanese = Slid(1030, "ja", "ja", "ja_JP") + Dutch = Slid(1031, "nl", "nl", "nl_NL") + Turkish = Slid(1032, "t", "tr", "tr_TR") + Swedish = Slid(1033, "sv", "sv", "sv_SE") + Norwegian = Slid(1034, "no", "no", "no_NO") + Danish = Slid(1035, "da", "da", "da_DK") + EnglishCA = Slid(1036, "en_ca", "en_ca", "en_CA") + Bulgarian = Slid(1037, "bg", "bg", "bg_BG") + Finnish = Slid(1038, "fi", "fi", "fi_FI") + Arabic = Slid(1039, "ar", "ar", "ar_AR") + Korean = Slid(1040, "ko", "ko", "ko_KR") + SpanishMX = Slid(1041, "es_mx", "es_mx", "es_MX") + FrenchCA = Slid(1042, "fr_ca", "fr_ca", "fr_CA") + EnglishAU = Slid(1043, "en_au", "en_au", "en_AU") + Greek = Slid(1047, "el", "el", "el_GR") + Estonian = Slid(1048, "et", "et", "et_EE") + Hungarian = Slid(1049, "hu", "hu", "hu_HU") + Armenian = Slid(1050, "hy", "hy", "hy_AM") + Georgian = Slid(1051, "ka", "ka", "ka_GE") + Romanian = Slid(1052, "ro", "ro", "ro_RO") + Slovak = Slid(1053, "sk", "sk", "sk_SK") + Uzbek = Slid(1054, "uz", "uz", "uz_UZ") + Vietnamese = Slid(1055, "vi", "vi", "vi_VN") + Azeri = Slid(1056, "az", "az", "az_AZ") + Hebrew = Slid(1057, "he", "he", "he_IL") + Catalan = Slid(1058, "ca", "ca", "ca_ES") + Czech = Slid(1059, "cs", "cs", "cs_CZ") + HaitianCreole = Slid(1060, "ht", "ht", "ht_HT") + Hindi = Slid(1061, "hi", "hi", "hi_IN") + HmongDaw = Slid(1062, "mww", "mww", "mww_MWW") + Indonesian = Slid(1063, "id", "id", "id_ID") + Klingon = Slid(1064, "tlh", "tlh", "tlh_TLH") + Klingon_pIqaD = Slid(1065, "tlh_Qaak", "tlh_Qaak", "tlh_Qaak") + Malay = Slid(1066, "ms", "ms", "ms_MY") + Maltese = Slid(1067, "mt", "mt", "mt_MT") + Persian = Slid(1068, "fa", "fa", "fa_IR") + Slovenian = Slid(1069, "sl", "sl", "sl_SI") + Thai = Slid(1070, "th", "th", "th_TH") + Urdu = Slid(1071, "ur", "ur", "ur_PK") + Welsh = Slid(1072, "cy", "cy", "cy_CY") + + + def _get_supported_languages(self): + _languages_list = {Slid._index_rfc[rfc].rfc_prefix for rfc in Slid._index_rfc} + _languages_list.add("auto") + return _languages_list + + @staticmethod + def _parse_language_pair(language_pair: str): + if len(language_pair) == 2: + source_language_code = language_pair[0] + destination_language_code = language_pair[1] + else: + # Adjusted pattern to correctly match "[fi]e", "r[kz]", and "[zh-cn]e" + match = re.match(r"(?:\[([a-z-]+)\]|([a-z]))(?:\[([a-z-]+)\]|([a-z]))", language_pair) + if match: + source_language_code = match.group(1) if match.group(1) else match.group(2) + destination_language_code = match.group(3) if match.group(3) else match.group(4) + else: + raise ValueError(f"Invalid language pair: {language_pair}") + + source_language = Slid.from_prefix(source_language_code).rfc_prefix + destination_language = Slid.from_prefix(destination_language_code).rfc_prefix + + return source_language, destination_language + + +class PromtTranslate(BaseTranslator): + """ + Promt Translate Implementation. + + !!!Attention!!! Do not modify the order of the headers, as it may result in a "Bad request" error from the server. This is due to certain legacy server constraints. + """ + + _helper = PromtHelper() + _soap_endpoint_url = "https://www.translate.ru/services/9.0/Translator.asmx" + _supported_languages = _helper._get_supported_languages() + _language_pair = {} + + def __init__(self, request: Request = Request()): + self.session = request + self.session_req_id = None + + self._initialize() + if not self._language_pair: + self._generate_lang_pair() + + print(self._supported_languages) + + def _initialize(self) -> str: + headers = { + 'Host': 'www.translate.ru', + 'Connection': 'Keep-Alive', + 'Content-Type': 'text/xml; charset=utf-8', + 'SOAPAction': '"http://tempuri.org/Initialize"', + } + + payload = "\t\ten\t" + + request = self.session.post(self._soap_endpoint_url, headers=headers, data=payload) + self._set_req_id(request.content) + + def _set_req_id(self, content: str) -> str: + root = ET.fromstring(content) + req_id_element = root.find('.//reqId') + self.session_req_id = req_id_element.text + return req_id_element.text + + def _generate_auth_headers(self): + auth_headers = {"PROMT-REQID": self.session_req_id, "PROMT-CODE": hashlib.md5((PROMT_ACC_ID + self.session_req_id + PROMT_ACC_KEY).encode('utf-8')).hexdigest(), "PROMT-ACCID": PROMT_ACC_ID} + return auth_headers + + def _generate_lang_pair(self): + service_response = self._get_service() + root = ET.fromstring(service_response) + directions = root.findall('.//direction') + for direction in directions: + direction_code = direction.find('id').text + direction_pair = PromtHelper._parse_language_pair(direction_code) + + self._language_pair.update({f"{direction_pair[0]}-{direction_pair[1]}": direction_code}) + + def _get_service(self) -> str: + auth_headers = self._generate_auth_headers() + headers = {**auth_headers, **{ + 'Host': 'www.translate.ru', + 'Connection': 'Keep-Alive', + 'Content-Type': 'text/xml; charset=utf-8', + 'SOAPAction': '"http://tempuri.org/GetServices"', + }} + + payload = "\t\ten\t" + + request = self.session.post(self._soap_endpoint_url, headers=headers, data=payload) + self._set_req_id(request.content) + return request.content + + def _translate(self, text: str, destination_language: str, source_language: str) -> str: + if source_language == "auto": + source_language = "a" + + destination_language = Slid.from_rfc_prefix(destination_language).prefix + language_pair = f"{source_language}{destination_language}" + else: + language_pair = self._language_pair.get(f"{source_language}-{destination_language}") + + if not language_pair: + raise BaseTranslateException("Language pair not supported") + + auth_headers = self._generate_auth_headers() + headers = {**auth_headers, **{ + 'Host': 'www.translate.ru', + 'Connection': 'Keep-Alive', + 'Content-Type': 'text/xml; charset=utf-8', + 'SOAPAction': '"http://tempuri.org/Translate"', + }} + + text = text.replace("&", "&") + text = text.replace("<", "<") + text = text.replace(">", ">") + text = text.replace('\"', """) + text = text.replace("\'", "'") + + payload = f"\t\t{language_pair}\tGeneral\t{text}\tword<params><param><id>1</id><name>Roaming</name><value>off</value></param><param><id>2</id><name>MinimizeTrafficInRoaming</name><value>off</value></param><param><id>3</id><name>TextSource</name><value>TEXT</value></param></params>\ten\t" + request = self.session.post(self._soap_endpoint_url, headers=headers, data=payload) + self._set_req_id(request.content) + print(request.text) + + root = ET.fromstring(request.text) + c = root.find('.//strResult').text + cdata = ET.fromstring(c) + translation = cdata.findall(".//translation")[0].find("result").text + + translation = translation.replace("&", "&") + translation = translation.replace("<", "<") + translation = translation.replace(">", ">") + translation = translation.replace(""", "\"") + translation = translation.replace("'", "'") + + if source_language == "a": + source_language = "auto" + + return source_language, translation + + def _language_normalize(self, language): + return language.alpha2 + + def _language_denormalize(self, language_code): + return Language(language_code) + + def __str__(self) -> str: + return "Promt" From a47086c73cff10ae3a3e9718be2bdc90f4f30f73 Mon Sep 17 00:00:00 2001 From: ZhymabekRoman Date: Thu, 7 Mar 2024 16:01:30 +0500 Subject: [PATCH 2/2] Prepare for release Promt --- translatepy/translators/promt.py | 46 +++++++++++++++++++------------- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/translatepy/translators/promt.py b/translatepy/translators/promt.py index c433064b..64236fcf 100644 --- a/translatepy/translators/promt.py +++ b/translatepy/translators/promt.py @@ -7,6 +7,7 @@ import uuid import hashlib import xml.etree.ElementTree as ET +from warnings import warn from translatepy.language import Language from translatepy.translators.base import BaseTranslateException, BaseTranslator @@ -113,7 +114,6 @@ class PromtHelper: Urdu = Slid(1071, "ur", "ur", "ur_PK") Welsh = Slid(1072, "cy", "cy", "cy_CY") - def _get_supported_languages(self): _languages_list = {Slid._index_rfc[rfc].rfc_prefix for rfc in Slid._index_rfc} _languages_list.add("auto") @@ -132,12 +132,12 @@ def _parse_language_pair(language_pair: str): destination_language_code = match.group(3) if match.group(3) else match.group(4) else: raise ValueError(f"Invalid language pair: {language_pair}") - + source_language = Slid.from_prefix(source_language_code).rfc_prefix destination_language = Slid.from_prefix(destination_language_code).rfc_prefix - + return source_language, destination_language - + class PromtTranslate(BaseTranslator): """ @@ -159,8 +159,6 @@ def __init__(self, request: Request = Request()): if not self._language_pair: self._generate_lang_pair() - print(self._supported_languages) - def _initialize(self) -> str: headers = { 'Host': 'www.translate.ru', @@ -177,6 +175,8 @@ def _initialize(self) -> str: def _set_req_id(self, content: str) -> str: root = ET.fromstring(content) req_id_element = root.find('.//reqId') + if not req_id_element.text: + return self.session_req_id = req_id_element.text return req_id_element.text @@ -212,7 +212,7 @@ def _get_service(self) -> str: def _translate(self, text: str, destination_language: str, source_language: str) -> str: if source_language == "auto": source_language = "a" - + destination_language = Slid.from_rfc_prefix(destination_language).prefix language_pair = f"{source_language}{destination_language}" else: @@ -229,6 +229,7 @@ def _translate(self, text: str, destination_language: str, source_language: str) 'SOAPAction': '"http://tempuri.org/Translate"', }} + # TODO: refactor text = text.replace("&", "&") text = text.replace("<", "<") text = text.replace(">", ">") @@ -238,18 +239,25 @@ def _translate(self, text: str, destination_language: str, source_language: str) payload = f"\t\t{language_pair}\tGeneral\t{text}\tword<params><param><id>1</id><name>Roaming</name><value>off</value></param><param><id>2</id><name>MinimizeTrafficInRoaming</name><value>off</value></param><param><id>3</id><name>TextSource</name><value>TEXT</value></param></params>\ten\t" request = self.session.post(self._soap_endpoint_url, headers=headers, data=payload) self._set_req_id(request.content) - print(request.text) - - root = ET.fromstring(request.text) - c = root.find('.//strResult').text - cdata = ET.fromstring(c) - translation = cdata.findall(".//translation")[0].find("result").text - - translation = translation.replace("&", "&") - translation = translation.replace("<", "<") - translation = translation.replace(">", ">") - translation = translation.replace(""", "\"") - translation = translation.replace("'", "'") + + try: + root = ET.fromstring(request.text) + c = root.find('.//strResult').text + + try: + cdata = ET.fromstring(c) + translation = cdata.findall(".//translation")[0].find("result").text + except Exception as ex: + translation = c + + translation = translation.replace("&", "&") + translation = translation.replace("<", "<") + translation = translation.replace(">", ">") + translation = translation.replace(""", "\"") + translation = translation.replace("'", "'") + except Exception as ex: + warn(f"Can't parse result. Exception: {ex}. Response: {request.text}. Try to set source language manually instead of automatic") + translation = None if source_language == "a": source_language = "auto"