resources/localization/pom_merger.py

import re
from datetime import date

try:
	from Levenshtein import distance as levenshtein_distance
except ImportError:
	print("you need to do 'python -m pip install python-Levenshtein'");
	exit(0);


datastore = dict();
datastore_trim = dict();# key:string -> TranslationLine

regex_only_letters = re.compile(r"[^a-zA-Z]")
allow_msgctxt = True;
ignore_case = False;
remove_comment = False;
percent_error_similar = 0
language_code = "??"
language = ""
max_similar = 3;
database_out = "";

def trim(str):
	redo = True;
	while redo:
		while len(str) > 0 and (
					str[0] == ":"
				or str[0] == "."
				or str[0] == ","
				or str[0] == "!"
				):
			str = str[1:];
		while len(str) > 0 and (
					str[-1] == ":"
				or str[-1] == "."
				or str[-1] == ","
				or str[-1] == "!"
				):
			str = str[:-1];
		str_stripped = str.strip();
		if str == str_stripped:
			redo = False
		else:
			str = str_stripped;
	return str;

class TranslationFiles:
	file_in = ""
	file_out = ""
	file_todo = ""
	database = ""

class TranslationLine:
	header_comment = ""
	raw_msgid = ""
	msgid = ""
	raw_msgstr = ""
	msgstr = ""
	multivalue = False

def main():
	global datastore, datastore_trim, regex_only_letters, allow_msgctxt, ignore_case, remove_comment;
	global percent_error_similar, language, language_code, max_similar, database_out;
	data_files = list(); # list of file paths
	ui_dir = "";
	operations = list(); # list of TranslationFiles
	settings_stream = open("./settings.ini", mode="r", encoding="utf-8")
	lines = settings_stream.read().splitlines()
	for line in lines:
		if line.startswith("data"):
			if line.startswith("database_out"):
				database_out = line[line.index('=')+1:].strip();
			else:
				data_files.append(line[line.index('=')+1:].strip());
		
		if line.startswith("input"):
			operations.append(TranslationFiles());
			operations[-1].file_in = line[line.index('=')+1:].strip();
		
		if line.startswith("output") and operations:
			operations[-1].file_out = line[line.index('=')+1:].strip();
		
		if line.startswith("todo") and operations:
			operations[-1].file_todo = line[line.index('=')+1:].strip();
		
		
		if line.startswith("ui_dir"):
			ui_dir = line[line.index('=')+1:].strip();
		
		if line.startswith("allow_msgctxt"):
			allow_msgctxt = (line[line.index('=')+1:].strip().lower() == "true");
			print("Don't comment msgctxt" if allow_msgctxt else "Commenting msgctxt");
		
		if line.startswith("allow_msgctxt"):
			allow_msgctxt = (line[line.index('=')+1:].strip().lower() == "true");
			print("Don't comment msgctxt" if allow_msgctxt else "Commenting msgctxt");

		if line.startswith("remove_comment"):
			remove_comment = (line[line.index('=')+1:].strip().lower() == "true");
			if remove_comment:
				print("Will not output the comments");
				
		if line.startswith("percent_error_similar"):
			percent_error_similar = float(line[line.index('=')+1:].strip());
			print("percent_error_similar set to " + str(percent_error_similar));
				
		if line.startswith("max_similar"):
			max_similar = int(line[line.index('=')+1:].strip());
			print("max_similar set to " + str(max_similar));
			
		if line.startswith("language"):
			if line.startswith("language_code"):
				language_code = line[line.index('=')+1:].strip();
				print("language_code set to " + language_code);
			else:
				language = line[line.index('=')+1:].strip();
				print("language set to " + language);
			

	# all_lines = list();
	for data_file in data_files:
		new_data = createKnowledge(data_file);
		for dataline in new_data:
			if len(dataline.msgstr) > 0:
				if not dataline.msgid in datastore:
					datastore[dataline.msgid] = dataline;
					datastore_trim[trim(dataline.msgid)] = dataline;
					if dataline.msgid == " Layers,":
						print(trim(dataline.msgid)+" is inside? "+("oui" if "Layers" in datastore_trim else "non"));
				else:
					str_old_val = datastore[dataline.msgid].msgstr;
					str_test_val = dataline.msgstr;
					length_old = len(regex_only_letters.sub("", str_old_val));
					length_new = len(regex_only_letters.sub("", str_test_val));
					# if already exist, only change it if the previous was lower than 3 char
					if length_new > length_old and length_old < 3:
						print(str_old_val.replace('\n', ' ')+" replaced by "+str_test_val.replace('\n', ' '));
						datastore[dataline.msgid].msgstr = str_test_val;
						datastore_trim[trim(dataline.msgid)].msgstr = str_test_val;
		print("finish reading" + data_file + " of size "+ str(len(new_data)) + ", now we had "+ str(len(datastore)) + " items");

	if ignore_case:
		temp = list();
		for msgid in datastore:
			if not msgid.lower() in datastore:
				temp.append(msgid);
		for msgid in temp:
			datastore[msgid.lower()] = datastore[msgid];
		temp = list();
		for msgid in datastore_trim:
			if not msgid.lower() in datastore_trim:
				temp.append(msgid);
		for msgid in temp:
			datastore_trim[msgid.lower()] = datastore_trim[msgid];

	for operation in operations:
		print("Translating " + operation.file_in);
		dict_ope = dict();
		ope_file_in = list();
		lst_temp = createKnowledge(operation.file_in);
		print("String from source files: " + str(len(lst_temp)));
		nbTrans = 0;
		#remove duplicate
		for line in lst_temp:
			if not line.msgid in dict_ope:
				dict_ope[line.msgid] = line;
				ope_file_in.append(line);
				if line.msgstr:
					nbTrans+=1;
					print(line.header_comment);
					print(line.raw_msgid);
					print(line.msgid);
					print(line.raw_msgstr);
					print(line.msgstr);
		#add def from conf files
		if ui_dir:
			new_data = parse_ui_file(ui_dir+"/extruder.ui");
			new_data.extend(parse_ui_file(ui_dir+"/extruder.ui"));
			new_data.extend(parse_ui_file(ui_dir+"/filament.ui"));
			new_data.extend(parse_ui_file(ui_dir+"/milling.ui"));
			new_data.extend(parse_ui_file(ui_dir+"/print.ui"));
			new_data.extend(parse_ui_file(ui_dir+"/printer_fff.ui"));
			new_data.extend(parse_ui_file(ui_dir+"/printer_sla.ui"));
			new_data.extend(parse_ui_file(ui_dir+"/sla_material.ui"));
			new_data.extend(parse_ui_file(ui_dir+"/sla_print.ui"));
			print("String from ui files: " + str(len(new_data)));
			for dataline in new_data:
				if not dataline.msgid in dict_ope:
					dict_ope[dataline.msgid] = dataline;
					ope_file_in.append(dataline);
		print("String to translate: " + str(len(ope_file_in) - nbTrans)+" and already translated: "+str(nbTrans));
			
		#create database
		if database_out:
			outputDatabase(database_out);
		
		#create TODO file
		if operation.file_todo:
			outputUntranslated(ope_file_in, operation.file_todo);
		
		#create .po file
		if operation.file_out:
			translate(ope_file_in, operation.file_out);
	
	print("End of merge");

def createKnowledge(file_path_in):
	read_data_lines = list();
	try:
		file_in_stream = open(file_path_in, mode="r", encoding="utf-8")
		lines = file_in_stream.read().splitlines();
		lines.append("");
		line_idx = 0;
		current_line = TranslationLine();
		nb = 0;
		while line_idx < len(lines):
			if not lines[line_idx].startswith("msgid") or len(lines[line_idx]) <= 7:
				if (lines[line_idx].startswith("#") 
					or lines[line_idx].startswith("msgctxt")
					or len(lines[line_idx].strip()) == 0
					):
					if not allow_msgctxt and lines[line_idx].startswith("msgctxt"):
						current_line.header_comment += "\n#, " + lines[line_idx];
					else:
						current_line.header_comment += "\n" + lines[line_idx];
				line_idx+=1;
				continue;

			# get the msgid line
			current_line.raw_msgid = lines[line_idx];
			current_line.msgid = lines[line_idx][7:];

			#get the next line (can be whatever)
			line_idx+=1;
			if line_idx >= len(lines):
				return read_data_lines;

			#populate the full current_line.msgid string
			while lines[line_idx].startswith("\"") or lines[line_idx].startswith("msgid"):
				current_line.raw_msgid += "\n" + lines[line_idx];
				if lines[line_idx].startswith("msgid"):
					current_line.multivalue = True;
				if lines[line_idx].startswith("\""):
					current_line.msgid = current_line.msgid[0:-1];
					current_line.msgid += lines[line_idx][1:];
				else:
					current_line.msgid += "\n" + lines[line_idx];
				#todo: do something for msgid_plural. Not needed right now...
				
				#get the next line (can be whatever)
				line_idx+=1;
				if line_idx >= len(lines):
					return read_data_lines;

			#check validity of the id
			if len(current_line.msgid) < 3:
				current_line = TranslationLine();
				continue;
			current_line.msgid = current_line.msgid[0:-1];

			#there should be a msgstr just after
			if not lines[line_idx].startswith("msgstr") or len(lines[line_idx]) <= 8:
				current_line = TranslationLine();
				continue;

			current_line.raw_msgstr = lines[line_idx];
			if lines[line_idx][7] == "\"":
				current_line.msgstr = lines[line_idx][8:];
			elif lines[line_idx][6] == "[":
				current_line.msgstr = lines[line_idx][11:];
			else:
				#can't parse
				print("error, can't parse msgstr: '"+lines[line_idx]+"'");
				current_line.msgstr = "";

			line_idx+=1;
			if line_idx >= len(lines):
				return read_data_lines;

			while lines[line_idx].startswith("\"") or lines[line_idx].startswith("msgstr"):
				current_line.raw_msgstr += "\n" + lines[line_idx];
				if lines[line_idx].startswith("\""):
					current_line.msgstr = current_line.msgstr[0:-1];
					current_line.msgstr += lines[line_idx][1:];
				elif lines[line_idx].startswith("msgstr["):
					current_line.msgstr = lines[line_idx][11:];
					current_line.multivalue = True;
				else:
					current_line.msgstr += "\n" + lines[line_idx];

				#get the next line (can be whatever)
				line_idx+=1;
				if line_idx >= len(lines):
					return read_data_lines;
			
			if current_line.msgstr:
				current_line.msgstr = current_line.msgstr[0:-1];
			read_data_lines.append(current_line);
			current_line = TranslationLine();
	except Exception as error:
		print("Warning, cannot read file " + file_path_in);
		print(error);
	return read_data_lines;

def getTranslation(item):
	if len(item.msgid) == 0:
		return "";
	if item.msgid in datastore:
		return datastore[item.msgid].raw_msgstr;
	elif item.msgid in datastore_trim:
		good = datastore_trim[item.msgid];
		if not good.multivalue:
			return "msgstr \""+trim(good.msgstr)+"\"";
	else:
		item_msg_trim = trim(item.msgid);
		if item_msg_trim in datastore:
			good = datastore[trim(item.msgid)];
			if not good.multivalue:
				if good.msgid in item.msgid:
					start_at = item.msgid.index(good.msgid);
					return "msgstr \"" + item.msgid[0:start_at] + good.msgstr + item.msgid[start_at+len(good.msgid):] + "\"";
		elif item_msg_trim in datastore_trim:
			good = datastore_trim[item_msg_trim];
			if not good.multivalue:
				good_msg_trim = trim(good.msgid);
				if good_msg_trim in item.msgid:
					start_at = item.msgid.index(good_msg_trim);
					return "msgstr \"" + item.msgid[0:start_at] + trim(good.msgstr) + item.msgid[start_at+len(good_msg_trim):] + "\"";

	if ignore_case:
		lowercase = TranslationLine();
		lowercase.msgid = item.msgid.lower();
		if lowercase.msgid != item.msgid:
			lowercase.header_comment = item.header_comment;
			lowercase.raw_msgid = item.raw_msgid;
			lowercase.raw_msgstr = item.raw_msgstr;
			lowercase.msgstr = item.msgstr;
			lowercase.multivalue = item.multivalue;
			return getTranslation(lowercase)
	return "";

def getTranslationNear(msgid_to_search, percent):
	max_word_diff = 1 + int(percent * len(msgid_to_search));
	possible_solutions = list();
	for msgid in datastore:
		dist = levenshtein_distance(msgid, msgid_to_search);
		if dist < max_word_diff:
			possible_solutions.append( (dist, datastore[msgid]) );
	possible_solutions.sort(key=lambda x:x[0]);
	return possible_solutions;

def outputUntranslated(data_to_translate, file_path_out):
	try:
		file_out_stream = open(file_path_out, mode="w", encoding="utf-8")
		nb_lines = 0;
		#sort to have an easier time translating.
		# idealy, they shoud be grouped by proximity, but it's abit more complicated to code
		sorted_lines = list()
		for dataline in data_to_translate:
			if not dataline.msgstr.strip() and dataline.msgid and len(getTranslation(dataline).strip()) == 0:
				sorted_lines.append(dataline);
		sorted_lines.sort(key=lambda x:x.msgid.lower())
		
		nb_iter = 0;
		# output bits that are empty
		for dataline in sorted_lines:
			if nb_iter%100 == 99:
				print('.');
			else:
				print('.', end = '');
			nb_iter += 1;
			file_out_stream.write(dataline.header_comment);
			file_out_stream.write("\n");
			# get translation that are near enough to be copy-pasted by humans.
			good_enough = getTranslationNear(dataline.msgid, 0.4);
			if len(good_enough) >0:
				file_out_stream.write("#Similar to me: "+dataline.msgid+"\n");
				for index in range(min(len(good_enough), max_similar)):
					file_out_stream.write("# "+str(good_enough[index][0])+("" if len(str(good_enough[index][0]))>2 else " " if len(str(good_enough[index][0]))==2 else "  ")
						+"  changes: " + good_enough[index][1].msgid+"\n");
					file_out_stream.write("#  translation: " + good_enough[index][1].msgstr+"\n");
			file_out_stream.write(dataline.raw_msgid);
			file_out_stream.write("\n");
			file_out_stream.write(dataline.raw_msgstr);
			file_out_stream.write("\n");
			nb_lines+=1;

		print("There is " + str(nb_lines) +" string untranslated");
	except Exception as error:
		print("error, cannot write file " + file_path_out);
		print(error);

def translate(data_to_translate, file_path_out):
	# try:
	file_out_stream = open(file_path_out, mode="w", encoding="utf-8")
	file_out_stream.write("# Translation file for "+(language if len(language)>0 else language_code)+"\n");
	file_out_stream.write("# Copyright (C) 2021\n");
	file_out_stream.write("# This file is distributed under the same license as Slic3r.\n");
	file_out_stream.write("#\n");
	file_out_stream.write("msgid \"\"\n");
	file_out_stream.write("msgstr \"\"\n");
	file_out_stream.write("\"Project-Id-Version: Slic3r\\n\"\n");
	file_out_stream.write("\"POT-Creation-Date: "+date.today().strftime('%Y-%m-%d %H:%M%z')+"\\n\"\n");
	file_out_stream.write("\"PO-Revision-Date: "+date.today().strftime('%Y-%m-%d %H:%M%z')+"\\n\"\n");
	file_out_stream.write("\"Last-Translator:\\n\"\n");
	file_out_stream.write("\"Language-Team:\\n\"\n");
	file_out_stream.write("\"MIME-Version: 1.0\\n\"\n");
	file_out_stream.write("\"Content-Type: text/plain; charset=UTF-8\\n\"\n");
	file_out_stream.write("\"Content-Transfer-Encoding: 8bit\\n\"\n");
	file_out_stream.write("\"Language:"+language_code+"\\n\"\n");
	nb_lines = 0;
	data_to_translate.sort(key=lambda x:x.msgid.lower().strip())
	# translate bits that are empty
	for dataline in data_to_translate:
		if not dataline.msgstr.strip():
			transl = getTranslation(dataline)
			if len(transl) > 9 or ( len(transl) > 3 and not transl.startswith('msgstr "')):
				file_out_stream.write("\n")
				if not remove_comment:
					file_out_stream.write(dataline.header_comment.strip())
					file_out_stream.write("\n")
				file_out_stream.write(dataline.raw_msgid)
				file_out_stream.write("\n")
				file_out_stream.write(transl)
				file_out_stream.write("\n")
				nb_lines+=1;
				if dataline.raw_msgid.count('%') != transl.count('%'):
					print("WARNING: not same number of '%' ( "+ str(dataline.raw_msgid.count('%')) + " => " + str(transl.count('%')) + ")"
						+"\n  for string:'" + dataline.msgid + "  '\n=>'"+transl[8:]);
		else:
			file_out_stream.write("\n")
			if not remove_comment:
				file_out_stream.write(dataline.header_comment.strip())
				file_out_stream.write("\n")
			file_out_stream.write(dataline.raw_msgid)
			file_out_stream.write("\n")
			file_out_stream.write(dataline.raw_msgstr)
			file_out_stream.write("\n")
			if dataline.raw_msgid.count('%') != dataline.raw_msgstr.count('%'):
				print("WARNING: not same number of '%'( "+ str(dataline.raw_msgid.count('%')) + " => " + str(dataline.raw_msgstr.count('%')) + ")"
						+"\n  for string:'" + dataline.msgid + "  '\n=>'"+dataline.msgstr);
			nb_lines+=1;
	print("There is " + str(nb_lines) +" string translated in the .po");
	# except Exception as error:
		# print("error, cannot write file " + file_path_out);
		# print(error);

def outputDatabase(file_path_out):
	try:
		file_out_stream = open(file_path_out, mode="w", encoding="utf-8")
		nb_lines = 0;
		
		for msgid in datastore:
			dataline = datastore[msgid];
			#don't store commenta nymore. there is none is the slic3r.po anyway
			#file_out_stream.write(dataline.header_comment);
			#note that the header_comment contains the \n already so comment this line is you don't comment header_comment
			file_out_stream.write("\n");
			file_out_stream.write(dataline.raw_msgid);
			file_out_stream.write("\n");
			file_out_stream.write(dataline.raw_msgstr);
			file_out_stream.write("\n");
			nb_lines+=1;

		print("There is " + str(nb_lines) +" in your database file");
	except Exception as error:
		print("error, cannot write file " + file_path_out);
		print(error);

def parse_ui_file(file_path):
	read_data_lines = list();
	# try:
	file_in_stream = open(file_path, mode="r", encoding="utf-8")
	lines = file_in_stream.read().splitlines();
	lines.append("");
	line_idx = 0;
	nb = 0;
	while line_idx < len(lines):
		items = lines[line_idx].strip().split(":");
		if len(items) > 1:
			if items[0]=="page":
				current_line = TranslationLine();
				current_line.header_comment = "\n#: "+file_path;#+":"+str(line_idx);
				current_line.raw_msgid = "msgid \""+items[1].strip()+"\"";
				current_line.msgid = items[1].strip();
				current_line.raw_msgstr = "msgstr \"\"";
				current_line.msgstr = "";
				read_data_lines.append(current_line);
			if items[0]=="group" or items[0]=="line":
				current_line = TranslationLine();
				current_line.header_comment = "\n#: "+file_path;#+":"+str(line_idx);
				current_line.raw_msgid = "msgid \""+items[-1].strip()+"\"";
				current_line.msgid = items[-1].strip();
				current_line.raw_msgstr = "msgstr \"\"";
				current_line.msgstr = "";
				read_data_lines.append(current_line);
			if items[0]=="setting":
				for item in items:
					if item.startswith("label$") or item.startswith("full_label$") or item.startswith("sidetext$") or item.startswith("tooltip$"):
						if item.split("$")[-1] != '_' and len(item.split("$")[-1]) > 0 :
							current_line = TranslationLine();
							current_line.header_comment = "\n#: "+file_path+" : l"+str(line_idx);
							current_line.msgid = item.split("$")[-1].strip();
							current_line.raw_msgid = "msgid \""+current_line.msgid+"\"";
							current_line.raw_msgstr = "msgstr \"\"";
							current_line.msgstr = "";
							read_data_lines.append(current_line);
		line_idx+=1;
	
	return read_data_lines;


main();