-
Notifications
You must be signed in to change notification settings - Fork 0
/
step02GettingIPAnotation.py
33 lines (31 loc) · 1.6 KB
/
step02GettingIPAnotation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
#!/usr/bin/env python3
#Este es el segundo paso. Este script recibe como input un archivo csv con tres columnas: numeración, english words y
#the same words translated to another language. The output is a set of tsv files con cuatro columnas, las tres anteriores
#más la transcripción IPA de la palabra en el idioma traducido
import os
import csv
#import pandas as pd
from csv import writer
from csv import reader
from epitran.backoff import Backoff
directory="/home/alonso/PalabrasWiktionaryTraducidasGoogleAPI/" #directorio donde están las palabras traducidas
new_directory="/home/alonso/PalabrasWiktionaryTraducidasTranscritasIPA/" #here will be saved the files with IPA notation
for filename in sorted(os.listdir(directory)):
# Open the input_file in read mode and output_file in write mode
code = filename[:8]
backoff = Backoff([code], cedict_file='/media/alonso/3361-6630/neologisms/cedict_ts.u8')
newfile = new_directory+"IPA_"+filename
with open(directory+filename, 'r') as f, \
open(newfile, 'w', newline='') as w:
# Create a csv.reader object from the input file object
csv_reader = reader(f)
# Create a csv.writer object from the output file object
csv_writer = writer(w, delimiter='\t', lineterminator='\n')
# Read each row of the input csv file as list
#type(csv_reader[0])
for row in csv_reader:
IPA=backoff.trans_list(row[2])
#este procedimiento coloca row e IPA en la misma fila
row.append(IPA)
# Add the updated row / list to the output file oxfioxfi
csv_writer.writerow(row)