Skip to content

Commit 2d005e7

Browse files
committed
removing numbers in between texts
final version
1 parent f02f65d commit 2d005e7

File tree

1 file changed

+15
-10
lines changed

1 file changed

+15
-10
lines changed

CleaningDataCode/CleanAndSplitData.py

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,27 +20,32 @@
2020

2121
Slovak = final[final['Category'] == 0]
2222
Slovak.apply(lambda x: x.astype(str).str.lower())
23+
Slovak.Text = Slovak.Text.str.replace('\d+', '')
2324
French= final[final['Category'] == 1]
2425
French.apply(lambda x: x.astype(str).str.lower())
26+
French.Text = French.Text.str.replace('\d+', '')
2527
Spanish= final[final['Category'] == 2]
2628
Spanish.apply(lambda x: x.astype(str).str.lower())
29+
Spanish.Text = Spanish.Text.str.replace('\d+', '')
2730
German= final[final['Category'] == 3]
2831
German.apply(lambda x: x.astype(str).str.lower())
32+
German.Text = German.Text.str.replace('\d+', '')
2933
Polish= final[final['Category'] == 4]
3034
Polish.apply(lambda x: x.astype(str).str.lower())
35+
Polish.Text = Polish.Text.str.replace('\d+', '')
3136

32-
Slovak.to_csv('Slovak.csv', index=False, sep=',',encoding='utf-8')
33-
French.to_csv('French.csv', index=False,sep=',', encoding='utf-8')
34-
Spanish.to_csv('Spanish.csv', index=False,sep=',', encoding='utf-8')
35-
German.to_csv('German.csv', index=False, sep=',',encoding='utf-8')
36-
Polish.to_csv('Polish.csv', index=False,sep=',', encoding='utf-8')
37+
Slovak.to_csv(r'./Slovak1.csv', index=False, encoding='utf-8')
38+
French.to_csv('./French1.csv', index=False, encoding='utf-8')
39+
Spanish.to_csv('Spanish1.csv', index=False, encoding='utf-8')
40+
German.to_csv('German1.csv', index=False, encoding='utf-8')
41+
Polish.to_csv('Polish1.csv', index=False, encoding='utf-8')
3742

3843
header = ["Text"]
39-
Slovak.to_csv('Slovak.txt', index=False, sep=',',encoding='utf-8', columns = header)
40-
French.to_csv('French.txt', index=False,sep=',', encoding='utf-8', columns = header)
41-
Spanish.to_csv('Spanish.txt', index=False,sep=',', encoding='utf-8', columns = header)
42-
German.to_csv('German.txt', index=False, sep=',',encoding='utf-8', columns = header)
43-
Polish.to_csv('Polish.txt', index=False,sep=',', encoding='utf-8', columns = header)
44+
Slovak.to_csv('Slovak_v1.txt', index=False,encoding='utf-8', columns = header)
45+
French.to_csv('French_v1.txt', index=False, encoding='utf-8', columns = header)
46+
Spanish.to_csv('Spanish_v1.txt', index=False, encoding='utf-8', columns = header)
47+
German.to_csv('German_v1.txt', index=False,encoding='utf-8', columns = header)
48+
Polish.to_csv('Polish_v1.txt', index=False, encoding='utf-8', columns = header)
4449

4550

4651

0 commit comments

Comments
 (0)