-
Notifications
You must be signed in to change notification settings - Fork 4
/
sortDictionariesAndCleanup.sh
executable file
·43 lines (36 loc) · 1.55 KB
/
sortDictionariesAndCleanup.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
#!/bin/bash
set -exuo pipefail
WORKINGDIR=$(pwd)
# ENG-dictionary:
# remove trailing blanks on each line
sed 's/[[:blank:]]*$//' -i "${WORKINGDIR}"/dictionary_all.txt
# convert upper case to lower case letters
sed 's/\([A-Z]\)/\L\1/g' -i "${WORKINGDIR}"/dictionary_all.txt
# Remove duplicate entries -i e.g.: abc->abc
sed -E '/^(.*)->\1$/d' -i "${WORKINGDIR}"/dictionary_all.txt
# Sort and remove duplicate entries
< "${WORKINGDIR}"/dictionary_all.txt LC_ALL=en_US sort | uniq > "${WORKINGDIR}"/dictionary_tmp.txt
mv "${WORKINGDIR}"/dictionary_tmp.txt "${WORKINGDIR}"/dictionary_all.txt
# Check for syntax errors
TMP_FILE_PATH=/tmp/syntaxErrors.txt
grep -Pv '[-][>]' dictionary_all.txt > "${TMP_FILE_PATH}" || true
if [[ -f "${TMP_FILE_PATH}" && -s "${TMP_FILE_PATH}" ]]; then
echo -e "\e[91mSyntax error in dictionary_all\e[39m";
cat "${TMP_FILE_PATH}";
fi
rm -f "${TMP_FILE_PATH}"
# GER-dictionary:
# remove trailing blanks on each line
sed 's/[[:blank:]]*$//' -i "${WORKINGDIR}"/dictionary_de.txt
# Remove duplicate entries -i e.g.: abc->abc
sed -E '/^(.*)->\1$/d' -i "${WORKINGDIR}"/dictionary_de.txt
# Sort and remove duplicate entries
< "${WORKINGDIR}"/dictionary_de.txt LC_ALL=de_DE sort | uniq > "${WORKINGDIR}"/dictionary_tmp.txt
mv "${WORKINGDIR}"/dictionary_tmp.txt "${WORKINGDIR}"/dictionary_de.txt
# Check for syntax errors
grep -Pv '[-][>]' dictionary_de.txt > "${TMP_FILE_PATH}" || true
if [[ -f "${TMP_FILE_PATH}" && -s "${TMP_FILE_PATH}" ]]; then
echo -e "\e[91mSyntax error in dictionary_de\e[39m";
cat "${TMP_FILE_PATH}"
fi
rm -f "${TMP_FILE_PATH}"