forked from tb0hdan/domains
-
Notifications
You must be signed in to change notification settings - Fork 0
/
unpack.sh
executable file
·59 lines (51 loc) · 1.2 KB
/
unpack.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#!/bin/bash
function test_xz() {
if [ "$(which xz)" == "" ]; then
echo "No XZ found. Get yours at https://tukaani.org/xz/"
exit 1
fi
}
function test_lfs() {
if [ "$(which git-lfs)" == "" ]; then
echo "No git-lfs found. Get yours at https://git-lfs.github.com/"
exit 3
fi
}
function lfs_pull() {
git lfs pull
}
function unpack() {
echo "Unpacking data/ ..."
find ./data -type f -iname "*.xz" -exec xz -d -k {} \;
}
function filter() {
local fname=$1
if [ ! -f "${fname}" ]; then
echo "Cannot filter non-existent file ..."
exit 3
fi
echo "Filtering ${fname} ..."
cat ${fname}|egrep -v '^(\.|\-|\%)' > ${fname}.1
mv ${fname}.1 ${fname}
}
function combine() {
olddir=$(pwd)
for datadir in $(find ./data -type d -mindepth 1); do
cd ${datadir}
echo "Working on ${datadir} ..."
big_fname=$(ls *.txt|sed -E 's/[0-9]+\./\./g'|head -n 1)
rm -f ${big_fname}
for fname in $(ls *.txt|grep '[0-9]\.txt'); do
cat $fname >> $big_fname
rm $fname
done
filter $big_fname
cd ${olddir}
done
}
# MAIN
test_xz
test_lfs
lfs_pull
unpack
combine