1
+ #! /bin/bash
2
+
3
+ # This script receives a data directory and randomly splits its subdirectories into two data directores - train_dir and test_dir.
4
+ # Arguments:
5
+ # /data_dir is the path to the data directory.
6
+ # /split_percentage is the proportional size of train_dir.
7
+
8
+ # calculate the dir's size in M
9
+ function dirSize {
10
+ echo ` du -h -s -m $1 | cut -f1`
11
+ }
12
+
13
+ data_dir=$1
14
+ split_percentage=$2
15
+
16
+ data_dir_size=` dirSize $data_dir `
17
+ let data_dir_size-=` du -h -s -m --separate-dirs $data_dir | cut -f1`
18
+ if (( data_dir_size == 0 )) ; then
19
+ echo " Error: $data_dir is empty!"
20
+ exit
21
+ fi
22
+
23
+ if (( split_percentage < 0 || split_percentage > 100 )) ; then
24
+ echo " Error: Invalid parameter: split percentage should be a value between 0 and 100"
25
+ exit
26
+ fi
27
+
28
+ train_dir=${data_dir} train_dir
29
+ test_dir=${data_dir} test_dir
30
+ mkdir -p ${train_dir} ${test_dir}
31
+
32
+ split_rate=` echo " scale=2;
33
+ $split_percentage / 100" \
34
+ | bc`
35
+
36
+ sub_dirs=` find $data_dir -maxdepth 1 -type d | tail --lines=+2`
37
+ sub_dirs=` shuf -e $( echo ${sub_dirs[*]} ) `
38
+
39
+ target_dir=$train_dir
40
+ i=0
41
+ for sub_dir in ${sub_dirs[*]}
42
+ do
43
+ let i++
44
+ if [[ $sub_dir == $train_dir || $sub_dir == $test_dir ]]; then
45
+ continue
46
+ fi
47
+ mv $sub_dir $target_dir
48
+ if [[ $target_dir == $train_dir ]]; then
49
+ train_dir_size=` dirSize $train_dir `
50
+ proportion=` echo " scale=2; $train_dir_size / $data_dir_size " | bc`
51
+ printf " \rtrain dir size: ${train_dir_size} M ; split rate: %.2f" $proportion
52
+ if (( $(echo "$proportion >= $split_rate " | bc - l) )) ; then
53
+ target_dir=$test_dir
54
+ printf ' \n'
55
+ fi
56
+ else
57
+ test_dir_size=` dirSize $test_dir `
58
+ printf " \rtest dir size: ${test_dir_size} M"
59
+ fi
60
+ done
61
+ printf " \n"
62
+
63
+ echo " data dir: $data_dir "
64
+ echo " train dir: $train_dir "
65
+ echo " test dir: $test_dir "
66
+ echo " data dir size: ${data_dir_size} M"
67
+ echo " train dir size: ${train_dir_size} M"
68
+ echo " test dir size: ${test_dir_size} M"
69
+ printf " final split rate = %.2f\n" $proportion
0 commit comments