Skip to content

Commit 4b3d3f1

Browse files
authored
Rewrite 7.sh dgsh script
1 parent bc71b6c commit 4b3d3f1

File tree

1 file changed

+62
-0
lines changed
  • evaluation/benchmarks/dgsh/sequential

1 file changed

+62
-0
lines changed
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
#!/bin/bash
2+
3+
# Consistent sorting across machines
4+
export LC_ALL=C
5+
6+
# Convert input into a ranked frequency list
7+
ranked_frequency()
8+
{
9+
awk '{count[$1]++} END {for (i in count) print count[i], i}' |
10+
# We want the standard sort here
11+
sort -rn
12+
}
13+
14+
# Convert standard input to a ranked frequency list of specified n-grams
15+
ngram()
16+
{
17+
local N=$1
18+
19+
perl -ne 'for ($i = 0; $i < length($_) - '$N'; $i++) {
20+
print substr($_, $i, '$N'), "\n";
21+
}' |
22+
ranked_frequency
23+
}
24+
25+
# Temporary files
26+
file1=$(mktemp)
27+
file2=$(mktemp)
28+
file3=$(mktemp)
29+
30+
cat > "$file1"
31+
32+
# Split input one word per line
33+
tr -cs a-zA-Z '\n' < "$file1" > "$file2"
34+
35+
# Digram frequency
36+
echo "Digram frequency"
37+
ngram 2 < "$file2"
38+
39+
# Trigram frequency
40+
echo "Trigram frequency"
41+
ngram 3 < "$file2"
42+
43+
# Word frequency
44+
echo "Word frequency"
45+
ranked_frequency < "$file2"
46+
47+
# Store number of characters to use in awk below
48+
nchars=$(wc -c < "$file1")
49+
50+
# Character frequency
51+
echo "Character frequency"
52+
sed 's/./&\
53+
/g' < "$file1" |
54+
# Print absolute
55+
ranked_frequency | tee "$file3"
56+
57+
# Print relative
58+
echo "Relative character frequency"
59+
awk -v NCHARS=$nchars 'BEGIN {
60+
OFMT = "%.2g%%"}
61+
{print $1, $2, $1 / NCHARS * 100}' "$file3"
62+

0 commit comments

Comments
 (0)