-
Notifications
You must be signed in to change notification settings - Fork 0
/
entropy_tree.cc
100 lines (84 loc) · 2.63 KB
/
entropy_tree.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#include <fstream>
#include <iostream>
#include <iomanip>
#include <vector>
#include <map>
#include <deque>
#include <cmath>
#define rep(i, a, b) for(int i = (a); i < int(b); ++i)
#define trav(it, v) for(typeof((v).begin()) it = (v).begin(); \
it != (v).end(); ++it)
using namespace std;
//double log2(double n){
// return log(n)/log(2);
//}
typedef map<deque<unsigned char>, int> State_map;
typedef vector<State_map> Freq_vector;
typedef vector<deque<unsigned char> > State_vector;
typedef pair<int, vector<double> > Entropy_pair;
unsigned int const max_size = 1000000; // ~70 MB apparently
struct Node{
int freq;
map<int, Node> childs;
Node():freq(1){}
};
int calc_H(map<int, Node>::iterator &it, vector<double> &H, int max_depth, int depth = 0){
if(depth == max_depth) return it->freq;
double h = 0;
trav(iter, it->childs){
Entropy_pair entropy(fstream &infile, int max_memory){
int depth = max_memory+1;
vector<unsigned int> size(depth);
unsigned int length = 0;
map<int, Node> m;
m[0] = Node();
deque<int> d;
while(1){
//while(length < 1000000){ // for testing with /dev/urandom.
int c = infile.get();
if(c == EOF && d.empty()) break;
if(c != EOF) d.push_back(c);
while(d.size() > (unsigned int) depth+1 || c == EOF){
map<int, Node>::iterator iter, last_iter = m.begin();
int i = 0;
trav(it, d){
iter = last_iter->second.childs.find(*it);
if(iter == last_iter->second.childs.end()){
iter = last_iter->second.childs.insert(last_iter->second.childs.begin(), make_pair(*it, Node()));
++size[i];
if(size[i] > max_size) --depth;
}
else ++iter->second.freq;
last_iter = iter;
}
d.pop_front();
}
++length;
if(length % 100000 == 0) cerr << "Length: " << length << '\n';
if(d.empty()) break;
}
vector<double> H(depth);
calc_H(m.begin(), H, depth);
return make_pair(length, H);
}
int main(int argc, char *argv[]){
if(argc < 2){
cout << "Wrong input argument, use " << argv[0] << " infile1 ... \n";
return 0;
}
fstream infile;
rep(i, 1, argc){
infile.open(argv[i], fstream::in | fstream::binary);
int memory = 10;
cerr << "File: " << argv[i] << "\n";
Entropy_pair res = entropy(infile, memory);
cout << "File: " << argv[i] << "\n Length: " << res.first << "\n";
cout << " " << left << setw(7) << "Memory" << setw(10) << "Entropy" << setw(15) << "Max compression\n";
rep(j, 0, res.second.size()){
cout << " " << setw(7) << j << setw(10) << res.second[j] << setw(15) << res.second[j]/8 << "\n";
}
cout << '\n';
infile.close();
}
return 0;
}