-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathintegration_test_hutucker.cpp
More file actions
97 lines (83 loc) · 2.67 KB
/
integration_test_hutucker.cpp
File metadata and controls
97 lines (83 loc) · 2.67 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#include <string>
#include <vector>
#include <boost/filesystem.hpp>
#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include <irkit/bitstream.hpp>
#include <irkit/coding/prefix_codec.hpp>
#include <irkit/prefixmap.hpp>
namespace {
namespace fs = boost::filesystem;
using map_type = irk::prefix_map<int, std::vector<char>>;
using block_builder = irk::prefix_map<int, std::vector<char>>::block_builder;
using block_ptr = irk::prefix_map<int, std::vector<char>>::block_ptr;
TEST(hutucker, individual_coding)
{
std::string terms_file("terms.txt");
// Build
std::ifstream in(terms_file.c_str());
std::vector<std::size_t> frequencies(256, 0);
std::string item;
while (std::getline(in, item)) {
for (const char& ch : item) {
++frequencies[static_cast<unsigned char>(ch)];
}
}
in.close();
irk::hutucker_codec<char> codec(frequencies);
std::ifstream in_terms(terms_file);
std::string term;
while (std::getline(in_terms, term)) {
std::ostringstream enc;
auto encoded = codec.encode(term.begin(), term.end());
std::vector<char> data(encoded.size() / 8 + 1, 0);
irk::bitptr<char> bp(data.data());
irk::bitcpy(bp, encoded);
auto reader = bp.reader();
codec.decode(reader, enc, term.size());
ASSERT_THAT(term, ::testing::ElementsAreArray(enc.str())) << term;
}
}
TEST(hutucker, prefix_coding)
{
std::string terms_file("terms.txt");
// Build
std::ifstream in(terms_file.c_str());
std::vector<std::size_t> frequencies(256, 0);
std::string item;
while (std::getline(in, item)) {
for (const char& ch : item) {
++frequencies[static_cast<unsigned char>(ch)];
}
}
in.close();
auto codec = irk::hutucker_codec<char>(frequencies);
irk::prefix_codec<irk::hutucker_codec<char>> pref_codec(std::move(codec));
int count = 0;
std::vector<std::string> terms;
std::ostringstream out_buffer;
irk::output_bit_stream bout(out_buffer);
std::string term, last = "";
std::ifstream in_terms(terms_file);
while (std::getline(in_terms, term)) {
pref_codec.encode(term, bout);
terms.push_back(term);
++count;
break;
}
bout.flush();
std::istringstream in_buffer(out_buffer.str());
irk::input_bit_stream bin(in_buffer);
for (int idx = 0; idx < count; ++idx) {
std::string term;
pref_codec.decode(bin, term);
ASSERT_THAT(term, ::testing::ElementsAreArray(terms[idx]))
<< terms[idx] << "(" << idx << ")";
}
}
} // namespace
int main(int argc, char** argv)
{
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}