|
| 1 | +#include "deflate_compressor.hpp" |
| 2 | + |
| 3 | +#include <unistd.h> |
| 4 | + |
| 5 | +#include <cstdio> |
| 6 | +#include <iostream> |
| 7 | +#include <map> |
| 8 | +#include <memory> |
| 9 | +#include <queue> |
| 10 | +#include <stdexcept> |
| 11 | +#include <string> |
| 12 | + |
| 13 | +#include "bits.hpp" |
| 14 | +#include "stream.hpp" |
| 15 | + |
| 16 | +namespace Compression { |
| 17 | + |
| 18 | +const std::vector<DeflateTables::ExtraLength> DeflateTables::extra_lengths = { |
| 19 | + { 257, 0, 3 }, |
| 20 | + { 258, 0, 4 }, |
| 21 | + { 259, 0, 5 }, |
| 22 | + { 260, 0, 6 }, |
| 23 | + { 261, 0, 7 }, |
| 24 | + { 262, 0, 8 }, |
| 25 | + { 263, 0, 9 }, |
| 26 | + { 264, 0, 10 }, |
| 27 | + { 265, 1, 11 }, |
| 28 | + { 266, 1, 13 }, |
| 29 | + { 267, 1, 15 }, |
| 30 | + { 268, 1, 17 }, |
| 31 | + { 269, 2, 19 }, |
| 32 | + { 270, 2, 23 }, |
| 33 | + { 271, 2, 27 }, |
| 34 | + { 272, 2, 31 }, |
| 35 | + { 273, 3, 35 }, |
| 36 | + { 274, 3, 43 }, |
| 37 | + { 275, 3, 51 }, |
| 38 | + { 276, 3, 59 }, |
| 39 | + { 277, 4, 67 }, |
| 40 | + { 278, 4, 83 }, |
| 41 | + { 279, 4, 99 }, |
| 42 | + { 280, 4, 115 }, |
| 43 | + { 281, 5, 131 }, |
| 44 | + { 282, 5, 163 }, |
| 45 | + { 283, 5, 195 }, |
| 46 | + { 284, 5, 227 }, |
| 47 | + { 285, 0, 258 }, |
| 48 | +}; |
| 49 | + |
| 50 | +const std::vector<DeflateTables::ExtraDistance> DeflateTables::extra_distances = { |
| 51 | + { 0, 0, 1 }, |
| 52 | + { 1, 0, 2 }, |
| 53 | + { 2, 0, 3 }, |
| 54 | + { 3, 0, 4 }, |
| 55 | + { 4, 1, 5 }, |
| 56 | + { 5, 1, 7 }, |
| 57 | + { 6, 2, 9 }, |
| 58 | + { 7, 2, 13 }, |
| 59 | + { 8, 3, 17 }, |
| 60 | + { 9, 3, 25 }, |
| 61 | + { 10, 4, 33 }, |
| 62 | + { 11, 4, 49 }, |
| 63 | + { 12, 5, 65 }, |
| 64 | + { 13, 5, 97 }, |
| 65 | + { 14, 6, 129 }, |
| 66 | + { 15, 6, 193 }, |
| 67 | + { 16, 7, 257 }, |
| 68 | + { 17, 7, 385 }, |
| 69 | + { 18, 8, 513 }, |
| 70 | + { 19, 8, 769 }, |
| 71 | + { 20, 9, 1025 }, |
| 72 | + { 21, 9, 1537 }, |
| 73 | + { 22, 10, 2049 }, |
| 74 | + { 23, 10, 3073 }, |
| 75 | + { 24, 11, 4097 }, |
| 76 | + { 25, 11, 6145 }, |
| 77 | + { 26, 12, 8193 }, |
| 78 | + { 27, 12, 12289 }, |
| 79 | + { 28, 13, 16385 }, |
| 80 | + { 29, 13, 24577 }, |
| 81 | +}; |
| 82 | + |
| 83 | +DeflateTables::ExtraLength DeflateTables::get_extra_length(uint16_t code) |
| 84 | +{ |
| 85 | + for (auto it = extra_lengths.begin(); it != extra_lengths.end(); ++it) |
| 86 | + if (it->code == code) return *it; |
| 87 | + |
| 88 | + throw std::runtime_error { "Code not found in get_extra_length()." }; |
| 89 | +} |
| 90 | + |
| 91 | +DeflateTables::ExtraDistance DeflateTables::get_extra_distance(uint16_t code) |
| 92 | +{ |
| 93 | + for (auto it = extra_distances.begin(); it != extra_distances.end(); ++it) |
| 94 | + if (it->code == code) return *it; |
| 95 | + |
| 96 | + throw std::runtime_error { "Code not found in get_extra_distance()." }; |
| 97 | +} |
| 98 | + |
| 99 | +HuffmanNode::HuffmanNode(int length, int symbol) : |
| 100 | + length { length }, |
| 101 | + symbol { symbol } |
| 102 | +{ |
| 103 | +} |
| 104 | + |
| 105 | +HuffmanTree::HuffmanTree(std::vector<std::shared_ptr<HuffmanNode>> nodes) |
| 106 | +{ |
| 107 | + // Build code_to_length + length_count dictionnary |
| 108 | + int max_length = 0; |
| 109 | + std::map<uint32_t, uint32_t> code_to_length; |
| 110 | + std::map<uint32_t, uint32_t> length_count; |
| 111 | + for (auto it = nodes.begin(); it != nodes.end(); ++it) |
| 112 | + { |
| 113 | + code_to_length[(*it)->symbol] = (*it)->length; |
| 114 | + if ((*it)->length > max_length) max_length = (*it)->length; |
| 115 | + ++length_count[(*it)->length]; |
| 116 | + } |
| 117 | + |
| 118 | + // Build next_code dictionnary |
| 119 | + length_count[0] = 0; |
| 120 | + std::map<uint32_t, uint32_t> next_code; |
| 121 | + uint32_t code = 0, max_code = 0; |
| 122 | + for (int length = 1; length <= max_length; ++length) |
| 123 | + { |
| 124 | + code = (code + length_count[length - 1]) << 1; |
| 125 | + if (code > max_code) max_code = code; |
| 126 | + next_code[length] = code; |
| 127 | + } |
| 128 | + |
| 129 | + // Build final table |
| 130 | + for (uint32_t i = 0; i < nodes.size(); ++i) |
| 131 | + { |
| 132 | + int length = nodes[i]->length; |
| 133 | + if (length != 0) |
| 134 | + { |
| 135 | + nodes[i]->code = next_code[length]; |
| 136 | + next_code[length]++; |
| 137 | + } |
| 138 | + } |
| 139 | + |
| 140 | + for (auto it = nodes.begin(); it != nodes.end(); ++it) |
| 141 | + code_to_symbol[(*it)->code] = (*it)->symbol; |
| 142 | +} |
| 143 | + |
| 144 | +int HuffmanTree::get_value(InputStream& in) |
| 145 | +{ |
| 146 | + if (in.remaining_size() == 0) |
| 147 | + throw std::runtime_error { "HuffmanTree::get_value() cannot run with an empty InputStream." }; |
| 148 | + |
| 149 | + uint32_t read_value = 0; |
| 150 | + std::map<int, int>::iterator it; |
| 151 | + do |
| 152 | + { |
| 153 | + read_value = (read_value << 1) | in.read_u1_le(); |
| 154 | + it = code_to_symbol.find(read_value); |
| 155 | + } while (it == code_to_symbol.end()); |
| 156 | + return it->second; |
| 157 | +} |
| 158 | + |
| 159 | +DeflateCompressor::DeflateCompressor(InputStream& in) : |
| 160 | + in { in } |
| 161 | +{ |
| 162 | +} |
| 163 | + |
| 164 | +std::vector<uint8_t> DeflateCompressor::uncompress() |
| 165 | +{ |
| 166 | + out.clear(); |
| 167 | + |
| 168 | + const uint8_t method = in.read_bits_le<uint8_t>(4); |
| 169 | + const int8_t info = in.read_bits_le<uint8_t>(4); |
| 170 | + if (method != 8) |
| 171 | + throw std::runtime_error { "Cannot deflate with a method of " + std::to_string(method) + "." }; |
| 172 | + |
| 173 | + in.read_bits_le<uint8_t>(5); |
| 174 | + uint8_t fdict = in.read_bits_le<uint8_t>(1); |
| 175 | + uint8_t flevel = in.read_bits_le<uint8_t>(2); |
| 176 | + |
| 177 | + bool is_last_block = false; |
| 178 | + do |
| 179 | + { |
| 180 | + is_last_block = in.read_bits_le<uint8_t>(1); |
| 181 | + uint8_t compression_level = in.read_bits_le<uint8_t>(2); |
| 182 | + read_block(compression_level); |
| 183 | + |
| 184 | + // TODO: Check ALDER32 |
| 185 | + in.skip(4); |
| 186 | + } while (!is_last_block); |
| 187 | + |
| 188 | + return out; |
| 189 | +} |
| 190 | + |
| 191 | +void DeflateCompressor::read_block(uint8_t compression_level) |
| 192 | +{ |
| 193 | + switch (compression_level) |
| 194 | + { |
| 195 | + case 0: |
| 196 | + read_block_compression_level_0(); |
| 197 | + break; |
| 198 | + case 1: |
| 199 | + read_block_compression_level_1(); |
| 200 | + break; |
| 201 | + case 2: |
| 202 | + read_block_compression_level_2(); |
| 203 | + break; |
| 204 | + default: |
| 205 | + throw std::runtime_error { "The compression level " + std::to_string(compression_level) + " cannot be decompressed." }; |
| 206 | + } |
| 207 | +} |
| 208 | + |
| 209 | +void DeflateCompressor::read_block_compression_level_0() |
| 210 | +{ |
| 211 | + std::cout << "LEVEL 0" << std::endl; |
| 212 | + |
| 213 | + uint16_t length = in.read_u16_le(); |
| 214 | + uint16_t length_inverted = in.read_u16_le(); |
| 215 | + |
| 216 | + if (length != (uint16_t)~length_inverted) |
| 217 | + throw std::runtime_error { std::to_string(length) + " and " + std::to_string(~length_inverted) + " are not equals. Stream integrity error." }; |
| 218 | + |
| 219 | + for (int i = 0; i < length; ++i) |
| 220 | + out.push_back(in.read_u8()); |
| 221 | +} |
| 222 | + |
| 223 | +void DeflateCompressor::read_block_compression_level_1() |
| 224 | +{ |
| 225 | + std::cout << "LEVEL 1" << std::endl; |
| 226 | + |
| 227 | + std::vector<std::shared_ptr<HuffmanNode>> length_nodes {}; |
| 228 | + std::vector<std::shared_ptr<HuffmanNode>> distance_nodes {}; |
| 229 | + |
| 230 | + length_nodes.reserve(288); |
| 231 | + distance_nodes.reserve(32); |
| 232 | + |
| 233 | + for (int i = 0; i <= 287; ++i) |
| 234 | + length_nodes.push_back(std::make_shared<HuffmanNode>(size_of_code(i), i)); |
| 235 | + |
| 236 | + for (int i = 0; i <= 31; ++i) |
| 237 | + distance_nodes.push_back(std::make_shared<HuffmanNode>(5, i)); |
| 238 | + |
| 239 | + HuffmanTree length_tree { length_nodes }; |
| 240 | + HuffmanTree distance_tree { distance_nodes }; |
| 241 | + |
| 242 | + BufferStream stream; |
| 243 | + stream.write_u8(0b11111101); |
| 244 | + std::cout << length_tree.get_value(stream) << std::endl; |
| 245 | + // SHOULD BE 143 |
| 246 | + |
| 247 | + throw std::exception(); |
| 248 | + |
| 249 | + bool loop = true; |
| 250 | + while (loop) |
| 251 | + { |
| 252 | + std::cout << "========== (" << in.remaining_size() << ") ==========" << std::endl; |
| 253 | + |
| 254 | + int length_code = length_tree.get_value(in); |
| 255 | + |
| 256 | + std::cout << "CODE | " << length_code << std::endl; |
| 257 | + |
| 258 | + if (length_code == 256) |
| 259 | + loop = false; |
| 260 | + else if (length_code <= 255) |
| 261 | + out.push_back((uint8_t)length_code); |
| 262 | + else |
| 263 | + { |
| 264 | + std::cout << "REPEAT" << std::endl; |
| 265 | + DeflateTables::ExtraLength length = DeflateTables::get_extra_length(length_code); |
| 266 | + uint8_t length_additional_bits = length.bits; |
| 267 | + int length_value = length.length << length_additional_bits; |
| 268 | + length_value += in.read_bits_le<int>(length_additional_bits); |
| 269 | + |
| 270 | + std::cout << "LENGTH: " << length_value << std::endl; |
| 271 | + |
| 272 | + uint16_t distance_code = distance_tree.get_value(in); |
| 273 | + |
| 274 | + DeflateTables::ExtraDistance distance = DeflateTables::get_extra_distance(distance_code); |
| 275 | + uint8_t distance_additional_bits = distance.bits; |
| 276 | + int distance_value = distance.distance << distance_additional_bits; |
| 277 | + distance_value += in.read_bits_le<int>(distance_additional_bits); |
| 278 | + |
| 279 | + std::cout << "DISTANCE: " << distance_value << std::endl; |
| 280 | + |
| 281 | + auto start = out.end() - distance_value; |
| 282 | + auto end = start + length_value; |
| 283 | + if (end > out.end()) end = out.end(); |
| 284 | + std::vector<char> repeated { start, end }; |
| 285 | + |
| 286 | + std::string s { repeated.begin(), repeated.end() }; |
| 287 | + std::cout << "REPEATED: " << s << std::endl; |
| 288 | + |
| 289 | + // TODO: MISSING REPETITION |
| 290 | + out.insert(out.end(), repeated.begin(), repeated.end()); |
| 291 | + } |
| 292 | + std::string s { out.begin(), out.end() }; |
| 293 | + std::cout << "OUTPUT | " << s << std::endl; |
| 294 | + } |
| 295 | +} |
| 296 | + |
| 297 | +void DeflateCompressor::read_block_compression_level_2() |
| 298 | +{ |
| 299 | + std::cout << "LEVEL 2" << std::endl; |
| 300 | +} |
| 301 | + |
| 302 | +uint8_t DeflateCompressor::size_of_code(uint16_t code) const |
| 303 | +{ |
| 304 | + if (code < 0) |
| 305 | + throw std::runtime_error { "DeflateCompressor::size_of_code(): Code cannot be negative (" + std::to_string(code) + ")" }; |
| 306 | + |
| 307 | + if (code <= 143) |
| 308 | + return 8; |
| 309 | + if (code <= 255) |
| 310 | + return 9; |
| 311 | + if (code <= 279) |
| 312 | + return 7; |
| 313 | + if (code <= 287) |
| 314 | + return 8; |
| 315 | + |
| 316 | + throw std::runtime_error { std::to_string(code) + " is not a valid code." }; |
| 317 | +} |
| 318 | +} |
0 commit comments