|
| 1 | +/* |
| 2 | + p8_compress.c |
| 3 | + |
| 4 | + (c) Copyright 2014-2016 Lexaloffle Games LLP |
| 5 | + author: joseph@lexaloffle.com |
| 6 | +
|
| 7 | + compression used in code section of .p8.png format |
| 8 | + |
| 9 | + This software is provided 'as-is', without any express or implied |
| 10 | + warranty. In no event will the authors be held liable for any damages |
| 11 | + arising from the use of this software. |
| 12 | +
|
| 13 | + Permission is granted to anyone to use this software for any purpose, |
| 14 | + including commercial applications, and to alter it and redistribute it |
| 15 | + freely, subject to the following restrictions: |
| 16 | +
|
| 17 | + 1. The origin of this software must not be misrepresented; you must not |
| 18 | + claim that you wrote the original software. If you use this software |
| 19 | + in a product, an acknowledgment in the product documentation would be |
| 20 | + appreciated but is not required. |
| 21 | + 2. Altered source versions must be plainly marked as such, and must not be |
| 22 | + misrepresented as being the original software. |
| 23 | + 3. This notice may not be removed or altered from any source distribution. |
| 24 | +
|
| 25 | +*/ |
| 26 | + |
| 27 | +#include <stdlib.h> |
| 28 | +#include <stdio.h> |
| 29 | +#include <string.h> |
| 30 | + |
| 31 | +#ifndef MAX |
| 32 | + #define MAX(x, y) (((x) > (y)) ? (x) : (y)) |
| 33 | + #define MIN(x, y) (((x) < (y)) ? (x) : (y)) |
| 34 | +#endif |
| 35 | + |
| 36 | +typedef unsigned char uint8; |
| 37 | + |
| 38 | +#define HIST_LEN 4096 |
| 39 | +#define LITERALS 60 |
| 40 | +#define PICO8_CODE_ALLOC_SIZE (0x10000+1) |
| 41 | + |
| 42 | +#define codo_malloc malloc |
| 43 | +#define codo_free free |
| 44 | +#define codo_memset memset |
| 45 | + |
| 46 | +// removed from end of decompressed if it exists |
| 47 | +// (injected to maintain 0.1.7 forwards compatibility) |
| 48 | +#define FUTURE_CODE "if(_update60)_update=function()_update60()_update60()end" |
| 49 | +#define FUTURE_CODE2 "if(_update60)_update=function()_update60()_update_buttons()_update60()end" |
| 50 | + |
| 51 | +// ^ is dummy -- not a literal. forgot '-', but nevermind! (gets encoded as rare literal) |
| 52 | +char *literal = "^\n 0123456789abcdefghijklmnopqrstuvwxyz!#%(){}[]<>+=/*:;.,~_"; |
| 53 | +int literal_index[256]; // map literals to 0..LITERALS-1. 0 is reserved (not listed in literals string) |
| 54 | + |
| 55 | +int find_repeatable_block(uint8 *dat, int pos, int len, int *block_offset) |
| 56 | +{ |
| 57 | + // block len starts from 2, so no need to record 0, 1 --> max is (15 + 2) |
| 58 | + int max_block_len = 17; // any more doesn't have much effect for code. more important to look back further. |
| 59 | + int max_hist_len = (255-LITERALS)*16; // less than HIST_LEN |
| 60 | + int i, j; |
| 61 | + int best_len = 0; |
| 62 | + int best_i = -100000; |
| 63 | + int max_len; |
| 64 | + |
| 65 | + // brute force search |
| 66 | + |
| 67 | + // block length can't be longer than remaining |
| 68 | + max_len = MIN(max_block_len, len - pos); |
| 69 | + |
| 70 | + // can't be longer than preceeding data |
| 71 | + max_hist_len = MIN(max_hist_len, pos); |
| 72 | + |
| 73 | + for (i = pos - max_hist_len; i < pos; i++) |
| 74 | + { |
| 75 | + // find length starting at i |
| 76 | + |
| 77 | + j = i; |
| 78 | + while ((j-i) < max_len && j < pos && dat[j] == dat[pos+j-i]) j++; |
| 79 | + |
| 80 | + if ((j-i) > best_len) |
| 81 | + { |
| 82 | + best_len = (j-i); |
| 83 | + best_i = i; |
| 84 | + } |
| 85 | + } |
| 86 | + |
| 87 | + *block_offset = (pos-best_i); |
| 88 | + |
| 89 | + return best_len; |
| 90 | +} |
| 91 | + |
| 92 | + |
| 93 | +#define WRITE_VAL(x) {*p_8 = (x); p_8++;} |
| 94 | + |
| 95 | +// returns compressed length |
| 96 | +int num_blocks, num_blocks_large, num_literals; |
| 97 | +int freq[256]; |
| 98 | + |
| 99 | +int compress_mini(uint8 *in_p, uint8 *out, int len) |
| 100 | +{ |
| 101 | + uint8 *p_8 = out; |
| 102 | + int pos = 0; |
| 103 | + int block_offset; |
| 104 | + int block_len; |
| 105 | + int i, j, best_i; |
| 106 | + uint8 *in; |
| 107 | + char *modified_code; |
| 108 | + |
| 109 | + // init literals search |
| 110 | + memset(literal_index, 0, 256); |
| 111 | + for (i = 1; i < LITERALS; i++) |
| 112 | + { |
| 113 | + literal_index[literal[i]] = i; |
| 114 | + } |
| 115 | + |
| 116 | + // 0.1.8 : inject future api implementation if _update60 found in in_p |
| 117 | + // note: doesn't apply to plain .p8 format |
| 118 | + |
| 119 | + modified_code = codo_malloc(strlen(in_p) + 1024); |
| 120 | + strcpy(modified_code, in_p); |
| 121 | + |
| 122 | + if (strstr(in_p, "_update60")) |
| 123 | + if (len < PICO8_CODE_ALLOC_SIZE - (strlen(FUTURE_CODE2)+1)) // skip if won't fit when decompressing |
| 124 | + { |
| 125 | + // 0.1.9: make sure there is some whitespace before future_code (0.1.8 bug) |
| 126 | + if (modified_code[strlen(modified_code)-1] != ' ' && modified_code[strlen(modified_code)-1] != '\n') |
| 127 | + { |
| 128 | + strcat(modified_code, "\n"); |
| 129 | + } |
| 130 | + strcat(modified_code, FUTURE_CODE2); |
| 131 | + len += strlen(FUTURE_CODE2)+1; |
| 132 | + } |
| 133 | + |
| 134 | + in = modified_code; |
| 135 | + |
| 136 | + // header tag: ":c:" |
| 137 | + // will show up in code section of old versions of pico-8 |
| 138 | + WRITE_VAL(':'); |
| 139 | + WRITE_VAL('c'); |
| 140 | + WRITE_VAL(':'); |
| 141 | + WRITE_VAL(0); |
| 142 | + |
| 143 | + // write uncompressed size |
| 144 | + WRITE_VAL(len/256); |
| 145 | + WRITE_VAL(len%256); |
| 146 | + |
| 147 | + // compressed size (fill in later). used for robust/safe decompression |
| 148 | + WRITE_VAL(0); |
| 149 | + WRITE_VAL(0); |
| 150 | + |
| 151 | + num_blocks = 0; |
| 152 | + num_literals = 0; |
| 153 | + |
| 154 | + memset(freq, 0, sizeof(freq)); |
| 155 | + #if 0 |
| 156 | + // generate histogram |
| 157 | + for (i = 0; i < len; i++) |
| 158 | + freq[in[i]]++; |
| 159 | + |
| 160 | + // show highest |
| 161 | + for (i = 0; i < 256; i++) |
| 162 | + if (freq[i] > len / 64) |
| 163 | + printf("[%c] : %d\n", i, freq[i]); |
| 164 | + #endif |
| 165 | + |
| 166 | + while (pos < len) |
| 167 | + { |
| 168 | + // either copy or literal |
| 169 | + |
| 170 | + //printf("pos: %d\n", pos); |
| 171 | + |
| 172 | + block_len = find_repeatable_block(in, pos, len, &block_offset); |
| 173 | + |
| 174 | + // use block when 3 or more long. performs better than 2, because after |
| 175 | + // writing first literal, second one might be part of a block. |
| 176 | + if (block_len >= 3) |
| 177 | + { |
| 178 | + // block: 2 bytes |
| 179 | + |
| 180 | + // printf(":: block. offset: %d len: %d\n", block_offset, block_len); |
| 181 | + |
| 182 | + WRITE_VAL((block_offset / 16) + LITERALS); |
| 183 | + WRITE_VAL((block_offset % 16) + (block_len-2) * 16); |
| 184 | + pos += block_len; |
| 185 | + |
| 186 | + // stats |
| 187 | + num_blocks ++; |
| 188 | + |
| 189 | + if (block_len > 17) num_blocks_large++; |
| 190 | + } |
| 191 | + else |
| 192 | + { |
| 193 | + // literal: 0 means read next byte |
| 194 | + // printf(":: literal: %d [%c]\n", in[pos], in[pos]); |
| 195 | + |
| 196 | + WRITE_VAL(literal_index[in[pos]]); |
| 197 | + |
| 198 | + if (literal_index[in[pos]] == 0) |
| 199 | + WRITE_VAL(in[pos]); |
| 200 | + |
| 201 | + pos ++; |
| 202 | + |
| 203 | + // stats |
| 204 | + |
| 205 | + //printf("%c",in[pos]); |
| 206 | + |
| 207 | + num_literals ++; |
| 208 | + freq[in[pos]]++; |
| 209 | + } |
| 210 | + } |
| 211 | + |
| 212 | + // compressed is larger than input -> just return input |
| 213 | + if ((p_8 - out) >= strlen(in)) |
| 214 | + { |
| 215 | + memcpy(out, in, strlen(in)); |
| 216 | + return strlen(in); |
| 217 | + } |
| 218 | + |
| 219 | + //printf("size: %d blocks: %d (%d large) literals: %d\n", (p_8 - out), num_blocks, num_blocks_large, num_literals); |
| 220 | + |
| 221 | + codo_free(modified_code); |
| 222 | + |
| 223 | + return p_8 - out; |
| 224 | +} |
| 225 | + |
| 226 | +#define READ_VAL(val) {val = *in; in++;} |
| 227 | +int decompress_mini(uint8 *in_p, uint8 *out_p, int max_len) |
| 228 | +{ |
| 229 | + int block_offset; |
| 230 | + int block_length; |
| 231 | + int val; |
| 232 | + uint8 *in = in_p; |
| 233 | + uint8 *out = out_p; |
| 234 | + int len; |
| 235 | + |
| 236 | + // header tag ":c:" |
| 237 | + READ_VAL(val); |
| 238 | + READ_VAL(val); |
| 239 | + READ_VAL(val); |
| 240 | + READ_VAL(val); |
| 241 | + |
| 242 | + // uncompressed length |
| 243 | + READ_VAL(val); |
| 244 | + len = val * 256; |
| 245 | + READ_VAL(val); |
| 246 | + len += val; |
| 247 | + |
| 248 | + // compressed length (to do: use to check) |
| 249 | + READ_VAL(val); |
| 250 | + READ_VAL(val); |
| 251 | + |
| 252 | + codo_memset(out_p, 0, max_len); |
| 253 | + |
| 254 | + if (len > max_len) return 1; // corrupt data |
| 255 | + |
| 256 | + while (out < out_p + len) |
| 257 | + { |
| 258 | + READ_VAL(val); |
| 259 | + |
| 260 | + if (val < LITERALS) |
| 261 | + { |
| 262 | + // literal |
| 263 | + if (val == 0) |
| 264 | + { |
| 265 | + READ_VAL(val); |
| 266 | + //printf("rare literal: %d\n", val); |
| 267 | + *out = val; |
| 268 | + } |
| 269 | + else |
| 270 | + { |
| 271 | + // printf("common literal: %d (%c)\n", literal[val], literal[val]); |
| 272 | + *out = literal[val]; |
| 273 | + } |
| 274 | + out++; |
| 275 | + } |
| 276 | + else |
| 277 | + { |
| 278 | + // block |
| 279 | + block_offset = val - LITERALS; |
| 280 | + block_offset *= 16; |
| 281 | + READ_VAL(val); |
| 282 | + block_offset += val % 16; |
| 283 | + block_length = (val / 16) + 2; |
| 284 | + |
| 285 | + memcpy(out, out - block_offset, block_length); |
| 286 | + out += block_length; |
| 287 | + } |
| 288 | + } |
| 289 | + |
| 290 | + |
| 291 | + // remove injected code (needed to be future compatible with PICO-8 C 0.1.7 / FILE_VERSION 8) |
| 292 | + // older versions will leave this code intact, allowing it to implement fallback 60fps support |
| 293 | + |
| 294 | + if (strstr(out_p, FUTURE_CODE)) |
| 295 | + if (strlen(out_p)-((char *)strstr(out_p, FUTURE_CODE) - (char *)out_p) == strlen(FUTURE_CODE)) // at end |
| 296 | + { |
| 297 | + out = out_p + strlen(out_p) - strlen(FUTURE_CODE); |
| 298 | + *out = 0; |
| 299 | + } |
| 300 | + |
| 301 | + // queue circus music |
| 302 | + if (strstr(out_p, FUTURE_CODE2)) |
| 303 | + if (strlen(out_p)-((char *)strstr(out_p, FUTURE_CODE2) - (char *)out_p) == strlen(FUTURE_CODE2)) // at end |
| 304 | + { |
| 305 | + out = out_p + strlen(out_p) - strlen(FUTURE_CODE2); |
| 306 | + *out = 0; |
| 307 | + } |
| 308 | + |
| 309 | + |
| 310 | + return out - out_p; |
| 311 | +} |
| 312 | + |
| 313 | + |
| 314 | +void compress_test(char *fn) |
| 315 | +{ |
| 316 | + FILE *f; |
| 317 | + uint8 *dat; |
| 318 | + uint8 *out; |
| 319 | + int len; |
| 320 | + int comp_len; |
| 321 | + int decomp_len; |
| 322 | + int i; |
| 323 | + |
| 324 | + dat = malloc(65536); |
| 325 | + out = malloc(65536); |
| 326 | + |
| 327 | + f = fopen(fn, "r"); |
| 328 | + |
| 329 | + len = fread(dat, 1, 65536, f); |
| 330 | + fclose(f); |
| 331 | + |
| 332 | + //comp_len = codo_compress_lz4_hc(dat, out, len); // not as good as compress_mini() |
| 333 | + comp_len = compress_mini(dat, out, len); |
| 334 | + |
| 335 | + memset(dat, 0, 65536); |
| 336 | + |
| 337 | + decomp_len = decompress_mini(out, dat, 65536); |
| 338 | + |
| 339 | + // show highest freq of literals |
| 340 | + #if 0 |
| 341 | + for (i = 0; i < 256; i++) |
| 342 | + if (freq[i] > 50) |
| 343 | + printf("[%c] : %d\n", i, freq[i]); |
| 344 | + #endif |
| 345 | + printf("len %d --> comp_len %d\n", len, comp_len); |
| 346 | + printf("decomp_len: %d\n", decomp_len); |
| 347 | + |
| 348 | + printf("blocks: %d literals %d\n", num_blocks, num_literals); |
| 349 | + printf("block len: %3.3f\n", (float)(len - num_literals) / (float)num_blocks); |
| 350 | + |
| 351 | + //printf("output: %s\n", dat); |
| 352 | + f = fopen("out.txt", "wb"); |
| 353 | + fwrite(dat, 1, strlen(dat), f); |
| 354 | + fclose(f); |
| 355 | + |
| 356 | + free(dat); |
| 357 | + free(out); |
| 358 | +} |
| 359 | + |
| 360 | +int main(int argc, char *argv[]) |
| 361 | +{ |
| 362 | + if (argc > 1) |
| 363 | + compress_test(argv[1]); |
| 364 | +} |
| 365 | + |
0 commit comments