Skip to content

Commit 7cca1f9

Browse files
committed
Official PICO-8 compression code in C
1 parent 8087386 commit 7cca1f9

File tree

3 files changed

+1086
-2
lines changed

3 files changed

+1086
-2
lines changed

README.md

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,19 @@
1-
# pico8
2-
Code and material related to PICO-8 by Lexaloffle Games LLP
1+
# lexaloffle
2+
3+
This is a miscellaneous collection of code and material related to products by Lexaloffle Games LLP.
4+
5+
## PICO-8 file format routines
6+
7+
The PICO-8 fantasy game console stores game cartridges in one of several documented file formats. The most canonical file format is the [P8PNG](https://pico-8.fandom.com/wiki/P8PNGFileFormat) format (`.p8.png`), an encoding of the game data stored stegonographically with a PNG image of the game's "cartridge." The Lua code for a cartridge is compressed using one of two documented proprietary methods: the legacy `:c:` method and the newer `pxa` method.
8+
9+
This repository contains C routines that can compress and decompress data using the two methods:
10+
11+
* `pxa_compress_snippets.c`: the PXA method, supported by PICO-8 versions 0.2.0 and newer
12+
* `p8_compress.c`: the legacy `:c:` method, supported by all versions of PICO-8
13+
* This includes `FUTURE_CODE` that was injected for forwards compatibility with PICO-8 0.1.7. This was added to the default wrapper code in PICO-8 0.1.8 and no longer needs to be injected by the save routine.
14+
15+
This code was created and officially released by Lexaloffle Games LLP under open source licenses. See each file for the text of the license.
16+
17+
For a Python implementation of the complete P8PNG format including stegonographic decoding, see [picotool](https://github.com/dansanderson/picotool). (As of this writing, picotool only supports `:c:` compression.)
18+
19+
For a prose description of the file formats and compression algorithms, see [P8PNGFileFormat in the PICO-8 wiki](https://pico-8.fandom.com/wiki/P8PNGFileFormat).

p8_compress.c

Lines changed: 365 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,365 @@
1+
/*
2+
p8_compress.c
3+
4+
(c) Copyright 2014-2016 Lexaloffle Games LLP
5+
author: joseph@lexaloffle.com
6+
7+
compression used in code section of .p8.png format
8+
9+
This software is provided 'as-is', without any express or implied
10+
warranty. In no event will the authors be held liable for any damages
11+
arising from the use of this software.
12+
13+
Permission is granted to anyone to use this software for any purpose,
14+
including commercial applications, and to alter it and redistribute it
15+
freely, subject to the following restrictions:
16+
17+
1. The origin of this software must not be misrepresented; you must not
18+
claim that you wrote the original software. If you use this software
19+
in a product, an acknowledgment in the product documentation would be
20+
appreciated but is not required.
21+
2. Altered source versions must be plainly marked as such, and must not be
22+
misrepresented as being the original software.
23+
3. This notice may not be removed or altered from any source distribution.
24+
25+
*/
26+
27+
#include <stdlib.h>
28+
#include <stdio.h>
29+
#include <string.h>
30+
31+
#ifndef MAX
32+
#define MAX(x, y) (((x) > (y)) ? (x) : (y))
33+
#define MIN(x, y) (((x) < (y)) ? (x) : (y))
34+
#endif
35+
36+
typedef unsigned char uint8;
37+
38+
#define HIST_LEN 4096
39+
#define LITERALS 60
40+
#define PICO8_CODE_ALLOC_SIZE (0x10000+1)
41+
42+
#define codo_malloc malloc
43+
#define codo_free free
44+
#define codo_memset memset
45+
46+
// removed from end of decompressed if it exists
47+
// (injected to maintain 0.1.7 forwards compatibility)
48+
#define FUTURE_CODE "if(_update60)_update=function()_update60()_update60()end"
49+
#define FUTURE_CODE2 "if(_update60)_update=function()_update60()_update_buttons()_update60()end"
50+
51+
// ^ is dummy -- not a literal. forgot '-', but nevermind! (gets encoded as rare literal)
52+
char *literal = "^\n 0123456789abcdefghijklmnopqrstuvwxyz!#%(){}[]<>+=/*:;.,~_";
53+
int literal_index[256]; // map literals to 0..LITERALS-1. 0 is reserved (not listed in literals string)
54+
55+
int find_repeatable_block(uint8 *dat, int pos, int len, int *block_offset)
56+
{
57+
// block len starts from 2, so no need to record 0, 1 --> max is (15 + 2)
58+
int max_block_len = 17; // any more doesn't have much effect for code. more important to look back further.
59+
int max_hist_len = (255-LITERALS)*16; // less than HIST_LEN
60+
int i, j;
61+
int best_len = 0;
62+
int best_i = -100000;
63+
int max_len;
64+
65+
// brute force search
66+
67+
// block length can't be longer than remaining
68+
max_len = MIN(max_block_len, len - pos);
69+
70+
// can't be longer than preceeding data
71+
max_hist_len = MIN(max_hist_len, pos);
72+
73+
for (i = pos - max_hist_len; i < pos; i++)
74+
{
75+
// find length starting at i
76+
77+
j = i;
78+
while ((j-i) < max_len && j < pos && dat[j] == dat[pos+j-i]) j++;
79+
80+
if ((j-i) > best_len)
81+
{
82+
best_len = (j-i);
83+
best_i = i;
84+
}
85+
}
86+
87+
*block_offset = (pos-best_i);
88+
89+
return best_len;
90+
}
91+
92+
93+
#define WRITE_VAL(x) {*p_8 = (x); p_8++;}
94+
95+
// returns compressed length
96+
int num_blocks, num_blocks_large, num_literals;
97+
int freq[256];
98+
99+
int compress_mini(uint8 *in_p, uint8 *out, int len)
100+
{
101+
uint8 *p_8 = out;
102+
int pos = 0;
103+
int block_offset;
104+
int block_len;
105+
int i, j, best_i;
106+
uint8 *in;
107+
char *modified_code;
108+
109+
// init literals search
110+
memset(literal_index, 0, 256);
111+
for (i = 1; i < LITERALS; i++)
112+
{
113+
literal_index[literal[i]] = i;
114+
}
115+
116+
// 0.1.8 : inject future api implementation if _update60 found in in_p
117+
// note: doesn't apply to plain .p8 format
118+
119+
modified_code = codo_malloc(strlen(in_p) + 1024);
120+
strcpy(modified_code, in_p);
121+
122+
if (strstr(in_p, "_update60"))
123+
if (len < PICO8_CODE_ALLOC_SIZE - (strlen(FUTURE_CODE2)+1)) // skip if won't fit when decompressing
124+
{
125+
// 0.1.9: make sure there is some whitespace before future_code (0.1.8 bug)
126+
if (modified_code[strlen(modified_code)-1] != ' ' && modified_code[strlen(modified_code)-1] != '\n')
127+
{
128+
strcat(modified_code, "\n");
129+
}
130+
strcat(modified_code, FUTURE_CODE2);
131+
len += strlen(FUTURE_CODE2)+1;
132+
}
133+
134+
in = modified_code;
135+
136+
// header tag: ":c:"
137+
// will show up in code section of old versions of pico-8
138+
WRITE_VAL(':');
139+
WRITE_VAL('c');
140+
WRITE_VAL(':');
141+
WRITE_VAL(0);
142+
143+
// write uncompressed size
144+
WRITE_VAL(len/256);
145+
WRITE_VAL(len%256);
146+
147+
// compressed size (fill in later). used for robust/safe decompression
148+
WRITE_VAL(0);
149+
WRITE_VAL(0);
150+
151+
num_blocks = 0;
152+
num_literals = 0;
153+
154+
memset(freq, 0, sizeof(freq));
155+
#if 0
156+
// generate histogram
157+
for (i = 0; i < len; i++)
158+
freq[in[i]]++;
159+
160+
// show highest
161+
for (i = 0; i < 256; i++)
162+
if (freq[i] > len / 64)
163+
printf("[%c] : %d\n", i, freq[i]);
164+
#endif
165+
166+
while (pos < len)
167+
{
168+
// either copy or literal
169+
170+
//printf("pos: %d\n", pos);
171+
172+
block_len = find_repeatable_block(in, pos, len, &block_offset);
173+
174+
// use block when 3 or more long. performs better than 2, because after
175+
// writing first literal, second one might be part of a block.
176+
if (block_len >= 3)
177+
{
178+
// block: 2 bytes
179+
180+
// printf(":: block. offset: %d len: %d\n", block_offset, block_len);
181+
182+
WRITE_VAL((block_offset / 16) + LITERALS);
183+
WRITE_VAL((block_offset % 16) + (block_len-2) * 16);
184+
pos += block_len;
185+
186+
// stats
187+
num_blocks ++;
188+
189+
if (block_len > 17) num_blocks_large++;
190+
}
191+
else
192+
{
193+
// literal: 0 means read next byte
194+
// printf(":: literal: %d [%c]\n", in[pos], in[pos]);
195+
196+
WRITE_VAL(literal_index[in[pos]]);
197+
198+
if (literal_index[in[pos]] == 0)
199+
WRITE_VAL(in[pos]);
200+
201+
pos ++;
202+
203+
// stats
204+
205+
//printf("%c",in[pos]);
206+
207+
num_literals ++;
208+
freq[in[pos]]++;
209+
}
210+
}
211+
212+
// compressed is larger than input -> just return input
213+
if ((p_8 - out) >= strlen(in))
214+
{
215+
memcpy(out, in, strlen(in));
216+
return strlen(in);
217+
}
218+
219+
//printf("size: %d blocks: %d (%d large) literals: %d\n", (p_8 - out), num_blocks, num_blocks_large, num_literals);
220+
221+
codo_free(modified_code);
222+
223+
return p_8 - out;
224+
}
225+
226+
#define READ_VAL(val) {val = *in; in++;}
227+
int decompress_mini(uint8 *in_p, uint8 *out_p, int max_len)
228+
{
229+
int block_offset;
230+
int block_length;
231+
int val;
232+
uint8 *in = in_p;
233+
uint8 *out = out_p;
234+
int len;
235+
236+
// header tag ":c:"
237+
READ_VAL(val);
238+
READ_VAL(val);
239+
READ_VAL(val);
240+
READ_VAL(val);
241+
242+
// uncompressed length
243+
READ_VAL(val);
244+
len = val * 256;
245+
READ_VAL(val);
246+
len += val;
247+
248+
// compressed length (to do: use to check)
249+
READ_VAL(val);
250+
READ_VAL(val);
251+
252+
codo_memset(out_p, 0, max_len);
253+
254+
if (len > max_len) return 1; // corrupt data
255+
256+
while (out < out_p + len)
257+
{
258+
READ_VAL(val);
259+
260+
if (val < LITERALS)
261+
{
262+
// literal
263+
if (val == 0)
264+
{
265+
READ_VAL(val);
266+
//printf("rare literal: %d\n", val);
267+
*out = val;
268+
}
269+
else
270+
{
271+
// printf("common literal: %d (%c)\n", literal[val], literal[val]);
272+
*out = literal[val];
273+
}
274+
out++;
275+
}
276+
else
277+
{
278+
// block
279+
block_offset = val - LITERALS;
280+
block_offset *= 16;
281+
READ_VAL(val);
282+
block_offset += val % 16;
283+
block_length = (val / 16) + 2;
284+
285+
memcpy(out, out - block_offset, block_length);
286+
out += block_length;
287+
}
288+
}
289+
290+
291+
// remove injected code (needed to be future compatible with PICO-8 C 0.1.7 / FILE_VERSION 8)
292+
// older versions will leave this code intact, allowing it to implement fallback 60fps support
293+
294+
if (strstr(out_p, FUTURE_CODE))
295+
if (strlen(out_p)-((char *)strstr(out_p, FUTURE_CODE) - (char *)out_p) == strlen(FUTURE_CODE)) // at end
296+
{
297+
out = out_p + strlen(out_p) - strlen(FUTURE_CODE);
298+
*out = 0;
299+
}
300+
301+
// queue circus music
302+
if (strstr(out_p, FUTURE_CODE2))
303+
if (strlen(out_p)-((char *)strstr(out_p, FUTURE_CODE2) - (char *)out_p) == strlen(FUTURE_CODE2)) // at end
304+
{
305+
out = out_p + strlen(out_p) - strlen(FUTURE_CODE2);
306+
*out = 0;
307+
}
308+
309+
310+
return out - out_p;
311+
}
312+
313+
314+
void compress_test(char *fn)
315+
{
316+
FILE *f;
317+
uint8 *dat;
318+
uint8 *out;
319+
int len;
320+
int comp_len;
321+
int decomp_len;
322+
int i;
323+
324+
dat = malloc(65536);
325+
out = malloc(65536);
326+
327+
f = fopen(fn, "r");
328+
329+
len = fread(dat, 1, 65536, f);
330+
fclose(f);
331+
332+
//comp_len = codo_compress_lz4_hc(dat, out, len); // not as good as compress_mini()
333+
comp_len = compress_mini(dat, out, len);
334+
335+
memset(dat, 0, 65536);
336+
337+
decomp_len = decompress_mini(out, dat, 65536);
338+
339+
// show highest freq of literals
340+
#if 0
341+
for (i = 0; i < 256; i++)
342+
if (freq[i] > 50)
343+
printf("[%c] : %d\n", i, freq[i]);
344+
#endif
345+
printf("len %d --> comp_len %d\n", len, comp_len);
346+
printf("decomp_len: %d\n", decomp_len);
347+
348+
printf("blocks: %d literals %d\n", num_blocks, num_literals);
349+
printf("block len: %3.3f\n", (float)(len - num_literals) / (float)num_blocks);
350+
351+
//printf("output: %s\n", dat);
352+
f = fopen("out.txt", "wb");
353+
fwrite(dat, 1, strlen(dat), f);
354+
fclose(f);
355+
356+
free(dat);
357+
free(out);
358+
}
359+
360+
int main(int argc, char *argv[])
361+
{
362+
if (argc > 1)
363+
compress_test(argv[1]);
364+
}
365+

0 commit comments

Comments
 (0)