Skip to content

Commit 8ce8cba

Browse files
committed
Fix binary chunk decoding and inflating
Git uses a unique Base85 encoding with different characters than the ascii85 encoding implemented by Go, so add a custom decoding function. Once decoded, use zlib instead of the raw DEFLATE algorithm to decompress the data. These issues were caught by some basic parsing tests which are added here as well.
1 parent 22fb507 commit 8ce8cba

File tree

3 files changed

+135
-22
lines changed

3 files changed

+135
-22
lines changed

gitdiff/base85.go

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
package gitdiff
2+
3+
import (
4+
"fmt"
5+
)
6+
7+
const (
8+
base85Alphabet = "0123456789" +
9+
"ABCDEFGHIJKLMNOPQRSTUVWXYZ" +
10+
"abcdefghijklmnopqrstuvwxyz" +
11+
"!#$%&()*+-;<=>?@^_`{|}~"
12+
)
13+
14+
var (
15+
de85 map[byte]byte
16+
)
17+
18+
func init() {
19+
de85 = make(map[byte]byte)
20+
for i, c := range base85Alphabet {
21+
de85[byte(c)] = byte(i)
22+
}
23+
}
24+
25+
// base85Decode decodes Base85-encoded data from src into dst. It uses the
26+
// alphabet defined by base85.c in the Git source tree, which appears to be
27+
// unique. src must contain at least len(dst) bytes of encoded data.
28+
func base85Decode(dst, src []byte) error {
29+
var v uint32
30+
var n, ndst int
31+
for i, b := range src {
32+
if b, ok := de85[b]; ok {
33+
v = 85*v + uint32(b)
34+
n++
35+
} else {
36+
return fmt.Errorf("invalid base85 byte at index %d: 0x%x", i, b)
37+
}
38+
if n == 5 {
39+
rem := len(dst) - ndst
40+
for j := 0; j < 4 && j < rem; j++ {
41+
dst[ndst] = byte(v >> 24)
42+
ndst++
43+
v <<= 8
44+
}
45+
v = 0
46+
n = 0
47+
}
48+
}
49+
if n > 0 {
50+
return fmt.Errorf("base85 data terminated by underpadded sequence")
51+
}
52+
if ndst < len(dst) {
53+
return fmt.Errorf("base85 data is too short: %d < %d", ndst, len(dst))
54+
}
55+
return nil
56+
}

gitdiff/parser.go

Lines changed: 18 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,7 @@ package gitdiff
33
import (
44
"bufio"
55
"bytes"
6-
"compress/flate"
7-
"encoding/ascii85"
6+
"compress/zlib"
87
"fmt"
98
"io"
109
"io/ioutil"
@@ -423,35 +422,30 @@ func (p *parser) ParseBinaryChunk(frag *BinaryFragment) error {
423422
if line == "\n" {
424423
break
425424
}
426-
427425
if len(line) < len(shortestValidLine) || (len(line)-2)%5 != 0 {
428426
return p.Errorf(0, "binary patch: corrupt data line")
429427
}
430428

431-
byteCount := int(line[0])
429+
byteCount, seq := int(line[0]), line[1:len(line)-1]
432430
switch {
433431
case 'A' <= byteCount && byteCount <= 'Z':
434432
byteCount = byteCount - 'A' + 1
435433
case 'a' <= byteCount && byteCount <= 'z':
436434
byteCount = byteCount - 'a' + 27
437435
default:
438-
return p.Errorf(0, "binary patch: invalid length byte: %q", line[0])
436+
return p.Errorf(0, "binary patch: invalid length byte")
439437
}
440438

441439
// base85 encodes every 4 bytes into 5 characters, with up to 3 bytes of end padding
442-
maxByteCount := (len(line) - 2) / 5 * 4
443-
if byteCount >= maxByteCount || byteCount < maxByteCount-3 {
444-
return p.Errorf(0, "binary patch: incorrect byte count: %d", byteCount)
440+
maxByteCount := len(seq) / 5 * 4
441+
if byteCount > maxByteCount || byteCount < maxByteCount-3 {
442+
return p.Errorf(0, "binary patch: incorrect byte count")
445443
}
446444

447-
ndst, _, err := ascii85.Decode(buf, []byte(line[1:]), byteCount < maxBytesPerLine)
448-
if err != nil {
445+
if err := base85Decode(buf[:byteCount], []byte(seq)); err != nil {
449446
return p.Errorf(0, "binary patch: %v", err)
450447
}
451-
if ndst != byteCount {
452-
return p.Errorf(0, "binary patch: %d byte line decoded as %d", byteCount, ndst)
453-
}
454-
data.Write(buf[:ndst])
448+
data.Write(buf[:byteCount])
455449

456450
if err := p.Next(); err != nil {
457451
if err == io.EOF {
@@ -472,18 +466,20 @@ func (p *parser) ParseBinaryChunk(frag *BinaryFragment) error {
472466
return nil
473467
}
474468

475-
func inflateBinaryChunk(frag *BinaryFragment, r io.Reader) (err error) {
476-
inflater := flate.NewReader(r)
477-
defer func() {
478-
if cerr := inflater.Close(); cerr != nil && err == nil {
479-
err = cerr
480-
}
481-
}()
469+
func inflateBinaryChunk(frag *BinaryFragment, r io.Reader) error {
470+
zr, err := zlib.NewReader(r)
471+
if err != nil {
472+
return err
473+
}
482474

483-
data, err := ioutil.ReadAll(inflater)
475+
data, err := ioutil.ReadAll(zr)
484476
if err != nil {
485477
return err
486478
}
479+
if err := zr.Close(); err != nil {
480+
return err
481+
}
482+
487483
if int64(len(data)) != frag.Size {
488484
return fmt.Errorf("%d byte fragment inflated to %d", frag.Size, len(data))
489485
}

gitdiff/parser_binary_test.go

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package gitdiff
22

33
import (
4+
"encoding/binary"
45
"io"
56
"reflect"
67
"testing"
@@ -108,3 +109,63 @@ func TestParseBinaryFragmentHeader(t *testing.T) {
108109
})
109110
}
110111
}
112+
113+
func TestParseBinaryChunk(t *testing.T) {
114+
tests := map[string]struct {
115+
Input string
116+
Fragment BinaryFragment
117+
Output []byte
118+
Err bool
119+
}{
120+
"newFile": {
121+
Input: "gcmZQzU|?i`U?w2V48*KJ%mKu_Kr9NxN<eH500b)lkN^Mx\n\n",
122+
Fragment: BinaryFragment{
123+
Size: 40,
124+
},
125+
Output: fib(10),
126+
},
127+
"newFileMultiline": {
128+
Input: "zcmZQzU|?i`U?w2V48*KJ%mKu_Kr9NxN<eH5#F0Qe0f=7$l~*z_FeL$%-)3N7vt?l5\n" +
129+
"zl3-vE2xVZ9%4J~CI>f->s?WfX|B-=Vs{#X~svra7Ekg#T|4s}nH;WnAZ)|1Y*`&cB\n" +
130+
"s(sh?X(Uz6L^!Ou&aF*u`J!eibJifSrv0z>$Q%Hd(^HIJ<Y?5`S0gT5UE&u=k\n\n",
131+
Fragment: BinaryFragment{
132+
Size: 160,
133+
},
134+
Output: fib(40),
135+
},
136+
}
137+
138+
for name, test := range tests {
139+
t.Run(name, func(t *testing.T) {
140+
p := newTestParser(test.Input, true)
141+
142+
frag := test.Fragment
143+
err := p.ParseBinaryChunk(&frag)
144+
if test.Err {
145+
if err == nil || err == io.EOF {
146+
t.Fatalf("expected error parsing binary chunk, but got %v", err)
147+
}
148+
return
149+
}
150+
if err != nil {
151+
t.Fatalf("unexpected error parsing binary chunk: %v", err)
152+
}
153+
if !reflect.DeepEqual(test.Output, frag.Data) {
154+
t.Errorf("incorrect binary chunk\nexpected: %+v\n actual: %+v", test.Output, frag.Data)
155+
}
156+
})
157+
}
158+
}
159+
160+
func fib(n int) []byte {
161+
seq := []uint32{1, 1}
162+
for i := 2; i < n; i++ {
163+
seq = append(seq, seq[i-1]+seq[i-2])
164+
}
165+
166+
buf := make([]byte, 4*n)
167+
for i, v := range seq[:n] {
168+
binary.BigEndian.PutUint32(buf[i*4:], v)
169+
}
170+
return buf
171+
}

0 commit comments

Comments
 (0)