Skip to content

Commit ddafbc8

Browse files
committed
Split up binary parsing to match text parsing
Parse the fragment header separately from the fragment chunk, which makes each function a bit more understandable.
1 parent 0cfd720 commit ddafbc8

File tree

2 files changed

+79
-54
lines changed

2 files changed

+79
-54
lines changed

gitdiff/gitdiff.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ func (op LineOp) String() string {
9898
// BinaryFragment describes changes to a binary file.
9999
type BinaryFragment struct {
100100
Method BinaryPatchMethod
101+
Size int64
101102
Data []byte
102103
}
103104

gitdiff/parser.go

Lines changed: 78 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"encoding/ascii85"
88
"fmt"
99
"io"
10+
"io/ioutil"
1011
"strconv"
1112
"strings"
1213
)
@@ -326,26 +327,36 @@ func (p *parser) ParseBinaryFragments(f *File) (n int, err error) {
326327
}
327328

328329
f.IsBinary = true
329-
if hasData {
330-
forward, err := p.ParseBinaryFragment()
331-
if err != nil {
332-
return n, err
333-
}
334-
if forward == nil {
335-
return n, p.Errorf(0, "missing data for binary patch")
336-
}
337-
f.BinaryFragment = forward
338-
n++
330+
if !hasData {
331+
return 0, nil
332+
}
339333

340-
// valid for reverse to not exist, but it must be valid if present
341-
reverse, err := p.ParseBinaryFragment()
342-
if err != nil {
343-
return n, err
344-
}
345-
f.ReverseBinaryFragment = reverse
334+
forward, err := p.ParseBinaryFragmentHeader()
335+
if err != nil {
336+
return 0, err
337+
}
338+
if forward == nil {
339+
return 0, p.Errorf(0, "missing data for binary patch")
340+
}
341+
if err := p.ParseBinaryChunk(forward); err != nil {
342+
return 0, err
343+
}
344+
f.BinaryFragment = forward
345+
346+
// valid for reverse to not exist, but it must be valid if present
347+
reverse, err := p.ParseBinaryFragmentHeader()
348+
if err != nil {
349+
return 1, err
350+
}
351+
if reverse == nil {
352+
return 1, nil
353+
}
354+
if err := p.ParseBinaryChunk(reverse); err != nil {
355+
return 1, err
346356
}
357+
f.ReverseBinaryFragment = reverse
347358

348-
return n, nil
359+
return 1, nil
349360
}
350361

351362
func (p *parser) ParseBinaryMarker() (isBinary bool, hasData bool, err error) {
@@ -364,14 +375,7 @@ func (p *parser) ParseBinaryMarker() (isBinary bool, hasData bool, err error) {
364375
return true, hasData, nil
365376
}
366377

367-
func (p *parser) ParseBinaryFragment() (*BinaryFragment, error) {
368-
// TODO(bkeyes): split this function into small parts
369-
// TODO(bkeyes): add summary of data format so this is less mysterious
370-
const (
371-
shortestValidLine = "A00000\n"
372-
maxBytesPerLine = 52
373-
)
374-
378+
func (p *parser) ParseBinaryFragmentHeader() (*BinaryFragment, error) {
375379
parts := strings.SplitN(p.Line(0), " ", 2)
376380
if len(parts) < 2 {
377381
return nil, nil
@@ -387,32 +391,41 @@ func (p *parser) ParseBinaryFragment() (*BinaryFragment, error) {
387391
return nil, nil
388392
}
389393

390-
totalBytes, err := strconv.ParseInt(parts[1], 10, 64)
391-
if err != nil {
394+
var err error
395+
if frag.Size, err = strconv.ParseInt(parts[1], 10, 64); err != nil {
392396
nerr := err.(*strconv.NumError)
393-
return nil, p.Errorf(0, "binary patch: invalid data length: %v", nerr.Err)
397+
return nil, p.Errorf(0, "binary patch: invalid size: %v", nerr.Err)
398+
}
399+
400+
if err := p.Next(); err != nil && err != io.EOF {
401+
return nil, err
394402
}
403+
return frag, nil
404+
}
405+
406+
func (p *parser) ParseBinaryChunk(frag *BinaryFragment) error {
407+
// Binary fragments are encoded as a series of base85 encoded lines. Each
408+
// line starts with a character in [A-Za-z] giving the number of bytes on
409+
// the line, where A = 1 and z = 52, and ends with a newline character.
410+
//
411+
// The base85 encoding means each line is a multiple of 5 characters + 2
412+
// additional characters for the length byte and the newline. The fragment
413+
// ends with a blank line.
414+
const (
415+
shortestValidLine = "A00000\n"
416+
maxBytesPerLine = 52
417+
)
395418

396419
var data bytes.Buffer
397420
buf := make([]byte, maxBytesPerLine)
398-
399421
for {
400-
if err := p.Next(); err != nil {
401-
if err == io.EOF {
402-
break
403-
}
404-
return nil, err
405-
}
406422
line := p.Line(0)
407-
408423
if line == "\n" {
409-
// blank line indicates the end of the fragment
410424
break
411425
}
412426

413-
// base85 encoding means each line is a multiple of 5 + first char and newline
414427
if len(line) < len(shortestValidLine) || (len(line)-2)%5 != 0 {
415-
return nil, p.Errorf(0, "binary patch: corrupt data line")
428+
return p.Errorf(0, "binary patch: corrupt data line")
416429
}
417430

418431
byteCount := int(line[0])
@@ -422,47 +435,58 @@ func (p *parser) ParseBinaryFragment() (*BinaryFragment, error) {
422435
case 'a' <= byteCount && byteCount <= 'z':
423436
byteCount = byteCount - 'a' + 27
424437
default:
425-
return nil, p.Errorf(0, "binary patch: invalid length byte: %q", line[0])
438+
return p.Errorf(0, "binary patch: invalid length byte: %q", line[0])
426439
}
427440

428441
// base85 encodes every 4 bytes into 5 characters, with up to 3 bytes of end padding
429442
maxByteCount := (len(line) - 2) / 5 * 4
430443
if byteCount >= maxByteCount || byteCount < maxByteCount-3 {
431-
return nil, p.Errorf(0, "binary patch: incorrect byte count: %d", byteCount)
444+
return p.Errorf(0, "binary patch: incorrect byte count: %d", byteCount)
432445
}
433446

434447
ndst, _, err := ascii85.Decode(buf, []byte(line[1:]), byteCount < maxBytesPerLine)
435448
if err != nil {
436-
return nil, p.Errorf(0, "binary patch: %v", err)
449+
return p.Errorf(0, "binary patch: %v", err)
437450
}
438451
if ndst != byteCount {
439-
return nil, p.Errorf(0, "binary patch: expected %d bytes, but decoded %d", byteCount, ndst)
452+
return p.Errorf(0, "binary patch: %d byte line decoded as %d", byteCount, ndst)
440453
}
441454
data.Write(buf[:ndst])
455+
456+
if err := p.Next(); err != nil {
457+
if err == io.EOF {
458+
return p.Errorf(0, "binary patch: unexpected EOF")
459+
}
460+
return err
461+
}
442462
}
443463

444-
if err := inflateBinaryChunk(frag, &data, totalBytes); err != nil {
445-
return nil, p.Errorf(0, "binary patch: %v", err)
464+
if err := inflateBinaryChunk(frag, &data); err != nil {
465+
return p.Errorf(0, "binary patch: %v", err)
446466
}
447467

448468
// consume the empty line that ended the fragment
449469
if err := p.Next(); err != nil && err != io.EOF {
450-
return nil, err
470+
return err
451471
}
452-
return frag, nil
472+
return nil
453473
}
454474

455-
func inflateBinaryChunk(frag *BinaryFragment, r io.Reader, length int64) error {
456-
data := make([]byte, length)
457-
475+
func inflateBinaryChunk(frag *BinaryFragment, r io.Reader) (err error) {
458476
inflater := flate.NewReader(r)
459-
if _, err := io.ReadFull(inflater, frag.Data); err != nil {
477+
defer func() {
478+
if cerr := inflater.Close(); cerr != nil && err == nil {
479+
err = cerr
480+
}
481+
}()
482+
483+
data, err := ioutil.ReadAll(inflater)
484+
if err != nil {
460485
return err
461486
}
462-
if err := inflater.Close(); err != nil {
463-
return err
487+
if int64(len(data)) != frag.Size {
488+
return fmt.Errorf("%d byte fragment inflated to %d", frag.Size, len(data))
464489
}
465-
466490
frag.Data = data
467491
return nil
468492
}

0 commit comments

Comments
 (0)