Skip to content

Commit 8459203

Browse files
committed
Implementation of file parsing
1 parent da6e40a commit 8459203

File tree

11 files changed

+417
-0
lines changed

11 files changed

+417
-0
lines changed

projects/file-parsing/go.mod

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
module github.com/CodeYourFuture/immersive-go-course/projects/file-parsing
2+
3+
go 1.19
4+
5+
require github.com/stretchr/testify v1.8.1
6+
7+
require (
8+
github.com/davecgh/go-spew v1.1.1 // indirect
9+
github.com/pmezard/go-difflib v1.0.0 // indirect
10+
gopkg.in/yaml.v3 v3.0.1 // indirect
11+
)

projects/file-parsing/go.sum

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
2+
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
3+
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
4+
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
5+
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
6+
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
7+
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
8+
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
9+
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
10+
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
11+
github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
12+
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
13+
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
14+
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
15+
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
16+
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
17+
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

projects/file-parsing/main.go

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
package main
2+
3+
import (
4+
"flag"
5+
"fmt"
6+
"log"
7+
"math"
8+
"os"
9+
10+
"github.com/CodeYourFuture/immersive-go-course/projects/file-parsing/parsers"
11+
"github.com/CodeYourFuture/immersive-go-course/projects/file-parsing/parsers/binary"
12+
"github.com/CodeYourFuture/immersive-go-course/projects/file-parsing/parsers/csv"
13+
"github.com/CodeYourFuture/immersive-go-course/projects/file-parsing/parsers/json"
14+
"github.com/CodeYourFuture/immersive-go-course/projects/file-parsing/parsers/repeated_json"
15+
)
16+
17+
func main() {
18+
format := flag.String("format", "", "Format the file is serialised in. Accepted values: json,repeated-json,csv,binary")
19+
file := flag.String("file", "", "Path to the file to read data from")
20+
flag.Parse()
21+
22+
var parser parsers.Parser
23+
switch *format {
24+
case "json":
25+
parser = &json.Parser{}
26+
case "repeated-json":
27+
parser = &repeated_json.Parser{}
28+
case "csv":
29+
parser = &csv.Parser{}
30+
case "bin":
31+
parser = &binary.Parser{}
32+
case "":
33+
log.Fatal("format is a required argument")
34+
default:
35+
log.Fatalf("Didn't know how to parse format %q", *format)
36+
}
37+
38+
if *file == "" {
39+
log.Fatal("file is a required argument")
40+
}
41+
f, err := os.Open(*file)
42+
if err != nil {
43+
log.Fatalf("Failed to open file %s: %v", *file, err)
44+
}
45+
defer f.Close()
46+
47+
records, err := parser.Parse(f)
48+
if err != nil {
49+
log.Fatalf("Failed to parse file %s as %s: %v", *file, *format, err)
50+
}
51+
52+
if len(records) == 0 {
53+
log.Fatal("No scores were found")
54+
}
55+
56+
lowScore := parsers.ScoreRecord{
57+
HighScore: math.MaxInt32,
58+
}
59+
highScore := parsers.ScoreRecord{
60+
HighScore: math.MinInt32,
61+
}
62+
63+
for _, record := range records {
64+
if record.HighScore > highScore.HighScore {
65+
highScore = record
66+
}
67+
if lowScore.HighScore < lowScore.HighScore {
68+
lowScore = record
69+
}
70+
}
71+
fmt.Printf("High score: %d from %s - congratulations!\n", highScore.HighScore, highScore.Name)
72+
fmt.Printf("Low score: %d from %s - commiserations!\n", lowScore.HighScore, lowScore.Name)
73+
}
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
package binary
2+
3+
import (
4+
"bufio"
5+
"encoding/binary"
6+
"errors"
7+
"fmt"
8+
"io"
9+
10+
"github.com/CodeYourFuture/immersive-go-course/projects/file-parsing/parsers"
11+
)
12+
13+
type Parser struct{}
14+
15+
func (p *Parser) Parse(r io.Reader) ([]parsers.ScoreRecord, error) {
16+
bufRead := bufio.NewReader(r)
17+
18+
var records []parsers.ScoreRecord
19+
20+
byteOrder, err := parseByteOrder(r)
21+
if err != nil {
22+
return nil, fmt.Errorf("failed to determine endian-ness: %w", err)
23+
}
24+
25+
for {
26+
if _, err := bufRead.Peek(1); errors.Is(err, io.EOF) {
27+
break
28+
}
29+
var score int32
30+
if err := binary.Read(bufRead, byteOrder, &score); err != nil {
31+
return nil, fmt.Errorf("failed to parse score: %w", err)
32+
}
33+
nameWithTrailingNull, err := bufRead.ReadString('\x00')
34+
if err != nil {
35+
return nil, fmt.Errorf("failed to parse name: %w", err)
36+
}
37+
name := nameWithTrailingNull[:len(nameWithTrailingNull)-1]
38+
records = append(records, parsers.ScoreRecord{
39+
Name: name,
40+
HighScore: score,
41+
})
42+
}
43+
44+
return records, nil
45+
}
46+
47+
func parseByteOrder(r io.Reader) (binary.ByteOrder, error) {
48+
buf := make([]byte, 2)
49+
_, err := io.ReadFull(r, buf)
50+
if err != nil {
51+
return nil, err
52+
}
53+
if buf[0] == '\xFE' && buf[1] == '\xFF' {
54+
return binary.BigEndian, nil
55+
} else if buf[0] == '\xFF' && buf[1] == '\xFE' {
56+
return binary.LittleEndian, nil
57+
} else {
58+
return nil, fmt.Errorf("didn't recognise byte-order mark")
59+
}
60+
}
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
package binary
2+
3+
import (
4+
"bytes"
5+
"encoding/binary"
6+
"testing"
7+
8+
"github.com/stretchr/testify/require"
9+
)
10+
11+
func TestParseByteOrderBE(t *testing.T) {
12+
buf := bytes.NewBuffer([]byte("\xFE\xFF"))
13+
bo, err := parseByteOrder(buf)
14+
require.NoError(t, err)
15+
require.Equal(t, binary.BigEndian, bo)
16+
}
17+
18+
func TestParseByteOrderLE(t *testing.T) {
19+
buf := bytes.NewBuffer([]byte("\xFF\xFE"))
20+
bo, err := parseByteOrder(buf)
21+
require.NoError(t, err)
22+
require.Equal(t, binary.LittleEndian, bo)
23+
}
24+
25+
func TestParseByteOrderWrong(t *testing.T) {
26+
buf := bytes.NewBuffer([]byte("\xFF\xFF"))
27+
_, err := parseByteOrder(buf)
28+
require.ErrorContains(t, err, "didn't recognise byte-order mark")
29+
}
30+
31+
func TestParseByteOrderNotEnoughBytes(t *testing.T) {
32+
buf := bytes.NewBuffer([]byte("\xFF"))
33+
_, err := parseByteOrder(buf)
34+
require.ErrorContains(t, err, "EOF")
35+
}
36+
37+
func TestNotEnoughBytesForScore(t *testing.T) {
38+
buf := bytes.NewBuffer([]byte("\xFE\xFF\x01\x00"))
39+
parser := &Parser{}
40+
_, err := parser.Parse(buf)
41+
require.ErrorContains(t, err, "failed to parse score")
42+
}
43+
44+
func TestMissingNullTerminator(t *testing.T) {
45+
buf := bytes.NewBuffer([]byte("\xFE\xFF\x01\x00\x00\x00Aya"))
46+
parser := &Parser{}
47+
_, err := parser.Parse(buf)
48+
require.ErrorContains(t, err, "failed to parse name")
49+
}
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
package csv
2+
3+
import (
4+
"encoding/csv"
5+
"errors"
6+
"fmt"
7+
"io"
8+
"strconv"
9+
10+
"github.com/CodeYourFuture/immersive-go-course/projects/file-parsing/parsers"
11+
)
12+
13+
type Parser struct{}
14+
15+
func (p *Parser) Parse(r io.Reader) ([]parsers.ScoreRecord, error) {
16+
reader := csv.NewReader(r)
17+
headerRecord, err := reader.Read()
18+
if err != nil {
19+
return nil, err
20+
}
21+
nameIndex := -1
22+
highScoreIndex := -1
23+
for i, col := range headerRecord {
24+
if col == "name" {
25+
nameIndex = i
26+
} else if col == "high score" {
27+
highScoreIndex = i
28+
} else {
29+
return nil, fmt.Errorf("unexpected header %q - expected %q and %q", col, "name", "high score")
30+
}
31+
}
32+
if nameIndex == -1 || highScoreIndex == -1 {
33+
return nil, fmt.Errorf("incorrect headers - expected to find %q and %q", "name", "high score")
34+
}
35+
36+
var records []parsers.ScoreRecord
37+
for {
38+
record, err := reader.Read()
39+
if err != nil {
40+
if errors.Is(err, io.EOF) {
41+
break
42+
} else {
43+
return nil, err
44+
}
45+
}
46+
highScoreString := record[highScoreIndex]
47+
highScore, err := strconv.ParseInt(highScoreString, 10, 32)
48+
if err != nil {
49+
return nil, fmt.Errorf("saw high score which wasn't an int32: %q: %w", highScoreString, err)
50+
}
51+
records = append(records, parsers.ScoreRecord{
52+
Name: record[nameIndex],
53+
HighScore: int32(highScore),
54+
})
55+
}
56+
return records, nil
57+
}
58+
59+
// TODO: Add some edge-case tests
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
package csv_test
2+
3+
import (
4+
"strings"
5+
"testing"
6+
7+
"github.com/CodeYourFuture/immersive-go-course/projects/file-parsing/parsers/csv"
8+
"github.com/stretchr/testify/require"
9+
)
10+
11+
func TestTooManyColumns(t *testing.T) {
12+
parser := &csv.Parser{}
13+
_, err := parser.Parse(strings.NewReader("name,high score,opponent\nAya,12,Prisha\n"))
14+
require.ErrorContains(t, err, "unexpected header \"opponent\"")
15+
}
16+
17+
func TestNotEnoughColumns(t *testing.T) {
18+
parser := &csv.Parser{}
19+
_, err := parser.Parse(strings.NewReader("name\nAya\n"))
20+
require.ErrorContains(t, err, "incorrect headers")
21+
require.ErrorContains(t, err, "high score")
22+
}
23+
24+
func TestWrongColumns(t *testing.T) {
25+
parser := &csv.Parser{}
26+
_, err := parser.Parse(strings.NewReader("name,low score\nAya,12\n"))
27+
require.ErrorContains(t, err, "unexpected header")
28+
require.ErrorContains(t, err, "low score")
29+
require.ErrorContains(t, err, "high score")
30+
}
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
package parsers_test
2+
3+
import (
4+
"os"
5+
"testing"
6+
7+
"github.com/CodeYourFuture/immersive-go-course/projects/file-parsing/parsers"
8+
"github.com/CodeYourFuture/immersive-go-course/projects/file-parsing/parsers/binary"
9+
"github.com/CodeYourFuture/immersive-go-course/projects/file-parsing/parsers/csv"
10+
"github.com/CodeYourFuture/immersive-go-course/projects/file-parsing/parsers/json"
11+
"github.com/CodeYourFuture/immersive-go-course/projects/file-parsing/parsers/repeated_json"
12+
"github.com/stretchr/testify/require"
13+
)
14+
15+
func TestExample(t *testing.T) {
16+
wantRecords := []parsers.ScoreRecord{
17+
{Name: "Aya", HighScore: 10},
18+
{Name: "Prisha", HighScore: 30},
19+
{Name: "Charlie", HighScore: -1},
20+
{Name: "Margot", HighScore: 25},
21+
}
22+
23+
for name, tc := range map[string]struct {
24+
filename string
25+
parser parsers.Parser
26+
}{
27+
"binary-be": {
28+
filename: "custom-binary-be.bin",
29+
parser: &binary.Parser{},
30+
},
31+
"binary-le": {
32+
filename: "custom-binary-le.bin",
33+
parser: &binary.Parser{},
34+
},
35+
"csv": {
36+
filename: "data.csv",
37+
parser: &csv.Parser{},
38+
},
39+
"json": {
40+
filename: "json.txt",
41+
parser: &json.Parser{},
42+
},
43+
"repeated_json": {
44+
filename: "repeated-json.txt",
45+
parser: &repeated_json.Parser{},
46+
},
47+
} {
48+
f, err := os.Open("../examples/" + tc.filename)
49+
require.NoError(t, err, "%s: failed to open %s", name, tc.filename)
50+
gotRecords, err := tc.parser.Parse(f)
51+
require.NoError(t, err, "%s: failed to parse %s", name, tc.filename)
52+
require.Equal(t, wantRecords, gotRecords, "%s: wrong records returned", name)
53+
}
54+
}
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
package json
2+
3+
import (
4+
"encoding/json"
5+
"io"
6+
7+
"github.com/CodeYourFuture/immersive-go-course/projects/file-parsing/parsers"
8+
)
9+
10+
type Parser struct{}
11+
12+
func (p *Parser) Parse(r io.Reader) ([]parsers.ScoreRecord, error) {
13+
var records []parsers.ScoreRecord
14+
if err := json.NewDecoder(r).Decode(&records); err != nil {
15+
return nil, err
16+
}
17+
return records, nil
18+
}
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
package parsers
2+
3+
import (
4+
"io"
5+
)
6+
7+
type ScoreRecord struct {
8+
Name string `json:"name"`
9+
HighScore int32 `json:"high_score"`
10+
}
11+
12+
type Parser interface {
13+
Parse(file io.Reader) ([]ScoreRecord, error)
14+
}

0 commit comments

Comments
 (0)