Skip to content

Fix up errors in unmarshalling strings that are escaped #3

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 18 additions & 21 deletions unmarshal.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
// in which case the surrounding double quotes '"' (0x22) are removed before
// processing. Inside a quoted field a double quote may be escaped by a preceeding
// second double quote which will be removed during parsing.
//
package csv

import (
Expand Down Expand Up @@ -164,15 +163,14 @@ func (d *Decoder) Buffer(buf []byte) *Decoder {
// is called for each record. Otherwise, CSV record fields are assigned to the
// struct fields with a corresponding name in their csv struct tag.
//
// // CSV field "name" will be assigned to struct field "Field".
// Field int64 `csv:"name"`
// // CSV field "name" will be assigned to struct field "Field".
// Field int64 `csv:"name"`
//
// // Field is used to store all unmapped CSV fields.
// Field map[string]string `csv:",any"`
// // Field is used to store all unmapped CSV fields.
// Field map[string]string `csv:",any"`
//
// A special flag 'any' can be used on a map or any other field type implementing
// TextUnmarshaler interface to capture all unmapped CSV fields of a record.
//
func Unmarshal(data []byte, v interface{}) error {
return NewDecoder(bytes.NewReader(data)).Decode(v)
}
Expand All @@ -185,19 +183,19 @@ func Unmarshal(data []byte, v interface{}) error {
//
// The canonical way of using ReadLine is (error handling omitted)
//
// dec := csv.NewDecoder(r)
// line, _ := dec.ReadLine()
// head, _ := dec.DecodeHeader(line)
// for {
// line, err = dec.ReadLine()
// if err != nil {
// return err
// }
// if line == "" {
// break
// }
// // process the next record here
// }
// dec := csv.NewDecoder(r)
// line, _ := dec.ReadLine()
// head, _ := dec.DecodeHeader(line)
// for {
// line, err = dec.ReadLine()
// if err != nil {
// return err
// }
// if line == "" {
// break
// }
// // process the next record here
// }
func (d *Decoder) ReadLine() (string, error) {
for d.s.Scan() {
line := d.s.Text()
Expand Down Expand Up @@ -324,13 +322,12 @@ func (d *Decoder) unmarshal(val reflect.Value, line string) error {
if merged == "" {
merged += string(d.sep)
} else {
merged += string(d.sep)
combined = append(combined, merged)
merged = ""
}
case len(v) >= 2 && strings.HasPrefix(v, Wrapper) && strings.HasSuffix(v, Wrapper):
// (1) .. ,"", .. (2) ..," text text ", ..
combined = append(combined, v[1:len(v)])
combined = append(combined, v[1:len(v)-1])
merged = ""
case strings.HasPrefix(v, Wrapper):
// .. ," text, more text", .. (1st part)
Expand Down
70 changes: 66 additions & 4 deletions unmarshal_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,10 +79,14 @@ func (x SpecialStruct) String() string {
const (
CsvWithHeader = `s,i,f,b
Hello,42,23.45,true`
CsvWithoutHeader = `Hello,true,42,23.45`
CsvWhitespace = ` Hello , true , 42 , 23.45`
CsvSemicolon = `Hello;true;42;23.45`
CsvComment = `# Comment line
CsvWithoutHeader = `Hello,true,42,23.45`
CsvWhitespace = ` Hello , true , 42 , 23.45`
CsvSemicolon = `Hello;true;42;23.45`
CsvQuotedString = `"Hello",true,42,23.45`
CsvQuotedWhiteSpace = `"Hello World",false,43,24.56`
CsvQuotedSeparator = `"Hello,World",false,43,24.56`
CsvQuotedSeparatorOnly = `",",false,43,24.56`
CsvComment = `# Comment line
Hello,true,42,23.45
#
# another comment
Expand All @@ -108,6 +112,8 @@ var (
A1 = A{"Hello", true, 42, 23.45}
A2 = A{"Hello World", false, 43, 24.56}
A3 = A{" Hello ", true, 42, 23.45}
A4 = A{"Hello,World", false, 43, 24.56}
A5 = A{",", false, 43, 24.56}
E1 = A{"", true, 42, 23.45}
E2 = A{"Hello", false, 42, 23.45}
E3 = A{"Hello", true, 0, 23.45}
Expand Down Expand Up @@ -370,6 +376,62 @@ func TestUnmarshalWithSeparator(t *testing.T) {
CheckA(t, a[0], A1)
}

func TestUnmarshalQuotedString(t *testing.T) {
r := bytes.NewReader([]byte(CsvQuotedString))
dec := NewDecoder(r).Header(false)
a := make([]*A, 0)
if err := dec.Decode(&a); err != nil {
t.Error(err)
}
if len(a) != 1 {
t.Errorf("invalid record count, got=%d expected=%d", len(a), 1)
return
}
CheckA(t, a[0], A1)
}

func TestUnmarshalQuotedWhiteSpace(t *testing.T) {
r := bytes.NewReader([]byte(CsvQuotedWhiteSpace))
dec := NewDecoder(r).Header(false)
a := make([]*A, 0)
if err := dec.Decode(&a); err != nil {
t.Error(err)
}
if len(a) != 1 {
t.Errorf("invalid record count, got=%d expected=%d", len(a), 1)
return
}
CheckA(t, a[0], A2)
}

func TestUnmarshalQuotedSeparator(t *testing.T) {
r := bytes.NewReader([]byte(CsvQuotedSeparator))
dec := NewDecoder(r).Header(false)
a := make([]*A, 0)
if err := dec.Decode(&a); err != nil {
t.Error(err)
}
if len(a) != 1 {
t.Errorf("invalid record count, got=%d expected=%d", len(a), 1)
return
}
CheckA(t, a[0], A4)
}

func TestUnmarshalQuotedSeparatorOnly(t *testing.T) {
r := bytes.NewReader([]byte(CsvQuotedSeparatorOnly))
dec := NewDecoder(r).Header(false)
a := make([]*A, 0)
if err := dec.Decode(&a); err != nil {
t.Error(err)
}
if len(a) != 1 {
t.Errorf("invalid record count, got=%d expected=%d", len(a), 1)
return
}
CheckA(t, a[0], A5)
}

func TestUnmarshalWithComments(t *testing.T) {
r := bytes.NewReader([]byte(CsvComment))
dec := NewDecoder(r).Header(false)
Expand Down