Skip to content

Commit

Permalink
lighting/parser: align NULL and ESCAPED BY with LOAD DATA (pingcap#40909
Browse files Browse the repository at this point in the history
  • Loading branch information
lance6716 authored and blacktear23 committed Feb 15, 2023
1 parent 767e623 commit 302c3a2
Show file tree
Hide file tree
Showing 11 changed files with 422 additions and 119 deletions.
78 changes: 61 additions & 17 deletions br/pkg/lightning/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -529,20 +529,48 @@ type PostRestore struct {
Compact bool `toml:"compact" json:"compact"`
}

// StringOrStringSlice can unmarshal a TOML string as string slice with one element.
type StringOrStringSlice []string

func (s *StringOrStringSlice) UnmarshalTOML(in interface{}) error {
switch v := in.(type) {
case string:
*s = []string{v}
case []interface{}:
*s = make([]string, 0, len(v))
for _, vv := range v {
vs, ok := vv.(string)
if !ok {
return errors.Errorf("invalid string slice '%v'", in)
}
*s = append(*s, vs)
}
default:
return errors.Errorf("invalid string slice '%v'", in)
}
return nil
}

type CSVConfig struct {
// Separator, Delimiter and Terminator should all be in utf8mb4 encoding.
Separator string `toml:"separator" json:"separator"`
Delimiter string `toml:"delimiter" json:"delimiter"`
Terminator string `toml:"terminator" json:"terminator"`
Null string `toml:"null" json:"null"`
Header bool `toml:"header" json:"header"`
HeaderSchemaMatch bool `toml:"header-schema-match" json:"header-schema-match"`
TrimLastSep bool `toml:"trim-last-separator" json:"trim-last-separator"`
NotNull bool `toml:"not-null" json:"not-null"`
BackslashEscape bool `toml:"backslash-escape" json:"backslash-escape"`
Separator string `toml:"separator" json:"separator"`
Delimiter string `toml:"delimiter" json:"delimiter"`
Terminator string `toml:"terminator" json:"terminator"`
Null StringOrStringSlice `toml:"null" json:"null"`
Header bool `toml:"header" json:"header"`
HeaderSchemaMatch bool `toml:"header-schema-match" json:"header-schema-match"`
TrimLastSep bool `toml:"trim-last-separator" json:"trim-last-separator"`
NotNull bool `toml:"not-null" json:"not-null"`
// deprecated, use `escaped-by` instead.
BackslashEscape bool `toml:"backslash-escape" json:"backslash-escape"`
// EscapedBy has higher priority than BackslashEscape, currently it must be a single character if set.
EscapedBy string `toml:"escaped-by" json:"escaped-by"`
// hide these options for lightning configuration file, they can only be used by LOAD DATA
// https://dev.mysql.com/doc/refman/8.0/en/load-data.html#load-data-field-line-handling
StartingBy string `toml:"-" json:"-"`
// For non-empty Delimiter (for example quotes), null elements inside quotes are not considered as null except for
// `\N` (when escape-by is `\`). That is to say, `\N` is special for null because it always means null.
QuotedNullIsText bool
}

type MydumperRuntime struct {
Expand Down Expand Up @@ -802,8 +830,9 @@ func NewConfig() *Config {
Header: true,
HeaderSchemaMatch: true,
NotNull: false,
Null: `\N`,
Null: []string{`\N`},
BackslashEscape: true,
EscapedBy: `\`,
TrimLastSep: false,
},
StrictFormat: false,
Expand Down Expand Up @@ -935,15 +964,30 @@ func (cfg *Config) Adjust(ctx context.Context) error {
return common.ErrInvalidConfig.GenWithStack("`mydumper.csv.separator` and `mydumper.csv.delimiter` must not be prefix of each other")
}

if csv.BackslashEscape {
if csv.Separator == `\` {
return common.ErrInvalidConfig.GenWithStack("cannot use '\\' as CSV separator when `mydumper.csv.backslash-escape` is true")
if len(csv.EscapedBy) > 1 {
return common.ErrInvalidConfig.GenWithStack("`mydumper.csv.escaped-by` must be empty or a single character")
}
if csv.BackslashEscape && csv.EscapedBy == "" {
csv.EscapedBy = `\`
}
if !csv.BackslashEscape && csv.EscapedBy == `\` {
csv.EscapedBy = ""
}

// keep compatibility with old behaviour
if !csv.NotNull && len(csv.Null) == 0 {
csv.Null = []string{""}
}

if len(csv.EscapedBy) > 0 {
if csv.Separator == csv.EscapedBy {
return common.ErrInvalidConfig.GenWithStack("cannot use '%s' both as CSV separator and `mydumper.csv.escaped-by`", csv.EscapedBy)
}
if csv.Delimiter == `\` {
return common.ErrInvalidConfig.GenWithStack("cannot use '\\' as CSV delimiter when `mydumper.csv.backslash-escape` is true")
if csv.Delimiter == csv.EscapedBy {
return common.ErrInvalidConfig.GenWithStack("cannot use '%s' both as CSV delimiter and `mydumper.csv.escaped-by`", csv.EscapedBy)
}
if csv.Terminator == `\` {
return common.ErrInvalidConfig.GenWithStack("cannot use '\\' as CSV terminator when `mydumper.csv.backslash-escape` is true")
if csv.Terminator == csv.EscapedBy {
return common.ErrInvalidConfig.GenWithStack("cannot use '%s' both as CSV terminator and `mydumper.csv.escaped-by`", csv.EscapedBy)
}
}

Expand Down
27 changes: 23 additions & 4 deletions br/pkg/lightning/config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -477,15 +477,15 @@ func TestInvalidCSV(t *testing.T) {
separator = '\'
backslash-escape = true
`,
err: "[Lightning:Config:ErrInvalidConfig]cannot use '\\' as CSV separator when `mydumper.csv.backslash-escape` is true",
err: "[Lightning:Config:ErrInvalidConfig]cannot use '\\' both as CSV separator and `mydumper.csv.escaped-by`",
},
{
input: `
[mydumper.csv]
delimiter = '\'
backslash-escape = true
escaped-by = '\'
`,
err: "[Lightning:Config:ErrInvalidConfig]cannot use '\\' as CSV delimiter when `mydumper.csv.backslash-escape` is true",
err: "[Lightning:Config:ErrInvalidConfig]cannot use '\\' both as CSV delimiter and `mydumper.csv.escaped-by`",
},
{
input: `
Expand Down Expand Up @@ -528,7 +528,7 @@ func TestInvalidCSV(t *testing.T) {
if tc.err != "" {
require.EqualError(t, err, tc.err, comment)
} else {
require.NoError(t, err)
require.NoError(t, err, tc.input)
}
}
}
Expand All @@ -543,6 +543,25 @@ func TestInvalidTOML(t *testing.T) {
require.EqualError(t, err, "toml: line 2: expected '.' or '=', but got '[' instead")
}

func TestStringOrStringSlice(t *testing.T) {
cfg := &config.Config{}
err := cfg.LoadFromTOML([]byte(`
[mydumper.csv]
null = '\N'
`))
require.NoError(t, err)
err = cfg.LoadFromTOML([]byte(`
[mydumper.csv]
null = [ '\N', 'NULL' ]
`))
require.NoError(t, err)
err = cfg.LoadFromTOML([]byte(`
[mydumper.csv]
null = [ '\N', 123 ]
`))
require.ErrorContains(t, err, "invalid string slice")
}

func TestTOMLUnusedKeys(t *testing.T) {
cfg := &config.Config{}
err := cfg.LoadFromTOML([]byte(`
Expand Down
1 change: 1 addition & 0 deletions br/pkg/lightning/mydump/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ go_library(
"@com_github_xitongsys_parquet_go//parquet",
"@com_github_xitongsys_parquet_go//reader",
"@com_github_xitongsys_parquet_go//source",
"@org_golang_x_exp//slices",
"@org_golang_x_text//encoding",
"@org_golang_x_text//encoding/simplifiedchinese",
"@org_uber_go_zap//:zap",
Expand Down
Loading

0 comments on commit 302c3a2

Please sign in to comment.