Skip to content
This repository has been archived by the owner on Dec 8, 2021. It is now read-only.

config: allow four byte-size config to be specified using human-readable units ("100 GiB") #471

Merged
merged 4 commits into from
Nov 13, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ default: clean lightning lightning-ctl checksuccess
prepare:
$(PREPARE_MOD)

finish-prepare:
$(FINISH_MOD)

clean:
rm -f $(LIGHTNING_BIN) $(LIGHTNING_CTRL_BIN) $(FAILPOINT_CTL_BIN) $(REVIVE_BIN) $(VFSGENDEV_BIN) go.mod go.sum

Expand Down
1 change: 1 addition & 0 deletions go.mod1
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ require (
github.com/cockroachdb/pebble v0.0.0-20201023120638-f1224da22976
github.com/coreos/go-semver v0.3.0
github.com/dgraph-io/ristretto v0.0.2-0.20200115201040-8f368f2f2ab3 // indirect
github.com/docker/go-units v0.4.0
github.com/fsouza/fake-gcs-server v1.19.0 // indirect
github.com/go-sql-driver/mysql v1.5.0
github.com/gogo/protobuf v1.3.1
Expand Down
44 changes: 44 additions & 0 deletions lightning/config/bytesize.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
// Copyright 2020 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.

package config

import (
"encoding/json"

"github.com/docker/go-units"
)

// ByteSize is an alias of int64 which accepts human-friendly strings like
// '10G' when read from TOML.
type ByteSize int64

// UnmarshalText implements encoding.TextUnmarshaler
func (size *ByteSize) UnmarshalText(b []byte) error {
res, err := units.RAMInBytes(string(b))
if err != nil {
return err
}
*size = ByteSize(res)
return nil
}

// UnmarshalJSON implements json.Unmarshaler (for testing)
func (size *ByteSize) UnmarshalJSON(b []byte) error {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shall we also implement MarshalTextand MarshalJSON

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems no need to implement the Marshal methods as the unit test passes without them.

var res int64
if err := json.Unmarshal(b, &res); err != nil {
return err
}
*size = ByteSize(res)
return nil
}
129 changes: 129 additions & 0 deletions lightning/config/bytesize_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
// Copyright 2020 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.

package config_test

import (
"encoding/json"
"strings"

"github.com/BurntSushi/toml"
. "github.com/pingcap/check"

"github.com/pingcap/tidb-lightning/lightning/config"
)

type byteSizeTestSuite struct{}

var _ = Suite(&byteSizeTestSuite{})

func (s *byteSizeTestSuite) TestByteSizeTOMLDecode(c *C) {
testCases := []struct {
input string
output config.ByteSize
err string
}{
{
input: "x = 10000",
output: 10000,
},
{
input: "x = 107_374_182_400",
output: 107_374_182_400,
},
{
input: "x = '10k'",
output: 10 * 1024,
},
{
input: "x = '10PiB'",
output: 10 * 1024 * 1024 * 1024 * 1024 * 1024,
},
{
input: "x = '10 KB'",
output: 10 * 1024,
},
{
input: "x = '32768'",
output: 32768,
},
{
input: "x = -1",
err: "invalid size: '-1'",
},
{
input: "x = 'invalid value'",
err: "invalid size: 'invalid value'",
},
{
input: "x = true",
err: "invalid size: 'true'",
},
{
input: "x = 256.0",
output: 256,
},
{
input: "x = 256.9",
output: 256,
},
{
input: "x = 10e+9",
output: 10_000_000_000,
},
{
input: "x = '2.5MB'",
output: 5 * 512 * 1024,
},
{
input: "x = 2020-01-01T00:00:00Z",
err: "invalid size: '2020-01-01T00:00:00Z'",
},
{
input: "x = ['100000']",
err: "toml: cannot load TOML value.*",
},
{
input: "x = { size = '100000' }",
err: "toml: cannot load TOML value.*",
},
}

for _, tc := range testCases {
comment := Commentf("input: `%s`", tc.input)
var output struct{ X config.ByteSize }
err := toml.Unmarshal([]byte(tc.input), &output)
if tc.err != "" {
c.Assert(err, ErrorMatches, tc.err, comment)
} else {
c.Assert(err, IsNil, comment)
c.Assert(output.X, Equals, tc.output, comment)
}
}
}

func (s *byteSizeTestSuite) TestByteSizeTOMLAndJSONEncode(c *C) {
var input struct {
X config.ByteSize `toml:"x" json:"x"`
}
input.X = 1048576

var output strings.Builder
err := toml.NewEncoder(&output).Encode(input)
c.Assert(err, IsNil)
c.Assert(output.String(), Equals, "x = 1048576\n")

js, err := json.Marshal(input)
c.Assert(err, IsNil)
c.Assert(string(js), Equals, `{"x":1048576}`)
}
24 changes: 12 additions & 12 deletions lightning/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -220,16 +220,16 @@ type CSVConfig struct {
}

type MydumperRuntime struct {
ReadBlockSize int64 `toml:"read-block-size" json:"read-block-size"`
BatchSize int64 `toml:"batch-size" json:"batch-size"`
ReadBlockSize ByteSize `toml:"read-block-size" json:"read-block-size"`
BatchSize ByteSize `toml:"batch-size" json:"batch-size"`
BatchImportRatio float64 `toml:"batch-import-ratio" json:"batch-import-ratio"`
SourceDir string `toml:"data-source-dir" json:"data-source-dir"`
NoSchema bool `toml:"no-schema" json:"no-schema"`
CharacterSet string `toml:"character-set" json:"character-set"`
CSV CSVConfig `toml:"csv" json:"csv"`
CaseSensitive bool `toml:"case-sensitive" json:"case-sensitive"`
StrictFormat bool `toml:"strict-format" json:"strict-format"`
MaxRegionSize int64 `toml:"max-region-size" json:"max-region-size"`
MaxRegionSize ByteSize `toml:"max-region-size" json:"max-region-size"`
Filter []string `toml:"filter" json:"filter"`
FileRouters []*FileRouteRule `toml:"files" json:"files"`
DefaultFileRules bool `toml:"default-file-rules" json:"default-file-rules"`
Expand All @@ -246,14 +246,14 @@ type FileRouteRule struct {
}

type TikvImporter struct {
Addr string `toml:"addr" json:"addr"`
Backend string `toml:"backend" json:"backend"`
OnDuplicate string `toml:"on-duplicate" json:"on-duplicate"`
MaxKVPairs int `toml:"max-kv-pairs" json:"max-kv-pairs"`
SendKVPairs int `toml:"send-kv-pairs" json:"send-kv-pairs"`
RegionSplitSize int64 `toml:"region-split-size" json:"region-split-size"`
SortedKVDir string `toml:"sorted-kv-dir" json:"sorted-kv-dir"`
RangeConcurrency int `toml:"range-concurrency" json:"range-concurrency"`
Addr string `toml:"addr" json:"addr"`
Backend string `toml:"backend" json:"backend"`
OnDuplicate string `toml:"on-duplicate" json:"on-duplicate"`
MaxKVPairs int `toml:"max-kv-pairs" json:"max-kv-pairs"`
SendKVPairs int `toml:"send-kv-pairs" json:"send-kv-pairs"`
RegionSplitSize ByteSize `toml:"region-split-size" json:"region-split-size"`
SortedKVDir string `toml:"sorted-kv-dir" json:"sorted-kv-dir"`
RangeConcurrency int `toml:"range-concurrency" json:"range-concurrency"`
}

type Checkpoint struct {
Expand Down Expand Up @@ -618,7 +618,7 @@ func (cfg *Config) Adjust() error {
if cfg.Mydumper.BatchSize <= 0 {
// if rows in source files are not sorted by primary key(if primary is number or cluster index enabled),
// the key range in each data engine may have overlap, thus a bigger engine size can somewhat alleviate it.
cfg.Mydumper.BatchSize = 100 * _G
cfg.Mydumper.BatchSize = defaultBatchSize

}
if cfg.Mydumper.BatchImportRatio < 0.0 || cfg.Mydumper.BatchImportRatio >= 1.0 {
Expand Down
20 changes: 11 additions & 9 deletions lightning/config/const.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,20 @@

package config

const (
_K = int64(1 << 10)
_M = _K << 10
_G = _M << 10
import (
"github.com/docker/go-units"
)

const (
// mydumper
ReadBlockSize int64 = 64 * _K
MinRegionSize int64 = 256 * _M
MaxRegionSize int64 = 256 * _M
SplitRegionSize int64 = 96 * _M
ReadBlockSize ByteSize = 64 * units.KiB
MinRegionSize ByteSize = 256 * units.MiB
MaxRegionSize ByteSize = 256 * units.MiB
SplitRegionSize ByteSize = 96 * units.MiB

BufferSizeScale = 5

defaultMaxAllowedPacket = 64 * 1024 * 1024
defaultMaxAllowedPacket = 64 * units.MiB

defaultBatchSize ByteSize = 100 * units.GiB
)
Loading