Skip to content

Commit

Permalink
support both filesize and rows arguments (pingcap#177)
Browse files Browse the repository at this point in the history
* support both filesize and rows arguments

* fix bash

* add unit test for the situation that both filesize and rows are enabled

* address comment

* address comment
  • Loading branch information
lichunzhu authored Nov 9, 2020
1 parent a2a0658 commit 7cacb8f
Show file tree
Hide file tree
Showing 16 changed files with 153 additions and 50 deletions.
4 changes: 2 additions & 2 deletions dumpling/tests/consistency/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ run_sql "insert into $DB_NAME.$TABLE_NAME values $(seq -s, 100 | sed 's/,*$//g'
wait

# check data record count
cnt=`grep -o "(1)" ${DUMPLING_OUTPUT_DIR}/${DB_NAME}.${TABLE_NAME}.0.sql|wc -l`
cnt=`grep -o "(1)" ${DUMPLING_OUTPUT_DIR}/${DB_NAME}.${TABLE_NAME}.000000000.sql|wc -l`
echo "1st records count is ${cnt}"
[ $cnt = 100 ]

Expand All @@ -55,6 +55,6 @@ fi
# test dumpling normally
export GO_FAILPOINTS=""
run_dumpling
cnt=`grep -o "(1)" ${DUMPLING_OUTPUT_DIR}/${DB_NAME}.${TABLE_NAME}.0.sql|wc -l`
cnt=`grep -o "(1)" ${DUMPLING_OUTPUT_DIR}/${DB_NAME}.${TABLE_NAME}.000000000.sql|wc -l`
echo "2nd records count is ${cnt}"
[ $cnt = 200 ]
4 changes: 2 additions & 2 deletions dumpling/tests/naughty_strings/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@ run_sql_file "$DUMPLING_BASE_NAME/data/naughty_strings.t-schema.sql"
run_sql_file "$DUMPLING_BASE_NAME/data/naughty_strings.t.sql"
run_dumpling --escape-backslash=false
# FIXME should compare the schemas too, but they differ too much among MySQL versions.
diff "$DUMPLING_BASE_NAME/expect/naughty_strings.t.sql" "$DUMPLING_OUTPUT_DIR/naughty_strings.t.0.sql"
diff "$DUMPLING_BASE_NAME/expect/naughty_strings.t.sql" "$DUMPLING_OUTPUT_DIR/naughty_strings.t.000000000.sql"
run_sql_file "$DUMPLING_BASE_NAME/data/naughty_strings.escape-schema.sql"
run_sql_file "$DUMPLING_BASE_NAME/data/naughty_strings.escape.sql"
run_dumpling --escape-backslash=true
# FIXME should compare the schemas too, but they differ too much among MySQL versions.
diff "$DUMPLING_BASE_NAME/expect/naughty_strings.escape.sql" "$DUMPLING_OUTPUT_DIR/naughty_strings.escape.0.sql"
diff "$DUMPLING_BASE_NAME/expect/naughty_strings.escape.sql" "$DUMPLING_OUTPUT_DIR/naughty_strings.escape.000000000.sql"
2 changes: 1 addition & 1 deletion dumpling/tests/null_unique_index/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,6 @@ export DUMPLING_TEST_DATABASE=$DB_NAME
run_dumpling -r 1

data="NULL"
cnt=$(sed "s/$data/$data\n/g" $DUMPLING_OUTPUT_DIR/$DB_NAME.t.1.sql | grep -c "$data") || true
cnt=$(sed "s/$data/$data\n/g" $DUMPLING_OUTPUT_DIR/$DB_NAME.t.000000001.sql | grep -c "$data") || true
[ $cnt = 1 ]

4 changes: 2 additions & 2 deletions dumpling/tests/primary_key/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,6 @@ for file_path in "$DUMPLING_BASE_NAME"/data/*; do
base_name=$(basename "$file_path")
table_name="${base_name%.sql}"
file_should_exist "$DUMPLING_BASE_NAME/result/$table_name.sql"
file_should_exist "$DUMPLING_OUTPUT_DIR/primary_key.$table_name.0.sql"
diff "$DUMPLING_BASE_NAME/result/$table_name.sql" "$DUMPLING_OUTPUT_DIR/primary_key.$table_name.0.sql"
file_should_exist "$DUMPLING_OUTPUT_DIR/primary_key.$table_name.000000000.sql"
diff "$DUMPLING_BASE_NAME/result/$table_name.sql" "$DUMPLING_OUTPUT_DIR/primary_key.$table_name.000000000.sql"
done
4 changes: 2 additions & 2 deletions dumpling/tests/quote/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
set -eu

mkdir -p "$DUMPLING_OUTPUT_DIR"/data
cp "$DUMPLING_BASE_NAME/data/quote-database.quote-table.0.sql" "$DUMPLING_OUTPUT_DIR/data/quo\`te%2Fdatabase.quo\`te%2Ftable.0.sql"
cp "$DUMPLING_BASE_NAME/data/quote-database.quote-table.000000000.sql" "$DUMPLING_OUTPUT_DIR/data/quo\`te%2Fdatabase.quo\`te%2Ftable.000000000.sql"
cp "$DUMPLING_BASE_NAME/data/quote-database.quote-table-schema.sql" "$DUMPLING_OUTPUT_DIR/data/quo\`te%2Fdatabase.quo\`te%2Ftable-schema.sql"
cp "$DUMPLING_BASE_NAME/data/quote-database-schema-create.sql" "$DUMPLING_OUTPUT_DIR/data/quo\`te%2Fdatabase-schema-create.sql"

Expand All @@ -13,7 +13,7 @@ run_sql_file "$DUMPLING_OUTPUT_DIR/data/quo\`te%2Fdatabase-schema-create.sql"
export DUMPLING_TEST_DATABASE=$db

run_sql_file "$DUMPLING_OUTPUT_DIR/data/quo\`te%2Fdatabase.quo\`te%2Ftable-schema.sql"
run_sql_file "$DUMPLING_OUTPUT_DIR/data/quo\`te%2Fdatabase.quo\`te%2Ftable.0.sql"
run_sql_file "$DUMPLING_OUTPUT_DIR/data/quo\`te%2Fdatabase.quo\`te%2Ftable.000000000.sql"

run_dumpling

Expand Down
File renamed without changes.
45 changes: 39 additions & 6 deletions dumpling/tests/rows/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,19 +19,21 @@ run_sql "create database $DB_NAME;"
run_sql "create table $DB_NAME.$TABLE_NAME (id int not null auto_increment primary key, a varchar(24));"

# insert 100 records
run_sql_file "$cur/data/rows.t.0.sql"
run_sql_file "$cur/data/rows.t.000000000.sql"

# make sure the estimated count is accurate
run_sql "analyze table $DB_NAME.$TABLE_NAME"

# dumping
export DUMPLING_TEST_DATABASE=$DB_NAME
run_dumpling --rows 10 --loglevel debug

# the dumping result is expected to be:
# 10 files for insertion
# FIXME the result of EXPLAIN SELECT `id` FROM `rows`.`t` randomly equal to 1 or 100, this could affect on file num.
# file_num=$(find "$DUMPLING_OUTPUT_DIR" -maxdepth 1 -iname "$DB_NAME.$TABLE_NAME.*.sql" | wc -l)
# if [ "$file_num" -ne 10 ]; then
# echo "obtain file number: $file_num, but expect: 10" && exit 1
# fi
file_num=$(find "$DUMPLING_OUTPUT_DIR" -maxdepth 1 -iname "$DB_NAME.$TABLE_NAME.*.sql" | wc -l)
if [ "$file_num" -ne 10 ]; then
echo "obtain file number: $file_num, but expect: 10" && exit 1
fi

cat "$cur/conf/lightning.toml"
# use lightning import data to tidb
Expand All @@ -40,4 +42,35 @@ run_lightning $cur/conf/lightning.toml
# check mysql and tidb data
check_sync_diff $cur/conf/diff_config.toml

# test dumpling with both rows and filesize
rm -rf "$DUMPLING_OUTPUT_DIR"
run_dumpling --rows 10 --filesize 100B --loglevel debug
# the dumping result is expected to be:
# 50 files for insertion
file_num=$(find "$DUMPLING_OUTPUT_DIR" -maxdepth 1 -iname "$DB_NAME.$TABLE_NAME.*.sql" | wc -l)
if [ "$file_num" -ne 50 ]; then
echo "obtain file number: $file_num, but expect: 50" && exit 1
fi

for i in `seq 1 10`
do
r=$(printf "%02d" $i)
for j in `seq 0 4`
do
file_name="$DUMPLING_OUTPUT_DIR/$DB_NAME.$TABLE_NAME.0000000${r}000${j}.sql"
if [ ! -f "$file_name" ]; then
echo "file $file_name doesn't exist, which is not expected" && exit 1
fi
done
done

# drop database on tidb
export DUMPLING_TEST_PORT=4000
run_sql "drop database if exists \`$DB_NAME\`;"

cat "$cur/conf/lightning.toml"
# use lightning import data to tidb
run_lightning $cur/conf/lightning.toml

# check mysql and tidb data
check_sync_diff $cur/conf/diff_config.toml
4 changes: 2 additions & 2 deletions dumpling/tests/s3/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ ls "${HOST_DIR}"

curl -o "${HOST_DIR}/s3-schema-create.sql" http://localhost:5000/mybucket/dump/s3-schema-create.sql
curl -o "${HOST_DIR}/s3.t-schema.sql" http://localhost:5000/mybucket/dump/s3.t-schema.sql
curl -o "${HOST_DIR}/s3.t.0.sql" http://localhost:5000/mybucket/dump/s3.t.0.sql
curl -o "${HOST_DIR}/s3.t.000000000.sql" http://localhost:5000/mybucket/dump/s3.t.000000000.sql

file_should_exist "$HOST_DIR/s3-schema-create.sql"
file_should_exist "$HOST_DIR/s3.t-schema.sql"
file_should_exist "$HOST_DIR/s3.t.0.sql"
file_should_exist "$HOST_DIR/s3.t.000000000.sql"
2 changes: 1 addition & 1 deletion dumpling/tests/tls/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,4 @@ run_dumpling --ca "$DUMPLING_TEST_DIR/ca.pem" --cert "$DUMPLING_TEST_DIR/dumplin

file_should_exist "$DUMPLING_OUTPUT_DIR/tls-schema-create.sql"
file_should_exist "$DUMPLING_OUTPUT_DIR/tls.t-schema.sql"
file_should_exist "$DUMPLING_OUTPUT_DIR/tls.t.0.sql"
file_should_exist "$DUMPLING_OUTPUT_DIR/tls.t.000000000.sql"
2 changes: 1 addition & 1 deletion dumpling/v4/export/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ func DefaultConfig() *Config {
NoViews: true,
Rows: UnspecifiedSize,
Where: "",
FileType: "SQL",
FileType: "sql",
NoHeader: false,
NoSchemas: false,
NoData: false,
Expand Down
4 changes: 3 additions & 1 deletion dumpling/v4/export/dump.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ package export
import (
"context"
"database/sql"
"errors"
"strconv"
"strings"
"time"
Expand All @@ -12,6 +11,7 @@ import (

_ "github.com/go-sql-driver/mysql"
"github.com/pingcap/br/pkg/utils"
"github.com/pingcap/errors"
"github.com/pingcap/failpoint"
pd "github.com/tikv/pd/client"
"go.uber.org/zap"
Expand Down Expand Up @@ -218,6 +218,8 @@ func Dump(pCtx context.Context, conf *Config) (err error) {
writer = SQLWriter{SimpleWriter: simpleWriter}
case "csv":
writer = CSVWriter{SimpleWriter: simpleWriter}
default:
return errors.Errorf("unsupported filetype %s", conf.FileType)
}

if conf.Sql == "" {
Expand Down
4 changes: 0 additions & 4 deletions dumpling/v4/export/prepare.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,10 +99,6 @@ func adjustConfig(ctx context.Context, conf *Config) error {
}
}

if conf.Rows != UnspecifiedSize {
// Disable filesize if rows was set
conf.FileSize = UnspecifiedSize
}
if conf.SessionParams == nil {
conf.SessionParams = make(map[string]interface{})
}
Expand Down
4 changes: 2 additions & 2 deletions dumpling/v4/export/prepare_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ func (s *testPrepareSuite) TestAdjustConfig(c *C) {
c.Assert(adjustConfig(nil, conf), IsNil)
conf.Sql = ""
conf.Rows = 5000
conf.FileSize = uint64(5000)
conf.FileSize = 5000
c.Assert(adjustConfig(nil, conf), IsNil)
c.Assert(conf.FileSize, Equals, uint64(UnspecifiedSize))
c.Assert(conf.FileSize, Equals, uint64(5000))
}
53 changes: 34 additions & 19 deletions dumpling/v4/export/writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ package export
import (
"bytes"
"context"
"fmt"
"strings"
"text/template"

"github.com/pingcap/br/pkg/storage"
Expand Down Expand Up @@ -72,12 +74,12 @@ func (f SQLWriter) WriteTableData(ctx context.Context, ir TableDataIR) error {
fileName = fmt.Sprintf("%s.%s.%d.sql", ir.DatabaseName(), ir.TableName(), 0)
}
}*/
namer := newOutputFileNamer(ir)
fileName, err := namer.NextName(f.cfg.OutputFileTemplate)
namer := newOutputFileNamer(ir, f.cfg.Rows != UnspecifiedSize, f.cfg.FileSize != UnspecifiedSize)
fileType := strings.ToLower(f.cfg.FileType)
fileName, err := namer.NextName(f.cfg.OutputFileTemplate, fileType)
if err != nil {
return err
}
fileName += ".sql"
chunksIter := ir
defer chunksIter.Rows().Close()

Expand All @@ -96,11 +98,10 @@ func (f SQLWriter) WriteTableData(ctx context.Context, ir TableDataIR) error {
if f.cfg.FileSize == UnspecifiedSize {
break
}
fileName, err = namer.NextName(f.cfg.OutputFileTemplate)
fileName, err = namer.NextName(f.cfg.OutputFileTemplate, fileType)
if err != nil {
return err
}
fileName += ".sql"
}
log.Debug("dumping table successfully",
zap.String("table", ir.TableName()))
Expand All @@ -126,9 +127,11 @@ func writeMetaToFile(ctx context.Context, target, metaSQL string, s storage.Exte
type CSVWriter struct{ SimpleWriter }

type outputFileNamer struct {
Index int
DB string
Table string
ChunkIndex int
FileIndex int
DB string
Table string
format string
}

type csvOption struct {
Expand All @@ -137,12 +140,21 @@ type csvOption struct {
delimiter []byte
}

func newOutputFileNamer(ir TableDataIR) *outputFileNamer {
return &outputFileNamer{
Index: ir.ChunkIndex(),
func newOutputFileNamer(ir TableDataIR, rows, fileSize bool) *outputFileNamer {
o := &outputFileNamer{
DB: ir.DatabaseName(),
Table: ir.TableName(),
}
o.ChunkIndex = ir.ChunkIndex()
o.FileIndex = 0
if rows && fileSize {
o.format = "%09d%04d"
} else if fileSize {
o.format = "%09[2]d"
} else {
o.format = "%09[1]d"
}
return o
}

func (namer *outputFileNamer) render(tmpl *template.Template, subName string) (string, error) {
Expand All @@ -153,21 +165,25 @@ func (namer *outputFileNamer) render(tmpl *template.Template, subName string) (s
return bf.String(), nil
}

func (namer *outputFileNamer) NextName(tmpl *template.Template) (string, error) {
func (namer *outputFileNamer) Index() string {
return fmt.Sprintf(namer.format, namer.ChunkIndex, namer.FileIndex)
}

func (namer *outputFileNamer) NextName(tmpl *template.Template, fileType string) (string, error) {
res, err := namer.render(tmpl, outputFileTemplateData)
namer.Index++
return res, err
namer.FileIndex++
return res + "." + fileType, err
}

func (f CSVWriter) WriteTableData(ctx context.Context, ir TableDataIR) error {
log.Debug("start dumping table in csv format...", zap.String("table", ir.TableName()))

namer := newOutputFileNamer(ir)
fileName, err := namer.NextName(f.cfg.OutputFileTemplate)
namer := newOutputFileNamer(ir, f.cfg.Rows != UnspecifiedSize, f.cfg.FileSize != UnspecifiedSize)
fileType := strings.ToLower(f.cfg.FileType)
fileName, err := namer.NextName(f.cfg.OutputFileTemplate, fileType)
if err != nil {
return err
}
fileName += ".csv"
chunksIter := ir
defer chunksIter.Rows().Close()

Expand All @@ -192,11 +208,10 @@ func (f CSVWriter) WriteTableData(ctx context.Context, ir TableDataIR) error {
if f.cfg.FileSize == UnspecifiedSize {
break
}
fileName, err = namer.NextName(f.cfg.OutputFileTemplate)
fileName, err = namer.NextName(f.cfg.OutputFileTemplate, fileType)
if err != nil {
return err
}
fileName += ".csv"
}
log.Debug("dumping table in csv format successfully",
zap.String("table", ir.TableName()))
Expand Down
Loading

0 comments on commit 7cacb8f

Please sign in to comment.