-
Notifications
You must be signed in to change notification settings - Fork 289
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
codec(ticdc): fix some avro encoding bugs #4704
Changes from 1 commit
078c562
e85f8c3
11e8526
961134e
8a296f3
4741fc6
14b7490
b9b05cb
ee85c59
2612e5b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,6 +24,7 @@ import ( | |
model2 "github.com/pingcap/tidb/parser/model" | ||
"github.com/pingcap/tidb/parser/mysql" | ||
"github.com/pingcap/tidb/types" | ||
"github.com/pingcap/tidb/util/rowcodec" | ||
"github.com/pingcap/tiflow/cdc/model" | ||
"github.com/pingcap/tiflow/cdc/puller" | ||
"github.com/pingcap/tiflow/pkg/regionspan" | ||
|
@@ -58,6 +59,21 @@ func (s *avroBatchEncoderSuite) TearDownSuite(c *check.C) { | |
stopHTTPInterceptForTestingRegistry() | ||
} | ||
|
||
func setBinChsClnFlag(ft *types.FieldType) *types.FieldType { | ||
types.SetBinChsClnFlag(ft) | ||
return ft | ||
} | ||
|
||
func setFlag(ft *types.FieldType, flag uint) *types.FieldType { | ||
types.SetTypeFlag(&ft.Flag, flag, true) | ||
return ft | ||
} | ||
|
||
func setElems(ft *types.FieldType, elems []string) *types.FieldType { | ||
ft.Elems = elems | ||
return ft | ||
} | ||
|
||
func (s *avroBatchEncoderSuite) TestAvroEncodeOnly(c *check.C) { | ||
defer testleak.AfterTest(c)() | ||
|
||
|
@@ -85,23 +101,56 @@ func (s *avroBatchEncoderSuite) TestAvroEncodeOnly(c *check.C) { | |
{Name: "mystring5", Value: "Hello World", Type: mysql.TypeVarString}, | ||
{Name: "mystring6", Value: "Hello World", Type: mysql.TypeString}, | ||
{Name: "mystring7", Value: "Hello World", Type: mysql.TypeVarchar}, | ||
{Name: "myenum", Value: types.Enum{Value: 1, Name: "v"}, Type: mysql.TypeEnum}, | ||
{Name: "myset", Value: types.Set{Value: 1, Name: "v"}, Type: mysql.TypeSet}, | ||
{Name: "myenum", Value: uint64(1), Type: mysql.TypeEnum}, | ||
{Name: "myset", Value: uint64(1), Type: mysql.TypeSet}, | ||
{Name: "ts", Value: time.Now().Format(types.TimeFSPFormat), Type: mysql.TypeTimestamp}, | ||
{Name: "myjson", Value: "{\"foo\": \"bar\"}", Type: mysql.TypeJSON}, | ||
} | ||
|
||
colInfos := []rowcodec.ColInfo{ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe we can organize this data by structured arrays? Controlling correspondence by index may be difficult with large test data. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes. Actually using arrays restricts the usage pattern. We have to iterate the array(currently no problem). I tried to use the tp map, but it can't be used since the column |
||
{ID: 1, IsPKHandle: true, VirtualGenCol: false, Ft: types.NewFieldType(mysql.TypeLong)}, | ||
{ID: 2, IsPKHandle: false, VirtualGenCol: false, Ft: types.NewFieldType(mysql.TypeLong)}, | ||
{ID: 3, IsPKHandle: false, VirtualGenCol: false, Ft: types.NewFieldType(mysql.TypeTiny)}, | ||
{ID: 4, IsPKHandle: false, VirtualGenCol: false, Ft: types.NewFieldType(mysql.TypeFloat)}, | ||
{ID: 5, IsPKHandle: false, VirtualGenCol: false, Ft: setBinChsClnFlag(types.NewFieldType(mysql.TypeBlob))}, | ||
{ID: 6, IsPKHandle: false, VirtualGenCol: false, Ft: setBinChsClnFlag(types.NewFieldType(mysql.TypeMediumBlob))}, | ||
{ID: 7, IsPKHandle: false, VirtualGenCol: false, Ft: setBinChsClnFlag(types.NewFieldType(mysql.TypeTinyBlob))}, | ||
{ID: 8, IsPKHandle: false, VirtualGenCol: false, Ft: setBinChsClnFlag(types.NewFieldType(mysql.TypeLongBlob))}, | ||
{ID: 9, IsPKHandle: false, VirtualGenCol: false, Ft: setBinChsClnFlag(types.NewFieldType(mysql.TypeVarString))}, | ||
{ID: 10, IsPKHandle: false, VirtualGenCol: false, Ft: setBinChsClnFlag(types.NewFieldType(mysql.TypeString))}, | ||
{ID: 11, IsPKHandle: false, VirtualGenCol: false, Ft: setBinChsClnFlag(types.NewFieldType(mysql.TypeVarchar))}, | ||
{ID: 12, IsPKHandle: false, VirtualGenCol: false, Ft: types.NewFieldType(mysql.TypeBlob)}, | ||
{ID: 13, IsPKHandle: false, VirtualGenCol: false, Ft: types.NewFieldType(mysql.TypeMediumBlob)}, | ||
{ID: 14, IsPKHandle: false, VirtualGenCol: false, Ft: types.NewFieldType(mysql.TypeTinyBlob)}, | ||
{ID: 15, IsPKHandle: false, VirtualGenCol: false, Ft: types.NewFieldType(mysql.TypeLongBlob)}, | ||
{ID: 16, IsPKHandle: false, VirtualGenCol: false, Ft: types.NewFieldType(mysql.TypeVarString)}, | ||
{ID: 17, IsPKHandle: false, VirtualGenCol: false, Ft: types.NewFieldType(mysql.TypeString)}, | ||
{ID: 18, IsPKHandle: false, VirtualGenCol: false, Ft: types.NewFieldType(mysql.TypeVarchar)}, | ||
{ID: 19, IsPKHandle: false, VirtualGenCol: false, Ft: setElems(types.NewFieldType(mysql.TypeEnum), []string{"a", "b"})}, | ||
{ID: 20, IsPKHandle: false, VirtualGenCol: false, Ft: setElems(types.NewFieldType(mysql.TypeSet), []string{"a", "b"})}, | ||
{ID: 21, IsPKHandle: false, VirtualGenCol: false, Ft: types.NewFieldType(mysql.TypeTimestamp)}, | ||
{ID: 22, IsPKHandle: false, VirtualGenCol: false, Ft: types.NewFieldType(mysql.TypeJSON)}, | ||
} | ||
|
||
schema, err := ColumnInfoToAvroSchema(table.Table, cols) | ||
c.Assert(err, check.IsNil) | ||
avroCodec, err := goavro.NewCodec(schema) | ||
c.Assert(err, check.IsNil) | ||
|
||
r, err := avroEncode(&table, s.encoder.valueSchemaManager, 1, cols, time.Local) | ||
r, err := avroEncode(&table, s.encoder.valueSchemaManager, 1, cols, colInfos, time.Local) | ||
c.Assert(err, check.IsNil) | ||
|
||
res, _, err := avroCodec.NativeFromBinary(r.data) | ||
c.Check(err, check.IsNil) | ||
c.Check(res, check.NotNil) | ||
for k, v := range res.(map[string]interface{}) { | ||
if k == "myenum" || k == "myset" { | ||
if vmap, ok := v.(map[string]interface{}); ok { | ||
_, exists := vmap["string"] | ||
c.Check(exists, check.IsTrue) | ||
} | ||
} | ||
} | ||
|
||
txt, err := avroCodec.TextualFromNative(nil, res) | ||
c.Check(err, check.IsNil) | ||
|
@@ -110,21 +159,6 @@ func (s *avroBatchEncoderSuite) TestAvroEncodeOnly(c *check.C) { | |
|
||
func (s *avroBatchEncoderSuite) TestAvroTimeZone(c *check.C) { | ||
defer testleak.AfterTest(c)() | ||
avroCodec, err := goavro.NewCodec(` | ||
{ | ||
"type": "record", | ||
"name": "TestAvroTimeZone", | ||
"fields" : [ | ||
{"name": "id", "type": ["null", "int"], "default": null}, | ||
{"name": "myint", "type": ["null", "int"], "default": null}, | ||
{"name": "mybool", "type": ["null", "int"], "default": null}, | ||
{"name": "myfloat", "type": ["null", "float"], "default": null}, | ||
{"name": "mystring", "type": ["null", "string"], "default": null}, | ||
{"name": "ts", "type": ["null", {"type": "long", "logicalType": "timestamp-millis"}], "default": null} | ||
] | ||
}`) | ||
|
||
c.Assert(err, check.IsNil) | ||
|
||
table := model.TableName{ | ||
Schema: "testdb", | ||
|
@@ -135,14 +169,30 @@ func (s *avroBatchEncoderSuite) TestAvroTimeZone(c *check.C) { | |
c.Check(err, check.IsNil) | ||
|
||
timestamp := time.Now() | ||
r, err := avroEncode(&table, s.encoder.valueSchemaManager, 1, []*model.Column{ | ||
cols := []*model.Column{ | ||
{Name: "id", Value: int64(1), Type: mysql.TypeLong}, | ||
{Name: "myint", Value: int64(2), Type: mysql.TypeLong}, | ||
{Name: "mybool", Value: int64(1), Type: mysql.TypeTiny}, | ||
{Name: "myfloat", Value: float64(3.14), Type: mysql.TypeFloat}, | ||
{Name: "mystring", Value: []byte("Hello World"), Type: mysql.TypeBlob}, | ||
{Name: "ts", Value: timestamp.In(location).Format(types.TimeFSPFormat), Type: mysql.TypeTimestamp}, | ||
}, location) | ||
} | ||
|
||
colInfos := []rowcodec.ColInfo{ | ||
{ID: 1, IsPKHandle: true, VirtualGenCol: false, Ft: types.NewFieldType(mysql.TypeLong)}, | ||
{ID: 2, IsPKHandle: false, VirtualGenCol: false, Ft: types.NewFieldType(mysql.TypeLong)}, | ||
{ID: 3, IsPKHandle: false, VirtualGenCol: false, Ft: types.NewFieldType(mysql.TypeTiny)}, | ||
{ID: 4, IsPKHandle: false, VirtualGenCol: false, Ft: types.NewFieldType(mysql.TypeFloat)}, | ||
{ID: 5, IsPKHandle: false, VirtualGenCol: false, Ft: types.NewFieldType(mysql.TypeBlob)}, | ||
{ID: 6, IsPKHandle: false, VirtualGenCol: false, Ft: types.NewFieldType(mysql.TypeTimestamp)}, | ||
} | ||
|
||
schema, err := ColumnInfoToAvroSchema(table.Table, cols) | ||
c.Assert(err, check.IsNil) | ||
avroCodec, err := goavro.NewCodec(schema) | ||
c.Assert(err, check.IsNil) | ||
|
||
r, err := avroEncode(&table, s.encoder.valueSchemaManager, 1, cols, colInfos, location) | ||
c.Assert(err, check.IsNil) | ||
|
||
res, _, err := avroCodec.NativeFromBinary(r.data) | ||
|
@@ -206,6 +256,13 @@ func (s *avroBatchEncoderSuite) TestAvroEncode(c *check.C) { | |
{Name: "utiny", Type: mysql.TypeTiny, Flag: model.UnsignedFlag, Value: uint64(100)}, | ||
{Name: "comment", Type: mysql.TypeBlob, Value: []byte("测试")}, | ||
}, | ||
ColInfos: []rowcodec.ColInfo{ | ||
{ID: 1, IsPKHandle: true, VirtualGenCol: false, Ft: types.NewFieldType(mysql.TypeLong)}, | ||
{ID: 2, IsPKHandle: false, VirtualGenCol: false, Ft: types.NewFieldType(mysql.TypeVarchar)}, | ||
{ID: 3, IsPKHandle: false, VirtualGenCol: false, Ft: types.NewFieldType(mysql.TypeTiny)}, | ||
{ID: 4, IsPKHandle: false, VirtualGenCol: false, Ft: setFlag(types.NewFieldType(mysql.TypeTiny), uint(model.UnsignedFlag))}, | ||
{ID: 5, IsPKHandle: false, VirtualGenCol: false, Ft: types.NewFieldType(mysql.TypeBlob)}, | ||
}, | ||
} | ||
|
||
testCaseDdl := &model.DDLEvent{ | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please use
column_infos
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sorry I don't really get it. Do you mean change the
json:"column-infos"
tojson:"column_infos"
? But all other fields use hyphens instead of underscores.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
msg:"-"
seems to prevent the information from being persisted for Redo. Could you verify the compatibility of this change with Redo?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Redo only applies to mysql sink currently so does none of avro business. So prevent
ColInfos
from being persisted for Redo seems right.But I think the
ColInfos
is important and maybe in future Redo could support avro? PersistColInfos
seems bring no trouble. Upgrade is also safe since anil
ColInfos
field is safe for other codecs. But as @liuzix tells me, it will make the redo log large. So I'd like to leave it unpersist in this pull request.Could you take a look @ben1009 ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
lgtm for redo part