Skip to content

Commit

Permalink
charset: add utf8_unicode_ci and utf8mb4_unicode_ci interface (#18678) (
Browse files Browse the repository at this point in the history
#22099)

Signed-off-by: ti-srebot <ti-srebot@pingcap.com>
  • Loading branch information
ti-srebot authored Jan 27, 2021
1 parent f0a757b commit 52a350a
Show file tree
Hide file tree
Showing 7 changed files with 96 additions and 23 deletions.
31 changes: 18 additions & 13 deletions ddl/serial_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1096,21 +1096,26 @@ func (s *testSerialSuite) TestModifyingColumn4NewCollations(c *C) {
// Column collation can be changed as long as there is no index defined.
tk.MustExec("alter table t modify b varchar(10) collate utf8_general_ci")
tk.MustExec("alter table t modify c varchar(10) collate utf8_bin")
tk.MustExec("alter table t modify c varchar(10) collate utf8_unicode_ci")
tk.MustExec("alter table t charset utf8 collate utf8_general_ci")
tk.MustExec("alter table t convert to charset utf8 collate utf8_bin")
tk.MustExec("alter table t convert to charset utf8 collate utf8_unicode_ci")
tk.MustExec("alter table t convert to charset utf8 collate utf8_general_ci")
tk.MustExec("alter table t modify b varchar(10) collate utf8_unicode_ci")
tk.MustExec("alter table t modify b varchar(10) collate utf8_bin")

tk.MustExec("alter table t add index b_idx(b)")
tk.MustExec("alter table t add index c_idx(c)")
tk.MustGetErrMsg("alter table t modify b varchar(10) collate utf8_general_ci", "[ddl:8200]Unsupported modifying collation of column 'b' from 'utf8_bin' to 'utf8_general_ci' when index is defined on it.")
tk.MustGetErrMsg("alter table t modify c varchar(10) collate utf8_bin", "[ddl:8200]Unsupported modifying collation of column 'c' from 'utf8_general_ci' to 'utf8_bin' when index is defined on it.")
tk.MustGetErrMsg("alter table t modify c varchar(10) collate utf8_unicode_ci", "[ddl:8200]Unsupported modifying collation of column 'c' from 'utf8_general_ci' to 'utf8_unicode_ci' when index is defined on it.")
tk.MustGetErrMsg("alter table t convert to charset utf8 collate utf8_general_ci", "[ddl:8200]Unsupported converting collation of column 'b' from 'utf8_bin' to 'utf8_general_ci' when index is defined on it.")
// Change to a compatible collation is allowed.
tk.MustExec("alter table t modify c varchar(10) collate utf8mb4_general_ci")
// Change the default collation of table is allowed.
tk.MustExec("alter table t collate utf8mb4_general_ci")
tk.MustExec("alter table t charset utf8mb4 collate utf8mb4_bin")
tk.MustExec("alter table t charset utf8mb4 collate utf8mb4_unicode_ci")
// Change the default collation of database is allowed.
tk.MustExec("alter database dct charset utf8mb4 collate utf8mb4_general_ci")
}
Expand All @@ -1124,27 +1129,27 @@ func (s *testSerialSuite) TestForbidUnsupportedCollations(c *C) {
tk.MustGetErrMsg(sql, fmt.Sprintf("[ddl:1273]Unsupported collation when new collation is enabled: '%s'", coll))
}
// Test default collation of database.
mustGetUnsupportedCollation("create database ucd charset utf8mb4 collate utf8mb4_unicode_ci", "utf8mb4_unicode_ci")
mustGetUnsupportedCollation("create database ucd charset utf8 collate utf8_unicode_ci", "utf8_unicode_ci")
mustGetUnsupportedCollation("create database ucd charset utf8mb4 collate utf8mb4_roman_ci", "utf8mb4_roman_ci")
mustGetUnsupportedCollation("create database ucd charset utf8 collate utf8_roman_ci", "utf8_roman_ci")
tk.MustExec("create database ucd")
mustGetUnsupportedCollation("alter database ucd charset utf8mb4 collate utf8mb4_unicode_ci", "utf8mb4_unicode_ci")
mustGetUnsupportedCollation("alter database ucd collate utf8mb4_unicode_ci", "utf8mb4_unicode_ci")
mustGetUnsupportedCollation("alter database ucd charset utf8mb4 collate utf8mb4_roman_ci", "utf8mb4_roman_ci")
mustGetUnsupportedCollation("alter database ucd collate utf8mb4_roman_ci", "utf8mb4_roman_ci")

// Test default collation of table.
tk.MustExec("use ucd")
mustGetUnsupportedCollation("create table t(a varchar(20)) charset utf8mb4 collate utf8mb4_unicode_ci", "utf8mb4_unicode_ci")
mustGetUnsupportedCollation("create table t(a varchar(20)) collate utf8_unicode_ci", "utf8_unicode_ci")
mustGetUnsupportedCollation("create table t(a varchar(20)) charset utf8mb4 collate utf8mb4_roman_ci", "utf8mb4_roman_ci")
mustGetUnsupportedCollation("create table t(a varchar(20)) collate utf8_roman_ci", "utf8_roman_ci")
tk.MustExec("create table t(a varchar(20)) collate utf8mb4_general_ci")
mustGetUnsupportedCollation("alter table t default collate utf8mb4_unicode_ci", "utf8mb4_unicode_ci")
mustGetUnsupportedCollation("alter table t convert to charset utf8mb4 collate utf8mb4_unicode_ci", "utf8mb4_unicode_ci")
mustGetUnsupportedCollation("alter table t default collate utf8mb4_roman_ci", "utf8mb4_roman_ci")
mustGetUnsupportedCollation("alter table t convert to charset utf8mb4 collate utf8mb4_roman_ci", "utf8mb4_roman_ci")

// Test collation of columns.
mustGetUnsupportedCollation("create table t1(a varchar(20)) collate utf8mb4_unicode_ci", "utf8mb4_unicode_ci")
mustGetUnsupportedCollation("create table t1(a varchar(20)) charset utf8 collate utf8_unicode_ci", "utf8_unicode_ci")
mustGetUnsupportedCollation("create table t1(a varchar(20)) collate utf8mb4_roman_ci", "utf8mb4_roman_ci")
mustGetUnsupportedCollation("create table t1(a varchar(20)) charset utf8 collate utf8_roman_ci", "utf8_roman_ci")
tk.MustExec("create table t1(a varchar(20))")
mustGetUnsupportedCollation("alter table t1 modify a varchar(20) collate utf8mb4_unicode_ci", "utf8mb4_unicode_ci")
mustGetUnsupportedCollation("alter table t1 modify a varchar(20) charset utf8 collate utf8_unicode_ci", "utf8_unicode_ci")
mustGetUnsupportedCollation("alter table t1 modify a varchar(20) charset utf8 collate utf8_unicode_ci", "utf8_unicode_ci")
mustGetUnsupportedCollation("alter table t1 modify a varchar(20) collate utf8mb4_roman_ci", "utf8mb4_roman_ci")
mustGetUnsupportedCollation("alter table t1 modify a varchar(20) charset utf8 collate utf8_roman_ci", "utf8_roman_ci")
mustGetUnsupportedCollation("alter table t1 modify a varchar(20) charset utf8 collate utf8_roman_ci", "utf8_roman_ci")

// TODO(bb7133): fix the following cases by setting charset from collate firstly.
// mustGetUnsupportedCollation("create database ucd collate utf8mb4_unicode_ci", errMsgUnsupportedUnicodeCI)
Expand Down
20 changes: 11 additions & 9 deletions executor/seqtest/seq_executor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1237,8 +1237,10 @@ func (s *seqTestSuite) TestShowForNewCollations(c *C) {
"latin1_bin latin1 47 Yes Yes 1",
"utf8_bin utf8 83 Yes Yes 1",
"utf8_general_ci utf8 33 Yes 1",
"utf8_unicode_ci utf8 192 Yes 1",
"utf8mb4_bin utf8mb4 46 Yes Yes 1",
"utf8mb4_general_ci utf8mb4 45 Yes 1",
"utf8mb4_unicode_ci utf8mb4 224 Yes 1",
)
tk.MustQuery("show collation").Check(expectRows)
tk.MustQuery("select * from information_schema.COLLATIONS").Check(expectRows)
Expand All @@ -1253,15 +1255,15 @@ func (s *seqTestSuite) TestForbidUnsupportedCollations(c *C) {
tk.MustGetErrMsg(sql, fmt.Sprintf("[ddl:1273]Unsupported collation when new collation is enabled: '%s'", coll))
}

mustGetUnsupportedCollation("select 'a' collate utf8_unicode_ci", "utf8_unicode_ci")
mustGetUnsupportedCollation("select cast('a' as char) collate utf8_unicode_ci", "utf8_unicode_ci")
mustGetUnsupportedCollation("set names utf8 collate utf8_unicode_ci", "utf8_unicode_ci")
mustGetUnsupportedCollation("set session collation_server = 'utf8_unicode_ci'", "utf8_unicode_ci")
mustGetUnsupportedCollation("set session collation_database = 'utf8_unicode_ci'", "utf8_unicode_ci")
mustGetUnsupportedCollation("set session collation_connection = 'utf8_unicode_ci'", "utf8_unicode_ci")
mustGetUnsupportedCollation("set global collation_server = 'utf8_unicode_ci'", "utf8_unicode_ci")
mustGetUnsupportedCollation("set global collation_database = 'utf8_unicode_ci'", "utf8_unicode_ci")
mustGetUnsupportedCollation("set global collation_connection = 'utf8_unicode_ci'", "utf8_unicode_ci")
mustGetUnsupportedCollation("select 'a' collate utf8_roman_ci", "utf8_roman_ci")
mustGetUnsupportedCollation("select cast('a' as char) collate utf8_roman_ci", "utf8_roman_ci")
mustGetUnsupportedCollation("set names utf8 collate utf8_roman_ci", "utf8_roman_ci")
mustGetUnsupportedCollation("set session collation_server = 'utf8_roman_ci'", "utf8_roman_ci")
mustGetUnsupportedCollation("set session collation_database = 'utf8_roman_ci'", "utf8_roman_ci")
mustGetUnsupportedCollation("set session collation_connection = 'utf8_roman_ci'", "utf8_roman_ci")
mustGetUnsupportedCollation("set global collation_server = 'utf8_roman_ci'", "utf8_roman_ci")
mustGetUnsupportedCollation("set global collation_database = 'utf8_roman_ci'", "utf8_roman_ci")
mustGetUnsupportedCollation("set global collation_connection = 'utf8_roman_ci'", "utf8_roman_ci")
}

func (s *seqTestSuite) TestAutoIncIDInRetry(c *C) {
Expand Down
2 changes: 2 additions & 0 deletions expression/distsql_builtin_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ func (s *testEvalSerialSuite) TestPBToExprWithNewCollation(c *C) {
{"utf8mb4_general_ci", "utf8mb4_general_ci", 45, 45},
{"", "utf8mb4_bin", 46, 46},
{"some_error_collation", "utf8mb4_bin", 46, 46},
{"utf8_unicode_ci", "utf8_unicode_ci", 192, 192},
{"utf8mb4_unicode_ci", "utf8mb4_unicode_ci", 224, 224},
}

for _, cs := range cases {
Expand Down
2 changes: 2 additions & 0 deletions expression/expr_to_pb_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -884,6 +884,7 @@ func (s *testEvaluatorSerialSuites) TestNewCollationsEnabled(c *C) {
colExprs = append(colExprs, columnCollation(dg.genColumn(mysql.TypeVarString, 3), "utf8mb4_general_ci"))
colExprs = append(colExprs, columnCollation(dg.genColumn(mysql.TypeString, 4), "utf8mb4_0900_ai_ci"))
colExprs = append(colExprs, columnCollation(dg.genColumn(mysql.TypeVarchar, 5), "utf8_bin"))
colExprs = append(colExprs, columnCollation(dg.genColumn(mysql.TypeVarchar, 6), "utf8_unicode_ci"))
pushed, _ := PushDownExprs(sc, colExprs, client, kv.UnSpecified)
c.Assert(len(pushed), Equals, len(colExprs))
pbExprs, err := ExpressionsToPBList(sc, colExprs, client)
Expand All @@ -894,6 +895,7 @@ func (s *testEvaluatorSerialSuites) TestNewCollationsEnabled(c *C) {
"{\"tp\":201,\"val\":\"gAAAAAAAAAM=\",\"sig\":0,\"field_type\":{\"tp\":253,\"flag\":0,\"flen\":-1,\"decimal\":-1,\"collate\":-45,\"charset\":\"\"}}",
"{\"tp\":201,\"val\":\"gAAAAAAAAAQ=\",\"sig\":0,\"field_type\":{\"tp\":254,\"flag\":0,\"flen\":-1,\"decimal\":-1,\"collate\":-255,\"charset\":\"\"}}",
"{\"tp\":201,\"val\":\"gAAAAAAAAAU=\",\"sig\":0,\"field_type\":{\"tp\":15,\"flag\":0,\"flen\":-1,\"decimal\":-1,\"collate\":-83,\"charset\":\"\"}}",
"{\"tp\":201,\"val\":\"gAAAAAAAAAY=\",\"sig\":0,\"field_type\":{\"tp\":15,\"flag\":0,\"flen\":-1,\"decimal\":-1,\"collate\":-192,\"charset\":\"\"}}",
}
for i, pbExpr := range pbExprs {
c.Assert(pbExprs, NotNil)
Expand Down
9 changes: 8 additions & 1 deletion util/collate/collate.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@ func CompatibleCollate(collate1, collate2 string) bool {
return true
} else if (collate1 == "utf8mb4_bin" || collate1 == "utf8_bin") && (collate2 == "utf8mb4_bin" || collate2 == "utf8_bin") {
return true
} else if (collate1 == "utf8mb4_unicode_ci" || collate1 == "utf8_unicode_ci") && (collate2 == "utf8mb4_unicode_ci" || collate2 == "utf8_unicode_ci") {
return true
} else {
return collate1 == collate2
}
Expand Down Expand Up @@ -217,7 +219,8 @@ func truncateTailingSpace(str string) string {

// IsCICollation returns if the collation is case-sensitive
func IsCICollation(collate string) bool {
return collate == "utf8_general_ci" || collate == "utf8mb4_general_ci"
return collate == "utf8_general_ci" || collate == "utf8mb4_general_ci" ||
collate == "utf8_unicode_ci" || collate == "utf8mb4_unicode_ci"
}

func init() {
Expand All @@ -238,4 +241,8 @@ func init() {
newCollatorIDMap[int(mysql.CollationNames["utf8mb4_general_ci"])] = &generalCICollator{}
newCollatorMap["utf8_general_ci"] = &generalCICollator{}
newCollatorIDMap[int(mysql.CollationNames["utf8_general_ci"])] = &generalCICollator{}
newCollatorMap["utf8mb4_unicode_ci"] = &unicodeCICollator{}
newCollatorIDMap[int(mysql.CollationNames["utf8mb4_unicode_ci"])] = &unicodeCICollator{}
newCollatorMap["utf8_unicode_ci"] = &unicodeCICollator{}
newCollatorIDMap[int(mysql.CollationNames["utf8_unicode_ci"])] = &unicodeCICollator{}
}
8 changes: 8 additions & 0 deletions util/collate/collate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -164,12 +164,16 @@ func (s *testCollateSuite) TestGetCollator(c *C) {
c.Assert(GetCollator("utf8_bin"), FitsTypeOf, &binPaddingCollator{})
c.Assert(GetCollator("utf8mb4_general_ci"), FitsTypeOf, &generalCICollator{})
c.Assert(GetCollator("utf8_general_ci"), FitsTypeOf, &generalCICollator{})
c.Assert(GetCollator("utf8mb4_unicode_ci"), FitsTypeOf, &unicodeCICollator{})
c.Assert(GetCollator("utf8_unicode_ci"), FitsTypeOf, &unicodeCICollator{})
c.Assert(GetCollator("default_test"), FitsTypeOf, &binPaddingCollator{})
c.Assert(GetCollatorByID(63), FitsTypeOf, &binCollator{})
c.Assert(GetCollatorByID(46), FitsTypeOf, &binPaddingCollator{})
c.Assert(GetCollatorByID(83), FitsTypeOf, &binPaddingCollator{})
c.Assert(GetCollatorByID(45), FitsTypeOf, &generalCICollator{})
c.Assert(GetCollatorByID(33), FitsTypeOf, &generalCICollator{})
c.Assert(GetCollatorByID(224), FitsTypeOf, &unicodeCICollator{})
c.Assert(GetCollatorByID(192), FitsTypeOf, &unicodeCICollator{})
c.Assert(GetCollatorByID(9999), FitsTypeOf, &binPaddingCollator{})

SetNewCollationEnabledForTest(false)
Expand All @@ -178,11 +182,15 @@ func (s *testCollateSuite) TestGetCollator(c *C) {
c.Assert(GetCollator("utf8_bin"), FitsTypeOf, &binCollator{})
c.Assert(GetCollator("utf8mb4_general_ci"), FitsTypeOf, &binCollator{})
c.Assert(GetCollator("utf8_general_ci"), FitsTypeOf, &binCollator{})
c.Assert(GetCollator("utf8mb4_unicode_ci"), FitsTypeOf, &binCollator{})
c.Assert(GetCollator("utf8_unicode_ci"), FitsTypeOf, &binCollator{})
c.Assert(GetCollator("default_test"), FitsTypeOf, &binCollator{})
c.Assert(GetCollatorByID(63), FitsTypeOf, &binCollator{})
c.Assert(GetCollatorByID(46), FitsTypeOf, &binCollator{})
c.Assert(GetCollatorByID(83), FitsTypeOf, &binCollator{})
c.Assert(GetCollatorByID(45), FitsTypeOf, &binCollator{})
c.Assert(GetCollatorByID(33), FitsTypeOf, &binCollator{})
c.Assert(GetCollatorByID(224), FitsTypeOf, &binCollator{})
c.Assert(GetCollatorByID(192), FitsTypeOf, &binCollator{})
c.Assert(GetCollatorByID(9999), FitsTypeOf, &binCollator{})
}
47 changes: 47 additions & 0 deletions util/collate/unicode_ci.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// Copyright 2020 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.

package collate

type unicodeCICollator struct {
}

// Compare implements Collator interface. Always return 0 temporary, will change when implement
func (uc *unicodeCICollator) Compare(a, b string) int {
return 0
}

// Key implements Collator interface. Always return nothing temporary, will change when implement
func (uc *unicodeCICollator) Key(str string) []byte {
return []byte{}
}

// Pattern implements Collator interface.
func (uc *unicodeCICollator) Pattern() WildcardPattern {
return &unicodePattern{}
}

type unicodePattern struct {
patChars []rune
patTypes []byte
}

// Compile implements WildcardPattern interface. Do nothing temporary, will change when implement
func (p *unicodePattern) Compile(patternStr string, escape byte) {

}

// DoMatch implements WildcardPattern interface. Always return false temporary, will change when implement
func (p *unicodePattern) DoMatch(str string) bool {
return false
}

0 comments on commit 52a350a

Please sign in to comment.