Skip to content

Commit

Permalink
expression: Implementation of Vitess hashing algorithm. (#23493) (#23915
Browse files Browse the repository at this point in the history
)
  • Loading branch information
ti-srebot authored Apr 8, 2021
1 parent bdac088 commit f3951ec
Show file tree
Hide file tree
Showing 11 changed files with 316 additions and 8 deletions.
30 changes: 30 additions & 0 deletions cmd/explaintest/r/vitess_hash.result
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
use test;
drop table if exists t;
create table t(
customer_id bigint,
id bigint,
expected_shard bigint unsigned,
computed_shard bigint unsigned null,
primary key (customer_id, id)
);
create index t_vitess_shard on t((vitess_hash(customer_id) >> 56));
explain format = 'brief' select customer_id from t where (vitess_hash(customer_id) >> 56) = x'd6' ORDER BY id;
id estRows task access object operator info
Projection 10.00 root test.t.customer_id
└─Sort 10.00 root test.t.id
└─IndexLookUp 10.00 root
├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:t_vitess_shard(vitess_hash(`customer_id`) >> 56) range:[214,214], keep order:false, stats:pseudo
└─TableRowIDScan(Probe) 10.00 cop[tikv] table:t keep order:false, stats:pseudo
explain format = 'brief' select id from t where (vitess_hash(customer_id) >> 56) IN (x'e0', x'e1') AND id BETWEEN 2 AND 5 ORDER BY id;
id estRows task access object operator info
Projection 0.50 root test.t.id
└─Sort 0.50 root test.t.id
└─IndexLookUp 0.50 root
├─IndexRangeScan(Build) 20.00 cop[tikv] table:t, index:t_vitess_shard(vitess_hash(`customer_id`) >> 56) range:[224,224], [225,225], keep order:false, stats:pseudo
└─Selection(Probe) 0.50 cop[tikv] ge(test.t.id, 2), le(test.t.id, 5)
└─TableRowIDScan 20.00 cop[tikv] table:t keep order:false, stats:pseudo
explain format = 'brief' select hex(vitess_hash(1123)) from t;
id estRows task access object operator info
Projection 10000.00 root 31B565D41BDF8CA->Column#7
└─IndexReader 10000.00 root index:IndexFullScan
└─IndexFullScan 10000.00 cop[tikv] table:t, index:t_vitess_shard(vitess_hash(`customer_id`) >> 56) keep order:false, stats:pseudo
13 changes: 13 additions & 0 deletions cmd/explaintest/t/vitess_hash.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
use test;
drop table if exists t;
create table t(
customer_id bigint,
id bigint,
expected_shard bigint unsigned,
computed_shard bigint unsigned null,
primary key (customer_id, id)
);
create index t_vitess_shard on t((vitess_hash(customer_id) >> 56));
explain format = 'brief' select customer_id from t where (vitess_hash(customer_id) >> 56) = x'd6' ORDER BY id;
explain format = 'brief' select id from t where (vitess_hash(customer_id) >> 56) IN (x'e0', x'e1') AND id BETWEEN 2 AND 5 ORDER BY id;
explain format = 'brief' select hex(vitess_hash(1123)) from t;
4 changes: 2 additions & 2 deletions executor/show_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1078,9 +1078,9 @@ func (s *testSuite5) TestShowBuiltin(c *C) {
res := tk.MustQuery("show builtins;")
c.Assert(res, NotNil)
rows := res.Rows()
c.Assert(267, Equals, len(rows))
c.Assert(268, Equals, len(rows))
c.Assert("abs", Equals, rows[0][0].(string))
c.Assert("yearweek", Equals, rows[266][0].(string))
c.Assert("yearweek", Equals, rows[267][0].(string))
}

func (s *testSuite5) TestShowClusterConfig(c *C) {
Expand Down
1 change: 1 addition & 0 deletions expression/builtin.go
Original file line number Diff line number Diff line change
Expand Up @@ -787,6 +787,7 @@ var funcs = map[string]functionClass{
ast.ReleaseAllLocks: &releaseAllLocksFunctionClass{baseFunctionClass{ast.ReleaseAllLocks, 0, 0}},
ast.UUID: &uuidFunctionClass{baseFunctionClass{ast.UUID, 0, 0}},
ast.UUIDShort: &uuidShortFunctionClass{baseFunctionClass{ast.UUIDShort, 0, 0}},
ast.VitessHash: &vitessHashFunctionClass{baseFunctionClass{ast.VitessHash, 1, 1}},

// get_lock() and release_lock() are parsed but do nothing.
// It is used for preventing error in Ruby's activerecord migrations.
Expand Down
48 changes: 48 additions & 0 deletions expression/builtin_miscellaneous.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
"github.com/pingcap/tidb/types"
"github.com/pingcap/tidb/types/json"
"github.com/pingcap/tidb/util/chunk"
"github.com/pingcap/tidb/util/vitess"
"github.com/pingcap/tipb/go-tipb"
)

Expand All @@ -51,6 +52,7 @@ var (
_ functionClass = &releaseAllLocksFunctionClass{}
_ functionClass = &uuidFunctionClass{}
_ functionClass = &uuidShortFunctionClass{}
_ functionClass = &vitessHashFunctionClass{}
)

var (
Expand All @@ -73,6 +75,7 @@ var (
_ builtinFunc = &builtinIsIPv4MappedSig{}
_ builtinFunc = &builtinIsIPv6Sig{}
_ builtinFunc = &builtinUUIDSig{}
_ builtinFunc = &builtinVitessHashSig{}

_ builtinFunc = &builtinNameConstIntSig{}
_ builtinFunc = &builtinNameConstRealSig{}
Expand Down Expand Up @@ -1046,3 +1049,48 @@ type uuidShortFunctionClass struct {
func (c *uuidShortFunctionClass) getFunction(ctx sessionctx.Context, args []Expression) (builtinFunc, error) {
return nil, errFunctionNotExists.GenWithStackByArgs("FUNCTION", "UUID_SHORT")
}

type vitessHashFunctionClass struct {
baseFunctionClass
}

func (c *vitessHashFunctionClass) getFunction(ctx sessionctx.Context, args []Expression) (builtinFunc, error) {
if err := c.verifyArgs(args); err != nil {
return nil, err
}
bf, err := newBaseBuiltinFuncWithTp(ctx, c.funcName, args, types.ETInt, types.ETInt)
if err != nil {
return nil, err
}

bf.tp.Flen = 20 //64 bit unsigned
bf.tp.Flag |= mysql.UnsignedFlag
types.SetBinChsClnFlag(bf.tp)

sig := &builtinVitessHashSig{bf}
sig.setPbCode(tipb.ScalarFuncSig_VitessHash)
return sig, nil
}

type builtinVitessHashSig struct {
baseBuiltinFunc
}

func (b *builtinVitessHashSig) Clone() builtinFunc {
newSig := &builtinVitessHashSig{}
newSig.cloneFrom(&b.baseBuiltinFunc)
return newSig
}

// evalInt evals VITESS_HASH(int64).
func (b *builtinVitessHashSig) evalInt(row chunk.Row) (int64, bool, error) {
shardKeyInt, isNull, err := b.args[0].EvalInt(b.ctx, row)
if isNull || err != nil {
return 0, true, err
}
var hashed uint64
if hashed, err = vitess.HashUint64(uint64(shardKeyInt)); err != nil {
return 0, true, err
}
return int64(hashed), false, nil
}
36 changes: 36 additions & 0 deletions expression/builtin_miscellaneous_vec.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
"github.com/pingcap/tidb/sessionctx/variable"
"github.com/pingcap/tidb/types"
"github.com/pingcap/tidb/util/chunk"
"github.com/pingcap/tidb/util/vitess"
)

func (b *builtinInetNtoaSig) vecEvalString(input *chunk.Chunk, result *chunk.Column) error {
Expand Down Expand Up @@ -617,3 +618,38 @@ func (b *builtinReleaseLockSig) vecEvalInt(input *chunk.Chunk, result *chunk.Col
}
return nil
}

func (b *builtinVitessHashSig) vectorized() bool {
return true
}

func (b *builtinVitessHashSig) vecEvalInt(input *chunk.Chunk, result *chunk.Column) error {
n := input.NumRows()
column, err := b.bufAllocator.get(types.ETInt, n)
if err != nil {
return err
}
defer b.bufAllocator.put(column)

if err := b.args[0].VecEvalInt(b.ctx, input, column); err != nil {
return err
}

result.ResizeInt64(n, false)
r64s := result.Uint64s()
result.MergeNulls(column)

for i := 0; i < n; i++ {
if column.IsNull(i) {
continue
}
var uintKey = column.GetUint64(i)
var hash uint64
if hash, err = vitess.HashUint64(uintKey); err != nil {
return err
}
r64s[i] = hash
}

return nil
}
72 changes: 72 additions & 0 deletions expression/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8998,3 +8998,75 @@ func (s *testIntegrationSerialSuite) TestIssue23805(c *C) {
" UNIQUE KEY `idx_10` (`col_26`(5)));")
tk.MustExec("insert ignore into tbl_5 set col_28 = 'ZmZIdSnq' , col_25 = '18:50:52.00' on duplicate key update col_26 = 'y';\n")
}

func (s *testIntegrationSuite) TestVitessHash(c *C) {
defer s.cleanEnv(c)
tk := testkit.NewTestKit(c, s.store)
tk.MustExec("use test")
tk.MustExec("drop table if exists t_int, t_blob, t_varchar;")
tk.MustExec("create table t_int(id int, a bigint unsigned null);")
tk.MustExec("insert into t_int values " +
"(1, 30375298039), " +
"(2, 1123), " +
"(3, 30573721600), " +
"(4, " + fmt.Sprintf("%d", uint64(math.MaxUint64)) + ")," +
"(5, 116)," +
"(6, null);")

// Integers
tk.MustQuery("select hex(vitess_hash(a)) from t_int").
Check(testkit.Rows(
"31265661E5F1133",
"31B565D41BDF8CA",
"1EFD6439F2050FFD",
"355550B2150E2451",
"1E1788FF0FDE093C",
"<nil>"))

// Nested function sanity test
tk.MustQuery("select hex(vitess_hash(convert(a, decimal(8,4)))) from t_int where id = 5").
Check(testkit.Rows("1E1788FF0FDE093C"))
}

func (s *testIntegrationSuite) TestVitessHashMatchesVitessShards(c *C) {
defer s.cleanEnv(c)
tk := testkit.NewTestKit(c, s.store)
tk.MustExec("use test")
tk.MustExec("drop table if exists t;")
tk.MustExec("create table t(customer_id bigint, id bigint, expected_shard bigint unsigned, computed_shard bigint unsigned null, primary key (customer_id, id));")

tk.MustExec("insert into t (customer_id, id, expected_shard) values " +
"(30370720100, 1, x'd6'), " +
"(30370670010, 2, x'd6'), " +
"(30370689320, 3, x'e1'), " +
"(30370693008, 4, x'e0'), " +
"(30370656005, 5, x'89'), " +
"(30370702638, 6, x'89'), " +
"(30370658809, 7, x'ce'), " +
"(30370665369, 8, x'cf'), " +
"(30370706138, 9, x'85'), " +
"(30370708769, 10, x'85'), " +
"(30370711915, 11, x'a3'), " +
"(30370712595, 12, x'a3'), " +
"(30370656340, 13, x'7d'), " +
"(30370660143, 14, x'7c'), " +
"(30371738450, 15, x'fc'), " +
"(30371683979, 16, x'fd'), " +
"(30370664597, 17, x'92'), " +
"(30370667361, 18, x'93'), " +
"(30370656406, 19, x'd2'), " +
"(30370716959, 20, x'd3'), " +
"(30375207698, 21, x'9a'), " +
"(30375168766, 22, x'9a'), " +
"(30370711813, 23, x'ca'), " +
"(30370721803, 24, x'ca'), " +
"(30370717957, 25, x'97'), " +
"(30370734969, 26, x'96'), " +
"(30375203572, 27, x'98'), " +
"(30375292643, 28, x'99'); ")

// Sanity check the shards being computed correctly
tk.MustExec("update t set computed_shard = (vitess_hash(customer_id) >> 56);")
tk.MustQuery("select customer_id, id, hex(expected_shard), hex(computed_shard) from t where expected_shard <> computed_shard").
Check(testkit.Rows())
}
4 changes: 2 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ require (
github.com/pingcap/parser v0.0.0-20210325072920-0d17053a8a69
github.com/pingcap/sysutil v0.0.0-20210221112134-a07bda3bde99
github.com/pingcap/tidb-tools v4.0.9-0.20201127090955-2707c97b3853+incompatible
github.com/pingcap/tipb v0.0.0-20210309080453-72c4feaa6da7
github.com/pingcap/tipb v0.0.0-20210326161441-1164ca065d1b
github.com/prometheus/client_golang v1.5.1
github.com/prometheus/client_model v0.2.0
github.com/prometheus/common v0.9.1
Expand All @@ -79,7 +79,7 @@ require (
golang.org/x/mod v0.4.2 // indirect
golang.org/x/net v0.0.0-20210323141857-08027d57d8cf
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9
golang.org/x/sys v0.0.0-20210320140829-1e4c9ba3b0c4
golang.org/x/sys v0.0.0-20210324051608-47abb6519492
golang.org/x/text v0.3.5
golang.org/x/tools v0.1.0
google.golang.org/grpc v1.27.1
Expand Down
8 changes: 4 additions & 4 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -474,8 +474,8 @@ github.com/pingcap/sysutil v0.0.0-20210221112134-a07bda3bde99/go.mod h1:EB/852NM
github.com/pingcap/tidb-dashboard v0.0.0-20210312062513-eef5d6404638/go.mod h1:OzFN8H0EDMMqeulPhPMw2i2JaiZWOKFQ7zdRPhENNgo=
github.com/pingcap/tidb-tools v4.0.9-0.20201127090955-2707c97b3853+incompatible h1:ceznmu/lLseGHP/jKyOa/3u/5H3wtLLLqkH2V3ssSjg=
github.com/pingcap/tidb-tools v4.0.9-0.20201127090955-2707c97b3853+incompatible/go.mod h1:XGdcy9+yqlDSEMTpOXnwf3hiTeqrV6MN/u1se9N8yIM=
github.com/pingcap/tipb v0.0.0-20210309080453-72c4feaa6da7 h1:j8MkWmy5tduhHVsdsgZJugN1U9OWTMSBQoZIpn8kqPc=
github.com/pingcap/tipb v0.0.0-20210309080453-72c4feaa6da7/go.mod h1:nsEhnMokcn7MRqd2J60yxpn/ac3ZH8A6GOJ9NslabUo=
github.com/pingcap/tipb v0.0.0-20210326161441-1164ca065d1b h1:sZHSH0mh8PcRbmZlsIqP7CEwnfFuBpmkGt5i9JStLWA=
github.com/pingcap/tipb v0.0.0-20210326161441-1164ca065d1b/go.mod h1:nsEhnMokcn7MRqd2J60yxpn/ac3ZH8A6GOJ9NslabUo=
github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
Expand Down Expand Up @@ -798,8 +798,8 @@ golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210315160823-c6e025ad8005/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210320140829-1e4c9ba3b0c4 h1:EZ2mChiOa8udjfp6rRmswTbtZN/QzUQp4ptM4rnjHvc=
golang.org/x/sys v0.0.0-20210320140829-1e4c9ba3b0c4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210324051608-47abb6519492 h1:Paq34FxTluEPvVyayQqMPgHm+vTOrIifmcYxFBx9TLg=
golang.org/x/sys v0.0.0-20210324051608-47abb6519492/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
Expand Down
42 changes: 42 additions & 0 deletions util/vitess/vitess_hash.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
// Copyright 2021 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.

package vitess

import (
"crypto/cipher"
"crypto/des"
"encoding/binary"

"github.com/pingcap/errors"
)

var nullKeyBlock cipher.Block

func init() {
var err error
nullKeyBlock, err = des.NewCipher(make([]byte, 8))
if err != nil {
panic(errors.Trace(err))
}
}

// HashUint64 implements vitess' method of calculating a hash used for determining a shard key range.
// Uses a DES encryption with 64 bit key, 64 bit block, null-key
func HashUint64(shardKey uint64) (uint64, error) {
var keybytes [8]byte
binary.BigEndian.PutUint64(keybytes[:], shardKey)
var hashed [8]byte
nullKeyBlock.Encrypt(hashed[:], keybytes[:])
return binary.BigEndian.Uint64(hashed[:]), nil
}
Loading

0 comments on commit f3951ec

Please sign in to comment.