Skip to content

Commit

Permalink
Adding split clone vtworker test.
Browse files Browse the repository at this point in the history
  • Loading branch information
alainjobart committed Oct 1, 2014
1 parent 0a07d3e commit c5b138a
Show file tree
Hide file tree
Showing 3 changed files with 86 additions and 53 deletions.
2 changes: 1 addition & 1 deletion go/vt/worker/split_clone.go
Original file line number Diff line number Diff line change
Expand Up @@ -387,7 +387,7 @@ func (scw *SplitCloneWorker) copy() error {
return fmt.Errorf("cannot get schema from source %v: %v", scw.sourceAliases[0], err)
}
if len(sourceSchemaDefinition.TableDefinitions) == 0 {
return fmt.Errorf("no tables matching the table filter")
return fmt.Errorf("no tables matching the table filter in tablet %v", scw.sourceAliases[0])
}
scw.wr.Logger().Infof("Source tablet 0 has %v tables to copy", len(sourceSchemaDefinition.TableDefinitions))
scw.mu.Lock()
Expand Down
123 changes: 71 additions & 52 deletions test/resharding.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
import utils
import tablet

use_clone_worker = False

keyspace_id_type = keyrange_constants.KIT_UINT64
pack_keyspace_id = struct.Struct('!Q').pack

Expand Down Expand Up @@ -513,59 +515,76 @@ def test_resharding(self):
'TabletTypes: master,rdonly,replica',
keyspace_id_type=keyspace_id_type)

utils.pause("AAAAAAAAAAAAAAAAAAAAAAAAA")

# take the snapshot for the split
utils.run_vtctl(['MultiSnapshot', '--spec=80-c0-',
'--exclude_tables=unrelated',
shard_1_slave1.tablet_alias], auto_log=True)

# the snapshot_copy hook will copy the snapshot files to
# VTDATAROOT/tmp/... as a test. We want to use these for one half,
# but not for the other, so we test both scenarios.
os.unlink(os.path.join(environment.tmproot, "snapshot-from-%s-for-%s.tar" %
(shard_1_slave1.tablet_alias, "80-c0")))

# wait for tablet's binlog server service to be enabled after snapshot
shard_1_slave1.wait_for_binlog_server_state("Enabled")

# perform the restores: first one from source tablet. We removed the
# storage backup, so it's coming from the tablet itself.
# we also delay starting the binlog player, then enable it.
utils.run_vtctl(['ShardMultiRestore',
'-strategy=populateBlpCheckpoint,dontStartBinlogPlayer',
'test_keyspace/80-c0', shard_1_slave1.tablet_alias],
auto_log=True)

timeout = 10
while True:
shard_2_master_status = shard_2_master.get_status()
if not "not starting because flag 'DontStart' is set" in shard_2_master_status:
timeout = utils.wait_step('shard 2 master has not failed starting yet', timeout)
continue
logging.debug("shard 2 master is waiting on flag removal, good")
break

qr = utils.run_vtctl_json(['ExecuteFetch', shard_2_master.tablet_alias, 'update _vt.blp_checkpoint set flags="" where source_shard_uid=0'])
self.assertEqual(qr['RowsAffected'], 1)
if use_clone_worker:
utils.pause("AAAAAAAAAAAAAAAAAAAAAAAAA")
# the worker will do everything. We test with source_reader_count=10
# (down from default=20) as connection pool is not big enough for 20.
# min_table_size_for_split is set to 1 as to force a split even on the
# small table we have.
utils.run_vtworker(['--cell', 'test_nj',
'--command_display_interval', '10ms',
'SplitClone',
'--exclude_tables' ,'unrelated',
'--strategy', 'populateBlpCheckpoint',
'--source_reader_count', '10',
'--min_table_size_for_split', '1',
'test_keyspace/80-c0'],
auto_log=True)

# TODO(alainjobart): experiment with the dontStartBinlogPlayer option

timeout = 10
while True:
shard_2_master_status = shard_2_master.get_status()
if "not starting because flag 'DontStart' is set" in shard_2_master_status:
timeout = utils.wait_step('shard 2 master has not started replication yet', timeout)
continue
logging.debug("shard 2 master has started replication, good")
break

# second restore from storage: to be sure, we stop vttablet, and restart
# it afterwards
shard_1_slave1.kill_vttablet()
utils.run_vtctl(['ShardMultiRestore', '-strategy=populateBlpCheckpoint',
'test_keyspace/c0-', shard_1_slave1.tablet_alias],
auto_log=True)
shard_1_slave1.start_vttablet(wait_for_state=None)
shard_1_slave1.wait_for_binlog_server_state("Enabled")
else:
# take the snapshot for the split
utils.run_vtctl(['MultiSnapshot', '--spec=80-c0-',
'--exclude_tables=unrelated',
shard_1_slave1.tablet_alias], auto_log=True)

# the snapshot_copy hook will copy the snapshot files to
# VTDATAROOT/tmp/... as a test. We want to use these for one half,
# but not for the other, so we test both scenarios.
os.unlink(os.path.join(environment.tmproot, "snapshot-from-%s-for-%s.tar" %
(shard_1_slave1.tablet_alias, "80-c0")))

# wait for tablet's binlog server service to be enabled after snapshot
shard_1_slave1.wait_for_binlog_server_state("Enabled")

# perform the restores: first one from source tablet. We removed the
# storage backup, so it's coming from the tablet itself.
# we also delay starting the binlog player, then enable it.
utils.run_vtctl(['ShardMultiRestore',
'-strategy=populateBlpCheckpoint,dontStartBinlogPlayer',
'test_keyspace/80-c0', shard_1_slave1.tablet_alias],
auto_log=True)

timeout = 10
while True:
shard_2_master_status = shard_2_master.get_status()
if not "not starting because flag 'DontStart' is set" in shard_2_master_status:
timeout = utils.wait_step('shard 2 master has not failed starting yet', timeout)
continue
logging.debug("shard 2 master is waiting on flag removal, good")
break

qr = utils.run_vtctl_json(['ExecuteFetch', shard_2_master.tablet_alias, 'update _vt.blp_checkpoint set flags="" where source_shard_uid=0'])
self.assertEqual(qr['RowsAffected'], 1)

timeout = 10
while True:
shard_2_master_status = shard_2_master.get_status()
if "not starting because flag 'DontStart' is set" in shard_2_master_status:
timeout = utils.wait_step('shard 2 master has not started replication yet', timeout)
continue
logging.debug("shard 2 master has started replication, good")
break

# second restore from storage: to be sure, we stop vttablet, and restart
# it afterwards
shard_1_slave1.kill_vttablet()
utils.run_vtctl(['ShardMultiRestore', '-strategy=populateBlpCheckpoint',
'test_keyspace/c0-', shard_1_slave1.tablet_alias],
auto_log=True)
shard_1_slave1.start_vttablet(wait_for_state=None)
shard_1_slave1.wait_for_binlog_server_state("Enabled")

# check the startup values are in the right place
self._check_startup_values()
Expand Down
14 changes: 14 additions & 0 deletions test/resharding_vtworker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/usr/bin/python
#
# Copyright 2014, Google Inc. All rights reserved.
# Use of this source code is governed by a BSD-style license that can
# be found in the LICENSE file.

import utils
import resharding

# this test is the same as resharding.py, but it uses vtworker to
# do the clone.
if __name__ == '__main__':
resharding.use_clone_worker = True
utils.main(resharding)

0 comments on commit c5b138a

Please sign in to comment.