Skip to content

Commit

Permalink
Travis: Fix flaky tests
Browse files Browse the repository at this point in the history
Make min rdonly instances configurable for vtworker tasks
Remove unnecessary tablets wherever possible
Use lower buffer pool size in MySQL to save memory
Track performance using New Relic
  • Loading branch information
Anand Henry committed Dec 22, 2014
1 parent 2e24f76 commit 5935349
Show file tree
Hide file tree
Showing 8 changed files with 58 additions and 120 deletions.
6 changes: 6 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,12 @@ site_integration_test_files = \
# - medium: 30 secs - 1 min
# - large: over 1 min
small_integration_test_files = \
initial_sharding.py \
initial_sharding_bytes.py \
vertical_split.py \
vertical_split_vtgate.py \
schema.py \
keyspace_test.py \
keyrange_test.py \
mysqlctl.py \
sharded.py \
Expand Down
4 changes: 2 additions & 2 deletions config/mycnf/default.cnf
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,14 @@ default-storage-engine = innodb
expire_logs_days = 3
innodb_additional_mem_pool_size = 32M
innodb_autoextend_increment = 64
innodb_buffer_pool_size = 64M
innodb_buffer_pool_size = 32M
innodb_data_file_path = ibdata1:10M:autoextend
innodb_data_home_dir = {{.InnodbDataHomeDir}}
innodb_file_per_table
innodb_flush_log_at_trx_commit = 2
innodb_flush_method = O_DIRECT
innodb_lock_wait_timeout = 20
innodb_log_buffer_size = 64M
innodb_log_buffer_size = 8M
innodb_log_file_size = 64M
innodb_log_files_in_group = 2
innodb_log_group_home_dir = {{.InnodbLogGroupHomeDir}}
Expand Down
13 changes: 9 additions & 4 deletions go/vt/worker/topo_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
package worker

import (
"flag"
"fmt"
"math/rand"
"time"
Expand All @@ -14,9 +15,13 @@ import (
"github.com/youtube/vitess/go/vt/wrangler"
)

var (
minHealthyEndPoints = flag.Int("min_healthy_rdonly_endpoints", 2, "minimum number of healthy rdonly endpoints required for checker")
)

// findHealthyRdonlyEndPoint returns a random healthy endpoint.
// Since we don't want to use them all, we require at least 2 servers
// are healthy.
// Since we don't want to use them all, we require at least
// minHealthyEndPoints servers to be healthy.
func findHealthyRdonlyEndPoint(wr *wrangler.Wrangler, cell, keyspace, shard string) (topo.TabletAlias, error) {
endPoints, err := wr.TopoServer().GetEndPoints(cell, keyspace, shard, topo.TYPE_RDONLY)
if err != nil {
Expand All @@ -28,8 +33,8 @@ func findHealthyRdonlyEndPoint(wr *wrangler.Wrangler, cell, keyspace, shard stri
healthyEndpoints = append(healthyEndpoints, entry)
}
}
if len(healthyEndpoints) <= 1 {
return topo.TabletAlias{}, fmt.Errorf("Not enough endpoints to chose from in (%v,%v/%v), have %v healthy ones", cell, keyspace, shard, len(healthyEndpoints))
if len(healthyEndpoints) < *minHealthyEndPoints {
return topo.TabletAlias{}, fmt.Errorf("Not enough endpoints to chose from in (%v,%v/%v), have %v healthy ones, need at least %v", cell, keyspace, shard, len(healthyEndpoints), *minHealthyEndPoints)
}

// random server in the list is what we want
Expand Down
66 changes: 15 additions & 51 deletions test/initial_sharding.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,19 +34,16 @@
shard_master = tablet.Tablet()
shard_replica = tablet.Tablet()
shard_rdonly1 = tablet.Tablet()
shard_rdonly2 = tablet.Tablet()

# split shards
# range "" - 80
shard_0_master = tablet.Tablet()
shard_0_replica = tablet.Tablet()
shard_0_rdonly1 = tablet.Tablet()
shard_0_rdonly2 = tablet.Tablet()
# range 80 - ""
shard_1_master = tablet.Tablet()
shard_1_replica = tablet.Tablet()
shard_1_rdonly1 = tablet.Tablet()
shard_1_rdonly2 = tablet.Tablet()


def setUpModule():
Expand All @@ -57,15 +54,12 @@ def setUpModule():
shard_master.init_mysql(),
shard_replica.init_mysql(),
shard_rdonly1.init_mysql(),
shard_rdonly2.init_mysql(),
shard_0_master.init_mysql(),
shard_0_replica.init_mysql(),
shard_0_rdonly1.init_mysql(),
shard_0_rdonly2.init_mysql(),
shard_1_master.init_mysql(),
shard_1_replica.init_mysql(),
shard_1_rdonly1.init_mysql(),
shard_1_rdonly2.init_mysql(),
]
utils.wait_procs(setup_procs)
except:
Expand All @@ -81,15 +75,12 @@ def tearDownModule():
shard_master.teardown_mysql(),
shard_replica.teardown_mysql(),
shard_rdonly1.teardown_mysql(),
shard_rdonly2.teardown_mysql(),
shard_0_master.teardown_mysql(),
shard_0_replica.teardown_mysql(),
shard_0_rdonly1.teardown_mysql(),
shard_0_rdonly2.teardown_mysql(),
shard_1_master.teardown_mysql(),
shard_1_replica.teardown_mysql(),
shard_1_rdonly1.teardown_mysql(),
shard_1_rdonly2.teardown_mysql(),
]
utils.wait_procs(teardown_procs, raise_on_error=False)

Expand All @@ -100,15 +91,12 @@ def tearDownModule():
shard_master.remove_tree()
shard_replica.remove_tree()
shard_rdonly1.remove_tree()
shard_rdonly2.remove_tree()
shard_0_master.remove_tree()
shard_0_replica.remove_tree()
shard_0_rdonly1.remove_tree()
shard_0_rdonly2.remove_tree()
shard_1_master.remove_tree()
shard_1_replica.remove_tree()
shard_1_rdonly1.remove_tree()
shard_1_rdonly2.remove_tree()


class TestInitialSharding(unittest.TestCase):
Expand Down Expand Up @@ -230,30 +218,24 @@ def _is_value_present_and_correct(self, tablet, table, id, msg, keyspace_id):

def _check_startup_values(self):
# check first value is in the right shard
for t in [shard_0_master, shard_0_replica, shard_0_rdonly1,
shard_0_rdonly2]:
for t in [shard_0_master, shard_0_replica, shard_0_rdonly1]:
self._check_value(t, 'resharding1', 1, 'msg1', 0x1000000000000000)
for t in [shard_1_master, shard_1_replica, shard_1_rdonly1,
shard_1_rdonly2]:
for t in [shard_1_master, shard_1_replica, shard_1_rdonly1]:
self._check_value(t, 'resharding1', 1, 'msg1',
0x1000000000000000, should_be_here=False)

# check second value is in the right shard
for t in [shard_0_master, shard_0_replica, shard_0_rdonly1,
shard_0_rdonly2]:
for t in [shard_0_master, shard_0_replica, shard_0_rdonly1]:
self._check_value(t, 'resharding1', 2, 'msg2', 0x9000000000000000,
should_be_here=False)
for t in [shard_1_master, shard_1_replica, shard_1_rdonly1,
shard_1_rdonly2]:
for t in [shard_1_master, shard_1_replica, shard_1_rdonly1]:
self._check_value(t, 'resharding1', 2, 'msg2', 0x9000000000000000)

# check third value is in the right shard too
for t in [shard_0_master, shard_0_replica, shard_0_rdonly1,
shard_0_rdonly2]:
for t in [shard_0_master, shard_0_replica, shard_0_rdonly1]:
self._check_value(t, 'resharding1', 3, 'msg3', 0xD000000000000000,
should_be_here=False)
for t in [shard_1_master, shard_1_replica, shard_1_rdonly1,
shard_1_rdonly2]:
for t in [shard_1_master, shard_1_replica, shard_1_rdonly1]:
self._check_value(t, 'resharding1', 3, 'msg3', 0xD000000000000000)

def _insert_lots(self, count, base=0):
Expand Down Expand Up @@ -307,20 +289,18 @@ def test_resharding(self):
shard_master.init_tablet( 'master', 'test_keyspace', '0')
shard_replica.init_tablet('replica', 'test_keyspace', '0')
shard_rdonly1.init_tablet( 'rdonly', 'test_keyspace', '0')
shard_rdonly2.init_tablet( 'rdonly', 'test_keyspace', '0')

utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'], auto_log=True)

# create databases so vttablet can start behaving normally
for t in [shard_master, shard_replica, shard_rdonly1, shard_rdonly2]:
for t in [shard_master, shard_replica, shard_rdonly1]:
t.create_db('vt_test_keyspace')
t.start_vttablet(wait_for_state=None)

# wait for the tablets
shard_master.wait_for_vttablet_state('SERVING')
shard_replica.wait_for_vttablet_state('SERVING')
shard_rdonly1.wait_for_vttablet_state('SERVING')
shard_rdonly2.wait_for_vttablet_state('SERVING')

# reparent to make the tablets work
utils.run_vtctl(['ReparentShard', '-force', 'test_keyspace/0',
Expand All @@ -339,21 +319,17 @@ def test_resharding(self):
shard_0_master.init_tablet( 'master', 'test_keyspace', '-80')
shard_0_replica.init_tablet('replica', 'test_keyspace', '-80')
shard_0_rdonly1.init_tablet( 'rdonly', 'test_keyspace', '-80')
shard_0_rdonly2.init_tablet( 'rdonly', 'test_keyspace', '-80')
shard_1_master.init_tablet( 'master', 'test_keyspace', '80-')
shard_1_replica.init_tablet('replica', 'test_keyspace', '80-')
shard_1_rdonly1.init_tablet( 'rdonly', 'test_keyspace', '80-')
shard_1_rdonly2.init_tablet( 'rdonly', 'test_keyspace', '80-')

# start vttablet on the split shards (no db created,
# so they're all not serving)
for t in [shard_0_master, shard_0_replica, shard_0_rdonly1, shard_0_rdonly2,
shard_1_master, shard_1_replica, shard_1_rdonly1,
shard_1_rdonly2]:
for t in [shard_0_master, shard_0_replica, shard_0_rdonly1,
shard_1_master, shard_1_replica, shard_1_rdonly1]:
t.start_vttablet(wait_for_state=None)
for t in [shard_0_master, shard_0_replica, shard_0_rdonly1, shard_0_rdonly2,
shard_1_master, shard_1_replica, shard_1_rdonly1,
shard_1_rdonly2]:
for t in [shard_0_master, shard_0_replica, shard_0_rdonly1,
shard_1_master, shard_1_replica, shard_1_rdonly1]:
t.wait_for_vttablet_state('NOT_SERVING')

utils.run_vtctl(['ReparentShard', '-force', 'test_keyspace/-80',
Expand Down Expand Up @@ -389,8 +365,6 @@ def test_resharding(self):
auto_log=True)
utils.run_vtctl(['ChangeSlaveType', shard_rdonly1.tablet_alias, 'rdonly'],
auto_log=True)
utils.run_vtctl(['ChangeSlaveType', shard_rdonly2.tablet_alias, 'rdonly'],
auto_log=True)

# check the startup values are in the right place
self._check_startup_values()
Expand Down Expand Up @@ -421,24 +395,16 @@ def test_resharding(self):
auto_log=True)
utils.run_vtctl(['ChangeSlaveType', shard_rdonly1.tablet_alias, 'rdonly'],
auto_log=True)
utils.run_vtctl(['ChangeSlaveType', shard_rdonly2.tablet_alias, 'rdonly'],
auto_log=True)
utils.run_vtctl(['ChangeSlaveType', shard_0_rdonly1.tablet_alias, 'rdonly'],
auto_log=True)
utils.run_vtctl(['ChangeSlaveType', shard_0_rdonly2.tablet_alias, 'rdonly'],
auto_log=True)

logging.debug("Running vtworker SplitDiff for 80-")
utils.run_vtworker(['-cell', 'test_nj', 'SplitDiff', 'test_keyspace/80-'],
auto_log=True)
utils.run_vtctl(['ChangeSlaveType', shard_rdonly1.tablet_alias, 'rdonly'],
auto_log=True)
utils.run_vtctl(['ChangeSlaveType', shard_rdonly2.tablet_alias, 'rdonly'],
auto_log=True)
utils.run_vtctl(['ChangeSlaveType', shard_1_rdonly1.tablet_alias, 'rdonly'],
auto_log=True)
utils.run_vtctl(['ChangeSlaveType', shard_1_rdonly2.tablet_alias, 'rdonly'],
auto_log=True)

utils.pause("Good time to test vtworker for diffs")

Expand Down Expand Up @@ -502,11 +468,10 @@ def test_resharding(self):
utils.run_vtctl(['DeleteShard', 'test_keyspace/0'], expect_fail=True)

# scrap the original tablets in the original shard
for t in [shard_master, shard_replica, shard_rdonly1, shard_rdonly2]:
for t in [shard_master, shard_replica, shard_rdonly1]:
utils.run_vtctl(['ScrapTablet', t.tablet_alias], auto_log=True)
tablet.kill_tablets([shard_master, shard_replica, shard_rdonly1,
shard_rdonly2])
for t in [shard_master, shard_replica, shard_rdonly1, shard_rdonly2]:
tablet.kill_tablets([shard_master, shard_replica, shard_rdonly1])
for t in [shard_master, shard_replica, shard_rdonly1]:
utils.run_vtctl(['DeleteTablet', t.tablet_alias], auto_log=True)

# rebuild the serving graph, all mentions of the old shards shoud be gone
Expand All @@ -517,8 +482,7 @@ def test_resharding(self):

# kill everything else
tablet.kill_tablets([shard_0_master, shard_0_replica, shard_0_rdonly1,
shard_0_rdonly2, shard_1_master, shard_1_replica,
shard_1_rdonly1, shard_1_rdonly2])
shard_1_master, shard_1_replica, shard_1_rdonly1])

if __name__ == '__main__':
utils.main()
Loading

0 comments on commit 5935349

Please sign in to comment.