Skip to content

Commit 0373de3

Browse files
committed
[PLAT-2609] Backups: Handle case when TS Web UI is not available
Summary: If the TS node was just ADDed or REMOVEd - the TS Web UI can be unavailable. This must be correctly handled in the `yb_backup` script - retry a current step after several seconds. Fixes in the `yb_backup` script: * Move all points of the TS config loading into a new single function - `load_ts_config`. * Store all TS related config values into a single top object - `YBTSConfig`(`ts_cfgs`) (it includes data from old `tserver_ip_to_web_port`). * In the backup-create pipeline - the retry-able step is "the tablet leaders searching" phase in `find_tablet_leaders`. * In the backup-restore pipeline - the retry-able step is the existing "Regenerate list of tservers for every tablet" phase in `restore_table`. Updated `find_data_dirs` and `generate_snapshot_dirs` which now can ignore TSes with failed config downloading - till the next top Data Downloading round. `find_data_dirs` and `generate_snapshot_dirs` are called from `download_snapshot_directories` which is called from a retry loop in `restore_table`. Fixes in the test framework: * Added a new test module for `yb_backup` testing: `cql-backup-test.cc`. The tests are based on the single-process `MiniCluster` + C++ Cassandra YCQL driver. * Backup related stuff was added into the `CqlTestBase`. * Added `TEST_mini_cluster_mode` g-flag. In the mode the `MiniTabletServer` can show correct `--fs_data_dirs` value in the TS Web UI. * TS Web UI `<ts-ip>/vars` was updated accordingly (in the test mode only). * `RpcAndWebServerBase::first_http_address()` can now return error status if the Web Server is not started. (Used in the test framework only.) * `RpcAndWebServerBase` can provide writable WebServer object for test purposes. * `RunBackupCommand` can get & handle empty (not used) YSQL host/port. * `MiniTabletServer` now uses the same host IP for the WebServer as its RPC interface. * `MiniTabletServer::bound_http_addr()` returns "provided input HTTP address" if the internal WebServer is not running. Added 2 new tests to cover this not available TS Web UI test-case in the backup-create & backup-restore cases. (See "Test Plan" section.) Previous related diff: https://phabricator.dev.yugabyte.com/D16554 Commit: yugabyte@8cec33a Test Plan: Jenkins: all tests New tests: ybd --cxx-test cql-backup-test --gtest_filter CqlBackupTest.TestBackupWithoutTSWebUI ybd --cxx-test cql-backup-test --gtest_filter CqlBackupTest.TestBackupRestoreWithoutTSWebUI Existing specific tests: ybd --java-test org.yb.pgsql.TestYbBackup#testIndex --tp 1 ybd --cxx-test yb-backup-test_ent --gtest_filter YBBackupTest.TestYCQLKeyspaceBackupWithLB ybd --cxx-test yb-backup-test_ent --gtest_filter YBBackupTest.TestYSQLBackupWithLearnerTS Generic backup tests: ybd --cxx-test tools_yb-backup-test_ent ybd --java-test org.yb.pgsql.TestYbBackup --tp 1 ybd --java-test org.yb.cql.TestYbBackup --tp 1 ybd --java-test org.yb.cql.ParameterizedTestYbBackup --tp 1 Reviewers: mihnea, achauhan, jhe Reviewed By: jhe Subscribers: bogdan, jenkins-bot, yql Differential Revision: https://phabricator.dev.yugabyte.com/D16650
1 parent 44a07b1 commit 0373de3

File tree

16 files changed

+396
-101
lines changed

16 files changed

+396
-101
lines changed

ent/src/yb/integration-tests/CMakeLists-include.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ set(INTEGRATION_TESTS_EXTENSIONS_TESTS
4444
cdc_service-txn-test
4545
cdcsdk_stream-test
4646
cdcsdk_ysql-test
47+
cql-backup-test
4748
twodc_ysql-test
4849
twodc-test
4950
PARENT_SCOPE)
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
// Copyright (c) YugaByte, Inc.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
4+
// in compliance with the License. You may obtain a copy of the License at
5+
//
6+
// http://www.apache.org/licenses/LICENSE-2.0
7+
//
8+
// Unless required by applicable law or agreed to in writing, software distributed under the License
9+
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
10+
// or implied. See the License for the specific language governing permissions and limitations
11+
// under the License.
12+
//
13+
14+
#include "yb/integration-tests/cql_test_base.h"
15+
16+
#include "yb/tserver/mini_tablet_server.h"
17+
#include "yb/tserver/tablet_server.h"
18+
#include "yb/util/test_util.h"
19+
#include "yb/util/thread.h"
20+
21+
using std::make_unique;
22+
using std::string;
23+
using std::unique_ptr;
24+
25+
DECLARE_bool(TEST_mini_cluster_mode);
26+
27+
namespace yb {
28+
29+
const string kDefaultTableName = "users";
30+
31+
class CqlBackupTest : public CqlTestBase<MiniCluster> {
32+
public:
33+
virtual ~CqlBackupTest() = default;
34+
35+
void SetUp() override {
36+
FLAGS_TEST_mini_cluster_mode = true; // Provide correct '--fs_data_dirs' via TS Web UI.
37+
CqlTestBase<MiniCluster>::SetUp();
38+
39+
backup_dir_ = GetTempDir("backup");
40+
session_ = make_unique<CassandraSession>(
41+
ASSERT_RESULT(EstablishSession(driver_.get())));
42+
}
43+
44+
void cql(const string& query) {
45+
ASSERT_OK(session_->ExecuteQuery(query));
46+
}
47+
48+
void createTestTable(const string& table_name = kDefaultTableName) {
49+
cql("CREATE TABLE " + table_name + "(userid INT PRIMARY KEY, fullname TEXT)");
50+
cql("INSERT INTO " + table_name + " (userid, fullname) values (1, 'yb');");
51+
52+
auto result = ASSERT_RESULT(session_->ExecuteWithResult("SELECT count(*) FROM " + table_name));
53+
auto iterator = result.CreateIterator();
54+
ASSERT_TRUE(iterator.Next());
55+
auto count = iterator.Row().Value(0).As<int64>();
56+
EXPECT_EQ(count, 1);
57+
}
58+
59+
yb::ThreadPtr stopWebServerAndStartAfter(size_t tsIdx, double startAfterSec) {
60+
Webserver* web_server = cluster_->mini_tablet_server(tsIdx)->server()->TEST_web_server();
61+
CHECK_NOTNULL(web_server)->Stop();
62+
SleepFor(MonoDelta::FromSeconds(0.5)); // Let the server stop listening.
63+
64+
// A thread that starts the stopped WebServer after some time.
65+
yb::ThreadPtr thread;
66+
EXPECT_OK(yb::Thread::Create(
67+
CURRENT_TEST_NAME(), "web_server_starter",
68+
[web_server, startAfterSec]() {
69+
// Start the server after the specified number of seconds.
70+
SleepFor(MonoDelta::FromSeconds(startAfterSec));
71+
EXPECT_OK(web_server->Start());
72+
}, &thread));
73+
return thread;
74+
}
75+
76+
protected:
77+
string backup_dir_;
78+
unique_ptr<CassandraSession> session_;
79+
};
80+
81+
TEST_F(CqlBackupTest, YB_DISABLE_TEST_IN_SANITIZERS_OR_MAC(TestBackupWithoutTSWebUI)) {
82+
createTestTable();
83+
84+
// A thread that starts the stopped WebServer after 130 sec. as retry round = 110 sec.
85+
yb::ThreadPtr thread = stopWebServerAndStartAfter(0, 130);
86+
87+
ASSERT_OK(RunBackupCommand(
88+
{"--backup_location", backup_dir_, "--keyspace", kCqlTestKeyspace, "create"}));
89+
90+
thread->Join();
91+
cql("DROP TABLE " + kDefaultTableName);
92+
LOG(INFO) << "Test finished: " << CURRENT_TEST_CASE_AND_TEST_NAME_STR();
93+
}
94+
95+
TEST_F(CqlBackupTest, YB_DISABLE_TEST_IN_SANITIZERS_OR_MAC(TestBackupRestoreWithoutTSWebUI)) {
96+
createTestTable();
97+
98+
ASSERT_OK(RunBackupCommand(
99+
{"--backup_location", backup_dir_, "--keyspace", kCqlTestKeyspace, "create"}));
100+
101+
// A thread that starts the stopped WebServer after 110 sec. as retry round = 90 sec.
102+
yb::ThreadPtr thread = stopWebServerAndStartAfter(0, 110);
103+
104+
ASSERT_OK(RunBackupCommand(
105+
{"--backup_location", backup_dir_, "--keyspace", kCqlTestKeyspace, "restore"}));
106+
107+
thread->Join();
108+
cql("DROP TABLE " + kDefaultTableName);
109+
LOG(INFO) << "Test finished: " << CURRENT_TEST_CASE_AND_TEST_NAME_STR();
110+
}
111+
112+
} // namespace yb

0 commit comments

Comments
 (0)