Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Etcd Cluster Downgrade #11362

Closed
wants to merge 25 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
9298f84
etcdserver: Add downgrade support POC
wenjiaswe Oct 25, 2019
62280cc
etcdserver: add downgrade proto api
YoyinZyc Oct 24, 2019
1aa02da
clientV3: add downgrade api in maintenance
YoyinZyc Oct 24, 2019
336cef4
v3rpc: add downgrader api in maintenance server
YoyinZyc Oct 24, 2019
9da21e9
membership: add downgrade field into raftCluster
YoyinZyc Oct 24, 2019
2023979
etcdserver: implement EtcdServer.Downgrade; implement downgrade valid…
YoyinZyc Oct 24, 2019
5182d5e
membership: add downgrade backend support
YoyinZyc Oct 24, 2019
a81969f
membership: recover downgrade from backend when restart the server or…
YoyinZyc Oct 26, 2019
5df9185
etcdserver: add downgrade start and downgrade cancel
YoyinZyc Oct 28, 2019
1d80e74
etcdserver: add monitorDowngrade for monitoring the downgrade status
YoyinZyc Oct 28, 2019
4e429b7
etcdctl: add downgrade commands.
YoyinZyc Oct 29, 2019
fa150ce
etcdserver: update downgrade check for version when starting/restarti…
YoyinZyc Oct 31, 2019
ce75238
etcdserver: add unit&integration test for downgrade; etcdctl: add pri…
YoyinZyc Nov 6, 2019
87aaab6
etcdserver: add new http handler to fetch the downgrade status of a c…
YoyinZyc Nov 11, 2019
dbc383a
e2e: add etcd downgrade/upgrade test
YoyinZyc Nov 11, 2019
f01694a
test: ctl_downgrade_test
wenjiaswe Nov 11, 2019
2311bc4
e2e: fix ctl_v3_downgrade_test; etcdctl: update output messages
YoyinZyc Nov 12, 2019
fe53e35
etcdserver: remove Downgrade Action Status in rpc.proto
YoyinZyc Nov 12, 2019
5c7f7ee
etcdserver: change downgrade raft request type from ConfChange to Int…
YoyinZyc Nov 13, 2019
61082c8
etcdserver: allow previous binary to restart when downgrade. add new …
YoyinZyc Nov 14, 2019
197a8a6
etcdserver: fix TestCtlV3Migrate nil pointer issue
YoyinZyc Nov 14, 2019
6efeb79
etcdserver: change "/downgrade/enabled" endpoint to serve linearized …
YoyinZyc Nov 16, 2019
f961ac9
etcdserver: separate function isValidDowngrade from mustDetectDowngra…
YoyinZyc Nov 19, 2019
00a36b4
etcdserver: define error string and link grpc error code.
YoyinZyc Nov 23, 2019
22ddb23
etcdserver: add new raft internal request DowngradeInfoSetRequest
YoyinZyc Dec 16, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions Documentation/dev-guide/api_reference_v3.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ This is a generated documentation. Please read the proto files for more.
| HashKV | HashKVRequest | HashKVResponse | HashKV computes the hash of all MVCC keys up to a given revision. It only iterates "key" bucket in backend storage. |
| Snapshot | SnapshotRequest | SnapshotResponse | Snapshot sends a snapshot of the entire backend from a member over a stream to a client. |
| MoveLeader | MoveLeaderRequest | MoveLeaderResponse | MoveLeader requests current leader node to transfer its leadership to transferee. |
| Downgrade | DowngradeRequest | DowngradeResponse | Downgrade requests downgrade, cancel downgrade on the cluster version. |



Expand Down Expand Up @@ -445,6 +446,24 @@ Empty field.



##### message `DowngradeRequest` (etcdserver/etcdserverpb/rpc.proto)

| Field | Description | Type |
| ----- | ----------- | ---- |
| action | action is the kind of downgrade request to issue. The action may VALIDATE the target version, DOWNGRADE the cluster version, or CANCEL the current downgrading job. | DowngradeAction |
| version | version is the target version to downgrade. | string |



##### message `DowngradeResponse` (etcdserver/etcdserverpb/rpc.proto)

| Field | Description | Type |
| ----- | ----------- | ---- |
| header | | ResponseHeader |
| version | version is the current cluster version. | string |



##### message `HashKVRequest` (etcdserver/etcdserverpb/rpc.proto)

| Field | Description | Type |
Expand Down
61 changes: 61 additions & 0 deletions Documentation/dev-guide/apispec/swagger/rpc.swagger.json
Original file line number Diff line number Diff line change
Expand Up @@ -988,6 +988,33 @@
}
}
},
"/v3/maintenance/downgrade": {
"post": {
"tags": [
"Maintenance"
],
"summary": "Downgrade requests downgrade, cancel downgrade on the cluster version.",
"operationId": "Downgrade",
"parameters": [
{
"name": "body",
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/etcdserverpbDowngradeRequest"
}
}
],
"responses": {
"200": {
"description": "A successful response.",
"schema": {
"$ref": "#/definitions/etcdserverpbDowngradeResponse"
}
}
}
}
},
"/v3/maintenance/hash": {
"post": {
"tags": [
Expand Down Expand Up @@ -1156,6 +1183,15 @@
"LEASE"
]
},
"DowngradeRequestDowngradeAction": {
"type": "string",
"default": "VALIDATE",
"enum": [
"VALIDATE",
"ENABLE",
"CANCEL"
]
},
"EventEventType": {
"type": "string",
"default": "PUT",
Expand Down Expand Up @@ -1708,6 +1744,31 @@
}
}
},
"etcdserverpbDowngradeRequest": {
"type": "object",
"properties": {
"action": {
"description": "action is the kind of downgrade request to issue. The action may\nVALIDATE the target version, DOWNGRADE the cluster version,\nor CANCEL the current downgrading job.",
"$ref": "#/definitions/DowngradeRequestDowngradeAction"
},
"version": {
"description": "version is the target version to downgrade.",
"type": "string"
}
}
},
"etcdserverpbDowngradeResponse": {
"type": "object",
"properties": {
"header": {
"$ref": "#/definitions/etcdserverpbResponseHeader"
},
"version": {
"description": "version is the current cluster version.",
"type": "string"
}
}
},
"etcdserverpbHashKVRequest": {
"type": "object",
"properties": {
Expand Down
106 changes: 106 additions & 0 deletions clientv3/integration/maintenance_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ import (
"testing"
"time"

"github.com/coreos/go-semver/semver"
"go.etcd.io/etcd/version"
"go.uber.org/zap"
"google.golang.org/grpc"

Expand Down Expand Up @@ -240,3 +242,107 @@ func TestMaintenanceStatus(t *testing.T) {
t.Fatal("no leader found")
}
}

func TestDowngradeValidateFromClient(t *testing.T) {
defer testutil.AfterTest(t)

clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 3})
defer clus.Terminate(t)

clus.WaitLeader(t)

eps := make([]string, 3)
for i := 0; i < 3; i++ {
eps[i] = clus.Members[i].GRPCAddr()
}

cli, err := clientv3.New(clientv3.Config{Endpoints: eps, DialOptions: []grpc.DialOption{grpc.WithBlock()}})
if err != nil {
t.Fatal(err)
}
defer cli.Close()

serverVersion := semver.Must(semver.NewVersion(version.Version))
clusterVersion := semver.Version{Major: serverVersion.Major, Minor: serverVersion.Minor}
targetVersion := &semver.Version{Major: serverVersion.Major, Minor: serverVersion.Minor - 1}
resp, err := cli.DowngradeValidate(context.TODO(), targetVersion.String())

if err != nil {
t.Fatalf("failed to validate downgrade against target version (%v)", err)
}
if resp.Version != clusterVersion.String() {
t.Errorf("expected %v; got %v", clusterVersion.String(), resp.Version)
}
}

func TestDowngradeEnableFromClient(t *testing.T) {
defer testutil.AfterTest(t)

clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 3})
defer clus.Terminate(t)

clus.WaitLeader(t)

eps := make([]string, 3)
for i := 0; i < 3; i++ {
eps[i] = clus.Members[i].GRPCAddr()
}

cli, err := clientv3.New(clientv3.Config{Endpoints: eps, DialOptions: []grpc.DialOption{grpc.WithBlock()}})
if err != nil {
t.Fatal(err)
}
defer cli.Close()

serverVersion := semver.Must(semver.NewVersion(version.Version))
clusterVersion := semver.Version{Major: serverVersion.Major, Minor: serverVersion.Minor}
targetVersion := &semver.Version{Major: serverVersion.Major, Minor: serverVersion.Minor - 1}
resp, err := cli.DowngradeEnable(context.TODO(), targetVersion.String())

if err != nil {
t.Fatalf("failed to enable downgrade to target version (%v)", err)
}
if resp.Version != clusterVersion.String() {
t.Errorf("expected %v; got %v", clusterVersion.String(), resp.Version)
}
}

func TestDowngradeCancelFromClient(t *testing.T) {
defer testutil.AfterTest(t)

clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 3})
defer clus.Terminate(t)

clus.WaitLeader(t)

eps := make([]string, 3)
for i := 0; i < 3; i++ {
eps[i] = clus.Members[i].GRPCAddr()
}

cli, err := clientv3.New(clientv3.Config{Endpoints: eps, DialOptions: []grpc.DialOption{grpc.WithBlock()}})
if err != nil {
t.Fatal(err)
}
defer cli.Close()

serverVersion := semver.Must(semver.NewVersion(version.Version))
clusterVersion := semver.Version{Major: serverVersion.Major, Minor: serverVersion.Minor}
targetVersion := &semver.Version{Major: serverVersion.Major, Minor: serverVersion.Minor - 1}
// send downgrade enable request
resp, err := cli.DowngradeEnable(context.TODO(), targetVersion.String())

if err != nil {
t.Fatalf("failed to enable downgrade to target version (%v)", err)
}

// send downgrade cancel request
resp, err = cli.DowngradeCancel(context.TODO())
if err != nil {
t.Fatalf("failed to cancel downgrade (%v)", err)
}

if resp.Version != clusterVersion.String() {
t.Errorf("expected %v; got %v", clusterVersion.String(), resp.Version)
}
}
59 changes: 59 additions & 0 deletions clientv3/maintenance.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ type (
StatusResponse pb.StatusResponse
HashKVResponse pb.HashKVResponse
MoveLeaderResponse pb.MoveLeaderResponse
DowngradeResponse pb.DowngradeResponse
)

type Maintenance interface {
Expand Down Expand Up @@ -65,6 +66,23 @@ type Maintenance interface {
// MoveLeader requests current leader to transfer its leadership to the transferee.
// Request must be made to the leader.
MoveLeader(ctx context.Context, transfereeID uint64) (*MoveLeaderResponse, error)

// DowngradeValidate requests validation of the downgrade request against the target version.
// Version should follow the version format "Major.Minor.Patch"(e.g. 3.4.0).
// The cluster can only be downgraded to one minor version lower.
// All other input version will fail the validation.
DowngradeValidate(ctx context.Context, version string) (*DowngradeResponse, error)

// DowngradeEnable requests to downgrade the current cluster version to target version.
// It will first validate the target version.
// After all servers have been downgraded to target version,
// the downgrade will reset disabled automatically.
// Redundant DowngradeEnable will error out.
DowngradeEnable(ctx context.Context, version string) (*DowngradeResponse, error)

// DowngradeCancel cancels the current downgrade job.
// If there is no current downgrade job, the request will return error message.
DowngradeCancel(ctx context.Context) (*DowngradeResponse, error)
}

type maintenance struct {
Expand Down Expand Up @@ -214,6 +232,47 @@ func (m *maintenance) Snapshot(ctx context.Context) (io.ReadCloser, error) {
return &snapshotReadCloser{ctx: ctx, ReadCloser: pr}, nil
}

func (m *maintenance) DowngradeValidate(ctx context.Context, version string) (*DowngradeResponse, error) {
req := &pb.DowngradeRequest{
Action: pb.DowngradeRequest_VALIDATE,
Version: version,
}

resp, err := m.remote.Downgrade(ctx, req)
if err != nil {
return nil, toErr(ctx, err)
}

return (*DowngradeResponse)(resp), nil
}

func (m *maintenance) DowngradeEnable(ctx context.Context, version string) (*DowngradeResponse, error) {
req := &pb.DowngradeRequest{
Action: pb.DowngradeRequest_ENABLE,
Version: version,
}

resp, err := m.remote.Downgrade(ctx, req)
if err != nil {
return nil, toErr(ctx, err)
}

return (*DowngradeResponse)(resp), nil
}

func (m *maintenance) DowngradeCancel(ctx context.Context) (*DowngradeResponse, error) {
req := &pb.DowngradeRequest{
Action: pb.DowngradeRequest_CANCEL,
}

resp, err := m.remote.Downgrade(ctx, req)
if err != nil {
return nil, toErr(ctx, err)
}

return (*DowngradeResponse)(resp), nil
}

type snapshotReadCloser struct {
ctx context.Context
io.ReadCloser
Expand Down
4 changes: 4 additions & 0 deletions clientv3/retry.go
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,10 @@ func (rmc *retryMaintenanceClient) Defragment(ctx context.Context, in *pb.Defrag
return rmc.mc.Defragment(ctx, in, opts...)
}

func (rmc *retryMaintenanceClient) Downgrade(ctx context.Context, in *pb.DowngradeRequest, opts ...grpc.CallOption) (resp *pb.DowngradeResponse, err error) {
return rmc.mc.Downgrade(ctx, in, opts...)
}

type retryAuthClient struct {
ac pb.AuthClient
}
Expand Down
Loading