Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Etcd Cluster Downgrade #11362

Closed
wants to merge 25 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
9298f84
etcdserver: Add downgrade support POC
wenjiaswe Oct 25, 2019
62280cc
etcdserver: add downgrade proto api
YoyinZyc Oct 24, 2019
1aa02da
clientV3: add downgrade api in maintenance
YoyinZyc Oct 24, 2019
336cef4
v3rpc: add downgrader api in maintenance server
YoyinZyc Oct 24, 2019
9da21e9
membership: add downgrade field into raftCluster
YoyinZyc Oct 24, 2019
2023979
etcdserver: implement EtcdServer.Downgrade; implement downgrade valid…
YoyinZyc Oct 24, 2019
5182d5e
membership: add downgrade backend support
YoyinZyc Oct 24, 2019
a81969f
membership: recover downgrade from backend when restart the server or…
YoyinZyc Oct 26, 2019
5df9185
etcdserver: add downgrade start and downgrade cancel
YoyinZyc Oct 28, 2019
1d80e74
etcdserver: add monitorDowngrade for monitoring the downgrade status
YoyinZyc Oct 28, 2019
4e429b7
etcdctl: add downgrade commands.
YoyinZyc Oct 29, 2019
fa150ce
etcdserver: update downgrade check for version when starting/restarti…
YoyinZyc Oct 31, 2019
ce75238
etcdserver: add unit&integration test for downgrade; etcdctl: add pri…
YoyinZyc Nov 6, 2019
87aaab6
etcdserver: add new http handler to fetch the downgrade status of a c…
YoyinZyc Nov 11, 2019
dbc383a
e2e: add etcd downgrade/upgrade test
YoyinZyc Nov 11, 2019
f01694a
test: ctl_downgrade_test
wenjiaswe Nov 11, 2019
2311bc4
e2e: fix ctl_v3_downgrade_test; etcdctl: update output messages
YoyinZyc Nov 12, 2019
fe53e35
etcdserver: remove Downgrade Action Status in rpc.proto
YoyinZyc Nov 12, 2019
5c7f7ee
etcdserver: change downgrade raft request type from ConfChange to Int…
YoyinZyc Nov 13, 2019
61082c8
etcdserver: allow previous binary to restart when downgrade. add new …
YoyinZyc Nov 14, 2019
197a8a6
etcdserver: fix TestCtlV3Migrate nil pointer issue
YoyinZyc Nov 14, 2019
6efeb79
etcdserver: change "/downgrade/enabled" endpoint to serve linearized …
YoyinZyc Nov 16, 2019
f961ac9
etcdserver: separate function isValidDowngrade from mustDetectDowngra…
YoyinZyc Nov 19, 2019
00a36b4
etcdserver: define error string and link grpc error code.
YoyinZyc Nov 23, 2019
22ddb23
etcdserver: add new raft internal request DowngradeInfoSetRequest
YoyinZyc Dec 16, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
etcdserver: Add downgrade support POC
  • Loading branch information
wenjiaswe authored and YoyinZyc committed Dec 12, 2019
commit 9298f84937e1164ea1da01e7580e3d4edaa7c288
3 changes: 2 additions & 1 deletion etcdserver/api/capability.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ package api
import (
"sync"

"go.etcd.io/etcd/etcdserver/api/membership"
"go.etcd.io/etcd/version"
"go.uber.org/zap"

Expand Down Expand Up @@ -65,7 +66,7 @@ func UpdateCapability(lg *zap.Logger, v *semver.Version) {
return
}
enableMapMu.Lock()
if curVersion != nil && !curVersion.LessThan(*v) {
if curVersion != nil && !membership.IsVersionChangable(v, curVersion) {
enableMapMu.Unlock()
return
}
Expand Down
17 changes: 10 additions & 7 deletions etcdserver/api/membership/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -816,18 +816,21 @@ func mustDetectDowngrade(lg *zap.Logger, cv *semver.Version) {
// only keep major.minor version for comparison against cluster version
lv = &semver.Version{Major: lv.Major, Minor: lv.Minor}
if cv != nil && lv.LessThan(*cv) {
if lg != nil {
lg.Fatal(
"invalid downgrade; server version is lower than determined cluster version",
zap.String("current-server-version", version.Version),
zap.String("determined-cluster-version", version.Cluster(cv.String())),
)
if IsVersionChangable(cv, lv) {
plog.Infof("cluster is downgrading to current version: %s from determined cluster version: %s).", version.Version, version.Cluster(cv.String()))
} else {
plog.Fatalf("cluster cannot be downgraded (current version: %s is lower than determined cluster version: %s).", version.Version, version.Cluster(cv.String()))
plog.Fatalf("cluster cannot be downgraded (current version: %s is too much lower than determined cluster version: %s).", version.Version, version.Cluster(cv.String()))
}
}
}

func IsVersionChangable(cv *semver.Version, lv *semver.Version) bool {
if (cv.Major == lv.Major) && ((cv.Minor-lv.Minor) == 1 || lv.Minor > cv.Minor) {
return true
}
return false
}

// IsLocalMemberLearner returns if the local member is raft learner
func (c *RaftCluster) IsLocalMemberLearner() bool {
c.Lock()
Expand Down
25 changes: 23 additions & 2 deletions etcdserver/cluster_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -224,19 +224,40 @@ func decideClusterVersion(lg *zap.Logger, vers map[string]*version.Versions) *se
return cv
}

// canUpdateClusterVersion verify whether to update cluster version:
// - if --experimental-enable-cluster-downgrade is set to false (default),
// update cluster version only if the decided version is greater than
// the current cluster version
// - if --experimental-enable-cluster-downgrade is set to true,
// update cluster version if the decided version is +1 or -1 minor
// version difference than current cluster version
func canUpdateClusterVersion(clusterDowngradeEnabled bool, decidedClusterVersion *semver.Version, currentClusterVersion *semver.Version) bool {
if (!clusterDowngradeEnabled && currentClusterVersion.LessThan(*decidedClusterVersion)) ||
(clusterDowngradeEnabled && membership.IsVersionChangable(currentClusterVersion, decidedClusterVersion)) {
return true
}
return false
}

// isCompatibleWithCluster return true if the local member has a compatible version with
// the current running cluster.
// The version is considered as compatible when at least one of the other members in the cluster has a
// cluster version in the range of [MinClusterVersion, Version] and no known members has a cluster version
// out of the range.
// We set this rule since when the local member joins, another member might be offline.
func isCompatibleWithCluster(lg *zap.Logger, cl *membership.RaftCluster, local types.ID, rt http.RoundTripper) bool {
// When cluster downgrade support is enabled, set maximum cluster version to be 1 minor version higher to
// to allow current local member to join a cluster at 1 minor version high.
func isCompatibleWithCluster(lg *zap.Logger, cl *membership.RaftCluster, local types.ID, rt http.RoundTripper, clusterDowngradeEnabled bool) bool {
vers := getVersions(lg, cl, local, rt)
minV := semver.Must(semver.NewVersion(version.MinClusterVersion))
maxV := semver.Must(semver.NewVersion(version.Version))
allowedClusterMinor := maxV.Minor
if clusterDowngradeEnabled {
allowedClusterMinor = maxV.Minor + 1
}
maxV = &semver.Version{
Major: maxV.Major,
Minor: maxV.Minor,
Minor: allowedClusterMinor,
}
return isCompatibleWithVers(lg, vers, local, minV, maxV)
}
Expand Down
2 changes: 2 additions & 0 deletions etcdserver/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,8 @@ type ServerConfig struct {
LeaseCheckpointInterval time.Duration

EnableGRPCGateway bool

EnableClusterDowngrade bool
}

// VerifyBootstrap sanity-checks the initial config for bootstrap case
Expand Down
9 changes: 5 additions & 4 deletions etcdserver/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -364,7 +364,7 @@ func NewServer(cfg ServerConfig) (srv *EtcdServer, err error) {
if err = membership.ValidateClusterAndAssignIDs(cfg.Logger, cl, existingCluster); err != nil {
return nil, fmt.Errorf("error validating peerURLs %s: %v", existingCluster, err)
}
if !isCompatibleWithCluster(cfg.Logger, cl, cl.MemberByName(cfg.Name).ID, prt) {
if !isCompatibleWithCluster(cfg.Logger, cl, cl.MemberByName(cfg.Name).ID, prt, cfg.EnableClusterDowngrade) {
return nil, fmt.Errorf("incompatible with current running cluster")
}

Expand Down Expand Up @@ -2555,9 +2555,10 @@ func (s *EtcdServer) monitorVersions() {
continue
}

// update cluster version only if the decided version is greater than
// the current cluster version
if v != nil && s.cluster.Version().LessThan(*v) {
// Original etcd v3.1.26 only update cluster version if the decided version is
// greater than the current cluster version, in this patched etcd, we relax the rule
// and allow +1 or -1 minor version cluster version change
if v != nil && canUpdateClusterVersion(s.Cfg.EnableClusterDowngrade, v, s.cluster.Version()) {
s.goAttach(func() { s.updateClusterVersion(v.String()) })
}
}
Expand Down