Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Proxies & Config API #582

Merged
merged 18 commits into from
Mar 24, 2014
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Proxy promotion.
  • Loading branch information
benbjohnson committed Feb 25, 2014
commit f5698d3566419b75c98eb015ed089d2fde76ee7f
4 changes: 0 additions & 4 deletions config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@ type Config struct {
KeyFile string `toml:"key_file" env:"ETCD_KEY_FILE"`
Peers []string `toml:"peers" env:"ETCD_PEERS"`
PeersFile string `toml:"peers_file" env:"ETCD_PEERS_FILE"`
MaxClusterSize int `toml:"max_cluster_size" env:"ETCD_MAX_CLUSTER_SIZE"`
MaxResultBuffer int `toml:"max_result_buffer" env:"ETCD_MAX_RESULT_BUFFER"`
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the plan here? Just flag and ignore?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you want it as a deprecation warning or an actual CLI error? It seems important enough that users should be notified to remove their -max-cluster-size.

MaxRetryAttempts int `toml:"max_retry_attempts" env:"ETCD_MAX_RETRY_ATTEMPTS"`
RetryInterval float64 `toml:"retry_interval" env:"ETCD_RETRY_INTERVAL"`
Expand Down Expand Up @@ -90,7 +89,6 @@ func New() *Config {
c := new(Config)
c.SystemPath = DefaultSystemConfigPath
c.Addr = "127.0.0.1:4001"
c.MaxClusterSize = 9
c.MaxResultBuffer = 1024
c.MaxRetryAttempts = 3
c.RetryInterval = 10.0
Expand Down Expand Up @@ -247,7 +245,6 @@ func (c *Config) LoadFlags(arguments []string) error {
f.IntVar(&c.MaxResultBuffer, "max-result-buffer", c.MaxResultBuffer, "")
f.IntVar(&c.MaxRetryAttempts, "max-retry-attempts", c.MaxRetryAttempts, "")
f.Float64Var(&c.RetryInterval, "retry-interval", c.RetryInterval, "")
f.IntVar(&c.MaxClusterSize, "max-cluster-size", c.MaxClusterSize, "")
f.IntVar(&c.Peer.HeartbeatTimeout, "peer-heartbeat-timeout", c.Peer.HeartbeatTimeout, "")
f.IntVar(&c.Peer.ElectionTimeout, "peer-election-timeout", c.Peer.ElectionTimeout, "")

Expand Down Expand Up @@ -281,7 +278,6 @@ func (c *Config) LoadFlags(arguments []string) error {
f.StringVar(&c.DataDir, "d", c.DataDir, "(deprecated)")
f.IntVar(&c.MaxResultBuffer, "m", c.MaxResultBuffer, "(deprecated)")
f.IntVar(&c.MaxRetryAttempts, "r", c.MaxRetryAttempts, "(deprecated)")
f.IntVar(&c.MaxClusterSize, "maxsize", c.MaxClusterSize, "(deprecated)")
f.IntVar(&c.SnapshotCount, "snapshotCount", c.SnapshotCount, "(deprecated)")
// END DEPRECATED FLAGS

Expand Down
37 changes: 0 additions & 37 deletions config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ func TestConfigTOML(t *testing.T) {
assert.Equal(t, c.BindAddr, "127.0.0.1:4003", "")
assert.Equal(t, c.Peers, []string{"coreos.com:4001", "coreos.com:4002"}, "")
assert.Equal(t, c.PeersFile, "/tmp/peers", "")
assert.Equal(t, c.MaxClusterSize, 10, "")
assert.Equal(t, c.MaxResultBuffer, 512, "")
assert.Equal(t, c.MaxRetryAttempts, 5, "")
assert.Equal(t, c.Name, "test-name", "")
Expand Down Expand Up @@ -101,7 +100,6 @@ func TestConfigEnv(t *testing.T) {
assert.Equal(t, c.BindAddr, "127.0.0.1:4003", "")
assert.Equal(t, c.Peers, []string{"coreos.com:4001", "coreos.com:4002"}, "")
assert.Equal(t, c.PeersFile, "/tmp/peers", "")
assert.Equal(t, c.MaxClusterSize, 10, "")
assert.Equal(t, c.MaxResultBuffer, 512, "")
assert.Equal(t, c.MaxRetryAttempts, 5, "")
assert.Equal(t, c.Name, "test-name", "")
Expand Down Expand Up @@ -281,21 +279,6 @@ func TestConfigPeersFileFlag(t *testing.T) {
assert.Equal(t, c.PeersFile, "/tmp/peers", "")
}

// Ensures that the Max Cluster Size can be parsed from the environment.
func TestConfigMaxClusterSizeEnv(t *testing.T) {
withEnv("ETCD_MAX_CLUSTER_SIZE", "5", func(c *Config) {
assert.Nil(t, c.LoadEnv(), "")
assert.Equal(t, c.MaxClusterSize, 5, "")
})
}

// Ensures that a the Max Cluster Size flag can be parsed.
func TestConfigMaxClusterSizeFlag(t *testing.T) {
c := New()
assert.Nil(t, c.LoadFlags([]string{"-max-cluster-size", "5"}), "")
assert.Equal(t, c.MaxClusterSize, 5, "")
}

// Ensures that the Max Result Buffer can be parsed from the environment.
func TestConfigMaxResultBufferEnv(t *testing.T) {
withEnv("ETCD_MAX_RESULT_BUFFER", "512", func(c *Config) {
Expand Down Expand Up @@ -600,26 +583,6 @@ func TestConfigDeprecatedPeersFileFlag(t *testing.T) {
assert.Equal(t, stderr, "[deprecated] use -peers-file, not -CF\n", "")
}

func TestConfigDeprecatedMaxClusterSizeFlag(t *testing.T) {
_, stderr := capture(func() {
c := New()
err := c.LoadFlags([]string{"-maxsize", "5"})
assert.NoError(t, err)
assert.Equal(t, c.MaxClusterSize, 5, "")
})
assert.Equal(t, stderr, "[deprecated] use -max-cluster-size, not -maxsize\n", "")
}

func TestConfigDeprecatedMaxResultBufferFlag(t *testing.T) {
_, stderr := capture(func() {
c := New()
err := c.LoadFlags([]string{"-m", "512"})
assert.NoError(t, err)
assert.Equal(t, c.MaxResultBuffer, 512, "")
})
assert.Equal(t, stderr, "[deprecated] use -max-result-buffer, not -m\n", "")
}

func TestConfigDeprecatedMaxRetryAttemptsFlag(t *testing.T) {
_, stderr := capture(func() {
c := New()
Expand Down
1 change: 0 additions & 1 deletion etcd.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,6 @@ func main() {
Scheme: config.PeerTLSInfo().Scheme(),
URL: config.Peer.Addr,
SnapshotCount: config.SnapshotCount,
MaxClusterSize: config.MaxClusterSize,
RetryTimes: config.MaxRetryAttempts,
RetryInterval: config.RetryInterval,
}
Expand Down
2 changes: 1 addition & 1 deletion server/cluster_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ type ClusterConfig struct {

// PromoteDelay is the amount of time, in seconds, after a node is
// unreachable that it will be swapped out for a proxy node, if available.
PromoteDelay int `json:"PromoteDelay"`
PromoteDelay int `json:"promoteDelay"`
}

// NewClusterConfig returns a cluster configuration with default settings.
Expand Down
5 changes: 5 additions & 0 deletions server/join_command.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,11 @@ func (c *JoinCommand) Apply(context raft.Context) (interface{}, error) {
return buf.Bytes(), nil
}

// Remove it as a proxy if it is one.
if ps.registry.ProxyExists(c.Name) {
ps.registry.UnregisterProxy(c.Name)
}

// Add to shared peer registry.
ps.registry.RegisterPeer(c.Name, c.RaftURL, c.EtcdURL)

Expand Down
31 changes: 29 additions & 2 deletions server/peer_server.go
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,6 @@ func (s *PeerServer) ClusterConfig() *ClusterConfig {
// SetClusterConfig updates the current cluster configuration.
// Adjusting the active size will
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

don't leave me hanging! :)

func (s *PeerServer) SetClusterConfig(c *ClusterConfig) error {
prevActiveSize := s.clusterConfig.ActiveSize
s.clusterConfig = c

// Validate configuration.
Expand Down Expand Up @@ -294,9 +293,10 @@ func (s *PeerServer) HTTPHandler() http.Handler {
router.HandleFunc("/version/{version:[0-9]+}/check", s.VersionCheckHttpHandler)
router.HandleFunc("/upgrade", s.UpgradeHttpHandler)
router.HandleFunc("/join", s.JoinHttpHandler)
router.HandleFunc("/promote", s.PromoteHttpHandler).Methods("POST")
router.HandleFunc("/remove/{name:.+}", s.RemoveHttpHandler)
router.HandleFunc("/config", s.getClusterConfigHttpHandler).Methods("GET")
router.HandleFunc("/config", s.setClusterConfigHttpHandler).Methods("POST")
router.HandleFunc("/config", s.setClusterConfigHttpHandler).Methods("PUT")
router.HandleFunc("/vote", s.VoteHttpHandler)
router.HandleFunc("/log", s.GetLogHttpHandler)
router.HandleFunc("/log/append", s.AppendEntriesHttpHandler)
Expand Down Expand Up @@ -632,18 +632,45 @@ func (s *PeerServer) monitorActive(closeChan chan bool) {
peerCount := s.registry.PeerCount()
proxies := s.registry.Proxies()
peers := s.registry.Peers()
fmt.Println("active.3»", peers)
if index := sort.SearchStrings(peers, s.Config.Name); index < len(peers) && peers[index] == s.Config.Name {
peers = append(peers[:index], peers[index+1:]...)
}

fmt.Println("active.1»", activeSize, peerCount)
fmt.Println("active.2»", proxies)

// If we have more active nodes than we should then demote.
if peerCount > activeSize {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

seems like a logabble event.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@benbjohnson I do not think we should handle this case here.
peerCount > activeSize appears only when we manually change the activeSize. So we should handle this when the user send a configuration change request.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@benbjohnson Any thought on this?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@xiangli-cmu I'd rather keep the cluster size management in one spot. The goal of the command is to set a target size but this function provides an abstraction about how to move the cluster to that size. Moving the functionality to the actual request poses two problems:

  1. Multiple calls to change the config at the same time could cause them to try to resize the cluster to different sizes.
  2. Retries could go on indefinitely if there is a problem demoting the other members. Timing out the request would then invalidate the setting of the target size.

peer := peers[rand.Intn(len(peers))]
fmt.Println("active.demote»", peer)
if _, err := s.raftServer.Do(&RemoveCommand{Name: peer}); err != nil {
log.Infof("%s: warning: demotion error: %v", s.Config.Name, err)
}
continue
}

// If we don't have enough active nodes then try to promote a proxy.
if peerCount < activeSize && len(proxies) > 0 {
proxy := proxies[rand.Intn(len(proxies))]
proxyPeerURL, _ := s.registry.ProxyPeerURL(proxy)
log.Infof("%s: promoting: %v (%s)", s.Config.Name, proxy, proxyPeerURL)

// Notify proxy to promote itself.
client := &http.Client{
Transport: &http.Transport{
DisableKeepAlives: false,
ResponseHeaderTimeout: ActiveMonitorTimeout,
},
}
resp, err := client.Post(fmt.Sprintf("%s/promote", proxyPeerURL), "application/json", nil)
if err != nil {
log.Infof("%s: warning: promotion error: %v", s.Config.Name, err)
} else if resp.StatusCode != http.StatusOK {
log.Infof("%s: warning: promotion failure: %v", s.Config.Name, resp.StatusCode)
}
continue
}
}
}

Expand Down
20 changes: 20 additions & 0 deletions server/peer_server_handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package server
import (
"encoding/json"
"net/http"
"net/url"
"strconv"
"time"

Expand Down Expand Up @@ -171,6 +172,25 @@ func (ps *PeerServer) JoinHttpHandler(w http.ResponseWriter, req *http.Request)
}
}

// Attempt to rejoin the cluster as a peer.
func (ps *PeerServer) PromoteHttpHandler(w http.ResponseWriter, req *http.Request) {
log.Infof("%s attempting to promote in cluster: %s", ps.Config.Name, ps.proxyPeerURL)
url, err := url.Parse(ps.proxyPeerURL)
if err != nil {
w.WriteHeader(http.StatusInternalServerError)
return
}

err = ps.joinByPeer(ps.raftServer, url.Host, ps.Config.Scheme)
if err != nil {
log.Infof("%s error while promoting: %v", ps.Config.Name, err)
w.WriteHeader(http.StatusInternalServerError)
return
}
log.Infof("%s promoted in the cluster", ps.Config.Name)
w.WriteHeader(http.StatusOK)
}

// Response to remove request
func (ps *PeerServer) RemoveHttpHandler(w http.ResponseWriter, req *http.Request) {
if req.Method != "DELETE" {
Expand Down
64 changes: 0 additions & 64 deletions server/promote_command.go

This file was deleted.

29 changes: 22 additions & 7 deletions server/registry.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"net/url"
"path"
"path/filepath"
"sort"
"strings"
"sync"

Expand Down Expand Up @@ -48,6 +49,7 @@ func (r *Registry) Peers() []string {
for name, _ := range r.peers {
names = append(names, name)
}
sort.Sort(sort.StringSlice(names))
return names
}

Expand All @@ -57,6 +59,7 @@ func (r *Registry) Proxies() []string {
for name, _ := range r.proxies {
names = append(names, name)
}
sort.Sort(sort.StringSlice(names))
return names
}

Expand All @@ -70,7 +73,11 @@ func (r *Registry) RegisterPeer(name string, peerURL string, machURL string) err
// RegisterProxy adds a proxy to the registry.
func (r *Registry) RegisterProxy(name string, peerURL string, machURL string) error {
// TODO(benbjohnson): Disallow proxies that are already peers.
return r.register(RegistryProxyKey, name, peerURL, machURL)
if err := r.register(RegistryProxyKey, name, peerURL, machURL); err != nil {
return err
}
r.proxies[name] = r.load(RegistryProxyKey, name)
return nil
}

func (r *Registry) register(key, name string, peerURL string, machURL string) error {
Expand Down Expand Up @@ -153,7 +160,9 @@ func (r *Registry) ClientURL(name string) (string, bool) {

func (r *Registry) clientURL(key, name string) (string, bool) {
if r.peers[name] == nil {
r.peers[name] = r.load(key, name)
if node := r.load(key, name); node != nil {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

s/node/peer ?

r.peers[name] = node
}
}

if node := r.peers[name]; node != nil {
Expand Down Expand Up @@ -184,7 +193,9 @@ func (r *Registry) PeerURL(name string) (string, bool) {

func (r *Registry) peerURL(key, name string) (string, bool) {
if r.peers[name] == nil {
r.peers[name] = r.load(key, name)
if node := r.load(key, name); node != nil {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

s/node/peer?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I left it as node since the peer is a node. The peers list is just a collection of node structs.

r.peers[name] = node
}
}

if node := r.peers[name]; node != nil {
Expand All @@ -203,7 +214,9 @@ func (r *Registry) ProxyClientURL(name string) (string, bool) {

func (r *Registry) proxyClientURL(key, name string) (string, bool) {
if r.proxies[name] == nil {
r.proxies[name] = r.load(key, name)
if node := r.load(key, name); node != nil {
r.proxies[name] = node
}
}
if node := r.proxies[name]; node != nil {
return node.url, true
Expand All @@ -215,12 +228,14 @@ func (r *Registry) proxyClientURL(key, name string) (string, bool) {
func (r *Registry) ProxyPeerURL(name string) (string, bool) {
r.Lock()
defer r.Unlock()
return r.proxyPeerURL(RegistryProxyKey,name)
return r.proxyPeerURL(RegistryProxyKey, name)
}

func (r *Registry) proxyPeerURL(key, name string) (string, bool) {
if r.proxies[name] == nil {
r.proxies[name] = r.load(key, name)
if node := r.load(key, name); node != nil {
r.proxies[name] = node
}
}
if node := r.proxies[name]; node != nil {
return node.peerURL, true
Expand Down Expand Up @@ -278,7 +293,7 @@ func (r *Registry) load(key, name string) *node {
}

// Retrieve from store.
e, err := r.store.Get(path.Join(RegistryPeerKey, name), false, false)
e, err := r.store.Get(path.Join(key, name), false, false)
if err != nil {
return nil
}
Expand Down
Loading