Skip to content

Commit

Permalink
client: add a mechanism for various endpoint selection mode
Browse files Browse the repository at this point in the history
Current etcd client library chooses a default destination node from
every member of a cluster in a random manner. However, requests of
write and read (for consistent results) need to be forwarded to the
leader node as the nature of Raft algorithm. If the chosen node is a
follower, additional network traffic will be caused by the forwarding
from follower to leader.

Mainly for reducing the forward traffic, this commit adds a new
mechanism for various endpoint selection mode to the client library
which can be configured with client.Config.SelectionMode.

Currently, three modes are provided:
 - EndpointSelectionDefault: default, same to existing behavior (pick
   a node in a random manner)
 - EndpointSelectionPrioritizeLeader: prioritize leader, for the above
   purpose
 - EndpointSelectionPreference: with this mode, client program selects
   a prioritized endpoint manually. This option is targetting a new
   local (not quorum) and consistent read request which will be
   implemented in the future.

I evaluated the effectiveness of the EndpointSelectionPrioritizeLeader
with 4 t1.micro instances of AWS (3 nodes for etcd cluster and 1 node
for etcd client). Client executes this simple benchmark
(https://github.com/mitake/etcd-things/tree/master/prioritize-leader-bench),
just writes 10000 keys. When PrioritizeLeader is false, the benchmark
needed 1 min and 32.102 sec to finish. When it is true, the benchmark
needed 1 min 4.760 sec.
  • Loading branch information
mitake committed Dec 21, 2015
1 parent c1c59f9 commit 767491e
Showing 1 changed file with 66 additions and 6 deletions.
72 changes: 66 additions & 6 deletions client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,23 @@ var DefaultTransport CancelableTransport = &http.Transport{
TLSHandshakeTimeout: 10 * time.Second,
}

type EndpointSelectionMode int

const (
// EndpointSelectionDefault: pick an endpoint in a random manner
EndpointSelectionDefault EndpointSelectionMode = iota

// EndpointSelectionPrioritizeLeader: prioritize leader for reducing needless
// forward between follower and leader
//
// This mode should be used with Client.AutoSync()
EndpointSelectionPrioritizeLeader

// EndpointSelectionPreference: manually picks an endpoint
// with using Config.PreferenceIndex as an index
EndpointSelectionPreference
)

type Config struct {
// Endpoints defines a set of URLs (schemes, hosts and ports only)
// that can be used to communicate with a logical etcd cluster. For
Expand Down Expand Up @@ -104,6 +121,9 @@ type Config struct {
//
// A HeaderTimeoutPerRequest of zero means no timeout.
HeaderTimeoutPerRequest time.Duration

SelectionMode EndpointSelectionMode
PreferenceIndex int
}

func (cfg *Config) transport() CancelableTransport {
Expand Down Expand Up @@ -167,8 +187,10 @@ type Client interface {

func New(cfg Config) (Client, error) {
c := &httpClusterClient{
clientFactory: newHTTPClientFactory(cfg.transport(), cfg.checkRedirect(), cfg.HeaderTimeoutPerRequest),
rand: rand.New(rand.NewSource(int64(time.Now().Nanosecond()))),
clientFactory: newHTTPClientFactory(cfg.transport(), cfg.checkRedirect(), cfg.HeaderTimeoutPerRequest),
rand: rand.New(rand.NewSource(int64(time.Now().Nanosecond()))),
selectionMode: cfg.SelectionMode,
preferenceIndex: cfg.PreferenceIndex,
}
if cfg.Username != "" {
c.credentials = &credentials{
Expand Down Expand Up @@ -216,7 +238,19 @@ type httpClusterClient struct {
pinned int
credentials *credentials
sync.RWMutex
rand *rand.Rand
rand *rand.Rand
selectionMode EndpointSelectionMode
preferenceIndex int
}

func (c *httpClusterClient) getLeaderEndpoint() (string, error) {
mAPI := NewMembersAPI(c)
leader, err := mAPI.Leader(context.Background())
if err != nil {
return "", err
}

return leader.ClientURLs[0], nil // TODO: how to handle multiple client URLs?
}

func (c *httpClusterClient) reset(eps []string) error {
Expand All @@ -233,9 +267,35 @@ func (c *httpClusterClient) reset(eps []string) error {
neps[i] = *u
}

c.endpoints = shuffleEndpoints(c.rand, neps)
// TODO: pin old endpoint if possible, and rebalance when new endpoint appears
c.pinned = 0
switch c.selectionMode {
case EndpointSelectionDefault:
c.endpoints = shuffleEndpoints(c.rand, neps)
c.pinned = 0
case EndpointSelectionPreference:
c.endpoints = neps
c.pinned = c.preferenceIndex
case EndpointSelectionPrioritizeLeader:
c.endpoints = neps
// TODO: should return ErrNoEndpoints in a case of getting leader fail?
lep, err := c.getLeaderEndpoint()
if err != nil {
return ErrNoEndpoints
}

lu, err := url.Parse(lep)
if err != nil {
return ErrNoEndpoints
}

for i := range c.endpoints {
if c.endpoints[i].String() == lu.String() {
c.pinned = i
break
}
}
default:
return errors.New(fmt.Sprintf("invalid mode of endpoint selection: %d", c.selectionMode))
}

return nil
}
Expand Down

0 comments on commit 767491e

Please sign in to comment.