Skip to content

Fixing how ring ownership is calculated on the ring status page #5889

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Apr 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
* [ENHANCEMENT] Querier: Add context error check when merging slices from ingesters for GetLabel operations. #5837
* [ENHANCEMENT] Ring: Add experimental `-ingester.tokens-generator-strategy=minimize-spread` flag to enable the new minimize spread token generator strategy. #5855
* [ENHANCEMENT] Query Frontend: Ensure error response returned by Query Frontend follows Prometheus API error response format. #5811
* [ENHANCEMENT] Ring Status Page: Add `Ownership Diff From Expected` column in the ring table to indicate the extent to which the ownership of a specific ingester differs from the expected ownership. #5889
* [BUGFIX] Distributor: Do not use label with empty values for sharding #5717
* [BUGFIX] Query Frontend: queries with negative offset should check whether it is cacheable or not. #5719
* [BUGFIX] Redis Cache: pass `cache_size` config correctly. #5734
Expand Down
22 changes: 20 additions & 2 deletions pkg/ring/http.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ const pageContent = `
<th>Last Heartbeat</th>
<th>Tokens</th>
<th>Ownership</th>
<th>Ownership Diff From Expected</th>
<th>Actions</th>
</tr>
</thead>
Expand All @@ -56,6 +57,7 @@ const pageContent = `
<td>{{ .HeartbeatTimestamp }}</td>
<td>{{ .NumTokens }}</td>
<td>{{ .Ownership }}%</td>
<td>{{ .DiffOwnership }}%</td>
<td><button name="forget" value="{{ .ID }}" type="submit">Forget</button></td>
</tr>
{{ end }}
Expand Down Expand Up @@ -114,6 +116,7 @@ type ingesterDesc struct {
Tokens []uint32 `json:"tokens"`
NumTokens int `json:"-"`
Ownership float64 `json:"-"`
DiffOwnership float64 `json:"-"`
}

type httpResponse struct {
Expand Down Expand Up @@ -153,7 +156,6 @@ func (r *Ring) ServeHTTP(w http.ResponseWriter, req *http.Request) {

storageLastUpdate := r.KVClient.LastUpdateTime(r.key)
var ingesters []ingesterDesc
_, owned := r.countTokens()
for _, id := range ingesterIDs {
ing := r.ringDesc.Ingesters[id]
heartbeatTimestamp := time.Unix(ing.Timestamp, 0)
Expand All @@ -168,6 +170,21 @@ func (r *Ring) ServeHTTP(w http.ResponseWriter, req *http.Request) {
registeredTimestamp = ing.GetRegisteredAt().String()
}

var ownership float64
var deltaOwnership float64

if r.cfg.ZoneAwarenessEnabled {
numTokens, ownedByAz := r.countTokensByAz()
ownership = (float64(ownedByAz[id]) / float64(math.MaxUint32+1)) * 100
expectedOwnership := 1 / float64(len(numTokens[ing.Zone])) * 100
deltaOwnership = (1 - expectedOwnership/ownership) * 100
} else {
_, owned := r.countTokens()
ownership = (float64(owned[id]) / float64(math.MaxUint32+1)) * 100
expectedOwnership := 1 / float64(len(owned)) * 100
deltaOwnership = (1 - expectedOwnership/ownership) * 100
}

ingesters = append(ingesters, ingesterDesc{
ID: id,
State: state,
Expand All @@ -177,7 +194,8 @@ func (r *Ring) ServeHTTP(w http.ResponseWriter, req *http.Request) {
Tokens: ing.Tokens,
Zone: ing.Zone,
NumTokens: len(ing.Tokens),
Ownership: (float64(owned[id]) / float64(math.MaxUint32)) * 100,
Ownership: ownership,
DiffOwnership: deltaOwnership,
})
}

Expand Down
46 changes: 33 additions & 13 deletions pkg/ring/ring.go
Original file line number Diff line number Diff line change
Expand Up @@ -590,22 +590,42 @@ func (r *Ring) GetReplicationSetForOperation(op Operation) (ReplicationSet, erro
}, nil
}

func (r *Ring) countTokensByAz() (map[string]map[string]uint32, map[string]int64) {
numTokens := map[string]map[string]uint32{}
owned := map[string]int64{}

for zone, zonalTokens := range r.ringDesc.getTokensByZone() {
numTokens[zone] = map[string]uint32{}
for i := 1; i <= len(zonalTokens); i++ {
index := i % len(zonalTokens)
diff := tokenDistance(zonalTokens[i-1], zonalTokens[index])
info := r.ringInstanceByToken[zonalTokens[index]]
owned[info.InstanceID] = owned[info.InstanceID] + diff
numTokens[zone][info.InstanceID] = numTokens[zone][info.InstanceID] + 1
}
}

// Set to 0 the number of owned tokens by instances which don't have tokens yet.
for id, info := range r.ringDesc.Ingesters {
if _, ok := owned[id]; !ok {
owned[id] = 0
numTokens[info.Zone][id] = 0
}
}

return numTokens, owned
}

// countTokens returns the number of tokens and tokens within the range for each instance.
// The ring read lock must be already taken when calling this function.
func (r *Ring) countTokens() (map[string]uint32, map[string]uint32) {
owned := map[string]uint32{}
func (r *Ring) countTokens() (map[string]uint32, map[string]int64) {
owned := map[string]int64{}
numTokens := map[string]uint32{}
for i, token := range r.ringTokens {
var diff uint32

// Compute how many tokens are within the range.
if i+1 == len(r.ringTokens) {
diff = (math.MaxUint32 - token) + r.ringTokens[0]
} else {
diff = r.ringTokens[i+1] - token
}
for i := 1; i <= len(r.ringTokens); i++ { // Compute how many tokens are within the range.
index := i % len(r.ringTokens)
diff := tokenDistance(r.ringTokens[i-1], r.ringTokens[index])

info := r.ringInstanceByToken[token]
info := r.ringInstanceByToken[r.ringTokens[index]]
numTokens[info.InstanceID] = numTokens[info.InstanceID] + 1
owned[info.InstanceID] = owned[info.InstanceID] + diff
}
Expand Down Expand Up @@ -662,7 +682,7 @@ func (r *Ring) updateRingMetrics(compareResult CompareResult) {
r.reportedOwners = make(map[string]struct{})
numTokens, ownedRange := r.countTokens()
for id, totalOwned := range ownedRange {
r.memberOwnershipGaugeVec.WithLabelValues(id).Set(float64(totalOwned) / float64(math.MaxUint32))
r.memberOwnershipGaugeVec.WithLabelValues(id).Set(float64(totalOwned) / float64(math.MaxUint32+1))
r.numTokensGaugeVec.WithLabelValues(id).Set(float64(numTokens[id]))
delete(prevOwners, id)
r.reportedOwners[id] = struct{}{}
Expand Down
8 changes: 4 additions & 4 deletions pkg/ring/ring_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3008,8 +3008,8 @@ func TestUpdateMetrics(t *testing.T) {
err = testutil.GatherAndCompare(registry, bytes.NewBufferString(`
# HELP ring_member_ownership_percent The percent ownership of the ring by member
# TYPE ring_member_ownership_percent gauge
ring_member_ownership_percent{member="A",name="test"} 0.500000000349246
ring_member_ownership_percent{member="B",name="test"} 0.49999999965075403
ring_member_ownership_percent{member="A",name="test"} 0.49999999976716936
ring_member_ownership_percent{member="B",name="test"} 0.5000000002328306
# HELP ring_members Number of members in the ring
# TYPE ring_members gauge
ring_members{name="test",state="ACTIVE"} 2
Expand Down Expand Up @@ -3060,8 +3060,8 @@ func TestUpdateMetricsWithRemoval(t *testing.T) {
err = testutil.GatherAndCompare(registry, bytes.NewBufferString(`
# HELP ring_member_ownership_percent The percent ownership of the ring by member
# TYPE ring_member_ownership_percent gauge
ring_member_ownership_percent{member="A",name="test"} 0.500000000349246
ring_member_ownership_percent{member="B",name="test"} 0.49999999965075403
ring_member_ownership_percent{member="A",name="test"} 0.49999999976716936
ring_member_ownership_percent{member="B",name="test"} 0.5000000002328306
# HELP ring_members Number of members in the ring
# TYPE ring_members gauge
ring_members{name="test",state="ACTIVE"} 2
Expand Down