@@ -252,9 +252,18 @@ func tokenFor(userID string, name model.LabelValue) uint32 {
252
252
}
253
253
254
254
type sampleTracker struct {
255
- sample * model.Sample
256
- minSuccess int
257
- succeeded int32
255
+ sample * model.Sample
256
+ minSuccess int
257
+ maxFailures int
258
+ succeeded int32
259
+ failed int32
260
+ }
261
+
262
+ type pushTracker struct {
263
+ samplesPending int32
264
+ samplesFailed int32
265
+ done chan struct {}
266
+ err chan error
258
267
}
259
268
260
269
// Push implements cortex.IngesterServer
@@ -285,11 +294,15 @@ func (d *Distributor) Push(ctx context.Context, req *remote.WriteRequest) (*cort
285
294
sampleTrackers := make ([]sampleTracker , len (samples ), len (samples ))
286
295
samplesByIngester := map [* ring.IngesterDesc ][]* sampleTracker {}
287
296
for i := range samples {
297
+ // We need a response from a quorum of ingesters, which is n/2 + 1.
298
+ minSuccess := (len (ingesters [i ]) / 2 ) + 1
299
+
288
300
sampleTrackers [i ] = sampleTracker {
289
- sample : samples [i ],
290
- // We need a response from a quorum of ingesters, which is n/2 + 1.
291
- minSuccess : (len (ingesters [i ]) / 2 ) + 1 ,
292
- succeeded : 0 ,
301
+ sample : samples [i ],
302
+ minSuccess : minSuccess ,
303
+ maxFailures : len (ingesters [i ]) - minSuccess ,
304
+ succeeded : 0 ,
305
+ failed : 0 ,
293
306
}
294
307
295
308
// Skip those that have not heartbeated in a while. NB these are still
@@ -315,26 +328,23 @@ func (d *Distributor) Push(ctx context.Context, req *remote.WriteRequest) (*cort
315
328
}
316
329
}
317
330
318
- errs := make ( chan error )
319
- for hostname , samples := range samplesByIngester {
320
- go func ( ingester * ring. IngesterDesc , samples [] * sampleTracker ) {
321
- errs <- d . sendSamples ( ctx , ingester , samples )
322
- }( hostname , samples )
331
+ pushTracker := pushTracker {
332
+ samplesPending : int32 ( len ( samples )),
333
+ samplesFailed : 0 ,
334
+ done : make ( chan struct {}),
335
+ err : make ( chan error ),
323
336
}
324
- var lastErr error
325
- for i := 0 ; i < len (samplesByIngester ); i ++ {
326
- if err := <- errs ; err != nil {
327
- lastErr = err
328
- continue
329
- }
337
+ for ingester , samples := range samplesByIngester {
338
+ go func (ingester * ring.IngesterDesc , samples []* sampleTracker ) {
339
+ d .sendSamples (ctx , ingester , samples , & pushTracker )
340
+ }(ingester , samples )
330
341
}
331
- for i := range sampleTrackers {
332
- if sampleTrackers [ i ]. succeeded < int32 ( sampleTrackers [ i ]. minSuccess ) {
333
- return nil , fmt . Errorf ( "need %d successful writes, only got %d, last error was: %v" ,
334
- sampleTrackers [ i ]. minSuccess , sampleTrackers [ i ]. succeeded , lastErr )
335
- }
342
+ select {
343
+ case err := <- pushTracker . err :
344
+ return nil , err
345
+ case <- pushTracker . done :
346
+ return & cortex. WriteResponse {}, nil
336
347
}
337
- return & cortex.WriteResponse {}, nil
338
348
}
339
349
340
350
func (d * Distributor ) getOrCreateIngestLimiter (userID string ) * rate.Limiter {
@@ -350,7 +360,38 @@ func (d *Distributor) getOrCreateIngestLimiter(userID string) *rate.Limiter {
350
360
return limiter
351
361
}
352
362
353
- func (d * Distributor ) sendSamples (ctx context.Context , ingester * ring.IngesterDesc , sampleTrackers []* sampleTracker ) error {
363
+ func (d * Distributor ) sendSamples (ctx context.Context , ingester * ring.IngesterDesc , sampleTrackers []* sampleTracker , pushTracker * pushTracker ) {
364
+ err := d .sendSamplesErr (ctx , ingester , sampleTrackers )
365
+
366
+ // If we suceed, decrement each sample's pending count by one. If we reach
367
+ // the requred number of successful puts on this sample, then decrement the
368
+ // number of pending samples by one. If we successfully push all samples to
369
+ // min success ingesters, wake up the waiting rpc so it can return early.
370
+ // Similarly, track the number of errors, and if it exceeds maxFailures
371
+ // shortcut the waiting rpc.
372
+ //
373
+ // The use of atomic increments here guarantees only a single sendSamples
374
+ // goroutine will write to either channel.
375
+ for i := range sampleTrackers {
376
+ if err != nil {
377
+ if atomic .AddInt32 (& sampleTrackers [i ].failed , 1 ) > int32 (sampleTrackers [i ].maxFailures ) {
378
+ continue
379
+ }
380
+ if atomic .AddInt32 (& pushTracker .samplesFailed , 1 ) == 1 {
381
+ pushTracker .err <- err
382
+ }
383
+ } else {
384
+ if atomic .AddInt32 (& sampleTrackers [i ].succeeded , 1 ) != int32 (sampleTrackers [i ].minSuccess ) {
385
+ continue
386
+ }
387
+ if atomic .AddInt32 (& pushTracker .samplesPending , - 1 ) == 0 {
388
+ pushTracker .done <- struct {}{}
389
+ }
390
+ }
391
+ }
392
+ }
393
+
394
+ func (d * Distributor ) sendSamplesErr (ctx context.Context , ingester * ring.IngesterDesc , sampleTrackers []* sampleTracker ) error {
354
395
client , err := d .getClientFor (ingester )
355
396
if err != nil {
356
397
return err
@@ -366,25 +407,8 @@ func (d *Distributor) sendSamples(ctx context.Context, ingester *ring.IngesterDe
366
407
d .ingesterAppends .WithLabelValues (ingester .Addr ).Inc ()
367
408
if err != nil {
368
409
d .ingesterAppendFailures .WithLabelValues (ingester .Addr ).Inc ()
369
- return err
370
- }
371
-
372
- for i := range sampleTrackers {
373
- atomic .AddInt32 (& sampleTrackers [i ].succeeded , 1 )
374
- }
375
- return nil
376
- }
377
-
378
- func metricNameFromLabelMatchers (matchers ... * metric.LabelMatcher ) (model.LabelValue , error ) {
379
- for _ , m := range matchers {
380
- if m .Name == model .MetricNameLabel {
381
- if m .Type != metric .Equal {
382
- return "" , fmt .Errorf ("non-equality matchers are not supported on the metric name" )
383
- }
384
- return m .Value , nil
385
- }
386
410
}
387
- return "" , fmt . Errorf ( "no metric name matcher found" )
411
+ return err
388
412
}
389
413
390
414
// Query implements Querier.
@@ -393,7 +417,7 @@ func (d *Distributor) Query(ctx context.Context, from, to model.Time, matchers .
393
417
err := instrument .TimeRequestHistogram (ctx , "Distributor.Query" , d .queryDuration , func (ctx context.Context ) error {
394
418
fpToSampleStream := map [model.Fingerprint ]* model.SampleStream {}
395
419
396
- metricName , err := metricNameFromLabelMatchers (matchers ... )
420
+ metricName , _ , err := util . ExtractMetricNameFromMatchers (matchers )
397
421
if err != nil {
398
422
return err
399
423
}
0 commit comments