@@ -87,6 +87,9 @@ type Config struct {
87
87
ClientCleanupPeriod time.Duration
88
88
IngestionRateLimit float64
89
89
IngestionBurstSize int
90
+
91
+ // for testing
92
+ ingesterClientFactory func (string ) cortex.IngesterClient
90
93
}
91
94
92
95
// RegisterFlags adds the flags required to config this to the given FlagSet
@@ -218,22 +221,27 @@ func (d *Distributor) getClientFor(ingester *ring.IngesterDesc) (cortex.Ingester
218
221
return client , nil
219
222
}
220
223
221
- conn , err := grpc .Dial (
222
- ingester .Addr ,
223
- grpc .WithTimeout (d .cfg .RemoteTimeout ),
224
- grpc .WithInsecure (),
225
- grpc .WithUnaryInterceptor (grpc_middleware .ChainUnaryClient (
226
- otgrpc .OpenTracingClientInterceptor (opentracing .GlobalTracer ()),
227
- middleware .ClientUserHeaderInterceptor ,
228
- )),
229
- )
230
- if err != nil {
231
- return nil , err
232
- }
233
-
234
- client = ingesterClient {
235
- IngesterClient : cortex .NewIngesterClient (conn ),
236
- conn : conn ,
224
+ if d .cfg .ingesterClientFactory != nil {
225
+ client = ingesterClient {
226
+ IngesterClient : d .cfg .ingesterClientFactory (ingester .Addr ),
227
+ }
228
+ } else {
229
+ conn , err := grpc .Dial (
230
+ ingester .Addr ,
231
+ grpc .WithTimeout (d .cfg .RemoteTimeout ),
232
+ grpc .WithInsecure (),
233
+ grpc .WithUnaryInterceptor (grpc_middleware .ChainUnaryClient (
234
+ otgrpc .OpenTracingClientInterceptor (opentracing .GlobalTracer ()),
235
+ middleware .ClientUserHeaderInterceptor ,
236
+ )),
237
+ )
238
+ if err != nil {
239
+ return nil , err
240
+ }
241
+ client = ingesterClient {
242
+ IngesterClient : cortex .NewIngesterClient (conn ),
243
+ conn : conn ,
244
+ }
237
245
}
238
246
d .clients [ingester .Addr ] = client
239
247
return client , nil
@@ -252,9 +260,18 @@ func tokenFor(userID string, name model.LabelValue) uint32 {
252
260
}
253
261
254
262
type sampleTracker struct {
255
- sample * model.Sample
256
- minSuccess int
257
- succeeded int32
263
+ sample * model.Sample
264
+ minSuccess int
265
+ maxFailures int
266
+ succeeded int32
267
+ failed int32
268
+ }
269
+
270
+ type pushTracker struct {
271
+ samplesPending int32
272
+ samplesFailed int32
273
+ done chan struct {}
274
+ err chan error
258
275
}
259
276
260
277
// Push implements cortex.IngesterServer
@@ -267,6 +284,10 @@ func (d *Distributor) Push(ctx context.Context, req *remote.WriteRequest) (*cort
267
284
samples := util .FromWriteRequest (req )
268
285
d .receivedSamples .Add (float64 (len (samples )))
269
286
287
+ if len (samples ) == 0 {
288
+ return & cortex.WriteResponse {}, nil
289
+ }
290
+
270
291
limiter := d .getOrCreateIngestLimiter (userID )
271
292
if ! limiter .AllowN (time .Now (), len (samples )) {
272
293
return nil , errIngestionRateLimitExceeded
@@ -285,11 +306,13 @@ func (d *Distributor) Push(ctx context.Context, req *remote.WriteRequest) (*cort
285
306
sampleTrackers := make ([]sampleTracker , len (samples ), len (samples ))
286
307
samplesByIngester := map [* ring.IngesterDesc ][]* sampleTracker {}
287
308
for i := range samples {
309
+ // We need a response from a quorum of ingesters, which is n/2 + 1.
310
+ minSuccess := (len (ingesters [i ]) / 2 ) + 1
311
+
288
312
sampleTrackers [i ] = sampleTracker {
289
- sample : samples [i ],
290
- // We need a response from a quorum of ingesters, which is n/2 + 1.
291
- minSuccess : (len (ingesters [i ]) / 2 ) + 1 ,
292
- succeeded : 0 ,
313
+ sample : samples [i ],
314
+ minSuccess : minSuccess ,
315
+ maxFailures : len (ingesters [i ]) - minSuccess ,
293
316
}
294
317
295
318
// Skip those that have not heartbeated in a while. NB these are still
@@ -315,26 +338,22 @@ func (d *Distributor) Push(ctx context.Context, req *remote.WriteRequest) (*cort
315
338
}
316
339
}
317
340
318
- errs := make (chan error )
319
- for hostname , samples := range samplesByIngester {
320
- go func (ingester * ring.IngesterDesc , samples []* sampleTracker ) {
321
- errs <- d .sendSamples (ctx , ingester , samples )
322
- }(hostname , samples )
341
+ pushTracker := pushTracker {
342
+ samplesPending : int32 (len (samples )),
343
+ done : make (chan struct {}),
344
+ err : make (chan error ),
323
345
}
324
- var lastErr error
325
- for i := 0 ; i < len (samplesByIngester ); i ++ {
326
- if err := <- errs ; err != nil {
327
- lastErr = err
328
- continue
329
- }
346
+ for ingester , samples := range samplesByIngester {
347
+ go func (ingester * ring.IngesterDesc , samples []* sampleTracker ) {
348
+ d .sendSamples (ctx , ingester , samples , & pushTracker )
349
+ }(ingester , samples )
330
350
}
331
- for i := range sampleTrackers {
332
- if sampleTrackers [ i ]. succeeded < int32 ( sampleTrackers [ i ]. minSuccess ) {
333
- return nil , fmt . Errorf ( "need %d successful writes, only got %d, last error was: %v" ,
334
- sampleTrackers [ i ]. minSuccess , sampleTrackers [ i ]. succeeded , lastErr )
335
- }
351
+ select {
352
+ case err := <- pushTracker . err :
353
+ return nil , err
354
+ case <- pushTracker . done :
355
+ return & cortex. WriteResponse {}, nil
336
356
}
337
- return & cortex.WriteResponse {}, nil
338
357
}
339
358
340
359
func (d * Distributor ) getOrCreateIngestLimiter (userID string ) * rate.Limiter {
@@ -350,7 +369,38 @@ func (d *Distributor) getOrCreateIngestLimiter(userID string) *rate.Limiter {
350
369
return limiter
351
370
}
352
371
353
- func (d * Distributor ) sendSamples (ctx context.Context , ingester * ring.IngesterDesc , sampleTrackers []* sampleTracker ) error {
372
+ func (d * Distributor ) sendSamples (ctx context.Context , ingester * ring.IngesterDesc , sampleTrackers []* sampleTracker , pushTracker * pushTracker ) {
373
+ err := d .sendSamplesErr (ctx , ingester , sampleTrackers )
374
+
375
+ // If we succeed, decrement each sample's pending count by one. If we reach
376
+ // the required number of successful puts on this sample, then decrement the
377
+ // number of pending samples by one. If we successfully push all samples to
378
+ // min success ingesters, wake up the waiting rpc so it can return early.
379
+ // Similarly, track the number of errors, and if it exceeds maxFailures
380
+ // shortcut the waiting rpc.
381
+ //
382
+ // The use of atomic increments here guarantees only a single sendSamples
383
+ // goroutine will write to either channel.
384
+ for i := range sampleTrackers {
385
+ if err != nil {
386
+ if atomic .AddInt32 (& sampleTrackers [i ].failed , 1 ) <= int32 (sampleTrackers [i ].maxFailures ) {
387
+ continue
388
+ }
389
+ if atomic .AddInt32 (& pushTracker .samplesFailed , 1 ) == 1 {
390
+ pushTracker .err <- err
391
+ }
392
+ } else {
393
+ if atomic .AddInt32 (& sampleTrackers [i ].succeeded , 1 ) != int32 (sampleTrackers [i ].minSuccess ) {
394
+ continue
395
+ }
396
+ if atomic .AddInt32 (& pushTracker .samplesPending , - 1 ) == 0 {
397
+ pushTracker .done <- struct {}{}
398
+ }
399
+ }
400
+ }
401
+ }
402
+
403
+ func (d * Distributor ) sendSamplesErr (ctx context.Context , ingester * ring.IngesterDesc , sampleTrackers []* sampleTracker ) error {
354
404
client , err := d .getClientFor (ingester )
355
405
if err != nil {
356
406
return err
@@ -366,25 +416,8 @@ func (d *Distributor) sendSamples(ctx context.Context, ingester *ring.IngesterDe
366
416
d .ingesterAppends .WithLabelValues (ingester .Addr ).Inc ()
367
417
if err != nil {
368
418
d .ingesterAppendFailures .WithLabelValues (ingester .Addr ).Inc ()
369
- return err
370
- }
371
-
372
- for i := range sampleTrackers {
373
- atomic .AddInt32 (& sampleTrackers [i ].succeeded , 1 )
374
- }
375
- return nil
376
- }
377
-
378
- func metricNameFromLabelMatchers (matchers ... * metric.LabelMatcher ) (model.LabelValue , error ) {
379
- for _ , m := range matchers {
380
- if m .Name == model .MetricNameLabel {
381
- if m .Type != metric .Equal {
382
- return "" , fmt .Errorf ("non-equality matchers are not supported on the metric name" )
383
- }
384
- return m .Value , nil
385
- }
386
419
}
387
- return "" , fmt . Errorf ( "no metric name matcher found" )
420
+ return err
388
421
}
389
422
390
423
// Query implements Querier.
@@ -393,7 +426,7 @@ func (d *Distributor) Query(ctx context.Context, from, to model.Time, matchers .
393
426
err := instrument .TimeRequestHistogram (ctx , "Distributor.Query" , d .queryDuration , func (ctx context.Context ) error {
394
427
fpToSampleStream := map [model.Fingerprint ]* model.SampleStream {}
395
428
396
- metricName , err := metricNameFromLabelMatchers (matchers ... )
429
+ metricName , _ , err := util . ExtractMetricNameFromMatchers (matchers )
397
430
if err != nil {
398
431
return err
399
432
}
0 commit comments