Skip to content

Commit

Permalink
Adds --analyize-labels to logcli series command. (#2497)
Browse files Browse the repository at this point in the history
Changes the series command to use the common matcher input found in the query and instant-query commands, instead of `logcli series --matcher='{foo="bar"}'` it's now `logcli series '{foo="bar"}'`
  • Loading branch information
slim-bean authored Aug 13, 2020
1 parent e69b64f commit 82845e4
Show file tree
Hide file tree
Showing 4 changed files with 97 additions and 12 deletions.
14 changes: 12 additions & 2 deletions cmd/logcli/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,16 @@ https://github.com/grafana/loki/blob/master/docs/logql.md`)
labelsCmd = app.Command("labels", "Find values for a given label.")
labelsQuery = newLabelQuery(labelsCmd)

seriesCmd = app.Command("series", "Run series query.")
seriesCmd = app.Command("series", `Run series query.
The "series" command will take the provided label matcher
and return all the log streams found in the time window.
It is possible to send an empty label matcher '{}' to return all streams.
Use the --analyze-labels flag to get a summary of the labels found in all streams.
This is helpful to find high cardinality labels.
`)
seriesQuery = newSeriesQuery(seriesCmd)
)

Expand Down Expand Up @@ -232,10 +241,11 @@ func newSeriesQuery(cmd *kingpin.CmdClause) *seriesquery.SeriesQuery {
return nil
})

cmd.Arg("matcher", "eg '{foo=\"bar\",baz=~\".*blip\"}'").Required().StringVar(&q.Matcher)
cmd.Flag("since", "Lookback window.").Default("1h").DurationVar(&since)
cmd.Flag("from", "Start looking for logs at this absolute time (inclusive)").StringVar(&from)
cmd.Flag("to", "Stop looking for logs at this absolute time (exclusive)").StringVar(&to)
cmd.Flag("match", "eg '{foo=\"bar\",baz=~\".*blip\"}'").Required().StringsVar(&q.Matchers)
cmd.Flag("analyze-labels", "Printout a summary of labels including count of label value combinations, useful for debugging high cardinality series").BoolVar(&q.AnalyzeLabels)

return q
}
Expand Down
27 changes: 26 additions & 1 deletion docs/sources/best-practices/current-best-practices.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,32 @@ Try to keep values bounded to as small a set as possible. We don't have perfect

Loki has several client options: [Promtail](https://github.com/grafana/loki/tree/master/docs/clients/promtail) (which also supports systemd journal ingestion and TCP-based syslog ingestion), [Fluentd](https://github.com/grafana/loki/tree/master/fluentd/fluent-plugin-grafana-loki), [Fluent Bit](https://github.com/grafana/loki/tree/master/cmd/fluent-bit), a [Docker plugin](https://grafana.com/blog/2019/07/15/lokis-path-to-ga-docker-logging-driver-plugin-support-for-systemd/), and more!

Each of these come with ways to configure what labels are applied to create log streams. But be aware of what dynamic labels might be applied. Use the Loki series API to get an idea of what your log streams look like and see if there might be ways to reduce streams and cardinality. Details of the Series API can be found [here](https://grafana.com/docs/loki/latest/api/#series), or you can use [logcli](https://grafana.com/docs/loki/latest/getting-started/logcli/) to query Loki for series information.
Each of these come with ways to configure what labels are applied to create log streams. But be aware of what dynamic labels might be applied.
Use the Loki series API to get an idea of what your log streams look like and see if there might be ways to reduce streams and cardinality.
Details of the Series API can be found [here](https://grafana.com/docs/loki/latest/api/#series), or you can use [logcli](https://grafana.com/docs/loki/latest/getting-started/logcli/) to query Loki for series information.

In Loki 1.6.0 and newer the logcli series command added the `--analyze-labels` flag specifically for debugging high cardinality labels:

```
Total Streams: 25017
Unique Labels: 8
Label Name Unique Values Found In Streams
requestId 24653 24979
logStream 1194 25016
logGroup 140 25016
accountId 13 25016
logger 1 25017
source 1 25016
transport 1 25017
format 1 25017
```

In this example you can see the `requestId` label had a 24653 different values out of 24979 streams it was found in, this is bad!!

This is a perfect example of something which should not be a label, `requestId` should be removed as a label and instead
filter expressions should be used to query logs for a specific `requestId`. For example if `requestId` is found in
the log line as a key=value pair you could write a query like this: `{logGroup="group1"} |= "requestId=32422355"`

## 5. Configure caching

Expand Down
2 changes: 1 addition & 1 deletion docs/sources/getting-started/logcli.md
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ Commands:
labels [<flags>] [<label>]
Find values for a given label.
series --match=MATCH [<flags>]
series [<flags>] <matcher>
Run series query.
$ logcli help query
Expand Down
66 changes: 58 additions & 8 deletions pkg/logcli/seriesquery/series.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@ package seriesquery
import (
"fmt"
"log"
"os"
"sort"
"text/tabwriter"
"time"

"github.com/grafana/loki/pkg/logcli/client"
Expand All @@ -11,24 +14,71 @@ import (

// SeriesQuery contains all necessary fields to execute label queries and print out the results
type SeriesQuery struct {
Matchers []string
Start time.Time
End time.Time
Quiet bool
Matcher string
Start time.Time
End time.Time
AnalyzeLabels bool
Quiet bool
}

type labelDetails struct {
name string
inStreams int
uniqueVals map[string]struct{}
}

// DoSeries prints out series results
func (q *SeriesQuery) DoSeries(c client.Client) {
values := q.GetSeries(c)
streams := q.GetSeries(c)

if q.AnalyzeLabels {
labelMap := map[string]*labelDetails{}

for _, stream := range streams {
for labelName, labelValue := range stream {
if _, ok := labelMap[labelName]; ok {
labelMap[labelName].inStreams++
labelMap[labelName].uniqueVals[labelValue] = struct{}{}
} else {
labelMap[labelName] = &labelDetails{
name: labelName,
inStreams: 1,
uniqueVals: map[string]struct{}{labelValue: struct{}{}},
}
}
}
}

for _, value := range values {
fmt.Println(value)
lds := make([]*labelDetails, 0, len(labelMap))
for _, ld := range labelMap {
lds = append(lds, ld)
}
sort.Slice(lds, func(ld1, ld2 int) bool {
return len(lds[ld1].uniqueVals) > len(lds[ld2].uniqueVals)
})

fmt.Println("Total Streams: ", len(streams))
fmt.Println("Unique Labels: ", len(labelMap))
fmt.Println()

w := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', 0)
fmt.Fprintf(w, "Label Name\tUnique Values\tFound In Streams\n")
for _, details := range lds {
fmt.Fprintf(w, "%v\t%v\t%v\n", details.name, len(details.uniqueVals), details.inStreams)
}
w.Flush()

} else {
for _, value := range streams {
fmt.Println(value)
}
}

}

// GetSeries returns an array of label sets
func (q *SeriesQuery) GetSeries(c client.Client) []loghttp.LabelSet {
seriesResponse, err := c.Series(q.Matchers, q.Start, q.End, q.Quiet)
seriesResponse, err := c.Series([]string{q.Matcher}, q.Start, q.End, q.Quiet)
if err != nil {
log.Fatalf("Error doing request: %+v", err)
}
Expand Down

0 comments on commit 82845e4

Please sign in to comment.