Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
121 changes: 67 additions & 54 deletions enhanced/scraper.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,73 +62,86 @@ func (s *scraper) start(ctx context.Context, interval time.Duration, ch chan<- m

// scrape performs a single scrape.
func (s *scraper) scrape(ctx context.Context) (map[string][]prometheus.Metric, map[string]string) {
input := &cloudwatchlogs.FilterLogEventsInput{
LogGroupName: aws.String("RDSOSMetrics"),
LogStreamNames: aws.StringSlice(s.logStreamNames),
StartTime: aws.Int64(aws.TimeUnixMilli(s.nextStartTime)),
}
s.logger.Debugf("Requesting metrics since %s (last %s).", s.nextStartTime.UTC(), time.Since(s.nextStartTime))

// collect all returned events and metrics/messages
allMetrics := make(map[string]map[time.Time][]prometheus.Metric) // ResourceID -> event timestamp -> metrics
allMessages := make(map[string]map[time.Time]string) // ResourceID -> event timestamp -> message
collectAllMetrics := func(output *cloudwatchlogs.FilterLogEventsOutput, lastPage bool) bool {
for _, event := range output.Events {
l := s.logger.With("EventId", *event.EventId).With("LogStreamName", *event.LogStreamName)
l = l.With("Timestamp", aws.MillisecondsTimeValue(event.Timestamp).UTC())
l = l.With("IngestionTime", aws.MillisecondsTimeValue(event.IngestionTime).UTC())

var instance *sessions.Instance
for _, i := range s.instances {
if i.ResourceID == *event.LogStreamName {
instance = &i
break

// LogStreamNames parameter supports up to 100 items.
// https://docs.aws.amazon.com/AmazonCloudWatchLogs/latest/APIReference/API_FilterLogEvents.html
streamCount := len(s.logStreamNames)
for i := 0; i < streamCount; i += 100 {
sliceStart := i
sliceEnd := i + 100
if sliceEnd > streamCount {
sliceEnd = streamCount
}

input := &cloudwatchlogs.FilterLogEventsInput{
LogGroupName: aws.String("RDSOSMetrics"),
LogStreamNames: aws.StringSlice(s.logStreamNames[sliceStart:sliceEnd]),
StartTime: aws.Int64(aws.TimeUnixMilli(s.nextStartTime)),
}

s.logger.With("next_start", s.nextStartTime.UTC()).With("since_last", time.Since(s.nextStartTime)).Debugf("Requesting metrics")

// collect all returned events and metrics/messages
collectAllMetrics := func(output *cloudwatchlogs.FilterLogEventsOutput, lastPage bool) bool {
for _, event := range output.Events {
l := s.logger.With("EventId", *event.EventId).With("LogStreamName", *event.LogStreamName)
l = l.With("Timestamp", aws.MillisecondsTimeValue(event.Timestamp).UTC())
l = l.With("IngestionTime", aws.MillisecondsTimeValue(event.IngestionTime).UTC())

var instance *sessions.Instance
for _, i := range s.instances {
if i.ResourceID == *event.LogStreamName {
instance = &i
break
}
}
if instance == nil {
l.Errorf("Failed to find instance.")
continue
}
}
if instance == nil {
l.Errorf("Failed to find instance.")
continue
}

if instance.DisableEnhancedMetrics {
l.Debugf("Enhanced Metrics are dissabled for instance %v.", instance)
continue
}
l = l.With("region", instance.Region).With("instance", instance.Instance)

// l.Debugf("Message:\n%s", *event.Message)
osMetrics, err := parseOSMetrics([]byte(*event.Message), s.testDisallowUnknownFields)
if err != nil {
// only for tests
if s.testDisallowUnknownFields {
panic(fmt.Sprintf("New metrics should be added: %s", err))
if instance.DisableEnhancedMetrics {
l.Debugf("Enhanced Metrics are dissabled for instance %v.", instance)
continue
}
l = l.With("region", instance.Region).With("instance", instance.Instance)

// l.Debugf("Message:\n%s", *event.Message)
osMetrics, err := parseOSMetrics([]byte(*event.Message), s.testDisallowUnknownFields)
if err != nil {
// only for tests
if s.testDisallowUnknownFields {
panic(fmt.Sprintf("New metrics should be added: %s", err))
}

l.Errorf("Failed to parse metrics: %s.", err)
continue
}
// l.Debugf("OS Metrics:\n%#v", osMetrics)

l.Errorf("Failed to parse metrics: %s.", err)
continue
}
// l.Debugf("OS Metrics:\n%#v", osMetrics)
timestamp := aws.MillisecondsTimeValue(event.Timestamp).UTC()
l.Debugf("Timestamp from message: %s; from event: %s.", osMetrics.Timestamp.UTC(), timestamp)

timestamp := aws.MillisecondsTimeValue(event.Timestamp).UTC()
l.Debugf("Timestamp from message: %s; from event: %s.", osMetrics.Timestamp.UTC(), timestamp)
if allMetrics[instance.ResourceID] == nil {
allMetrics[instance.ResourceID] = make(map[time.Time][]prometheus.Metric)
}
allMetrics[instance.ResourceID][timestamp] = osMetrics.makePrometheusMetrics(instance.Region, instance.Labels)

if allMetrics[instance.ResourceID] == nil {
allMetrics[instance.ResourceID] = make(map[time.Time][]prometheus.Metric)
if allMessages[instance.ResourceID] == nil {
allMessages[instance.ResourceID] = make(map[time.Time]string)
}
allMessages[instance.ResourceID][timestamp] = *event.Message
}
allMetrics[instance.ResourceID][timestamp] = osMetrics.makePrometheusMetrics(instance.Region, instance.Labels)

if allMessages[instance.ResourceID] == nil {
allMessages[instance.ResourceID] = make(map[time.Time]string)
}
allMessages[instance.ResourceID][timestamp] = *event.Message
return true // continue pagination
}
if err := s.svc.FilterLogEventsPagesWithContext(ctx, input, collectAllMetrics); err != nil {
s.logger.Errorf("Failed to filter log events: %s.", err)
}

return true // continue pagination
}
if err := s.svc.FilterLogEventsPagesWithContext(ctx, input, collectAllMetrics); err != nil {
s.logger.Errorf("Failed to filter log events: %s.", err)
}

// get better times
allTimes := make(map[string][]time.Time)
for resourceID, events := range allMetrics {
Expand Down