Skip to content

Commit

Permalink
Fix some inputs panic will lead to the telegraf exit
Browse files Browse the repository at this point in the history
closes #585
closes #584
  • Loading branch information
Wu Taizeng authored and sparrc committed Jan 26, 2016
1 parent 47ea2d5 commit cf56848
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ specifying a docker endpoint to get metrics from.
- [#440](https://github.com/influxdata/telegraf/issues/440): Don't query filtered devices for disk stats.
- [#463](https://github.com/influxdata/telegraf/issues/463): Docker plugin not working on AWS Linux
- [#568](https://github.com/influxdata/telegraf/issues/568): Multiple output race condition.
- [#585](https://github.com/influxdata/telegraf/pull/585): Log stack trace and continue on Telegraf panic. Thanks @wutaizeng!

## v0.10.0 [2016-01-12]

Expand Down
16 changes: 16 additions & 0 deletions agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"math/big"
"math/rand"
"os"
"runtime"
"sync"
"time"

Expand Down Expand Up @@ -87,6 +88,18 @@ func (a *Agent) Close() error {
return err
}

func panicRecover(input *models.RunningInput) {
if err := recover(); err != nil {
trace := make([]byte, 2048)
runtime.Stack(trace, true)
log.Printf("FATAL: Input [%s] panicked: %s, Stack:\n%s\n",
input.Name, err, trace)
log.Println("PLEASE REPORT THIS PANIC ON GITHUB with " +
"stack trace, configuration, and OS information: " +
"https://github.com/influxdata/telegraf/issues/new")
}
}

// gatherParallel runs the inputs that are using the same reporting interval
// as the telegraf agent.
func (a *Agent) gatherParallel(pointChan chan *client.Point) error {
Expand All @@ -103,6 +116,7 @@ func (a *Agent) gatherParallel(pointChan chan *client.Point) error {
wg.Add(1)
counter++
go func(input *models.RunningInput) {
defer panicRecover(input)
defer wg.Done()

acc := NewAccumulator(input.Config, pointChan)
Expand Down Expand Up @@ -148,6 +162,8 @@ func (a *Agent) gatherSeparate(
input *models.RunningInput,
pointChan chan *client.Point,
) error {
defer panicRecover(input)

ticker := time.NewTicker(input.Config.Interval)

for {
Expand Down

0 comments on commit cf56848

Please sign in to comment.