Skip to content

Split OOM event into OOM and OOM Kill #648

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 15, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 7 additions & 6 deletions info/v1/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -497,18 +497,19 @@ type Event struct {
type EventType int

const (
EventOom EventType = iota
EventContainerCreation
EventContainerDeletion
EventOom EventType = 0
EventOomKill = 1
EventContainerCreation = 2
EventContainerDeletion = 3
)

// Extra information about an event. Only one type will be set.
type EventData struct {
// Information about a container creation event.
Created *CreatedEventData `json:"created,omitempty"`

// Information about an OOM event.
Oom *OomEventData `json:"oom,omitempty"`
// Information about an OOM kill event.
OomKill *OomKillEventData `json:"oom,omitempty"`
}

// Information related to a container creation event.
Expand All @@ -518,7 +519,7 @@ type CreatedEventData struct {
}

// Information related to an OOM kill instance
type OomEventData struct {
type OomKillEventData struct {
// process id of the killed process
Pid int `json:"pid"`

Expand Down
19 changes: 15 additions & 4 deletions manager/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -876,21 +876,32 @@ func (self *manager) watchForNewOoms() error {

go func() {
for oomInstance := range outStream {
// Surface OOM and OOM kill events.
newEvent := &info.Event{
ContainerName: oomInstance.ContainerName,
Timestamp: oomInstance.TimeOfDeath,
EventType: info.EventOom,
}
err := self.eventHandler.AddEvent(newEvent)
if err != nil {
glog.Errorf("failed to add OOM event for %q: %v", oomInstance.ContainerName, err)
}
glog.V(3).Infof("Created an OOM event in container %q at %v", oomInstance.ContainerName, oomInstance.TimeOfDeath)

newEvent = &info.Event{
ContainerName: oomInstance.VictimContainerName,
Timestamp: oomInstance.TimeOfDeath,
EventType: info.EventOomKill,
EventData: info.EventData{
Oom: &info.OomEventData{
OomKill: &info.OomKillEventData{
Pid: oomInstance.Pid,
ProcessName: oomInstance.ProcessName,
},
},
}
glog.V(2).Infof("Created an oom event in container %q at %v", oomInstance.ContainerName, oomInstance.TimeOfDeath)
err := self.eventHandler.AddEvent(newEvent)
err = self.eventHandler.AddEvent(newEvent)
if err != nil {
glog.Errorf("failed to add event %v, got error: %v", newEvent, err)
glog.Errorf("failed to add OOM kill event for %q: %v", oomInstance.ContainerName, err)
}
}
}()
Expand Down
6 changes: 5 additions & 1 deletion utils/oomparser/oomparser.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ import (
)

var containerRegexp *regexp.Regexp = regexp.MustCompile(
`Task in (.*) killed as a result of limit of `)
`Task in (.*) killed as a result of limit of (.*)`)
var lastLineRegexp *regexp.Regexp = regexp.MustCompile(
`(^[A-Z]{1}[a-z]{2} .*[0-9]{1,2} [0-9]{1,2}:[0-9]{2}:[0-9]{2}) .* Killed process ([0-9]+) \(([0-9A-Za-z_]+)\)`)
var firstLineRegexp *regexp.Regexp = regexp.MustCompile(
Expand All @@ -52,6 +52,9 @@ type OomInstance struct {
TimeOfDeath time.Time
// the absolute name of the container that OOMed
ContainerName string
// the absolute name of the container that was killed
// due to the OOM.
VictimContainerName string
}

// gets the container name from a line and adds it to the oomInstance.
Expand All @@ -61,6 +64,7 @@ func getContainerName(line string, currentOomInstance *OomInstance) error {
return nil
}
currentOomInstance.ContainerName = path.Join("/", parsedLine[1])
currentOomInstance.VictimContainerName = path.Join("/", parsedLine[2])
return nil
}

Expand Down
26 changes: 16 additions & 10 deletions utils/oomparser/oomparser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,14 @@ package oomparser
import (
"bufio"
"os"
"reflect"
"testing"
"time"
)

const startLine = "Jan 21 22:01:49 localhost kernel: [62278.816267] ruby invoked oom-killer: gfp_mask=0x201da, order=0, oom_score_adj=0"
const endLine = "Jan 21 22:01:49 localhost kernel: [62279.421192] Killed process 19667 (evilprogram2) total-vm:1460016kB, anon-rss:1414008kB, file-rss:4kB"
const containerLine = "Jan 26 14:10:07 kateknister0.mtv.corp.google.com kernel: [1814368.465205] Task in /mem2 killed as a result of limit of /mem2"
const containerLine = "Jan 26 14:10:07 kateknister0.mtv.corp.google.com kernel: [1814368.465205] Task in /mem2 killed as a result of limit of /mem3"
const containerLogFile = "containerOomExampleLog.txt"
const systemLogFile = "systemOomExampleLog.txt"

Expand All @@ -35,10 +36,11 @@ func createExpectedContainerOomInstance(t *testing.T) *OomInstance {
return nil
}
return &OomInstance{
Pid: 13536,
ProcessName: "memorymonster",
TimeOfDeath: deathTime,
ContainerName: "/mem2",
Pid: 13536,
ProcessName: "memorymonster",
TimeOfDeath: deathTime,
ContainerName: "/mem2",
VictimContainerName: "/mem3",
}
}

Expand All @@ -50,10 +52,11 @@ func createExpectedSystemOomInstance(t *testing.T) *OomInstance {
return nil
}
return &OomInstance{
Pid: 1532,
ProcessName: "badsysprogram",
TimeOfDeath: deathTime,
ContainerName: "/",
Pid: 1532,
ProcessName: "badsysprogram",
TimeOfDeath: deathTime,
ContainerName: "/",
VictimContainerName: "/",
}
}

Expand All @@ -73,6 +76,9 @@ func TestGetContainerName(t *testing.T) {
if currentOomInstance.ContainerName != "/mem2" {
t.Errorf("getContainerName should have set containerName to /mem2, not %s", currentOomInstance.ContainerName)
}
if currentOomInstance.VictimContainerName != "/mem3" {
t.Errorf("getContainerName should have set victimContainerName to /mem3, not %s", currentOomInstance.VictimContainerName)
}
}

func TestGetProcessNamePid(t *testing.T) {
Expand Down Expand Up @@ -139,7 +145,7 @@ func helpTestStreamOoms(oomCheckInstance *OomInstance, sysFile string, t *testin

select {
case oomInstance := <-outStream:
if *oomCheckInstance != *oomInstance {
if reflect.DeepEqual(*oomCheckInstance, *oomInstance) {
t.Errorf("wrong instance returned. Expected %v and got %v",
oomCheckInstance, oomInstance)
}
Expand Down