Skip to content

Commit

Permalink
Merge pull request #28110 from jeff-roche/rt-latency-tests
Browse files Browse the repository at this point in the history
OCPVE-292: OCPVE-293: OCPVE-294: feat: added a realtime latency test suite
  • Loading branch information
openshift-merge-robot authored Aug 3, 2023
2 parents 4a3ca7b + 8f43ef2 commit 6ee9dc5
Show file tree
Hide file tree
Showing 6 changed files with 269 additions and 9 deletions.
13 changes: 13 additions & 0 deletions pkg/testsuites/standard_suites.go
Original file line number Diff line number Diff line change
Expand Up @@ -367,4 +367,17 @@ var staticSuites = []ginkgo.TestSuite{
},
TestTimeout: 30 * time.Minute,
},
{
Name: "openshift/nodes/realtime/latency",
Description: templates.LongDesc(`
This test suite runs tests to validate realtime latency on nodes.
`),
Matches: func(name string) bool {
if isDisabled(name) {
return false
}
return strings.Contains(name, "[Suite:openshift/nodes/realtime/latency")
},
TestTimeout: 30 * time.Minute,
},
}
6 changes: 6 additions & 0 deletions test/extended/kernel/OWNERS
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
approvers:
- eggfoobar
- jeff-roche
- jerpeter1
- jakobmoellerdev

Original file line number Diff line number Diff line change
Expand Up @@ -21,21 +21,18 @@ var _ = g.Describe("[sig-node][Suite:openshift/nodes/realtime][Disruptive] Real
startRtTestPod(oc)
})

g.It("pi_stress to run successfully with the default algorithm", func() {
args := []string{rtPodName, "--", "pi_stress", "--duration=600", "--groups=1"}
_, err := oc.SetNamespace(rtNamespace).Run("exec").Args(args...).Output()
g.It("pi_stress to run successfully with the fifo algorithm", func() {
err := runPiStressFifo(oc)
o.Expect(err).NotTo(o.HaveOccurred(), "error occured running pi_stress with the fifo algorithm")
})

g.It("pi_stress to run successfully with the round robin algorithm", func() {
args := []string{rtPodName, "--", "pi_stress", "--duration=600", "--groups=1", "--rr"}
_, err := oc.SetNamespace(rtNamespace).Run("exec").Args(args...).Output()
err := runPiStressRR(oc)
o.Expect(err).NotTo(o.HaveOccurred(), "error occured running pi_stress with the round robin algorithm")
})

g.It("deadline_test to run successfully", func() {
args := []string{rtPodName, "--", "deadline_test"}
_, err := oc.SetNamespace(rtNamespace).Run("exec").Args(args...).Output()
err := runDeadlineTest(oc)
o.Expect(err).NotTo(o.HaveOccurred(), "error occured running deadline_test")
})

Expand All @@ -46,5 +43,4 @@ var _ = g.Describe("[sig-node][Suite:openshift/nodes/realtime][Disruptive] Real
g.AfterAll(func() {
cleanupRealtimeTestEnvironment(oc)
})

})
52 changes: 52 additions & 0 deletions test/extended/kernel/kernel_rt_latency.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
package kernel

import (
g "github.com/onsi/ginkgo/v2"
o "github.com/onsi/gomega"
exutil "github.com/openshift/origin/test/extended/util"
)

var _ = g.Describe("[sig-node][Suite:openshift/nodes/realtime/latency][Disruptive] Real time kernel should meet latency requirements when tested with", g.Ordered, func() {
defer g.GinkgoRecover()
var (
oc = exutil.NewCLI(rtNamespace).AsAdmin()
)

g.BeforeAll(func() {
failIfNotRT(oc)
configureRealtimeTestEnvironment(oc)
})

g.BeforeEach(func() {
startRtTestPod(oc)
})

g.It("hwlatdetect", func() {
err := runHwlatdetect(oc)
o.Expect(err).NotTo(o.HaveOccurred(), "error occured running hwlatdetect")
})

g.It("oslat", func() {
cpuCount, err := getProcessorCount(oc)
o.Expect(err).NotTo(o.HaveOccurred(), "unable to get the number of processors online")

err = runOslat(cpuCount, oc)
o.Expect(err).NotTo(o.HaveOccurred(), "error occured running oslat")
})

g.It("cyclictest", func() {
cpuCount, err := getProcessorCount(oc)
o.Expect(err).NotTo(o.HaveOccurred(), "unable to get the number of processors online")

err = runCyclictest(cpuCount, oc)
o.Expect(err).NotTo(o.HaveOccurred(), "error occured running cyclictest")
})

g.AfterEach(func() {
cleanupRtTestPod(oc)
})

g.AfterAll(func() {
cleanupRealtimeTestEnvironment(oc)
})
})
187 changes: 187 additions & 0 deletions test/extended/kernel/tools.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
package kernel

import (
"encoding/json"
"fmt"
"strconv"

exutil "github.com/openshift/origin/test/extended/util"
"github.com/pkg/errors"
)

const (
hwlatdetectThresholdusec = 5000
oslatThresholdusec = 5000
cyclictestThresholdusec = 5000
)

func runPiStressFifo(oc *exutil.CLI) error {
args := []string{rtPodName, "--", "pi_stress", "--duration=600", "--groups=1"}
_, err := oc.SetNamespace(rtNamespace).Run("exec").Args(args...).Output()

return err
}

func runPiStressRR(oc *exutil.CLI) error {
args := []string{rtPodName, "--", "pi_stress", "--duration=600", "--groups=1", "--rr"}
_, err := oc.SetNamespace(rtNamespace).Run("exec").Args(args...).Output()

return err
}

func runDeadlineTest(oc *exutil.CLI) error {
args := []string{rtPodName, "--", "deadline_test"}
_, err := oc.SetNamespace(rtNamespace).Run("exec").Args(args...).Output()

return err
}

func runHwlatdetect(oc *exutil.CLI) error {
args := []string{rtPodName, "--", "hwlatdetect", "--duration=600s", "--window=1s", "--width=500ms", fmt.Sprintf("--threshold=%dus", hwlatdetectThresholdusec)}
_, err := oc.SetNamespace(rtNamespace).Run("exec").Args(args...).Output()
if err != nil {
// An error here indicates thresholds were exceeded or an issue with the test
return errors.Wrap(err, "error running hwlatdetect")
}

return nil
}

func runOslat(cpuCount int, oc *exutil.CLI) error {
oslatReportFile := "/tmp/oslatresults.json"

// Make sure there is enough hardware for this test
if cpuCount <= 4 {
return fmt.Errorf("more than 4 cores are required to run this oslat test. Found %d cores", cpuCount)
}

// Run the test
args := []string{rtPodName, "--", "oslat", "--cpu-list", fmt.Sprintf("4-%d", cpuCount-1), "--rtprio", "1", "--duration", "600", "--json", oslatReportFile}
_, err := oc.SetNamespace(rtNamespace).Run("exec").Args(args...).Output()
if err != nil {
return errors.Wrap(err, "error running oslat")
}

// Get the results
args = []string{rtPodName, "--", "cat", oslatReportFile}
report, err := oc.SetNamespace(rtNamespace).Run("exec").Args(args...).Output()
if err != nil {
return errors.Wrap(err, "error retrieving oslat results")
}

// Parse the results and return any errors detected
if err = parseOslatResults(report, oslatThresholdusec); err != nil {
return errors.Wrap(err, "error parsing oslat report")
}

return nil
}

func parseOslatResults(jsonReport string, maxThresholdusec int) error {
var oslatReport struct {
Threads map[string]struct {
Cpu int `json:"cpu"`
Max int `json:"max"`
} `json:"thread"`
}

// Parse the data
err := json.Unmarshal([]byte(jsonReport), &oslatReport)
if err != nil {
return errors.Wrap(err, "unable to decode oslat report json")
}

if len(oslatReport.Threads) == 0 {
return fmt.Errorf("no thread reports found")
}

failedCPUs := make([]int, 0, len(oslatReport.Threads)) // Report all failed cores
for _, thread := range oslatReport.Threads {
if thread.Max > maxThresholdusec {
failedCPUs = append(failedCPUs, thread.Cpu)
}
}

if len(failedCPUs) > 0 {
return fmt.Errorf("the following CPUs were over the max latency threshold: %v", failedCPUs)
}

return nil
}

func runCyclictest(cpuCount int, oc *exutil.CLI) error {
cyclictestReportFile := "/tmp/cyclictestresults.json"
// Make sure there is enough hardware for this test
if cpuCount <= 4 {
return fmt.Errorf("more than 4 cores are required to run this oslat test. Found %d cores", cpuCount)
}

// Run the test
args := []string{rtPodName, "--", "cyclictest", "--duration=10m", "--priority=95", fmt.Sprintf("--threads=%d", cpuCount-5), fmt.Sprintf("--affinity=4-%d", cpuCount-1), "--interval=1000", fmt.Sprintf("--breaktrace=%d", cyclictestThresholdusec), "--mainaffinity=4", "-m", fmt.Sprintf("--json=%s", cyclictestReportFile)}
_, err := oc.SetNamespace(rtNamespace).Run("exec").Args(args...).Output()
if err != nil {
return errors.Wrap(err, "error running cyclictest")
}

// Gather the results
args = []string{rtPodName, "--", "cat", cyclictestReportFile}
report, err := oc.SetNamespace(rtNamespace).Run("exec").Args(args...).Output()
if err != nil {
return errors.Wrap(err, "error retrieving cyclictest results")
}

// Parse the results and return any errors detected
if err = parseCyclictestResults(report, cyclictestThresholdusec); err != nil {
return errors.Wrap(err, "error parsing cyclictest report")
}

return nil
}

func parseCyclictestResults(jsonReport string, maxThresholdusec int) error {
var cyclictestReport struct {
Threads map[string]struct {
Cpu int `json:"cpu"`
Max int `json:"max"`
} `json:"thread"`
}

// Parse the data
err := json.Unmarshal([]byte(jsonReport), &cyclictestReport)
if err != nil {
return errors.Wrap(err, "unable to decode cyclictest report json")
}

if len(cyclictestReport.Threads) == 0 {
return fmt.Errorf("no thread reports found")
}

failedCPUs := make([]int, 0, len(cyclictestReport.Threads)) // Report all failed cores
for _, thread := range cyclictestReport.Threads {
if thread.Max > maxThresholdusec {
failedCPUs = append(failedCPUs, thread.Cpu)
}
}

if len(failedCPUs) > 0 {
return fmt.Errorf("the following CPUs were over the max latency threshold: %v", failedCPUs)
}

return nil
}

func getProcessorCount(oc *exutil.CLI) (int, error) {
args := []string{rtPodName, "--", "getconf", "_NPROCESSORS_ONLN"}
num, err := oc.SetNamespace(rtNamespace).Run("exec").Args(args...).Output()
if err != nil {
return 0, err
}

// Parse out the CPU count
count, err := strconv.Atoi(num)
if err != nil {
return 0, err
}

return count, nil
}

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 6ee9dc5

Please sign in to comment.