Skip to content

Commit

Permalink
Retain process logs on robustness tests
Browse files Browse the repository at this point in the history
Adding a new flag to retain e2e etcd process logs after stop and
saving next to the visualized model.

Spun out of #16018 where I used it for easier local debugging on model
violations.

Signed-off-by: Thomas Jungblut <tjungblu@redhat.com>
  • Loading branch information
tjungblu committed Jun 14, 2023
1 parent caee532 commit b599235
Show file tree
Hide file tree
Showing 6 changed files with 64 additions and 21 deletions.
5 changes: 5 additions & 0 deletions tests/framework/e2e/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,7 @@ type EtcdProcessClusterConfig struct {
DiscoveryEndpoints []string // v3 discovery
DiscoveryToken string
LogLevel string
KeepLogs bool

MaxConcurrentStreams uint32 // default is math.MaxUint32
CorruptCheckTime time.Duration
Expand Down Expand Up @@ -311,6 +312,10 @@ func WithLogLevel(level string) EPClusterOption {
return func(c *EtcdProcessClusterConfig) { c.LogLevel = level }
}

func WithKeepLogs(keep bool) EPClusterOption {
return func(c *EtcdProcessClusterConfig) { c.KeepLogs = keep }
}

func WithCorruptCheckTime(time time.Duration) EPClusterOption {
return func(c *EtcdProcessClusterConfig) { c.CorruptCheckTime = time }
}
Expand Down
4 changes: 4 additions & 0 deletions tests/framework/e2e/cluster_proxy.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,10 @@ func (p *proxyEtcdProcess) Logs() LogsExpect {
return p.etcdProc.Logs()
}

func (p *proxyEtcdProcess) LogLines() []string {
return p.etcdProc.LogLines()
}

func (p *proxyEtcdProcess) Kill() error {
return p.etcdProc.Kill()
}
Expand Down
19 changes: 19 additions & 0 deletions tests/framework/e2e/etcd_process.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ type EtcdProcess interface {
PeerProxy() proxy.Server
Failpoints() *BinaryFailpoints
Logs() LogsExpect
LogLines() []string
Kill() error
}

Expand All @@ -72,6 +73,8 @@ type EtcdServerProcess struct {
proxy proxy.Server
failpoints *BinaryFailpoints
donec chan struct{} // closed when Interact() terminates

keptLogLines []string // only filled when KeepLogs=true
}

type EtcdServerProcessConfig struct {
Expand All @@ -96,6 +99,8 @@ type EtcdServerProcessConfig struct {
InitialCluster string
GoFailPort int

KeepLogs bool

Proxy *proxy.ServerConfig
}

Expand Down Expand Up @@ -177,6 +182,9 @@ func (ep *EtcdServerProcess) Stop() (err error) {
return nil
}
defer func() {
if ep.cfg.KeepLogs {
ep.keptLogLines = ep.proc.Lines()
}
ep.proc = nil
}()

Expand Down Expand Up @@ -235,6 +243,17 @@ func (ep *EtcdServerProcess) Logs() LogsExpect {
return ep.proc
}

func (ep *EtcdServerProcess) LogLines() []string {
if ep.proc == nil {
if ep.cfg.KeepLogs {
return ep.keptLogLines
}

ep.cfg.lg.Panic("Please grab logs before process is stopped or enable KeepLogs to retain them.")
}
return ep.proc.Lines()
}

func (ep *EtcdServerProcess) Kill() error {
ep.cfg.lg.Info("killing server...", zap.String("name", ep.cfg.Name))
return ep.proc.Signal(syscall.SIGKILL)
Expand Down
42 changes: 22 additions & 20 deletions tests/robustness/linearizability_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,12 @@ func TestRobustness(t *testing.T) {
name: "ClusterOfSize1/" + traffic.Name,
failpoint: RandomFailpoint,
traffic: traffic,
cluster: *e2e.NewConfig(
e2e.WithClusterSize(1),
clusterOpts: []e2e.EPClusterOption{e2e.WithClusterSize(1),
e2e.WithSnapshotCount(100),
e2e.WithGoFailEnabled(true),
e2e.WithCompactionBatchLimit(100), // required for compactBeforeCommitBatch and compactAfterCommitBatch failpoints
e2e.WithWatchProcessNotifyInterval(100*time.Millisecond),
),
e2e.WithWatchProcessNotifyInterval(100 * time.Millisecond),
},
})
clusterOfSize3Options := []e2e.EPClusterOption{
e2e.WithIsPeerTLS(true),
Expand All @@ -68,53 +67,53 @@ func TestRobustness(t *testing.T) {
watch: watchConfig{
expectUniqueRevision: traffic.Traffic.ExpectUniqueRevision(),
},
cluster: *e2e.NewConfig(clusterOfSize3Options...),
clusterOpts: clusterOfSize3Options,
})
}
scenarios = append(scenarios, testScenario{
name: "Issue14370",
failpoint: RaftBeforeSavePanic,
cluster: *e2e.NewConfig(
clusterOpts: []e2e.EPClusterOption{
e2e.WithClusterSize(1),
e2e.WithGoFailEnabled(true),
),
},
})
scenarios = append(scenarios, testScenario{
name: "Issue14685",
failpoint: DefragBeforeCopyPanic,
cluster: *e2e.NewConfig(
clusterOpts: []e2e.EPClusterOption{
e2e.WithClusterSize(1),
e2e.WithGoFailEnabled(true),
),
},
})
scenarios = append(scenarios, testScenario{
name: "Issue13766",
failpoint: KillFailpoint,
traffic: traffic.HighTraffic,
cluster: *e2e.NewConfig(
clusterOpts: []e2e.EPClusterOption{
e2e.WithSnapshotCount(100),
),
},
})
scenarios = append(scenarios, testScenario{
name: "Issue15220",
failpoint: RandomFailpoint,
watch: watchConfig{
requestProgress: true,
},
cluster: *e2e.NewConfig(
clusterOpts: []e2e.EPClusterOption{
e2e.WithClusterSize(1),
),
},
})
if v.Compare(version.V3_5) >= 0 {
scenarios = append(scenarios, testScenario{
name: "Issue15271",
failpoint: BlackholeUntilSnapshot,
traffic: traffic.HighTraffic,
cluster: *e2e.NewConfig(
clusterOpts: []e2e.EPClusterOption{
e2e.WithSnapshotCount(100),
e2e.WithPeerProxy(true),
e2e.WithIsPeerTLS(true),
),
},
})
}
for _, scenario := range scenarios {
Expand All @@ -124,6 +123,8 @@ func TestRobustness(t *testing.T) {

t.Run(scenario.name, func(t *testing.T) {
lg := zaptest.NewLogger(t)
scenario.clusterOpts = append(scenario.clusterOpts, e2e.WithKeepLogs(true))
scenario.cluster = *e2e.NewConfig(scenario.clusterOpts...)
scenario.cluster.Logger = lg
ctx := context.Background()
testRobustness(ctx, t, lg, scenario)
Expand All @@ -132,11 +133,12 @@ func TestRobustness(t *testing.T) {
}

type testScenario struct {
name string
failpoint Failpoint
cluster e2e.EtcdProcessClusterConfig
traffic traffic.Config
watch watchConfig
name string
failpoint Failpoint
clusterOpts []e2e.EPClusterOption
cluster e2e.EtcdProcessClusterConfig
traffic traffic.Config
watch watchConfig
}

func testRobustness(ctx context.Context, t *testing.T, lg *zap.Logger, s testScenario) {
Expand Down
4 changes: 3 additions & 1 deletion tests/robustness/model/history.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@ import (

// ValidateOperationHistoryAndReturnVisualize return visualize as porcupine.linearizationInfo used to generate visualization is private.
func ValidateOperationHistoryAndReturnVisualize(t *testing.T, lg *zap.Logger, operations []porcupine.Operation) (visualize func(basepath string)) {
linearizable, info := porcupine.CheckOperationsVerbose(NonDeterministicModel, operations, 5*time.Minute)
const timeout = 5 * time.Minute
lg.Info("Running porcupine to check operations", zap.String("model", "NonDeterministicModel"), zap.Duration("timeout", timeout))
linearizable, info := porcupine.CheckOperationsVerbose(NonDeterministicModel, operations, timeout)
if linearizable == porcupine.Illegal {
t.Error("Model is not linearizable")
}
Expand Down
11 changes: 11 additions & 0 deletions tests/robustness/report.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ func (r *report) Report(t *testing.T, force bool) {
for _, member := range r.clus.Procs {
memberDataDir := filepath.Join(path, fmt.Sprintf("server-%s", member.Config().Name))
persistMemberDataDir(t, r.lg, member, memberDataDir)

logsFile := filepath.Join(path, fmt.Sprintf("server-%s.log", member.Config().Name))
persistMemberLogs(t, r.lg, member, logsFile)
}
if r.clientReports != nil {
sort.Slice(r.clientReports, func(i, j int) bool {
Expand Down Expand Up @@ -118,6 +121,14 @@ func persistWatchResponses(t *testing.T, lg *zap.Logger, path string, responses
}
}

func persistMemberLogs(t *testing.T, lg *zap.Logger, member e2e.EtcdProcess, path string) {
lg.Info("Saving member logs dir", zap.String("member", member.Config().Name), zap.String("path", path))
err := os.WriteFile(path, []byte(strings.Join(member.LogLines(), "")), 0644)
if err != nil {
t.Fatal(err)
}
}

func persistWatchEvents(t *testing.T, lg *zap.Logger, path string, events []traffic.TimedWatchEvent) {
lg.Info("Saving watch events", zap.String("path", path))
file, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0755)
Expand Down

0 comments on commit b599235

Please sign in to comment.