diff --git a/pkg/changelog/changelog.go b/pkg/changelog/changelog.go index 64dc17074ddf..1847fda3adf9 100644 --- a/pkg/changelog/changelog.go +++ b/pkg/changelog/changelog.go @@ -1,7 +1,6 @@ package changelog import ( - "slices" "time" ) @@ -151,32 +150,48 @@ func (clv *Changelog[T]) findIndex(target time.Time) int { // enforceSizeBoundary ensures that the size of the inner array doesn't exceed the limit. // It applies two methods to reduce the log size to the maximum allowed: -// 1. Unite duplicate values that are trailing one another. +// 1. Unite duplicate values that are trailing one another, removing the oldest of the pair. // 2. Remove the oldest logs as they are likely less important. func (clv *Changelog[T]) enforceSizeBoundary() { - if len(clv.changes) > clv.maxSize { - // Get rid of oldest changes to keep max size boundary - boundaryDiff := len(clv.changes) - clv.maxSize - - // First try to unite states - clv.changes = slices.CompactFunc(clv.changes, func(i item[T], i2 item[T]) bool { - if i.value == i2.value && boundaryDiff > 0 { - delete(clv.timestamps, i2.timestamp) - boundaryDiff-- - return true - } - return false - }) - - if boundaryDiff == 0 { - return - } - removedChanges := clv.changes[:boundaryDiff] - clv.changes = clv.changes[boundaryDiff:] - for _, removedChange := range removedChanges { - delete(clv.timestamps, removedChange.timestamp) + if len(clv.changes) <= clv.maxSize { + // Nothing to do + return + } + + boundaryDiff := len(clv.changes) - clv.maxSize + + // Compact the slice in place + writeIdx := 0 + for readIdx := 0; readIdx < len(clv.changes); readIdx++ { + nextIdx := readIdx + 1 + if nextIdx < len(clv.changes) && + clv.changes[nextIdx].value == clv.changes[readIdx].value && + boundaryDiff > 0 { + // Remove the oldest (readIdx) from the duplicate pair + delete(clv.timestamps, clv.changes[readIdx].timestamp) + boundaryDiff-- + continue } + + clv.changes[writeIdx] = clv.changes[readIdx] + writeIdx++ + } + + clear(clv.changes[writeIdx:]) + clv.changes = clv.changes[:writeIdx] + + if len(clv.changes) <= clv.maxSize { + // Size is within limits after compaction + return + } + + // As it still exceeds maxSize, remove the oldest entries in the remaining slice + boundaryDiff = len(clv.changes) - clv.maxSize + removedChanges := clv.changes[:boundaryDiff] + clv.changes = clv.changes[boundaryDiff:] + for _, removedChange := range removedChanges { + delete(clv.timestamps, removedChange.timestamp) } } diff --git a/pkg/changelog/changelog_benchmark_test.go b/pkg/changelog/changelog_benchmark_test.go new file mode 100644 index 000000000000..bd9959cd3565 --- /dev/null +++ b/pkg/changelog/changelog_benchmark_test.go @@ -0,0 +1,77 @@ +package changelog + +import ( + "testing" + "time" +) + +func BenchmarkEnforceSizeBoundary(b *testing.B) { + // Define test cases + testCases := []struct { + name string + changelog Changelog[int] + }{ + { + name: "No change needed", + changelog: Changelog[int]{ + changes: []item[int]{ + {value: 1, timestamp: getTimeFromSec(1)}, + {value: 2, timestamp: getTimeFromSec(2)}, + }, + timestamps: map[time.Time]int{ + getTimeFromSec(1): 0, + getTimeFromSec(2): 1, + }, + maxSize: 5, + }, + }, + { + name: "Trim excess with duplicates", + changelog: Changelog[int]{ + changes: []item[int]{ + {value: 1, timestamp: getTimeFromSec(1)}, + {value: 1, timestamp: getTimeFromSec(2)}, + {value: 2, timestamp: getTimeFromSec(3)}, + {value: 3, timestamp: getTimeFromSec(4)}, + {value: 3, timestamp: getTimeFromSec(5)}, + }, + timestamps: map[time.Time]int{ + getTimeFromSec(1): 0, + getTimeFromSec(2): 1, + getTimeFromSec(3): 2, + getTimeFromSec(4): 3, + getTimeFromSec(5): 4, + }, + maxSize: 3, + }, + }, + { + name: "Remove oldest entries", + changelog: Changelog[int]{ + changes: []item[int]{ + {value: 1, timestamp: getTimeFromSec(1)}, + {value: 2, timestamp: getTimeFromSec(2)}, + {value: 3, timestamp: getTimeFromSec(3)}, + {value: 4, timestamp: getTimeFromSec(4)}, + }, + timestamps: map[time.Time]int{ + getTimeFromSec(1): 0, + getTimeFromSec(2): 1, + getTimeFromSec(3): 2, + getTimeFromSec(4): 3, + }, + maxSize: 2, + }, + }, + } + + // Run benchmarks + for _, tc := range testCases { + b.Run(tc.name, func(b *testing.B) { + for i := 0; i < b.N; i++ { + clv := tc.changelog // Create a copy for each iteration + clv.enforceSizeBoundary() + } + }) + } +} diff --git a/pkg/changelog/changelog_test.go b/pkg/changelog/changelog_test.go index 8dec19c8c1a2..888ec0c6011b 100644 --- a/pkg/changelog/changelog_test.go +++ b/pkg/changelog/changelog_test.go @@ -1,26 +1,25 @@ -package changelog_test +package changelog import ( + "reflect" "testing" "time" "github.com/stretchr/testify/assert" - - "github.com/aquasecurity/tracee/pkg/changelog" ) func TestChangelog(t *testing.T) { t.Parallel() t.Run("GetCurrent on an empty changelog", func(t *testing.T) { - cl := changelog.NewChangelog[int](3) + cl := NewChangelog[int](3) // Test GetCurrent on an empty changelog assert.Zero(t, cl.GetCurrent()) }) t.Run("Set and get", func(t *testing.T) { - cl := changelog.NewChangelog[int](3) + cl := NewChangelog[int](3) testVal := 42 cl.SetCurrent(testVal) @@ -28,7 +27,7 @@ func TestChangelog(t *testing.T) { }) t.Run("Set and get on set time", func(t *testing.T) { - cl := changelog.NewChangelog[int](3) + cl := NewChangelog[int](3) testVal1 := 42 testVal2 := 76 testVal3 := 76 @@ -50,7 +49,7 @@ func TestChangelog(t *testing.T) { }) t.Run("Set twice on the same time", func(t *testing.T) { - cl := changelog.NewChangelog[int](3) + cl := NewChangelog[int](3) testVal := 42 now := time.Now() @@ -72,13 +71,13 @@ func TestChangelog(t *testing.T) { }) t.Run("Get on an empty changelog", func(t *testing.T) { - cl := changelog.NewChangelog[int](3) + cl := NewChangelog[int](3) assert.Zero(t, cl.GetCurrent()) }) t.Run("Test 1 second interval among changes", func(t *testing.T) { - cl := changelog.NewChangelog[int](3) + cl := NewChangelog[int](3) cl.SetCurrent(1) time.Sleep(2 * time.Second) @@ -94,7 +93,7 @@ func TestChangelog(t *testing.T) { }) t.Run("Test 100 milliseconds interval among changes", func(t *testing.T) { - cl := changelog.NewChangelog[int](3) + cl := NewChangelog[int](3) cl.SetCurrent(1) time.Sleep(100 * time.Millisecond) @@ -110,7 +109,7 @@ func TestChangelog(t *testing.T) { }) t.Run("Test getting all values at once", func(t *testing.T) { - cl := changelog.NewChangelog[int](3) + cl := NewChangelog[int](3) cl.SetCurrent(1) time.Sleep(100 * time.Millisecond) @@ -123,7 +122,7 @@ func TestChangelog(t *testing.T) { }) t.Run("Pass max size wit repeated values", func(t *testing.T) { - cl := changelog.NewChangelog[int](3) + cl := NewChangelog[int](3) cl.SetCurrent(1) time.Sleep(100 * time.Millisecond) @@ -135,14 +134,14 @@ func TestChangelog(t *testing.T) { now := time.Now() assert.Equal(t, 1, cl.Get(now.Add(-300*time.Millisecond))) - assert.Equal(t, 2, cl.Get(now.Add(-200*time.Millisecond))) + assert.Equal(t, 1, cl.Get(now.Add(-200*time.Millisecond))) // oldest 2 is removed, so 1 is returned assert.Equal(t, 2, cl.Get(now.Add(-100*time.Millisecond))) assert.Equal(t, 3, cl.Get(now)) assert.Len(t, cl.GetAll(), 3) }) t.Run("Pass max size with unique values", func(t *testing.T) { - cl := changelog.NewChangelog[int](3) + cl := NewChangelog[int](3) cl.SetCurrent(1) time.Sleep(100 * time.Millisecond) @@ -161,7 +160,7 @@ func TestChangelog(t *testing.T) { }) t.Run("Pass max size with new old value", func(t *testing.T) { - cl := changelog.NewChangelog[int](3) + cl := NewChangelog[int](3) cl.SetCurrent(1) time.Sleep(100 * time.Millisecond) @@ -183,7 +182,7 @@ func TestChangelog(t *testing.T) { }) t.Run("Zero sized changelog", func(t *testing.T) { - cl := changelog.NewChangelog[int](0) + cl := NewChangelog[int](0) cl.SetCurrent(1) time.Sleep(100 * time.Millisecond) @@ -203,4 +202,125 @@ func TestChangelog(t *testing.T) { assert.Equal(t, 0, cl.Get(now)) assert.Empty(t, cl.GetAll()) }) + + t.Run("Test enforceSizeBoundary", func(t *testing.T) { + type TestCase struct { + name string + maxSize int + initialChanges []item[int] + expected []item[int] + } + + testCases := []TestCase{ + { + name: "No Action Required", + maxSize: 3, + initialChanges: []item[int]{ + {timestamp: getTimeFromSec(42), value: 1}, + {timestamp: getTimeFromSec(43), value: 2}, + {timestamp: getTimeFromSec(44), value: 3}, + }, + expected: []item[int]{ + {timestamp: getTimeFromSec(42), value: 1}, + {timestamp: getTimeFromSec(43), value: 2}, + {timestamp: getTimeFromSec(44), value: 3}, + }, + }, + { + name: "Basic Removal of Oldest Entries", + maxSize: 3, + initialChanges: []item[int]{ + {timestamp: getTimeFromSec(42), value: 1}, + {timestamp: getTimeFromSec(43), value: 2}, + {timestamp: getTimeFromSec(44), value: 3}, + {timestamp: getTimeFromSec(45), value: 4}, + }, + expected: []item[int]{ + {timestamp: getTimeFromSec(43), value: 2}, + {timestamp: getTimeFromSec(44), value: 3}, + {timestamp: getTimeFromSec(45), value: 4}, + }, + }, + { + name: "Compacting Duplicate Values - Start", + maxSize: 3, + initialChanges: []item[int]{ + {timestamp: getTimeFromSec(42), value: 1}, + {timestamp: getTimeFromSec(43), value: 1}, + {timestamp: getTimeFromSec(44), value: 2}, + {timestamp: getTimeFromSec(45), value: 3}, + }, + expected: []item[int]{ + {timestamp: getTimeFromSec(43), value: 1}, + {timestamp: getTimeFromSec(44), value: 2}, + {timestamp: getTimeFromSec(45), value: 3}, + }, + }, + { + name: "Compacting Duplicate Values - Middle", + maxSize: 3, + initialChanges: []item[int]{ + {timestamp: getTimeFromSec(42), value: 1}, + {timestamp: getTimeFromSec(43), value: 2}, + {timestamp: getTimeFromSec(44), value: 2}, + {timestamp: getTimeFromSec(45), value: 3}, + }, + expected: []item[int]{ + {timestamp: getTimeFromSec(42), value: 1}, + {timestamp: getTimeFromSec(44), value: 2}, + {timestamp: getTimeFromSec(45), value: 3}, + }, + }, + { + name: "Compacting Duplicate Values - End", + maxSize: 3, + initialChanges: []item[int]{ + {timestamp: getTimeFromSec(42), value: 1}, + {timestamp: getTimeFromSec(43), value: 2}, + {timestamp: getTimeFromSec(44), value: 3}, + {timestamp: getTimeFromSec(45), value: 3}, + }, + expected: []item[int]{ + {timestamp: getTimeFromSec(42), value: 1}, + {timestamp: getTimeFromSec(43), value: 2}, + {timestamp: getTimeFromSec(45), value: 3}, + }, + }, + { + name: "Combination of Compaction and Removal of Oldest Entries", + maxSize: 3, + initialChanges: []item[int]{ + {timestamp: getTimeFromSec(42), value: 1}, + {timestamp: getTimeFromSec(43), value: 2}, + {timestamp: getTimeFromSec(44), value: 2}, + {timestamp: getTimeFromSec(45), value: 2}, + {timestamp: getTimeFromSec(46), value: 3}, + {timestamp: getTimeFromSec(47), value: 4}, + }, + expected: []item[int]{ + {timestamp: getTimeFromSec(45), value: 2}, + {timestamp: getTimeFromSec(46), value: 3}, + {timestamp: getTimeFromSec(47), value: 4}, + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + cl := NewChangelog[int](tc.maxSize) + for _, change := range tc.initialChanges { + cl.Set(change.value, change.timestamp) + } + + cl.enforceSizeBoundary() + + eq := reflect.DeepEqual(cl.changes, tc.expected) + assert.True(t, eq) + }) + } + }) +} + +func getTimeFromSec(second int) time.Time { + return time.Unix(int64(second), 0) }