Skip to content

Commit

Permalink
perf(changelog): optimize enforceSizeBoundary
Browse files Browse the repository at this point in the history
These optimizations make the enforceSizeBoundary function more
efficient across various scenarios, leading to overall better
performance while maintaining the same behaviour (*).

Running tool: /home/gg/.goenv/versions/1.22.4/bin/go test -benchmem
-run=^$ -tags ebpf
-bench ^(BenchmarkEnforceSizeBoundaryOld|BenchmarkEnforceSizeBoundary)$
github.com/aquasecurity/tracee/pkg/changelog -benchtime=100000000x

goos: linux
goarch: amd64
pkg: github.com/aquasecurity/tracee/pkg/changelog
cpu: AMD Ryzen 9 7950X 16-Core Processor

| Test Case              | Old (ns/op) | New (ns/op) |   (%)  |
|------------------------|-------------|-------------|--------|
| No change needed       | 1.446       | 1.434       |  0.83% |
| Trim excess duplicates | 30.18       | 21.97       | 27.21% |
| Remove oldest entries  | 27.29       | 21.39       | 21.60% |

Since enforceSizeBoundary is called by setAt, the performance
improvements will be reflected in the latter:

-bench ^(setAtOld|setAt)$

| Test Case          | Old (ns/op) | New (ns/op) |   (%)  |
|--------------------|-------------|-------------|--------|
| All Scenarios      | 1267        | 1163        | 8.21%  |
| Within Limit       | 1770        | 1747        | 1.30%  |

Tests were fixed and added to ensure the correctness of the
implementation. Benchmarks were also added to track the performance
improvements.

(*) The behaviour of the function is the same, but the implementation
fixes a bug that when coalescing duplicate values, the removal of the
oldest entry was not being done correctly. Instead of removing the
oldest entry, the function was removing the newest one.
  • Loading branch information
geyslan committed Aug 23, 2024
1 parent 800a881 commit 4b04800
Show file tree
Hide file tree
Showing 3 changed files with 448 additions and 38 deletions.
67 changes: 45 additions & 22 deletions pkg/changelog/changelog.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package changelog

import (
"slices"
"time"

"github.com/aquasecurity/tracee/pkg/logger"
Expand Down Expand Up @@ -165,33 +164,57 @@ func (clv *Changelog[T]) findIndex(target time.Time) int {

// enforceSizeBoundary ensures that the size of the inner array doesn't exceed the limit.
// It applies two methods to reduce the log size to the maximum allowed:
// 1. Unite duplicate values that are trailing one another.
// 1. Unite duplicate values that are trailing one another, removing the oldest of the pair.
// 2. Remove the oldest logs as they are likely less important.

func (clv *Changelog[T]) enforceSizeBoundary() {
if len(clv.changes) > clv.maxSize {
// Get rid of oldest changes to keep max size boundary
boundaryDiff := len(clv.changes) - clv.maxSize

// First try to unite states
clv.changes = slices.CompactFunc(clv.changes, func(i item[T], i2 item[T]) bool {
if i.value == i2.value && boundaryDiff > 0 {
delete(clv.timestamps, i2.timestamp)
boundaryDiff--
return true
}
return false
})

if boundaryDiff == 0 {
return
if len(clv.changes) <= clv.maxSize {
// Nothing to do
return
}

boundaryDiff := len(clv.changes) - clv.maxSize
changed := false

// Compact the slice in place
writeIdx := 0
for readIdx := 0; readIdx < len(clv.changes); readIdx++ {
nextIdx := readIdx + 1
if nextIdx < len(clv.changes) &&
clv.changes[nextIdx].value == clv.changes[readIdx].value &&
boundaryDiff > 0 {
// Remove the oldest (readIdx) from the duplicate pair
delete(clv.timestamps, clv.changes[readIdx].timestamp)
boundaryDiff--
changed = true
continue
}
removedChanges := clv.changes[:boundaryDiff]
clv.changes = clv.changes[boundaryDiff:]
for _, removedChange := range removedChanges {
delete(clv.timestamps, removedChange.timestamp)

// If elements have been removed or moved, update the map and the slice
if changed {
clv.changes[writeIdx] = clv.changes[readIdx]
}

writeIdx++
}

if changed {
clear(clv.changes[writeIdx:])
clv.changes = clv.changes[:writeIdx]
}

if len(clv.changes) <= clv.maxSize {
// Size is within limits after compaction
return
}

// As it still exceeds maxSize, remove the oldest entries in the remaining slice
boundaryDiff = len(clv.changes) - clv.maxSize
for i := 0; i < boundaryDiff; i++ {
delete(clv.timestamps, clv.changes[i].timestamp)
}
clear(clv.changes[:boundaryDiff])
clv.changes = clv.changes[boundaryDiff:]
}

// returnZero returns the zero value of the type T.
Expand Down
233 changes: 233 additions & 0 deletions pkg/changelog/changelog_benchmark_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,233 @@
package changelog

import (
"testing"
"time"
)

func Benchmark_enforceSizeBoundary(b *testing.B) {
testCases := []struct {
name string
changelog Changelog[int]
}{
{
name: "No change needed",
changelog: Changelog[int]{
changes: []item[int]{
{value: 1, timestamp: getTimeFromSec(1)},
{value: 2, timestamp: getTimeFromSec(2)},
},
timestamps: map[time.Time]struct{}{
getTimeFromSec(1): {},
getTimeFromSec(2): {},
},
maxSize: 5,
},
},
{
name: "Trim excess with duplicates",
changelog: Changelog[int]{
changes: []item[int]{
{value: 1, timestamp: getTimeFromSec(1)},
{value: 1, timestamp: getTimeFromSec(2)},
{value: 2, timestamp: getTimeFromSec(3)},
{value: 3, timestamp: getTimeFromSec(4)},
{value: 3, timestamp: getTimeFromSec(5)},
},
timestamps: map[time.Time]struct{}{
getTimeFromSec(1): {},
getTimeFromSec(2): {},
getTimeFromSec(3): {},
getTimeFromSec(4): {},
getTimeFromSec(5): {},
},
maxSize: 3,
},
},
{
name: "Remove oldest entries",
changelog: Changelog[int]{
changes: []item[int]{
{value: 1, timestamp: getTimeFromSec(1)},
{value: 2, timestamp: getTimeFromSec(2)},
{value: 3, timestamp: getTimeFromSec(3)},
{value: 4, timestamp: getTimeFromSec(4)},
},
timestamps: map[time.Time]struct{}{
getTimeFromSec(1): {},
getTimeFromSec(2): {},
getTimeFromSec(3): {},
getTimeFromSec(4): {},
},
maxSize: 2,
},
},
}

for _, tc := range testCases {
b.Run(tc.name, func(b *testing.B) {
for i := 0; i < b.N; i++ {
clv := tc.changelog // Create a copy for each iteration
clv.enforceSizeBoundary()
}
})
}
}

func Benchmark_setAt(b *testing.B) {
// Test cases where the Changelog needs to enforce the size boundary
testCasesAllScenarios := []struct {
value int
time time.Time
}{
{
value: 42,
time: getTimeFromSec(0),
},
{
value: 72,
time: getTimeFromSec(1),
},
{
value: 642,
time: getTimeFromSec(2),
},
{
value: 672,
time: getTimeFromSec(3), // will trigger removal of oldest entry
},
{
value: 642,
time: getTimeFromSec(4), // will trigger coalescing of duplicate values
},
{
value: 672,
time: getTimeFromSec(5), // will trigger coalescing of duplicate values
},
{
value: 6642,
time: getTimeFromSec(6), // will trigger removal of oldest entry
},
{
value: 672,
time: getTimeFromSec(7), // will trigger coalescing of duplicate values
},
{
value: 642,
time: getTimeFromSec(8), // will trigger coalescing of duplicate values
},
{
value: 6672,
time: getTimeFromSec(9), // will trigger removal of oldest entry
},
{
value: 9642,
time: getTimeFromSec(10), // will trigger removal of oldest entry
},
{
value: 0,
time: getTimeFromSec(0), // will just update the value
},
{
value: 0,
time: getTimeFromSec(1), // will just update the value
},
{
value: 0,
time: getTimeFromSec(2), // will just update the value
},
{
value: 0,
time: getTimeFromSec(3), // will just update the value
},
}

b.Run("All Scenarios", func(b *testing.B) {
for i := 0; i < b.N; i++ {
b.StopTimer()
clv := NewChangelog[int](3)
b.StartTimer()
for _, tc := range testCasesAllScenarios {
clv.setAt(tc.value, tc.time)
}
}
})

// Test cases where the changelog is within the maximum size limit
testCasesWithinLimit := []struct {
value int
time time.Time
}{
{
value: 0,
time: getTimeFromSec(0),
},
{
value: 1,
time: getTimeFromSec(1),
},
{
value: 2,
time: getTimeFromSec(2),
},
{
value: 3,
time: getTimeFromSec(3),
},
{
value: 4,
time: getTimeFromSec(4),
},
{
value: 5,
time: getTimeFromSec(5),
},
{
value: 6,
time: getTimeFromSec(6),
},
{
value: 7,
time: getTimeFromSec(7),
},
{
value: 8,
time: getTimeFromSec(8),
},
{
value: 9,
time: getTimeFromSec(9),
},
{
value: 10,
time: getTimeFromSec(10),
},
{
value: 11,
time: getTimeFromSec(11),
},
{
value: 12,
time: getTimeFromSec(12),
},
{
value: 13,
time: getTimeFromSec(13),
},
{
value: 14,
time: getTimeFromSec(14),
},
}

b.Run("Within Limit", func(b *testing.B) {
for i := 0; i < b.N; i++ {
b.StopTimer()
clv := NewChangelog[int](15)
b.StartTimer()
for _, tc := range testCasesWithinLimit {
clv.setAt(tc.value, tc.time)
}
}
})
}
Loading

0 comments on commit 4b04800

Please sign in to comment.