Skip to content

Commit

Permalink
perf(changelog): optimize enforceSizeBoundary
Browse files Browse the repository at this point in the history
These optimizations make the enforceSizeBoundary function more
efficient across various scenarios, leading to overall better
performance while maintaining the same behaviour (*).

Running tool: /home/gg/.goenv/versions/1.22.4/bin/go test -benchmem
-run=^$ -tags ebpf
-bench ^(BenchmarkEnforceSizeBoundaryOld|BenchmarkEnforceSizeBoundary)$
github.com/aquasecurity/tracee/pkg/changelog -benchtime=100000000x

goos: linux
goarch: amd64
pkg: github.com/aquasecurity/tracee/pkg/changelog
cpu: AMD Ryzen 9 7950X 16-Core Processor

| Test Case              | Old (ns/op) | New (ns/op) |   (%)  |
|------------------------|-------------|-------------|--------|
| No change needed       | 1.494       | 1.395       | 6.63%  |
| Trim excess duplicates | 30.07       | 21.53       | 28.41% |
| Remove oldest entries  | 27.71       | 24.96       | 9.90%  |

Tests and benchmarks are included.

(*) The behaviour of the function is the same, but the implementation
fixes a bug that when coalescing duplicate values, the removal of the
oldest entry was not being done correctly. Instead of removing the
oldest entry, the function was removing the newest one.
  • Loading branch information
geyslan committed Aug 10, 2024
1 parent c20327c commit cd7004f
Show file tree
Hide file tree
Showing 3 changed files with 251 additions and 39 deletions.
61 changes: 38 additions & 23 deletions pkg/changelog/changelog.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package changelog

import (
"slices"
"time"
)

Expand Down Expand Up @@ -151,32 +150,48 @@ func (clv *Changelog[T]) findIndex(target time.Time) int {

// enforceSizeBoundary ensures that the size of the inner array doesn't exceed the limit.
// It applies two methods to reduce the log size to the maximum allowed:
// 1. Unite duplicate values that are trailing one another.
// 1. Unite duplicate values that are trailing one another, removing the oldest of the pair.
// 2. Remove the oldest logs as they are likely less important.

func (clv *Changelog[T]) enforceSizeBoundary() {
if len(clv.changes) > clv.maxSize {
// Get rid of oldest changes to keep max size boundary
boundaryDiff := len(clv.changes) - clv.maxSize

// First try to unite states
clv.changes = slices.CompactFunc(clv.changes, func(i item[T], i2 item[T]) bool {
if i.value == i2.value && boundaryDiff > 0 {
delete(clv.timestamps, i2.timestamp)
boundaryDiff--
return true
}
return false
})

if boundaryDiff == 0 {
return
}
removedChanges := clv.changes[:boundaryDiff]
clv.changes = clv.changes[boundaryDiff:]
for _, removedChange := range removedChanges {
delete(clv.timestamps, removedChange.timestamp)
if len(clv.changes) <= clv.maxSize {
// Nothing to do
return
}

boundaryDiff := len(clv.changes) - clv.maxSize

// Compact the slice in place
writeIdx := 0
for readIdx := 0; readIdx < len(clv.changes); readIdx++ {
nextIdx := readIdx + 1
if nextIdx < len(clv.changes) &&
clv.changes[nextIdx].value == clv.changes[readIdx].value &&
boundaryDiff > 0 {
// Remove the oldest (readIdx) from the duplicate pair
delete(clv.timestamps, clv.changes[readIdx].timestamp)
boundaryDiff--
continue
}

clv.changes[writeIdx] = clv.changes[readIdx]
writeIdx++
}

clear(clv.changes[writeIdx:])
clv.changes = clv.changes[:writeIdx]

if len(clv.changes) <= clv.maxSize {
// Size is within limits after compaction
return
}

// As it still exceeds maxSize, remove the oldest entries in the remaining slice
boundaryDiff = len(clv.changes) - clv.maxSize
removedChanges := clv.changes[:boundaryDiff]
clv.changes = clv.changes[boundaryDiff:]
for _, removedChange := range removedChanges {
delete(clv.timestamps, removedChange.timestamp)
}
}

Expand Down
77 changes: 77 additions & 0 deletions pkg/changelog/changelog_benchmark_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
package changelog

import (
"testing"
"time"
)

func BenchmarkEnforceSizeBoundary(b *testing.B) {
// Define test cases
testCases := []struct {
name string
changelog Changelog[int]
}{
{
name: "No change needed",
changelog: Changelog[int]{
changes: []item[int]{
{value: 1, timestamp: getTimeFromSec(1)},
{value: 2, timestamp: getTimeFromSec(2)},
},
timestamps: map[time.Time]int{
getTimeFromSec(1): 0,
getTimeFromSec(2): 1,
},
maxSize: 5,
},
},
{
name: "Trim excess with duplicates",
changelog: Changelog[int]{
changes: []item[int]{
{value: 1, timestamp: getTimeFromSec(1)},
{value: 1, timestamp: getTimeFromSec(2)},
{value: 2, timestamp: getTimeFromSec(3)},
{value: 3, timestamp: getTimeFromSec(4)},
{value: 3, timestamp: getTimeFromSec(5)},
},
timestamps: map[time.Time]int{
getTimeFromSec(1): 0,
getTimeFromSec(2): 1,
getTimeFromSec(3): 2,
getTimeFromSec(4): 3,
getTimeFromSec(5): 4,
},
maxSize: 3,
},
},
{
name: "Remove oldest entries",
changelog: Changelog[int]{
changes: []item[int]{
{value: 1, timestamp: getTimeFromSec(1)},
{value: 2, timestamp: getTimeFromSec(2)},
{value: 3, timestamp: getTimeFromSec(3)},
{value: 4, timestamp: getTimeFromSec(4)},
},
timestamps: map[time.Time]int{
getTimeFromSec(1): 0,
getTimeFromSec(2): 1,
getTimeFromSec(3): 2,
getTimeFromSec(4): 3,
},
maxSize: 2,
},
},
}

// Run benchmarks
for _, tc := range testCases {
b.Run(tc.name, func(b *testing.B) {
for i := 0; i < b.N; i++ {
clv := tc.changelog // Create a copy for each iteration
clv.enforceSizeBoundary()
}
})
}
}
Loading

0 comments on commit cd7004f

Please sign in to comment.