Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support multiple detectors per match #2065

Merged
merged 17 commits into from
Nov 3, 2023
Prev Previous commit
Next Next commit
populate slice
  • Loading branch information
rosecodym committed Nov 1, 2023
commit aca550d1a91a6124727a0d226ba26e1323a999b8
4 changes: 2 additions & 2 deletions pkg/engine/ahocorasickcore.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,13 +69,13 @@ func (ac *AhoCorasickCore) MatchString(input string) []*ahocorasick.Match {
// PopulateDetectorsByMatch populates the given detectorMap based on the Aho-Corasick match results.
// This method is designed to reuse the same map for performance optimization,
// reducing the need for repeated allocations within each detector worker in the engine.
func (ac *AhoCorasickCore) PopulateDetectorsByMatch(match *ahocorasick.Match, detectors map[detectorspb.DetectorType]detectors.Detector) bool {
func (ac *AhoCorasickCore) PopulateDetectorsByMatch(match *ahocorasick.Match, detectors *[]detectors.Detector) bool {
matchedDetectorKeys, ok := ac.keywordsToDetectors[match.MatchString()]
if !ok {
return false
}
for _, key := range matchedDetectorKeys {
detectors[key.detectorType] = ac.detectorsByKey[key]
*detectors = append(*detectors, ac.detectorsByKey[key])
}
return true
}
Expand Down
8 changes: 4 additions & 4 deletions pkg/engine/ahocorasickcore_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,8 @@ func TestAhoCorasickCore_MultipleCustomDetectorsMatchable(t *testing.T) {
matches := ac.MatchString("a")
assert.Equal(t, 1, len(matches))

matchingDetectors := make(map[detectorspb.DetectorType]detectors.Detector)
ac.PopulateDetectorsByMatch(matches[0], matchingDetectors)
matchingDetectors := make([]detectors.Detector, 0, 2)
ac.PopulateDetectorsByMatch(matches[0], &matchingDetectors)
assert.Equal(t, 2, len(matchingDetectors))
assert.Contains(t, matchingDetectors, customDetector1)
assert.Contains(t, matchingDetectors, customDetector2)
Expand All @@ -95,8 +95,8 @@ func TestAhoCorasickCore_MultipleDetectorVersionsMatchable(t *testing.T) {
matches := ac.MatchString("a")
assert.Equal(t, 1, len(matches))

matchingDetectors := make(map[detectorspb.DetectorType]detectors.Detector)
ac.PopulateDetectorsByMatch(matches[0], matchingDetectors)
matchingDetectors := make([]detectors.Detector, 0, 2)
ac.PopulateDetectorsByMatch(matches[0], &matchingDetectors)
assert.Equal(t, 2, len(matchingDetectors))
assert.Contains(t, matchingDetectors, v1)
assert.Contains(t, matchingDetectors, v2)
Expand Down
8 changes: 4 additions & 4 deletions pkg/engine/engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -458,7 +458,7 @@ func (e *Engine) detectorWorker(ctx context.Context) {

// Reuse the same map to avoid allocations.
const avgDetectorsPerChunk = 2
chunkSpecificDetectors := make(map[detectorspb.DetectorType]detectors.Detector, avgDetectorsPerChunk)
chunkSpecificDetectors := make([]detectors.Detector, 0, avgDetectorsPerChunk)
for originalChunk := range e.ChunksChan() {
for chunk := range sources.Chunker(originalChunk) {
atomic.AddUint64(&e.metrics.BytesScanned, uint64(len(chunk.Data)))
Expand All @@ -470,12 +470,12 @@ func (e *Engine) detectorWorker(ctx context.Context) {
}

for _, match := range e.ahoCorasickCore.MatchString(string(decoded.Chunk.Data)) {
if !e.ahoCorasickCore.PopulateDetectorsByMatch(match, chunkSpecificDetectors) {
if !e.ahoCorasickCore.PopulateDetectorsByMatch(match, &chunkSpecificDetectors) {
continue
}
}

for k, detector := range chunkSpecificDetectors {
for _, detector := range chunkSpecificDetectors {
decoded.Chunk.Verify = e.verify
wgDetect.Add(1)
e.detectableChunksChan <- detectableChunk{
Expand All @@ -484,8 +484,8 @@ func (e *Engine) detectorWorker(ctx context.Context) {
decoder: decoded.DecoderType,
wgDoneFn: wgDetect.Done,
}
delete(chunkSpecificDetectors, k)
}
clear(chunkSpecificDetectors)
}
}
atomic.AddUint64(&e.metrics.ChunksScanned, 1)
Expand Down