diff --git a/buildtools/aho-corasick/Cargo.lock b/buildtools/aho-corasick/Cargo.lock deleted file mode 100644 index d6cbe48ba1586..0000000000000 --- a/buildtools/aho-corasick/Cargo.lock +++ /dev/null @@ -1,25 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -version = 3 - -[[package]] -name = "aho-corasick" -version = "0.7.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac" -dependencies = [ - "memchr", -] - -[[package]] -name = "aho-corasick-c" -version = "0.1.0" -dependencies = [ - "aho-corasick", -] - -[[package]] -name = "memchr" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" diff --git a/buildtools/aho-corasick/Cargo.toml b/buildtools/aho-corasick/Cargo.toml deleted file mode 100644 index 1c088800539a7..0000000000000 --- a/buildtools/aho-corasick/Cargo.toml +++ /dev/null @@ -1,13 +0,0 @@ -[workspace] - -[package] -name = "aho-corasick-c" -version = "0.1.0" -description = "C wrapper for aho-corasick for loading from Wasm" - -[lib] -crate-type = ["staticlib"] -name = "aho_corasick" - -[dependencies] -aho-corasick = "0.7.20" diff --git a/buildtools/aho-corasick/Dockerfile b/buildtools/aho-corasick/Dockerfile deleted file mode 100644 index 6cd20e403b16b..0000000000000 --- a/buildtools/aho-corasick/Dockerfile +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright 2022 The OWASP Coraza contributors -# SPDX-License-Identifier: Apache-2.0 - -FROM rust:1-alpine - -RUN rustup target add wasm32-wasi - -ADD . /aho-corasick -WORKDIR /aho-corasick -ENV RUSTFLAGS "-C target-feature=-crt-static" -RUN cargo build --release --target wasm32-wasi - -CMD ["cp", "target/wasm32-wasi/release/libaho_corasick.a", "/out/libaho_corasick.a"] diff --git a/buildtools/aho-corasick/src/lib.rs b/buildtools/aho-corasick/src/lib.rs deleted file mode 100644 index e5ff07efb4cb8..0000000000000 --- a/buildtools/aho-corasick/src/lib.rs +++ /dev/null @@ -1,99 +0,0 @@ -// Copyright The OWASP Coraza contributors -// SPDX-License-Identifier: Apache-2.0 - -extern crate aho_corasick; - -use std::mem::MaybeUninit; -use std::slice; -use std::str; -use aho_corasick::{AhoCorasick, AhoCorasickBuilder, MatchKind}; - -#[no_mangle] -pub unsafe extern "C" fn new_matcher(patterns_ptr: *mut u8, patterns_len: usize) -> Box { - let all_patterns = unsafe { - slice::from_raw_parts(patterns_ptr, patterns_len) - }; - - let mut patterns = Vec::new(); - - let mut off = 0; - while off < patterns_len { - let pattern_len = u32::from_le_bytes([all_patterns[off], all_patterns[off+1], all_patterns[off+2], all_patterns[off+3]]) as usize; - off += 4; - let pattern = unsafe { - str::from_utf8_unchecked(&all_patterns[off..off+pattern_len]) - }; - patterns.push(pattern); - off += pattern_len; - } - - let ac = AhoCorasickBuilder::new() - .ascii_case_insensitive(true) - .dfa(true) - .match_kind(MatchKind::LeftmostLongest) - .build(patterns); - - return Box::new(ac) -} - -#[no_mangle] -pub extern "C" fn matches(ac: &mut AhoCorasick, value_ptr: usize, value_len: usize, n: usize, matches: *mut usize) -> usize { - let value = ptr_to_string(value_ptr, value_len); - - let mut num = 0; - for value in ac.find_iter(value) { - if num == n { - break; - } - unsafe { - *matches.offset(2*num as isize) = value.start(); - *matches.offset((2*num+1) as isize) = value.end(); - } - num += 1; - } - - return num -} - -/// WebAssembly export that allocates a pointer (linear memory offset) that can -/// be used for a string. -/// -/// This is an ownership transfer, which means the caller must call -/// [`deallocate`] when finished. -#[cfg_attr(all(target_arch = "wasm32"), export_name = "allocate")] -#[no_mangle] -pub extern "C" fn _allocate(size: usize) -> *mut u8 { - allocate(size as usize) -} - -/// Allocates size bytes and leaks the pointer where they start. -fn allocate(size: usize) -> *mut u8 { - // Allocate the amount of bytes needed. - let vec: Vec> = Vec::with_capacity(size); - - // into_raw leaks the memory to the caller. - Box::into_raw(vec.into_boxed_slice()) as *mut u8 -} - - -/// WebAssembly export that deallocates a pointer of the given size (linear -/// memory offset, byteCount) allocated by [`allocate`]. -#[cfg_attr(all(target_arch = "wasm32"), export_name = "deallocate")] -#[no_mangle] -pub unsafe extern "C" fn _deallocate(ptr: usize, size: usize) { - deallocate(ptr as *mut u8, size); -} - -/// Retakes the pointer which allows its memory to be freed. -unsafe fn deallocate(ptr: *mut u8, size: usize) { - let _ = Vec::from_raw_parts(ptr, 0, size); -} - -/// Returns a string from WebAssembly compatible numeric types representing -/// its pointer and length. -fn ptr_to_string(ptr: usize, len: usize) -> &'static str { - unsafe { - let slice = slice::from_raw_parts_mut(ptr as *mut u8, len as usize); - return str::from_utf8_unchecked_mut(slice); - } -} diff --git a/go.mod b/go.mod index 22ebdc534e71e..1953dbaa1a068 100644 --- a/go.mod +++ b/go.mod @@ -5,8 +5,9 @@ go 1.19 require ( github.com/corazawaf/coraza/v3 v3.0.0-20230110223518-703d29668893 github.com/stretchr/testify v1.8.0 - github.com/tetratelabs/proxy-wasm-go-sdk v0.20.1-0.20221031045735-89d180d022a5 + github.com/tetratelabs/proxy-wasm-go-sdk v0.20.1-0.20230115020858-593cf0f7417a github.com/tidwall/gjson v1.14.3 + github.com/wasilibs/go-aho-corasick v0.2.0 github.com/wasilibs/go-re2 v0.0.0-20221219074959-3ec67f9038f0 ) @@ -17,7 +18,7 @@ require ( github.com/magefile/mage v1.14.0 // indirect github.com/petar-dambovaliev/aho-corasick v0.0.0-20211021192214-5ab2d9280aa9 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect - github.com/tetratelabs/wazero v1.0.0-pre.4.0.20221213074253-2e13f57f56a1 // indirect + github.com/tetratelabs/wazero v1.0.0-pre.7 // indirect github.com/tidwall/match v1.1.1 // indirect github.com/tidwall/pretty v1.2.1 // indirect golang.org/x/net v0.1.0 // indirect diff --git a/go.sum b/go.sum index c81d7329f5c20..c8c40a81bf3c5 100644 --- a/go.sum +++ b/go.sum @@ -23,10 +23,10 @@ github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSS github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0 h1:pSgiaMZlXftHpm5L7V1+rVB+AZJydKsMxsQBIJw4PKk= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= -github.com/tetratelabs/proxy-wasm-go-sdk v0.20.1-0.20221031045735-89d180d022a5 h1:gbsZkzeu+H7oX9xJA97eIuNHCuXfppuJh32mX2Cpeqc= -github.com/tetratelabs/proxy-wasm-go-sdk v0.20.1-0.20221031045735-89d180d022a5/go.mod h1:A0osZ5uU1yRt5ZOdRRzIHxJZf8xzsxvEkeL8Ae698+s= -github.com/tetratelabs/wazero v1.0.0-pre.4.0.20221213074253-2e13f57f56a1 h1:L+/AG1GzZc8u7tIl7ijAl508T/FHu9esMf+E3hZ1JVA= -github.com/tetratelabs/wazero v1.0.0-pre.4.0.20221213074253-2e13f57f56a1/go.mod h1:u8wrFmpdrykiFK0DFPiFm5a4+0RzsdmXYVtijBKqUVo= +github.com/tetratelabs/proxy-wasm-go-sdk v0.20.1-0.20230115020858-593cf0f7417a h1:uxfM0O1fvBKs3UyCZgz69LRzjyg2eBiPIvSD7xmgap8= +github.com/tetratelabs/proxy-wasm-go-sdk v0.20.1-0.20230115020858-593cf0f7417a/go.mod h1:62ObOye8ebDcihh92dIsVV+TgzjOehFeg8fruL6F12g= +github.com/tetratelabs/wazero v1.0.0-pre.7 h1:WI5N14XxoXw+ZWhcjSazJ6rEowhJbH/x8hglxC5gN7k= +github.com/tetratelabs/wazero v1.0.0-pre.7/go.mod h1:u8wrFmpdrykiFK0DFPiFm5a4+0RzsdmXYVtijBKqUVo= github.com/tidwall/gjson v1.14.3 h1:9jvXn7olKEHU1S9vwoMGliaT8jq1vJ7IH/n9zD9Dnlw= github.com/tidwall/gjson v1.14.3/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA= @@ -34,6 +34,8 @@ github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JT github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= github.com/tidwall/pretty v1.2.1 h1:qjsOFOWWQl+N3RsoF5/ssm1pHmJJwhjlSbZ51I6wMl4= github.com/tidwall/pretty v1.2.1/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= +github.com/wasilibs/go-aho-corasick v0.2.0 h1:32cgC99Id42dzoUupwn0nMPMxz3QD6DlxdjYpOVfZOA= +github.com/wasilibs/go-aho-corasick v0.2.0/go.mod h1:70K0dlZi6vyp5xyczyd73SCZMYcxswRXLVnUwQSKpM4= github.com/wasilibs/go-re2 v0.0.0-20221219074959-3ec67f9038f0 h1:+dy0jRJ7Y0sMNJPUkTeZ8qC9qc9tNWJ/Noha+L6w2ZE= github.com/wasilibs/go-re2 v0.0.0-20221219074959-3ec67f9038f0/go.mod h1:9YbcVrlaRryN9yCvk1fAjJTn5MLKPEd9/LnCJPkGWxY= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4 h1:6zppjxzCulZykYSLyVDYbneBfbaBIQPYMevg0bEwv2s= diff --git a/init_tinygo.go b/init_tinygo.go index 5e63825316963..3c97817f0497f 100644 --- a/init_tinygo.go +++ b/init_tinygo.go @@ -7,5 +7,5 @@ package main import _ "github.com/corazawaf/coraza-proxy-wasm/internal/gc" -// #cgo LDFLAGS: lib/libinjection.a lib/libaho_corasick.a lib/libmimalloc.a lib/libgc.a +// #cgo LDFLAGS: lib/libinjection.a lib/libmimalloc.a lib/libgc.a import "C" diff --git a/internal/ahocorasick/ahocorasick.go b/internal/ahocorasick/ahocorasick.go deleted file mode 100644 index 4cfce9828a4b9..0000000000000 --- a/internal/ahocorasick/ahocorasick.go +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright The OWASP Coraza contributors -// SPDX-License-Identifier: Apache-2.0 - -//go:build tinygo - -package ahocorasick - -import ( - "encoding/binary" - "reflect" - "unsafe" -) - -//export new_matcher -func newMatcher(patternsPtr unsafe.Pointer, patternsLen uint32) uint32 - -//export matches -func matches(matcherPtr uint32, valuePtr unsafe.Pointer, valueLen uint32, n uint32, matchesPtr unsafe.Pointer) uint32 - -type Matcher struct { - ptr uint32 -} - -func NewMatcher(patterns []string) Matcher { - var bufSize int - for _, p := range patterns { - bufSize += 4 - bufSize += len(p) - } - - buf := make([]byte, 0, bufSize) - for _, p := range patterns { - buf = binary.LittleEndian.AppendUint32(buf, uint32(len(p))) - buf = append(buf, p...) - } - - ac := newMatcher(unsafe.Pointer(&buf[0]), uint32(bufSize)) - return Matcher{ptr: ac} -} - -func (ac Matcher) Matches(value string, n int) []string { - sh := (*reflect.StringHeader)(unsafe.Pointer(&value)) - matchOffs := make([]uint32, 2*n) - matchOffsPtr := unsafe.Pointer(&matchOffs[0]) - numMatches := matches(ac.ptr, unsafe.Pointer(sh.Data), uint32(sh.Len), uint32(n), matchOffsPtr) - var matches = make([]string, numMatches) - for i := 0; i < int(numMatches); i++ { - start := matchOffs[2*i] - end := matchOffs[2*i+1] - matches[i] = value[start:end] - } - return matches -} diff --git a/internal/operators/pm.go b/internal/operators/pm.go index 6236750c6de42..d7b46851079ca 100644 --- a/internal/operators/pm.go +++ b/internal/operators/pm.go @@ -9,30 +9,57 @@ import ( "strings" "github.com/corazawaf/coraza/v3/rules" - - "github.com/corazawaf/coraza-proxy-wasm/internal/ahocorasick" + ahocorasick "github.com/wasilibs/go-aho-corasick" ) type pm struct { - m ahocorasick.Matcher + matcher ahocorasick.AhoCorasick } var _ rules.Operator = (*pm)(nil) func newPM(options rules.OperatorOptions) (rules.Operator, error) { - return &pm{m: ahocorasick.NewMatcher(strings.Split(options.Arguments, " "))}, nil + data := options.Arguments + + data = strings.ToLower(data) + dict := strings.Split(data, " ") + builder := ahocorasick.NewAhoCorasickBuilder(ahocorasick.Opts{ + AsciiCaseInsensitive: true, + MatchOnlyWholeWords: false, + MatchKind: ahocorasick.LeftMostLongestMatch, + DFA: true, + }) + + // TODO this operator is supposed to support snort data syntax: "@pm A|42|C|44|F" + return &pm{matcher: builder.Build(dict)}, nil } func (o *pm) Evaluate(tx rules.TransactionState, value string) bool { - return pmEvaluate(o.m, tx, value) + return pmEvaluate(o.matcher, tx, value) } -func pmEvaluate(m ahocorasick.Matcher, tx rules.TransactionState, value string) bool { - matches := m.Matches(value, 8) - if tx.Capturing() { - for i, c := range matches { - tx.CaptureField(i, c) +func pmEvaluate(matcher ahocorasick.AhoCorasick, tx rules.TransactionState, value string) bool { + iter := matcher.Iter(value) + + if !tx.Capturing() { + // Not capturing so just one match is enough. + return iter.Next() != nil + } + + var numMatches int + for { + m := iter.Next() + if m == nil { + break + } + + tx.CaptureField(numMatches, value[m.Start():m.End()]) + + numMatches++ + if numMatches == 10 { + return true } } - return len(matches) > 0 + + return numMatches > 0 } diff --git a/internal/operators/pm_from_file.go b/internal/operators/pm_from_file.go index 2b6af3ec6b460..48a335ee5b1bb 100644 --- a/internal/operators/pm_from_file.go +++ b/internal/operators/pm_from_file.go @@ -11,8 +11,7 @@ import ( "strings" "github.com/corazawaf/coraza/v3/rules" - - "github.com/corazawaf/coraza-proxy-wasm/internal/ahocorasick" + ahocorasick "github.com/wasilibs/go-aho-corasick" ) func newPMFromFile(options rules.OperatorOptions) (rules.Operator, error) { @@ -37,5 +36,12 @@ func newPMFromFile(options rules.OperatorOptions) (rules.Operator, error) { lines = append(lines, strings.ToLower(l)) } - return &pm{m: ahocorasick.NewMatcher(lines)}, nil + builder := ahocorasick.NewAhoCorasickBuilder(ahocorasick.Opts{ + AsciiCaseInsensitive: true, + MatchOnlyWholeWords: false, + MatchKind: ahocorasick.LeftMostLongestMatch, + DFA: false, + }) + + return &pm{matcher: builder.Build(lines)}, nil } diff --git a/lib/libaho_corasick.a b/lib/libaho_corasick.a deleted file mode 100644 index a9ad4e4681485..0000000000000 Binary files a/lib/libaho_corasick.a and /dev/null differ diff --git a/magefiles/magefile.go b/magefiles/magefile.go index ff9e709a1dc0a..d1c985065b91e 100644 --- a/magefiles/magefile.go +++ b/magefiles/magefile.go @@ -203,7 +203,7 @@ tinygo build -gc=custom -opt=2 -o %s -scheduler=none -target=wasi %s`, filepath. // UpdateLibs updates the C++ filter dependencies. func UpdateLibs() error { - libs := []string{"aho-corasick", "bdwgc", "libinjection", "mimalloc"} + libs := []string{"bdwgc", "libinjection", "mimalloc"} for _, lib := range libs { if err := sh.RunV("docker", "build", "-t", "ghcr.io/corazawaf/coraza-proxy-wasm/buildtools-"+lib, filepath.Join("buildtools", lib)); err != nil { return err