Skip to content

Commit

Permalink
Replace rx, pm, and sqli libs with non-Go libraries (corazawaf#28)
Browse files Browse the repository at this point in the history
  • Loading branch information
anuraaga authored Sep 16, 2022
1 parent 7ed598b commit 368f259
Show file tree
Hide file tree
Showing 32 changed files with 806 additions and 120 deletions.
8 changes: 0 additions & 8 deletions README_RULES.md

This file was deleted.

14 changes: 14 additions & 0 deletions buildtools/aho-corasick/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Copyright 2022 The OWASP Coraza contributors
# SPDX-License-Identifier: Apache-2.0

FROM rust:1-alpine

RUN apk add --no-cache curl patch && rustup target add wasm32-wasi

RUN mkdir -p /aho-corasick && curl -L https://github.com/BurntSushi/aho-corasick/archive/refs/tags/0.7.19.tar.gz | tar -xz --strip-components 1 -C /aho-corasick
WORKDIR /aho-corasick
ADD aho-corasick.patch aho-corasick.patch
RUN patch -p1 < aho-corasick.patch
RUN cargo build --release --target wasm32-wasi

CMD ["cp", "target/wasm32-wasi/release/libaho_corasick.a", "/out/libaho_corasick.a"]
124 changes: 124 additions & 0 deletions buildtools/aho-corasick/aho-corasick.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
diff --git a/Cargo.toml b/Cargo.toml
index 610bd4d..55e2f37 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -19,6 +19,7 @@ edition = "2018"
members = ["aho-corasick-debug", "bench"]

[lib]
+crate-type = ["staticlib"]
name = "aho_corasick"

[features]
diff --git a/src/exports.rs b/src/exports.rs
new file mode 100644
index 0000000..f97de6d
--- /dev/null
+++ b/src/exports.rs
@@ -0,0 +1,93 @@
+use std::mem::MaybeUninit;
+use std::slice;
+use crate::{AhoCorasick, AhoCorasickBuilder, MatchKind};
+
+static mut MATCHERS: Vec<AhoCorasick> = Vec::new();
+
+#[no_mangle]
+pub extern "C" fn new_matcher(patterns_ptr: usize, patterns_len: usize) -> usize {
+ let patterns_str = ptr_to_string(patterns_ptr, patterns_len);
+ std::mem::forget(&patterns_str);
+
+ let patterns = patterns_str.split(' ');
+
+ let ac = AhoCorasickBuilder::new()
+ .ascii_case_insensitive(true)
+ .dfa(true)
+ .match_kind(MatchKind::LeftmostLongest)
+ .build(patterns);
+
+ unsafe {
+ MATCHERS.push(ac);
+ MATCHERS.len() - 1
+ }
+}
+
+#[no_mangle]
+pub extern "C" fn matches(matcher_ptr: usize, value_ptr: usize, value_len: usize, n: usize, matches: *mut usize) -> usize {
+ let ac = unsafe {
+ let matcher = MATCHERS.get_unchecked(matcher_ptr);
+ matcher
+ };
+
+ let value = ptr_to_string(value_ptr, value_len);
+ std::mem::forget(&value);
+
+ let mut num = 0;
+ for value in ac.find_iter(value.as_bytes()) {
+ if num == n {
+ break;
+ }
+ unsafe {
+ *matches.offset(2*num as isize) = value.start();
+ *matches.offset((2*num+1) as isize) = value.end();
+ }
+ num += 1;
+ }
+
+ return num
+}
+
+/// WebAssembly export that allocates a pointer (linear memory offset) that can
+/// be used for a string.
+///
+/// This is an ownership transfer, which means the caller must call
+/// [`deallocate`] when finished.
+#[cfg_attr(all(target_arch = "wasm32"), export_name = "allocate")]
+#[no_mangle]
+pub extern "C" fn _allocate(size: usize) -> *mut u8 {
+ allocate(size as usize)
+}
+
+/// Allocates size bytes and leaks the pointer where they start.
+fn allocate(size: usize) -> *mut u8 {
+ // Allocate the amount of bytes needed.
+ let vec: Vec<MaybeUninit<u8>> = Vec::with_capacity(size);
+
+ // into_raw leaks the memory to the caller.
+ Box::into_raw(vec.into_boxed_slice()) as *mut u8
+}
+
+
+/// WebAssembly export that deallocates a pointer of the given size (linear
+/// memory offset, byteCount) allocated by [`allocate`].
+#[cfg_attr(all(target_arch = "wasm32"), export_name = "deallocate")]
+#[no_mangle]
+pub unsafe extern "C" fn _deallocate(ptr: usize, size: usize) {
+ deallocate(ptr as *mut u8, size);
+}
+
+/// Retakes the pointer which allows its memory to be freed.
+unsafe fn deallocate(ptr: *mut u8, size: usize) {
+ let _ = Vec::from_raw_parts(ptr, 0, size);
+}
+
+/// Returns a string from WebAssembly compatible numeric types representing
+/// its pointer and length.
+fn ptr_to_string(ptr: usize, len: usize) -> String {
+ unsafe {
+ let slice = slice::from_raw_parts_mut(ptr as *mut u8, len as usize);
+ let utf8 = std::str::from_utf8_unchecked_mut(slice);
+ return String::from(utf8);
+ }
+}
\ No newline at end of file
diff --git a/src/lib.rs b/src/lib.rs
index 4465a56..9997a02 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -213,6 +213,7 @@ mod prefilter;
mod state_id;
#[cfg(test)]
mod tests;
+mod exports;

/// A representation of a match reported by an Aho-Corasick automaton.
///
14 changes: 14 additions & 0 deletions buildtools/libinjection/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Copyright 2022 The OWASP Coraza contributors
# SPDX-License-Identifier: Apache-2.0

FROM ghcr.io/anuraaga/coraza-wasm-filter/buildtools-wasi-sdk:main

RUN apt-get install -y patch

RUN mkdir -p /libinjection && curl -L https://github.com/libinjection/libinjection/archive/49904c42a6e68dc8f16c022c693e897e4010a06c.tar.gz | tar -xz --strip-components 1 -C /libinjection
WORKDIR /libinjection
ADD libinjection.patch libinjection.patch
RUN patch -p1 < libinjection.patch
RUN make -C src libinjection.a

CMD ["cp", "src/libinjection.a", "/out/libinjection.a"]
13 changes: 13 additions & 0 deletions buildtools/libinjection/libinjection.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
diff --git a/src/Makefile b/src/Makefile
index 48bdc6a..4aed08c 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -5,7 +5,7 @@ AR ?= ar

CC ?= cc
LD ?= ld
-CFLAGS=-Wall -Wextra -Werror -pedantic -ansi -g -O3 -fPIC
+CFLAGS ?= -Wall -Wextra -Werror -pedantic -ansi -g -O3 -fPIC

INSTALL=install
PREFIX=/usr/local
22 changes: 22 additions & 0 deletions buildtools/re2/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Copyright 2022 The OWASP Coraza contributors
# SPDX-License-Identifier: Apache-2.0

FROM ghcr.io/anuraaga/coraza-wasm-filter/buildtools-wasi-sdk:main

RUN apt-get install -y patch

RUN mkdir -p /re2 && curl -L https://github.com/google/re2/archive/refs/tags/2022-06-01.tar.gz | tar -xz --strip-components 1 -C /re2
WORKDIR /re2
ADD re2.patch re2.patch
RUN patch -p1 < re2.patch
RUN make obj/libre2.a

RUN mkdir -p /cre2 && curl -L https://bitbucket.org/marcomaggi/cre2/downloads/cre2-0.4.0-devel.2.tar.gz | tar -xz --strip-components 1 -C /cre2
WORKDIR /cre2
ADD cre2.patch cre2.patch
RUN patch -p1 < cre2.patch
# host is required by configure but not used so set it arbitrarily
RUN RE2_CFLAGS=-I/re2 RE2_LIBS=/re2/obj ./configure --host=i686-pc-linux-gnu --enable-shared=false && \
make

CMD ["bash", "-c", "cp /re2/obj/libre2.a /out/libre2.a && cp /cre2/.libs/libcre2.a /out/libcre2.a"]
25 changes: 25 additions & 0 deletions buildtools/re2/cre2.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
diff --git a/src/cre2.cpp b/src/cre2.cpp
index 5a63b93..12bc2ed 100644
--- a/src/cre2.cpp
+++ b/src/cre2.cpp
@@ -462,7 +462,6 @@ DEFINE_MATCH_REX_FUN2(cre2_find_and_consume_re,FindAndConsumeN)
int
cre2_replace (const char * pattern, cre2_string_t * text_and_target, cre2_string_t * rewrite)
{
- try {
std::string S(text_and_target->data, text_and_target->length);
re2::StringPiece R(rewrite->data, rewrite->length);
char * buffer; /* this exists to make GCC shut up about const */
@@ -477,12 +476,6 @@ cre2_replace (const char * pattern, cre2_string_t * text_and_target, cre2_string
} else
return -1;
return int(retval);
- } catch(const std::exception &e) {
- // e.what();
- return -1;
- } catch(...) {
- return -1;
- }
}
int
cre2_replace_re (cre2_regexp_t * rex, cre2_string_t * text_and_target, cre2_string_t * rewrite)
98 changes: 98 additions & 0 deletions buildtools/re2/re2.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
diff --git a/Makefile b/Makefile
index c24fd57..fad85a5 100644
--- a/Makefile
+++ b/Makefile
@@ -17,8 +17,8 @@ CXX?=g++
CXXFLAGS?=-O3 -g
LDFLAGS?=
# required
-RE2_CXXFLAGS?=-pthread -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers -I. $(CCICU) $(CCPCRE)
-RE2_LDFLAGS?=-pthread $(LDICU) $(LDPCRE)
+RE2_CXXFLAGS?=-Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers -I. $(CCICU) $(CCPCRE)
+RE2_LDFLAGS?=$(LDICU) $(LDPCRE)
AR?=ar
ARFLAGS?=rsc
NM?=nm
diff --git a/util/mutex.h b/util/mutex.h
index 4b6772a..10e6f57 100644
--- a/util/mutex.h
+++ b/util/mutex.h
@@ -26,17 +26,6 @@
#endif
#endif

-#if defined(MUTEX_IS_WIN32_SRWLOCK)
-typedef SRWLOCK MutexType;
-#elif defined(MUTEX_IS_PTHREAD_RWLOCK)
-#include <pthread.h>
-#include <stdlib.h>
-typedef pthread_rwlock_t MutexType;
-#else
-#include <shared_mutex>
-typedef std::shared_mutex MutexType;
-#endif
-
namespace re2 {

class Mutex {
@@ -55,8 +44,6 @@ class Mutex {
inline void WriterUnlock() { Unlock(); } // Release a lock from WriterLock()

private:
- MutexType mutex_;
-
// Catch the error of writing Mutex when intending MutexLock.
Mutex(Mutex *ignored);

@@ -66,12 +53,12 @@ class Mutex {

#if defined(MUTEX_IS_WIN32_SRWLOCK)

-Mutex::Mutex() : mutex_(SRWLOCK_INIT) { }
+Mutex::Mutex() { }
Mutex::~Mutex() { }
-void Mutex::Lock() { AcquireSRWLockExclusive(&mutex_); }
-void Mutex::Unlock() { ReleaseSRWLockExclusive(&mutex_); }
-void Mutex::ReaderLock() { AcquireSRWLockShared(&mutex_); }
-void Mutex::ReaderUnlock() { ReleaseSRWLockShared(&mutex_); }
+void Mutex::Lock() { }
+void Mutex::Unlock() { }
+void Mutex::ReaderLock() { }
+void Mutex::ReaderUnlock() { }

#elif defined(MUTEX_IS_PTHREAD_RWLOCK)

@@ -80,12 +67,12 @@ void Mutex::ReaderUnlock() { ReleaseSRWLockShared(&mutex_); }
if ((fncall) != 0) abort(); \
} while (0)

-Mutex::Mutex() { SAFE_PTHREAD(pthread_rwlock_init(&mutex_, NULL)); }
-Mutex::~Mutex() { SAFE_PTHREAD(pthread_rwlock_destroy(&mutex_)); }
-void Mutex::Lock() { SAFE_PTHREAD(pthread_rwlock_wrlock(&mutex_)); }
-void Mutex::Unlock() { SAFE_PTHREAD(pthread_rwlock_unlock(&mutex_)); }
-void Mutex::ReaderLock() { SAFE_PTHREAD(pthread_rwlock_rdlock(&mutex_)); }
-void Mutex::ReaderUnlock() { SAFE_PTHREAD(pthread_rwlock_unlock(&mutex_)); }
+Mutex::Mutex() { }
+Mutex::~Mutex() { }
+void Mutex::Lock() { }
+void Mutex::Unlock() { }
+void Mutex::ReaderLock() { }
+void Mutex::ReaderUnlock() { }

#undef SAFE_PTHREAD

@@ -93,10 +80,10 @@ void Mutex::ReaderUnlock() { SAFE_PTHREAD(pthread_rwlock_unlock(&mutex_)); }

Mutex::Mutex() { }
Mutex::~Mutex() { }
-void Mutex::Lock() { mutex_.lock(); }
-void Mutex::Unlock() { mutex_.unlock(); }
-void Mutex::ReaderLock() { mutex_.lock_shared(); }
-void Mutex::ReaderUnlock() { mutex_.unlock_shared(); }
+void Mutex::Lock() { }
+void Mutex::Unlock() { }
+void Mutex::ReaderLock() { }
+void Mutex::ReaderUnlock() { }

#endif

43 changes: 43 additions & 0 deletions internal/ahocorasick/ahocorasick.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
// Copyright 2022 The OWASP Coraza contributors
// SPDX-License-Identifier: Apache-2.0

//go:build tinygo

package ahocorasick

import (
"reflect"
"runtime"
"unsafe"
)

//export new_matcher
func newMatcher(patternsPtr unsafe.Pointer, patternsLen uint32) uint32

//export matches
func matches(matcherPtr uint32, valuePtr unsafe.Pointer, valueLen uint32, n uint32, matchesPtr unsafe.Pointer) uint32

type Matcher struct {
ptr uint32
}

func NewMatcher(patternsStr string) Matcher {
sh := (*reflect.StringHeader)(unsafe.Pointer(&patternsStr))
ac := newMatcher(unsafe.Pointer(sh.Data), uint32(sh.Len))
runtime.KeepAlive(patternsStr)
return Matcher{ptr: ac}
}

func (ac Matcher) Matches(value string, n int) []string {
sh := (*reflect.StringHeader)(unsafe.Pointer(&value))
matchOffs := make([]uint32, 2*n)
matchOffsPtr := unsafe.Pointer(&matchOffs[0])
numMatches := matches(ac.ptr, unsafe.Pointer(sh.Data), uint32(sh.Len), uint32(n), matchOffsPtr)
var matches []string
for i := 0; i < int(numMatches); i++ {
start := matchOffs[2*i]
end := matchOffs[2*i+1]
matches = append(matches, value[start:end])
}
return matches
}
Loading

0 comments on commit 368f259

Please sign in to comment.