Skip to content

Commit

Permalink
tweak: futher optimisations by changing the hash algorithm
Browse files Browse the repository at this point in the history
From benchmarking it was clear that most of the time when Chainsaw is
hunting is spent on accessing values from objects (hashmap lookups). The
preprocssing optimisation was a good step forwards but only had a
noticible impact when single thread or on very large rule sets. It was
still apparent that the lookups were the true bottleneck. Luckily there
is no need for a cryptographically safe hashing algorithm. So swapping
to FxHashMap makes sense as that is what it is for. This optimisation as
noted in the code has not been applied to search but when it is the same
improvement will be seen when using search.
  • Loading branch information
fscc-alexkornitzer authored and alexkornitzer committed Feb 16, 2023
1 parent 296cdd9 commit 8b39037
Show file tree
Hide file tree
Showing 7 changed files with 133 additions and 76 deletions.
11 changes: 9 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ prettytable-rs = "0.10"
quick-xml = { version = "0.27", features = ["serialize"] }
rayon = "1.5"
regex = "1.6"
rustc-hash = "1.0"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
serde_yaml = "0.9"
Expand Down
26 changes: 11 additions & 15 deletions src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -187,16 +187,16 @@ pub fn print_log(
let mut values = vec![];
for field in hunt.mapper.fields() {
if field.visible {
let data: Json;
let data: Value;
let wrapper;
let mapped = match &document.kind {
FileKind::Evtx => {
data = Json::from(bincode::deserialize::<Value>(&document.data)?);
data = bincode::deserialize::<Value>(&document.data)?;
wrapper = crate::evtx::Wrapper(&data);
hunt.mapper.mapped(&wrapper)
}
FileKind::Json | FileKind::Jsonl | FileKind::Mft | FileKind::Xml => {
data = Json::from(bincode::deserialize::<Value>(&document.data)?);
data = bincode::deserialize::<Value>(&document.data)?;
hunt.mapper.mapped(&data)
}
FileKind::Unknown => continue,
Expand Down Expand Up @@ -366,22 +366,18 @@ pub fn print_detections(
// What we do here is hash each row since if the fields are the same but the values
// are not then we would lose data, so in this case we split the row
for hit in &grouping.hits {
let data: Json;
let data: Value;
let wrapper;
let mapped = match &document.kind {
FileKind::Evtx => {
data = Json::from(
bincode::deserialize::<Value>(&document.data)
.expect("could not decompress"),
);
data = bincode::deserialize::<Value>(&document.data)
.expect("could not decompress");
wrapper = crate::evtx::Wrapper(&data);
hit.hunt.mapper.mapped(&wrapper)
}
FileKind::Json | FileKind::Jsonl | FileKind::Mft | FileKind::Xml => {
data = Json::from(
bincode::deserialize::<Value>(&document.data)
.expect("could not decompress"),
);
data = bincode::deserialize::<Value>(&document.data)
.expect("could not decompress");
hit.hunt.mapper.mapped(&data)
}
FileKind::Unknown => continue,
Expand Down Expand Up @@ -618,16 +614,16 @@ pub fn print_csv(
// What we do here is hash each row since if the fields are the same but the values
// are not then we would lose data, so in this case we split the row
for hit in &grouping.hits {
let data: Json;
let data: Value;
let wrapper;
let mapped = match &document.kind {
FileKind::Evtx => {
data = Json::from(bincode::deserialize::<Value>(&document.data)?);
data = bincode::deserialize::<Value>(&document.data)?;
wrapper = crate::evtx::Wrapper(&data);
hit.hunt.mapper.mapped(&wrapper)
}
FileKind::Json | FileKind::Jsonl | FileKind::Mft | FileKind::Xml => {
data = Json::from(bincode::deserialize::<Value>(&document.data)?);
data = bincode::deserialize::<Value>(&document.data)?;
hit.hunt.mapper.mapped(&data)
}
FileKind::Unknown => continue,
Expand Down
20 changes: 19 additions & 1 deletion src/file/evtx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use serde_json::Value as Json;
use tau_engine::{Document, Value as Tau};

use crate::search::Searchable;
use crate::value::Value;

pub type Evtx = SerializedEvtxRecord<Json>;

Expand All @@ -30,7 +31,7 @@ impl Parser {
}
}

pub struct Wrapper<'a>(pub &'a Json);
pub struct Wrapper<'a>(pub &'a Value);
impl<'a> Document for Wrapper<'a> {
fn find(&self, key: &str) -> Option<Tau<'_>> {
// As event logs can store values in a key or complex objects we do some aliasing here for
Expand All @@ -44,6 +45,23 @@ impl<'a> Document for Wrapper<'a> {
}
}
}
// FIXME: Remove the need for this, it requires a big rethink on the data structures, as `search` is
// the blocker here. It's actually quite easy to do, but just want to think it through first...
// This structure means that we don't get the lookup speed improvements from using `Value`.
pub struct WrapperLegacy<'a>(pub &'a Json);
impl<'a> Document for WrapperLegacy<'a> {
fn find(&self, key: &str) -> Option<Tau<'_>> {
// As event logs can store values in a key or complex objects we do some aliasing here for
// convenience...
match key {
"Event.System.Provider" => self.0.find("Event.System.Provider_attributes.Name"),
"Event.System.TimeCreated" => self
.0
.find("Event.System.TimeCreated_attributes.SystemTime"),
_ => self.0.find(key),
}
}
}

impl Searchable for SerializedEvtxRecord<Json> {
fn matches(&self, regex: &RegexSet) -> bool {
Expand Down
Loading

0 comments on commit 8b39037

Please sign in to comment.