Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Experimental LLM Support #234

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ memory_limit = [
"revm-interpreter/memory_limit",
"revm/memory_limit",
]
llm = []

[dependencies]
bytes = { version = "1.2.1", features = ["serde"] }
Expand Down
5 changes: 5 additions & 0 deletions cli/src/evm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,10 @@ pub struct EvmArgs {
/// Offchain Config File. If specified, will deploy based on offchain config file.
#[arg(long, default_value = "")]
offchain_config_file: String,

/// [Experimental] Priority of ABI functions
#[arg(long)]
priority_file: Option<String>,
}

enum EVMTargetType {
Expand Down Expand Up @@ -566,6 +570,7 @@ pub fn evm_main(args: EvmArgs) {
selfdestruct_bug: args.selfdestruct_oracle,
arbitrary_external_call: args.arbitrary_external_call_oracle,
builder,
priority_file: args.priority_file,
};

match config.fuzzer_type {
Expand Down
1 change: 1 addition & 0 deletions src/evm/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,4 +76,5 @@ pub struct Config<VS, Addr, Code, By, Loc, SlotTy, Out, I, S, CI> {
pub selfdestruct_bug: bool,
pub arbitrary_external_call: bool,
pub builder: Option<BuildJob>,
pub priority_file: Option<String>,
}
2 changes: 2 additions & 0 deletions src/evm/experimental/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
pub mod priority_scoring;
pub mod priority_state;
143 changes: 143 additions & 0 deletions src/evm/experimental/priority_scoring.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
use std::collections::HashMap;
use std::fs::File;
use std::io::Read;
use std::marker::PhantomData;
use libafl::corpus::{Corpus, Testcase};
use libafl::{Error, impl_serdeany};
use libafl::inputs::Input;
use libafl::prelude::{HasMetadata, HasRand, Rand, Scheduler, TestcaseScore};
use libafl::prelude::probabilistic_sampling::ProbabilityMetadata;
use libafl::state::HasCorpus;
use rand::prelude::IteratorRandom;
use crate::evm::input::{EVMInputT, EVMInputTy};
use crate::evm::types::EVMFuzzState;
use serde::Deserialize;
use serde::Serialize;
use crate::evm::contract_utils::set_hash;


#[derive(Debug, Serialize, Deserialize)]
pub struct SigScore {
pub scores: HashMap<[u8; 4], f64>,
pub total_score: f64,
pub sig_indexes: HashMap<[u8; 4], Vec<usize>>
}

impl_serdeany!(SigScore);

impl SigScore {
pub fn new() -> Self {
Self {
scores: HashMap::new(),
total_score: 0.0,
sig_indexes: Default::default(),
}
}

pub fn from_file(path: &str) -> Result<Self, Error> {
let mut data = String::new();
let mut scores = SigScore::new();
File::open(path)?.read_to_string(&mut data)?;
for line in data.lines() {
let sig = line.split("@").nth(0).unwrap();
let score = line.split("@").nth(1).unwrap();
let mut hash = [0; 4];
set_hash(sig, &mut hash);
println!("{:?}:{}", hex::encode(hash), 1.0 as f64 / score.parse::<f64>().unwrap());
scores.register_score(&hash, score.parse::<f64>().unwrap());
}
Ok(scores)
}

pub fn get_score(&self, sig: &[u8; 4]) -> Option<f64> {
self.scores.get(sig).copied()
}

pub fn register_score(&mut self, sig: &[u8; 4], score: f64) {
self.scores.insert(*sig, score);
self.total_score += score;
}
}




#[derive(Debug, Clone)]
pub struct ProbabilityABISamplingScheduler<I, S>
where
I: Input,
S: HasCorpus<I> + HasMetadata + HasRand,
{
phantom: PhantomData<(I, S)>,
}

impl<I, S> ProbabilityABISamplingScheduler<I, S>
where
I: Input,
S: HasCorpus<I> + HasMetadata + HasRand,
{
pub fn new() -> Self {
Self {
phantom: PhantomData,
}
}
}

impl<I, S> Scheduler<I, S> for ProbabilityABISamplingScheduler<I, S>
where
I: Input + EVMInputT,
S: HasCorpus<I> + HasMetadata + HasRand,
{
fn on_add(&self, state: &mut S, idx: usize) -> Result<(), Error> {
let key = match state.corpus().get(idx).unwrap().borrow().input().as_ref().unwrap().get_function() {
Some(sig) => {
*sig
}
None => {
[0; 4]
}
};
let meta = state.metadata_mut().get_mut::<SigScore>().unwrap();

if meta.scores.get(&key).is_none() {
meta.register_score(&key, 10.0);
}
meta.sig_indexes.entry(key).or_insert_with(Vec::new).push(idx);
Ok(())
}

fn next(&self, state: &mut S) -> Result<usize, Error> {
if state.corpus().count() == 0 {
Err(Error::empty(String::from("No entries in corpus")))
} else {
let sig = {
let rand_prob: f64 = (state.rand_mut().below(100) as f64) / 100.0;
let meta = state.metadata().get::<SigScore>().unwrap();
let threshold = meta.total_score * rand_prob;
let mut k: f64 = 0.0;
let mut ret = *meta.scores.keys().last().unwrap();
for (idx, prob) in meta.scores.iter() {
k += prob;
if k >= threshold {
ret = *idx;
break;
}
}
ret
};

let ret = *state.metadata()
.get::<SigScore>()
.unwrap()
.sig_indexes
.get(&sig)
.expect("sig not found")
.iter()
.next()
.unwrap();

*state.corpus_mut().current_mut() = Some(ret);
Ok(ret)
}
}
}
120 changes: 120 additions & 0 deletions src/evm/experimental/priority_state.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
use std::collections::{HashMap, HashSet};
use std::fmt::Debug;
use std::fs::File;
use std::io::Read;
use std::marker::PhantomData;
use libafl::{Error, impl_serdeany};
use libafl::corpus::Corpus;
use libafl::inputs::Input;
use libafl::prelude::{HasMetadata, HasRand, Rand};
use libafl::state::HasCorpus;
use serde::{Deserialize, Serialize};
use crate::evm::contract_utils::set_hash;
use crate::evm::input::EVMInputT;
use crate::evm::types::EVMStagedVMState;
use crate::scheduler::SortedDroppingSchedulerNext;
use crate::state::HasParent;

#[derive(Debug, Serialize, Deserialize)]
pub struct StateScore {
pub preference: HashMap<[u8; 4], Vec<[u8; 4]>>,
pub current_sigs: [u8; 4],
pub state_satisfied: HashMap<usize, Vec<[u8; 4]>>,
}

impl StateScore {
pub fn new() -> Self {
Self {
preference: HashMap::new(),
current_sigs: [0; 4],
state_satisfied: HashMap::new(),
}
}

pub fn from_file(path: &str) -> Result<Self, Error> {
let mut data = String::new();
let mut scores = StateScore::new();
File::open(path)?.read_to_string(&mut data)?;
for line in data.lines() {
let seq = line.split("@");
let mut seq_parsed = vec![];
for sig in seq {
let mut hash = [0; 4];
set_hash(sig, &mut hash);
seq_parsed.push(hash);
}
println!("{:?}", seq_parsed);
for i in 1..seq_parsed.len() {
let mut pref = vec![];
for j in 0..i {
pref.push(seq_parsed[j]);
}
scores.preference.insert(seq_parsed[i], pref);
}
}
println!("{:?}", scores);

Ok(scores)
}
}

impl_serdeany!(StateScore);


/// On state added, we push the corresponding index and its preference to list
/// On next, we find the corresponding index and return it
pub struct StateScoreScheulder<InnerSCC> {
pub _phantom: PhantomData<InnerSCC>,
}

impl<InnerSCC, S> SortedDroppingSchedulerNext<S> for StateScoreScheulder<InnerSCC>
where S: HasCorpus<EVMStagedVMState> + HasRand + HasMetadata + HasParent,
InnerSCC: SortedDroppingSchedulerNext<S>
{
fn next(state: &mut S) -> Result<usize, Error> {
// 50% chance to use inner scheduler
if state.rand_mut().next() % 2 == 0 {
return InnerSCC::next(state);
}
let next = state.rand_mut().next();
let satisfied = {
let meta = state.metadata_mut().get_mut::<StateScore>().unwrap();
let pref = meta.preference.get(&meta.current_sigs).unwrap();
meta.state_satisfied.iter().filter(|(_, v)| {
v.ends_with(pref)
}).map(|(k, _)| k).collect::<Vec<_>>()
};
if satisfied.is_empty() {
return InnerSCC::next(state);
} else {
let idx = *satisfied[(next % satisfied.len() as u64) as usize];
return Ok(idx);
}
}

fn before_on_add(state: &mut S, idx: usize) -> Result<(), Error> {
let from_idx = state.corpus().get(idx).unwrap().borrow().input().as_ref().unwrap().trace.from_idx;
let meta = state.metadata_mut().get_mut::<StateScore>().unwrap();
let sig_trace = if let Some(idx) = from_idx {
let sig = meta.state_satisfied.get(&idx).unwrap();
let mut my_sig = sig.clone();
my_sig.push(meta.current_sigs);
if my_sig.len() > 4 {
my_sig.remove(0);
}

my_sig
} else {
vec![meta.current_sigs]
};
meta.state_satisfied.insert(idx, sig_trace);
Ok(())
}

fn before_on_remove(state: &mut S, idx: usize) -> Result<(), Error> {
let meta = state.metadata_mut().get_mut::<StateScore>().unwrap();
meta.state_satisfied.remove(&idx).unwrap();
Ok(())
}
}

10 changes: 10 additions & 0 deletions src/evm/input.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ pub trait EVMInputT {
/// Get the ABI encoded input
fn to_bytes(&self) -> Vec<u8>;

/// Get the function
fn get_function(&self) -> Option<&[u8; 4]>;

/// Get revm environment (block, timestamp, etc.)
fn get_vm_env(&self) -> &Env;

Expand Down Expand Up @@ -349,6 +352,13 @@ impl EVMInputT for EVMInput {
&mut self.env
}

fn get_function(&self) -> Option<&[u8; 4]> {
match self.data.as_ref() {
None => None,
Some(v) => Some(&v.function)
}
}

fn get_vm_env(&self) -> &Env {
&self.env
}
Expand Down
1 change: 1 addition & 0 deletions src/evm/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,4 @@ pub mod feedbacks;
pub mod cov_stage;
pub mod blaz;
pub mod bytecode_iterator;
pub mod experimental;
3 changes: 2 additions & 1 deletion src/evm/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use crate::evm::mutator::FuzzMutator;
use crate::evm::vm::EVMState;

use crate::oracle::OracleCtx;
use crate::scheduler::SortedDroppingScheduler;
use crate::scheduler::{ProbSamplingScheduler, SortedDroppingScheduler};
use crate::state::{FuzzState, InfantStateState};
use crate::state_input::StagedVMState;
use bytes::Bytes;
Expand All @@ -32,6 +32,7 @@ pub type EVMFuzzMutator<'a> = FuzzMutator<
SortedDroppingScheduler<
StagedVMState<EVMAddress, EVMAddress, EVMState, ConciseEVMInput>,
InfantStateState<EVMAddress, EVMAddress, EVMState, ConciseEVMInput>,
ProbSamplingScheduler<EVMStagedVMState>
>,
ConciseEVMInput
>;
Expand Down
16 changes: 16 additions & 0 deletions src/fuzzers/evm_fuzzer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ use crate::evm::blaz::builder::{ArtifactInfoMetadata, BuildJob};
use crate::evm::concolic::concolic_host::ConcolicHost;
use crate::evm::concolic::concolic_stage::{ConcolicFeedbackWrapper, ConcolicStage};
use crate::evm::cov_stage::CoverageStage;
use crate::evm::experimental::priority_scoring::{ProbabilityABISamplingScheduler, SigScore};
use crate::evm::feedbacks::Sha3WrappedFeedback;
use crate::evm::middlewares::call_printer::CallPrinter;
use crate::evm::middlewares::coverage::{Coverage, EVAL_COVERAGE};
Expand Down Expand Up @@ -87,6 +88,21 @@ pub fn evm_fuzzer(
let monitor = SimpleMonitor::new(|s| println!("{}", s));
let mut mgr = SimpleEventManager::new(monitor);
let infant_scheduler = SortedDroppingScheduler::new();

#[cfg(feature = "llm")]
let mut scheduler: ProbabilityABISamplingScheduler<EVMInput, EVMFuzzState> = {
let mut sig_score = match config.priority_file {
Some(path) => {
SigScore::from_file(path.as_str()).expect("Failed to load priority file")
}
None => {
SigScore::new()
}
};
state.metadata_mut().insert(sig_score);
ProbabilityABISamplingScheduler::new()
};
#[cfg(not(feature = "llm"))]
let mut scheduler = QueueScheduler::new();

let jmps = unsafe { &mut JMP_MAP };
Expand Down
Loading