Skip to content

Commit

Permalink
Find and display string literal leaks
Browse files Browse the repository at this point in the history
  • Loading branch information
ergrelet committed Aug 15, 2022
1 parent 37352b7 commit fdfc930
Show file tree
Hide file tree
Showing 2 changed files with 158 additions and 27 deletions.
2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,5 @@ clang = { version = "2.0", features = ["clang_10_0"] }
anyhow = "1.0"
structopt = "0.3"
widestring = "1.0"
log = "0.4"
env_logger = "0.9"
183 changes: 156 additions & 27 deletions src/main.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
use std::path::PathBuf;
use std::{
borrow::Cow,
fs::File,
io::Read,
path::{Path, PathBuf},
vec,
};

use anyhow::{anyhow, Result};
use clang::{Clang, Entity, EntityKind, Index};
Expand All @@ -8,10 +14,13 @@ use widestring::{encode_utf16, encode_utf32};
const PKG_NAME: &str = env!("CARGO_PKG_NAME");

#[derive(Debug, StructOpt)]
#[structopt(name = PKG_NAME, about = "TODO")]
#[structopt(name = PKG_NAME, about = "An information leak detector for C and C++ code bases")]
struct CpplumberOptions {
#[structopt(parse(from_os_str), short, long = "bin")]
binary_file_path: PathBuf,

#[structopt(parse(from_os_str))]
file_paths: Vec<PathBuf>,
source_file_paths: Vec<PathBuf>,
}

#[derive(Debug)]
Expand Down Expand Up @@ -49,13 +58,22 @@ impl TryFrom<Entity<'_>> for InformationLeakDescription {
}

fn main() -> Result<()> {
env_logger::init();
let options = CpplumberOptions::from_args();

// Initial checks before starting work
if !options.binary_file_path.is_file() {
return Err(anyhow!(
"'{}' is not a valid file path.",
options.binary_file_path.display()
));
}

let clang = Clang::new().map_err(|e| anyhow!(e))?;
let index = Index::new(&clang, false, false);

let mut potential_leaks: Vec<InformationLeakDescription> = vec![];
for file_path in options.file_paths {
for file_path in options.source_file_paths {
let translation_unit = index
.parser(file_path)
.visit_implicit_attributes(false)
Expand All @@ -70,8 +88,13 @@ fn main() -> Result<()> {
.filter_map(|literal| literal.try_into().ok()),
);
}
log::debug!("{:#?}", potential_leaks);

println!("{:?}", potential_leaks);
log::info!(
"Looking for leaks in '{}'...",
options.binary_file_path.display()
);
check_for_leaks_in_binary_file(&options.binary_file_path, &potential_leaks)?;

Ok(())
}
Expand All @@ -95,7 +118,6 @@ fn gather_entities_by_kind_rec<'tu>(
.iter()
.any(|elem| elem == &root_entity_kind)
{
println!("{}", root_entity.get_name().unwrap());
entities.push(root_entity);
}

Expand All @@ -112,51 +134,158 @@ fn gather_entities_by_kind_rec<'tu>(
}

/// We have to reimplement this ourselves since the `clang` crate doesn't
/// provide an easy to get byte representations of `StringLiteral` entities.
/// provide an easy way to get byte representations of `StringLiteral` entities.
fn string_literal_to_bytes(string_literal: &str) -> Vec<u8> {
let mut char_it = string_literal.chars();
let first_char = char_it.next();
match first_char {
None => return vec![],
Some(first_char) => match first_char {
// Ordinary string (we assume it'll be encoded to ASCII)
'"' => string_literal[1..string_literal.len() - 1]
'"' => process_escape_sequences(&string_literal[1..string_literal.len() - 1])
.unwrap()
.as_bytes()
.to_owned(),
// Wide string (we assume it'll be encoded to UTF-16LE)
'L' => encode_utf16(string_literal[2..string_literal.len() - 1].chars())
.map(u16::to_le_bytes)
.fold(Vec::new(), |mut acc: Vec<u8>, e| {
acc.extend(e);
acc
}),
'L' => encode_utf16(
process_escape_sequences(&string_literal[2..string_literal.len() - 1])
.unwrap()
.chars(),
)
.map(u16::to_le_bytes)
.fold(Vec::new(), |mut acc: Vec<u8>, e| {
acc.extend(e);
acc
}),
// UTF-32 string
'U' => encode_utf32(string_literal[2..string_literal.len() - 1].chars())
.map(u32::to_le_bytes)
.fold(Vec::new(), |mut acc: Vec<u8>, e| {
acc.extend(e);
acc
}),
'U' => encode_utf32(
process_escape_sequences(&string_literal[2..string_literal.len() - 1])
.unwrap()
.chars(),
)
.map(u32::to_le_bytes)
.fold(Vec::new(), |mut acc: Vec<u8>, e| {
acc.extend(e);
acc
}),
// UTF-8 or UTF-16LE string
'u' => {
let second_char = char_it.next().unwrap();
let third_char = char_it.next().unwrap();
if second_char == '8' && third_char == '"' {
// UTF-8
string_literal[3..string_literal.len() - 1]
process_escape_sequences(&string_literal[3..string_literal.len() - 1])
.unwrap()
.as_bytes()
.to_owned()
} else {
// UTF-16LE
encode_utf16(string_literal[2..string_literal.len() - 1].chars())
.map(u16::to_le_bytes)
.fold(Vec::new(), |mut acc: Vec<u8>, e| {
acc.extend(e);
acc
})
encode_utf16(
process_escape_sequences(&string_literal[2..string_literal.len() - 1])
.unwrap()
.chars(),
)
.map(u16::to_le_bytes)
.fold(Vec::new(), |mut acc: Vec<u8>, e| {
acc.extend(e);
acc
})
}
}
_ => unreachable!("New string literal prefix introduced in the standard?"),
},
}
}

fn process_escape_sequences(string: &str) -> Option<Cow<str>> {
let mut owned: Option<String> = None;
let mut skip_until: usize = 0;
for (position, char) in string.chars().enumerate() {
if position <= skip_until {
continue;
}

if char == '\\' {
if owned.is_none() {
owned = Some(string[..position].to_owned());
}
let b = owned.as_mut().unwrap();
let mut escape_char_it = string.chars();
let first_char = escape_char_it.nth(position + 1);
if let Some(first_char) = first_char {
skip_until = position + 1;
match first_char {
// Simple escape sequences
'a' => b.push('\x07'),
'b' => b.push('\x08'),
't' => b.push('\t'),
'n' => b.push('\n'),
'v' => b.push('\x0b'),
'f' => b.push('\x0c'),
'r' => b.push('\r'),
' ' => b.push(' '),
'\\' => b.push('\\'),
'0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' => {
let start_position = position + 1;
let mut end_position = start_position + 1;
if let Some(second_char) = escape_char_it.next() {
if second_char.is_digit(8) {
end_position += 1;
}
}
if let Some(third_char) = escape_char_it.next() {
if third_char.is_digit(8) {
end_position += 1;
}
}

// Octal escape sequence (\nnn)
let octal_value =
u8::from_str_radix(&string[start_position..end_position], 8).unwrap();
// TODO: Fix wrong multibyte transformations in some cases
b.push(octal_value as char);
skip_until = end_position;
}
a => b.push(a),
}
} else {
return None;
}
} else if let Some(o) = owned.as_mut() {
o.push(char);
}
}

if let Some(owned) = owned {
Some(Cow::Owned(owned))
} else {
Some(Cow::Borrowed(string))
}
}

fn check_for_leaks_in_binary_file(
binary_file_path: &Path,
leak_desc: &[InformationLeakDescription],
) -> Result<()> {
let mut bin_file = File::open(binary_file_path)?;

let mut bin_data = vec![];
bin_file.read_to_end(&mut bin_data)?;

for leak in leak_desc {
if let Some(offset) = bin_data
.windows(leak.bytes.len())
.position(|window| window == leak.bytes)
{
println!(
"Leak at offset 0x{:x}: {} [{}:{}]",
offset,
leak.leaked_information,
leak.declaration_metadata.0.display(),
leak.declaration_metadata.1
);
}
}

Ok(())
}

0 comments on commit fdfc930

Please sign in to comment.