Skip to content

Commit

Permalink
di: refactor the visitor
Browse files Browse the repository at this point in the history
This is a refactor of the visitor to give it more structure, use less
unsafe and generally make it a bit more idiomatic rust.
  • Loading branch information
alessandrod committed Jan 30, 2024
1 parent 05d4ecd commit ca6cae4
Show file tree
Hide file tree
Showing 2 changed files with 255 additions and 183 deletions.
268 changes: 95 additions & 173 deletions src/llvm/di.rs
Original file line number Diff line number Diff line change
@@ -1,20 +1,15 @@
use std::{
collections::{hash_map::DefaultHasher, HashSet},
ffi::CStr,
hash::Hasher,
};

use gimli::{DW_TAG_pointer_type, DW_TAG_structure_type, DW_TAG_variant_part};
use llvm_sys::{core::*, debuginfo::*, prelude::*};
use log::{trace, warn};

use super::{
symbol_name,
types::{
di::DIType,
ir::{MDNode, Metadata, Value},
},
Message,
use super::types::{
di::DIType,
ir::{MDNode, Metadata, Value},
};
use crate::llvm::iter::*;

Expand All @@ -27,8 +22,8 @@ pub struct DISanitizer {
context: LLVMContextRef,
module: LLVMModuleRef,
builder: LLVMDIBuilderRef,
cache: Cache,
node_stack: Vec<LLVMValueRef>,
visited_nodes: HashSet<u64>,
item_stack: Vec<Item>,
}

// Sanitize Rust type names to be valid C type names.
Expand Down Expand Up @@ -60,17 +55,17 @@ fn sanitize_type_name<T: AsRef<str>>(name: T) -> String {
}

impl DISanitizer {
pub unsafe fn new(context: LLVMContextRef, module: LLVMModuleRef) -> DISanitizer {
pub fn new(context: LLVMContextRef, module: LLVMModuleRef) -> DISanitizer {
DISanitizer {
context,
module,
builder: LLVMCreateDIBuilder(module),
cache: Cache::new(),
node_stack: Vec::new(),
builder: unsafe { LLVMCreateDIBuilder(module) },
visited_nodes: HashSet::new(),
item_stack: Vec::new(),
}
}

fn mdnode(&mut self, mdnode: MDNode) {
fn visit_mdnode(&mut self, mdnode: MDNode) {
match mdnode.try_into().expect("MDNode is not Metadata") {
Metadata::DICompositeType(mut di_composite_type) => {
#[allow(clippy::single_match)]
Expand Down Expand Up @@ -190,7 +185,6 @@ impl DISanitizer {
_ => (),
}
}
// Sanitize function (subprogram) names.
Metadata::DISubprogram(mut di_subprogram) => {
// Sanitize function names
if let Some(name) = di_subprogram.name() {
Expand All @@ -205,198 +199,126 @@ impl DISanitizer {
}

// navigate the tree of LLVMValueRefs (DFS-pre-order)
unsafe fn discover(&mut self, value: LLVMValueRef, depth: usize) {
let one = " ";

if value.is_null() {
trace!("{one:depth$}skipping null node");
return;
}
fn visit_item(&mut self, item: Item, depth: usize) {
let value_ref = item.value_ref();
let value_id = item.value_id();

let log_prefix = "";
let log_depth = depth * 4;
trace!(
"{log_prefix:log_depth$}visiting item: {item:?} id: {} value: {value_ref:?}",
item.value_id(),
);

// TODO: doing this on the pointer value is not good
let key = if is_mdnode(value) {
LLVMValueAsMetadata(value) as u64
} else {
value as u64
let value = match (value_ref, &item) {
// An operand with no value is valid and means that the operand is
// not set
(v, Item::Operand { .. }) if v.is_null() => return,
(v, _) if !v.is_null() => Value::new(v),
// All other items should have values
(_, item) => panic!("{item:?} has no value"),
};
if self.cache.hit(key) {
trace!("{one:depth$}skipping already visited node");

let first_visit = self.visited_nodes.insert(value_id);
if !first_visit {
trace!("{log_prefix:log_depth$}already visited");
return;
}

self.node_stack.push(value);
self.item_stack.push(item.clone());

if let Value::MDNode(mdnode) = Value::new(value) {
let metadata_kind = LLVMGetMetadataKind(mdnode.metadata());
trace!(
"{one:depth$}mdnode kind:{:?} n_operands:{} value: {}",
metadata_kind,
LLVMGetMDNodeNumOperands(value),
Message {
ptr: LLVMPrintValueToString(value)
}
.as_c_str()
.unwrap()
.to_str()
.unwrap()
);

self.mdnode(mdnode)
} else {
trace!(
"{one:depth$}node value: {}",
Message {
ptr: LLVMPrintValueToString(value)
}
.as_c_str()
.unwrap()
.to_str()
.unwrap()
);
if let Value::MDNode(mdnode) = value.clone() {
self.visit_mdnode(mdnode)
}

if can_get_all_metadata(value) {
for (index, (kind, metadata)) in iter_metadata_copy(value).enumerate() {
let metadata_value = LLVMMetadataAsValue(self.context, metadata);
trace!("{one:depth$}all_metadata entry: index:{}", index);
self.discover(metadata_value, depth + 1);

if is_instruction(value) {
LLVMSetMetadata(value, kind, metadata_value);
} else {
LLVMGlobalSetMetadata(value, kind, metadata);
}
if let Some(operands) = value.operands() {
for (index, operand) in operands.enumerate() {
self.visit_item(
Item::Operand(Operand {
parent: value_ref,
value: operand,
index: index as u32,
}),
depth + 1,
)
}
}

if can_get_operands(value) {
for (index, operand) in iter_operands(value).enumerate() {
trace!(
"{one:depth$}operand index:{} name:{} value:{}",
index,
symbol_name(value),
Message {
ptr: LLVMPrintValueToString(value)
}
.as_c_str()
.unwrap()
.to_str()
.unwrap()
);
self.discover(operand, depth + 1)
if let Some(entries) = value.metadata_entries() {
for (index, (metadata, kind)) in entries.iter().enumerate() {
let metadata_value = unsafe { LLVMMetadataAsValue(self.context, metadata) };
self.visit_item(Item::MetadataEntry(metadata_value, kind, index), depth + 1);
}
}

assert_eq!(self.node_stack.pop(), Some(value));
}

pub unsafe fn run(&mut self) {
for sym in self.module.named_metadata_iter() {
let mut len: usize = 0;
let name = CStr::from_ptr(LLVMGetNamedMetadataName(sym, &mut len))
.to_str()
.unwrap();
// just for debugging, we are not visiting those nodes for the moment
trace!("named metadata name:{}", name);
}

let module = self.module;
for (i, sym) in module.globals_iter().enumerate() {
trace!("global index:{} name:{}", i, symbol_name(sym));
self.discover(sym, 0);
}

for (i, sym) in module.global_aliases_iter().enumerate() {
trace!("global aliases index:{} name:{}", i, symbol_name(sym));
self.discover(sym, 0);
}

for function in module.functions_iter() {
trace!("function > name:{}", symbol_name(function));
self.discover(function, 0);

let params_count = LLVMCountParams(function);
for i in 0..params_count {
let param = LLVMGetParam(function, i);
trace!("function param name:{} index:{}", symbol_name(param), i);
self.discover(param, 1);
// If an item has sub items that are not operands nor metadata entries, we need to visit
// those too.
if let Value::Function(fun) = value {
for param in fun.params() {
self.visit_item(Item::FunctionParam(param), depth + 1);
}

for basic_block in function.basic_blocks_iter() {
trace!("function block");
for basic_block in fun.basic_blocks() {
for instruction in basic_block.instructions_iter() {
let n_operands = LLVMGetNumOperands(instruction);
trace!("function block instruction num_operands: {}", n_operands);
for index in 0..n_operands {
let operand = LLVMGetOperand(instruction, index as u32);
if is_instruction(operand) {
self.discover(operand, 2);
}
}

self.discover(instruction, 1);
self.visit_item(Item::Instruction(instruction), depth + 1);
}
}
}

LLVMDisposeDIBuilder(self.builder);
let _ = self.item_stack.pop().unwrap();
}
}

// utils

unsafe fn iter_operands(v: LLVMValueRef) -> impl Iterator<Item = LLVMValueRef> {
(0..LLVMGetNumOperands(v)).map(move |i| LLVMGetOperand(v, i as u32))
}

unsafe fn iter_metadata_copy(v: LLVMValueRef) -> impl Iterator<Item = (u32, LLVMMetadataRef)> {
let mut count = 0;
let entries = LLVMGlobalCopyAllMetadata(v, &mut count);
(0..count).map(move |index| {
(
LLVMValueMetadataEntriesGetKind(entries, index as u32),
LLVMValueMetadataEntriesGetMetadata(entries, index as u32),
)
})
}

unsafe fn is_instruction(v: LLVMValueRef) -> bool {
!LLVMIsAInstruction(v).is_null()
}

unsafe fn is_mdnode(v: LLVMValueRef) -> bool {
!LLVMIsAMDNode(v).is_null()
}
pub fn run(mut self) {
let module = self.module;

unsafe fn is_user(v: LLVMValueRef) -> bool {
!LLVMIsAUser(v).is_null()
}
for value in module.globals_iter() {
self.visit_item(Item::GlobalVariable(value), 0);
}
for value in module.global_aliases_iter() {
self.visit_item(Item::GlobalAlias(value), 0);
}

unsafe fn is_globalobject(v: LLVMValueRef) -> bool {
!LLVMIsAGlobalObject(v).is_null()
}
for function in module.functions_iter() {
self.visit_item(Item::Function(function), 0);
}

unsafe fn can_get_all_metadata(v: LLVMValueRef) -> bool {
is_globalobject(v) || is_instruction(v)
unsafe { LLVMDisposeDIBuilder(self.builder) };
}
}

unsafe fn can_get_operands(v: LLVMValueRef) -> bool {
is_mdnode(v) || is_user(v)
#[derive(Clone, Debug, Eq, PartialEq)]
enum Item {
GlobalVariable(LLVMValueRef),
GlobalAlias(LLVMValueRef),
Function(LLVMValueRef),
FunctionParam(LLVMValueRef),
Instruction(LLVMValueRef),
Operand(Operand),
MetadataEntry(LLVMValueRef, u32, usize),
}

pub struct Cache {
keys: HashSet<u64>,
#[derive(Clone, Debug, Eq, PartialEq)]
struct Operand {
parent: LLVMValueRef,
value: LLVMValueRef,
index: u32,
}

impl Cache {
pub fn new() -> Self {
Cache {
keys: HashSet::new(),
impl Item {
fn value_ref(&self) -> LLVMValueRef {
match self {
Item::GlobalVariable(value)
| Item::GlobalAlias(value)
| Item::Function(value)
| Item::FunctionParam(value)
| Item::Instruction(value)
| Item::Operand(Operand { value, .. })
| Item::MetadataEntry(value, _, _) => *value,
}
}

pub fn hit(&mut self, key: u64) -> bool {
!self.keys.insert(key)
fn value_id(&self) -> u64 {
self.value_ref() as u64
}
}

Expand Down
Loading

0 comments on commit ca6cae4

Please sign in to comment.