From ca6cae4b36b4ff4a62b9abe40a92def41aa194c4 Mon Sep 17 00:00:00 2001 From: Alessandro Decina Date: Sun, 28 Jan 2024 18:21:23 +1100 Subject: [PATCH] di: refactor the visitor This is a refactor of the visitor to give it more structure, use less unsafe and generally make it a bit more idiomatic rust. --- src/llvm/di.rs | 268 +++++++++++++++---------------------------- src/llvm/types/ir.rs | 170 +++++++++++++++++++++++++-- 2 files changed, 255 insertions(+), 183 deletions(-) diff --git a/src/llvm/di.rs b/src/llvm/di.rs index a6c4901c..dd7a9788 100644 --- a/src/llvm/di.rs +++ b/src/llvm/di.rs @@ -1,6 +1,5 @@ use std::{ collections::{hash_map::DefaultHasher, HashSet}, - ffi::CStr, hash::Hasher, }; @@ -8,13 +7,9 @@ use gimli::{DW_TAG_pointer_type, DW_TAG_structure_type, DW_TAG_variant_part}; use llvm_sys::{core::*, debuginfo::*, prelude::*}; use log::{trace, warn}; -use super::{ - symbol_name, - types::{ - di::DIType, - ir::{MDNode, Metadata, Value}, - }, - Message, +use super::types::{ + di::DIType, + ir::{MDNode, Metadata, Value}, }; use crate::llvm::iter::*; @@ -27,8 +22,8 @@ pub struct DISanitizer { context: LLVMContextRef, module: LLVMModuleRef, builder: LLVMDIBuilderRef, - cache: Cache, - node_stack: Vec, + visited_nodes: HashSet, + item_stack: Vec, } // Sanitize Rust type names to be valid C type names. @@ -60,17 +55,17 @@ fn sanitize_type_name>(name: T) -> String { } impl DISanitizer { - pub unsafe fn new(context: LLVMContextRef, module: LLVMModuleRef) -> DISanitizer { + pub fn new(context: LLVMContextRef, module: LLVMModuleRef) -> DISanitizer { DISanitizer { context, module, - builder: LLVMCreateDIBuilder(module), - cache: Cache::new(), - node_stack: Vec::new(), + builder: unsafe { LLVMCreateDIBuilder(module) }, + visited_nodes: HashSet::new(), + item_stack: Vec::new(), } } - fn mdnode(&mut self, mdnode: MDNode) { + fn visit_mdnode(&mut self, mdnode: MDNode) { match mdnode.try_into().expect("MDNode is not Metadata") { Metadata::DICompositeType(mut di_composite_type) => { #[allow(clippy::single_match)] @@ -190,7 +185,6 @@ impl DISanitizer { _ => (), } } - // Sanitize function (subprogram) names. Metadata::DISubprogram(mut di_subprogram) => { // Sanitize function names if let Some(name) = di_subprogram.name() { @@ -205,198 +199,126 @@ impl DISanitizer { } // navigate the tree of LLVMValueRefs (DFS-pre-order) - unsafe fn discover(&mut self, value: LLVMValueRef, depth: usize) { - let one = " "; - - if value.is_null() { - trace!("{one:depth$}skipping null node"); - return; - } + fn visit_item(&mut self, item: Item, depth: usize) { + let value_ref = item.value_ref(); + let value_id = item.value_id(); + + let log_prefix = ""; + let log_depth = depth * 4; + trace!( + "{log_prefix:log_depth$}visiting item: {item:?} id: {} value: {value_ref:?}", + item.value_id(), + ); - // TODO: doing this on the pointer value is not good - let key = if is_mdnode(value) { - LLVMValueAsMetadata(value) as u64 - } else { - value as u64 + let value = match (value_ref, &item) { + // An operand with no value is valid and means that the operand is + // not set + (v, Item::Operand { .. }) if v.is_null() => return, + (v, _) if !v.is_null() => Value::new(v), + // All other items should have values + (_, item) => panic!("{item:?} has no value"), }; - if self.cache.hit(key) { - trace!("{one:depth$}skipping already visited node"); + + let first_visit = self.visited_nodes.insert(value_id); + if !first_visit { + trace!("{log_prefix:log_depth$}already visited"); return; } - self.node_stack.push(value); + self.item_stack.push(item.clone()); - if let Value::MDNode(mdnode) = Value::new(value) { - let metadata_kind = LLVMGetMetadataKind(mdnode.metadata()); - trace!( - "{one:depth$}mdnode kind:{:?} n_operands:{} value: {}", - metadata_kind, - LLVMGetMDNodeNumOperands(value), - Message { - ptr: LLVMPrintValueToString(value) - } - .as_c_str() - .unwrap() - .to_str() - .unwrap() - ); - - self.mdnode(mdnode) - } else { - trace!( - "{one:depth$}node value: {}", - Message { - ptr: LLVMPrintValueToString(value) - } - .as_c_str() - .unwrap() - .to_str() - .unwrap() - ); + if let Value::MDNode(mdnode) = value.clone() { + self.visit_mdnode(mdnode) } - if can_get_all_metadata(value) { - for (index, (kind, metadata)) in iter_metadata_copy(value).enumerate() { - let metadata_value = LLVMMetadataAsValue(self.context, metadata); - trace!("{one:depth$}all_metadata entry: index:{}", index); - self.discover(metadata_value, depth + 1); - - if is_instruction(value) { - LLVMSetMetadata(value, kind, metadata_value); - } else { - LLVMGlobalSetMetadata(value, kind, metadata); - } + if let Some(operands) = value.operands() { + for (index, operand) in operands.enumerate() { + self.visit_item( + Item::Operand(Operand { + parent: value_ref, + value: operand, + index: index as u32, + }), + depth + 1, + ) } } - if can_get_operands(value) { - for (index, operand) in iter_operands(value).enumerate() { - trace!( - "{one:depth$}operand index:{} name:{} value:{}", - index, - symbol_name(value), - Message { - ptr: LLVMPrintValueToString(value) - } - .as_c_str() - .unwrap() - .to_str() - .unwrap() - ); - self.discover(operand, depth + 1) + if let Some(entries) = value.metadata_entries() { + for (index, (metadata, kind)) in entries.iter().enumerate() { + let metadata_value = unsafe { LLVMMetadataAsValue(self.context, metadata) }; + self.visit_item(Item::MetadataEntry(metadata_value, kind, index), depth + 1); } } - assert_eq!(self.node_stack.pop(), Some(value)); - } - - pub unsafe fn run(&mut self) { - for sym in self.module.named_metadata_iter() { - let mut len: usize = 0; - let name = CStr::from_ptr(LLVMGetNamedMetadataName(sym, &mut len)) - .to_str() - .unwrap(); - // just for debugging, we are not visiting those nodes for the moment - trace!("named metadata name:{}", name); - } - - let module = self.module; - for (i, sym) in module.globals_iter().enumerate() { - trace!("global index:{} name:{}", i, symbol_name(sym)); - self.discover(sym, 0); - } - - for (i, sym) in module.global_aliases_iter().enumerate() { - trace!("global aliases index:{} name:{}", i, symbol_name(sym)); - self.discover(sym, 0); - } - - for function in module.functions_iter() { - trace!("function > name:{}", symbol_name(function)); - self.discover(function, 0); - - let params_count = LLVMCountParams(function); - for i in 0..params_count { - let param = LLVMGetParam(function, i); - trace!("function param name:{} index:{}", symbol_name(param), i); - self.discover(param, 1); + // If an item has sub items that are not operands nor metadata entries, we need to visit + // those too. + if let Value::Function(fun) = value { + for param in fun.params() { + self.visit_item(Item::FunctionParam(param), depth + 1); } - for basic_block in function.basic_blocks_iter() { - trace!("function block"); + for basic_block in fun.basic_blocks() { for instruction in basic_block.instructions_iter() { - let n_operands = LLVMGetNumOperands(instruction); - trace!("function block instruction num_operands: {}", n_operands); - for index in 0..n_operands { - let operand = LLVMGetOperand(instruction, index as u32); - if is_instruction(operand) { - self.discover(operand, 2); - } - } - - self.discover(instruction, 1); + self.visit_item(Item::Instruction(instruction), depth + 1); } } } - LLVMDisposeDIBuilder(self.builder); + let _ = self.item_stack.pop().unwrap(); } -} - -// utils - -unsafe fn iter_operands(v: LLVMValueRef) -> impl Iterator { - (0..LLVMGetNumOperands(v)).map(move |i| LLVMGetOperand(v, i as u32)) -} - -unsafe fn iter_metadata_copy(v: LLVMValueRef) -> impl Iterator { - let mut count = 0; - let entries = LLVMGlobalCopyAllMetadata(v, &mut count); - (0..count).map(move |index| { - ( - LLVMValueMetadataEntriesGetKind(entries, index as u32), - LLVMValueMetadataEntriesGetMetadata(entries, index as u32), - ) - }) -} - -unsafe fn is_instruction(v: LLVMValueRef) -> bool { - !LLVMIsAInstruction(v).is_null() -} -unsafe fn is_mdnode(v: LLVMValueRef) -> bool { - !LLVMIsAMDNode(v).is_null() -} + pub fn run(mut self) { + let module = self.module; -unsafe fn is_user(v: LLVMValueRef) -> bool { - !LLVMIsAUser(v).is_null() -} + for value in module.globals_iter() { + self.visit_item(Item::GlobalVariable(value), 0); + } + for value in module.global_aliases_iter() { + self.visit_item(Item::GlobalAlias(value), 0); + } -unsafe fn is_globalobject(v: LLVMValueRef) -> bool { - !LLVMIsAGlobalObject(v).is_null() -} + for function in module.functions_iter() { + self.visit_item(Item::Function(function), 0); + } -unsafe fn can_get_all_metadata(v: LLVMValueRef) -> bool { - is_globalobject(v) || is_instruction(v) + unsafe { LLVMDisposeDIBuilder(self.builder) }; + } } -unsafe fn can_get_operands(v: LLVMValueRef) -> bool { - is_mdnode(v) || is_user(v) +#[derive(Clone, Debug, Eq, PartialEq)] +enum Item { + GlobalVariable(LLVMValueRef), + GlobalAlias(LLVMValueRef), + Function(LLVMValueRef), + FunctionParam(LLVMValueRef), + Instruction(LLVMValueRef), + Operand(Operand), + MetadataEntry(LLVMValueRef, u32, usize), } -pub struct Cache { - keys: HashSet, +#[derive(Clone, Debug, Eq, PartialEq)] +struct Operand { + parent: LLVMValueRef, + value: LLVMValueRef, + index: u32, } -impl Cache { - pub fn new() -> Self { - Cache { - keys: HashSet::new(), +impl Item { + fn value_ref(&self) -> LLVMValueRef { + match self { + Item::GlobalVariable(value) + | Item::GlobalAlias(value) + | Item::Function(value) + | Item::FunctionParam(value) + | Item::Instruction(value) + | Item::Operand(Operand { value, .. }) + | Item::MetadataEntry(value, _, _) => *value, } } - pub fn hit(&mut self, key: u64) -> bool { - !self.keys.insert(key) + fn value_id(&self) -> u64 { + self.value_ref() as u64 } } diff --git a/src/llvm/types/ir.rs b/src/llvm/types/ir.rs index 4857ff2e..da49f1fa 100644 --- a/src/llvm/types/ir.rs +++ b/src/llvm/types/ir.rs @@ -1,18 +1,30 @@ use std::{ ffi::{CString, NulError}, marker::PhantomData, + ptr::NonNull, }; use llvm_sys::{ core::{ - LLVMIsAMDNode, LLVMMDNodeInContext2, LLVMMDStringInContext2, LLVMMetadataAsValue, - LLVMReplaceMDNodeOperandWith, LLVMValueAsMetadata, + LLVMCountParams, LLVMDisposeValueMetadataEntries, LLVMGetNumOperands, LLVMGetOperand, + LLVMGetParam, LLVMGlobalCopyAllMetadata, LLVMIsAFunction, LLVMIsAGlobalObject, + LLVMIsAInstruction, LLVMIsAMDNode, LLVMIsAUser, LLVMMDNodeInContext2, + LLVMMDStringInContext2, LLVMMetadataAsValue, LLVMPrintValueToString, + LLVMReplaceMDNodeOperandWith, LLVMValueAsMetadata, LLVMValueMetadataEntriesGetKind, + LLVMValueMetadataEntriesGetMetadata, + }, + debuginfo::{LLVMGetMetadataKind, LLVMGetSubprogram, LLVMMetadataKind, LLVMSetSubprogram}, + prelude::{ + LLVMBasicBlockRef, LLVMContextRef, LLVMMetadataRef, LLVMValueMetadataEntry, LLVMValueRef, }, - debuginfo::{LLVMGetMetadataKind, LLVMMetadataKind}, - prelude::{LLVMContextRef, LLVMMetadataRef, LLVMValueRef}, }; -use super::di::{DICompositeType, DIDerivedType, DISubprogram, DIType}; +use crate::llvm::{ + iter::IterBasicBlocks as _, + symbol_name, + types::di::{DICompositeType, DIDerivedType, DISubprogram, DIType}, + Message, +}; pub(crate) fn replace_name( value_ref: LLVMValueRef, @@ -26,19 +38,74 @@ pub(crate) fn replace_name( Ok(()) } +#[derive(Clone)] pub enum Value<'ctx> { MDNode(MDNode<'ctx>), + Function(Function<'ctx>), Other(LLVMValueRef), } +impl<'ctx> std::fmt::Debug for Value<'ctx> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let value_to_string = |value| { + Message { + ptr: unsafe { LLVMPrintValueToString(value) }, + } + .as_c_str() + .unwrap() + .to_str() + .unwrap() + .to_string() + }; + match self { + Self::MDNode(node) => f + .debug_struct("MDNode") + .field("value", &value_to_string(node.value_ref)) + .finish(), + Self::Function(fun) => f + .debug_struct("Function") + .field("value", &value_to_string(fun.value_ref)) + .finish(), + Self::Other(value) => f + .debug_struct("Other") + .field("value", &value_to_string(*value)) + .finish(), + } + } +} + impl<'ctx> Value<'ctx> { pub fn new(value: LLVMValueRef) -> Self { if unsafe { !LLVMIsAMDNode(value).is_null() } { let mdnode = unsafe { MDNode::from_value_ref(value) }; return Value::MDNode(mdnode); + } else if unsafe { !LLVMIsAFunction(value).is_null() } { + return Value::Function(unsafe { Function::from_value_ref(value) }); } Value::Other(value) } + + pub fn metadata_entries(&self) -> Option { + let value = match self { + Value::MDNode(node) => node.value_ref, + Value::Function(f) => f.value_ref, + Value::Other(value) => *value, + }; + MetadataEntries::new(value) + } + + pub fn operands(&self) -> Option> { + let value = match self { + Value::MDNode(node) => Some(node.value_ref), + Value::Function(f) => Some(f.value_ref), + Value::Other(value) if unsafe { !LLVMIsAUser(*value).is_null() } => Some(*value), + _ => None, + }; + + value.map(|value| unsafe { + (0..LLVMGetNumOperands(value)).map(move |i| LLVMGetOperand(value, i as u32)) + }) + } } pub enum Metadata<'ctx> { @@ -120,6 +187,7 @@ impl<'ctx> TryFrom> for Metadata<'ctx> { } /// Represents a metadata node. +#[derive(Clone)] pub struct MDNode<'ctx> { pub(super) value_ref: LLVMValueRef, _marker: PhantomData<&'ctx ()>, @@ -156,11 +224,6 @@ impl<'ctx> MDNode<'ctx> { } } - /// Returns the low level `LLVMMetadataRef` corresponding to this node. - pub fn metadata(&self) -> LLVMMetadataRef { - unsafe { LLVMValueAsMetadata(self.value_ref) } - } - /// Constructs an empty metadata node. pub fn empty(context: LLVMContextRef) -> Self { let metadata = unsafe { LLVMMDNodeInContext2(context, core::ptr::null_mut(), 0) }; @@ -187,3 +250,90 @@ impl<'ctx> MDNode<'ctx> { unsafe { Self::from_metadata_ref(context, metadata) } } } + +pub struct MetadataEntries { + entries: *mut LLVMValueMetadataEntry, + count: usize, +} + +impl MetadataEntries { + pub fn new(v: LLVMValueRef) -> Option { + if unsafe { LLVMIsAGlobalObject(v).is_null() && LLVMIsAInstruction(v).is_null() } { + return None; + } + + let mut count = 0; + let entries = unsafe { LLVMGlobalCopyAllMetadata(v, &mut count) }; + if entries.is_null() { + return None; + } + + Some(MetadataEntries { entries, count }) + } + + pub fn iter(&self) -> impl Iterator + '_ { + (0..self.count).map(move |index| unsafe { + ( + LLVMValueMetadataEntriesGetMetadata(self.entries, index as u32), + LLVMValueMetadataEntriesGetKind(self.entries, index as u32), + ) + }) + } +} + +impl Drop for MetadataEntries { + fn drop(&mut self) { + unsafe { + LLVMDisposeValueMetadataEntries(self.entries); + } + } +} + +/// Represents a metadata node. +#[derive(Clone)] +pub struct Function<'ctx> { + pub value_ref: LLVMValueRef, + _marker: PhantomData<&'ctx ()>, +} + +impl<'ctx> Function<'ctx> { + /// Constructs a new [`Function`] from the given `value`. + /// + /// # Safety + /// + /// This method assumes that the provided `value` corresponds to a valid + /// instance of [LLVM `Function`](https://llvm.org/doxygen/classllvm_1_1Function.html). + /// It's the caller's responsibility to ensure this invariant, as this + /// method doesn't perform any valiation checks. + pub(crate) unsafe fn from_value_ref(value_ref: LLVMValueRef) -> Self { + Self { + value_ref, + _marker: PhantomData, + } + } + + pub(crate) fn name(&self) -> &str { + symbol_name(self.value_ref) + } + + pub(crate) fn params(&self) -> impl Iterator { + let params_count = unsafe { LLVMCountParams(self.value_ref) }; + let value = self.value_ref; + (0..params_count).map(move |i| unsafe { LLVMGetParam(value, i) }) + } + + pub(crate) fn basic_blocks(&self) -> impl Iterator + '_ { + self.value_ref.basic_blocks_iter() + } + + pub(crate) fn sub_program(&self, context: LLVMContextRef) -> Option> { + let sub_program = unsafe { LLVMGetSubprogram(self.value_ref) }; + NonNull::new(sub_program).map(|_| unsafe { + DISubprogram::from_value_ref(LLVMMetadataAsValue(context, sub_program)) + }) + } + + pub(crate) fn set_subprogram(&mut self, sub_program: &DISubprogram) { + unsafe { LLVMSetSubprogram(self.value_ref, LLVMValueAsMetadata(sub_program.value_ref)) }; + } +}