From 65a834a8a89e24dbc2e47ce91f89038569f182a0 Mon Sep 17 00:00:00 2001 From: Alessandro Decina Date: Sun, 5 Nov 2023 22:17:07 +1100 Subject: [PATCH] di: btf: only exported symbols (bpf programs) should have linkage=global We only want exported symbols (programs marked as #[no_mangle]) to have linkage=global. This avoid issues like: Global function write() doesn't return scalar. Only those are supported. verification time 18 usec stack depth 0+0 ... The error above happens when aya-log's WriteBuf::write doesn't get inlined, and ends up having linkage=global. Global functions are verified independently from their callers, so the verifier has less context, and as a result globals are harder to verify. In clang one makes a function global by not making it `static`. That model doesn't work for rust, where all symbols exported by dependencies are non-static, and therefore end up being emitted as global. This commit implements a custom pass which marks functions as linkage=static unless they've been explicitly exported. Fixes https://github.com/aya-rs/aya/issues/808 --- src/linker.rs | 2 +- src/llvm/di.rs | 135 +++++++++++++++++- tests/btf/assembly/auxiliary/dep-exports.rs | 14 ++ .../assembly/auxiliary/loop-panic-handler.rs | 8 ++ tests/btf/assembly/exported-symbols.rs | 50 +++++++ 5 files changed, 203 insertions(+), 6 deletions(-) create mode 100644 tests/btf/assembly/auxiliary/dep-exports.rs create mode 100644 tests/btf/assembly/auxiliary/loop-panic-handler.rs create mode 100644 tests/btf/assembly/exported-symbols.rs diff --git a/src/linker.rs b/src/linker.rs index e6b214ed..4f4822e5 100644 --- a/src/linker.rs +++ b/src/linker.rs @@ -449,7 +449,7 @@ impl Linker { // programs and maps and remove dead code. unsafe { - llvm::DISanitizer::new(self.context, self.module).run(); + llvm::DISanitizer::new(self.context, self.module).run(&self.options.export_symbols); llvm::optimize( self.target_machine, diff --git a/src/llvm/di.rs b/src/llvm/di.rs index dd7a9788..73423af9 100644 --- a/src/llvm/di.rs +++ b/src/llvm/di.rs @@ -1,6 +1,8 @@ use std::{ - collections::{hash_map::DefaultHasher, HashSet}, + borrow::Cow, + collections::{hash_map::DefaultHasher, HashMap, HashSet}, hash::Hasher, + ptr, }; use gimli::{DW_TAG_pointer_type, DW_TAG_structure_type, DW_TAG_variant_part}; @@ -9,9 +11,9 @@ use log::{trace, warn}; use super::types::{ di::DIType, - ir::{MDNode, Metadata, Value}, + ir::{Function, MDNode, Metadata, Value}, }; -use crate::llvm::iter::*; +use crate::llvm::{iter::*, types::di::DISubprogram}; // KSYM_NAME_LEN from linux kernel intentionally set // to lower value found accross kernel versions to ensure @@ -24,6 +26,7 @@ pub struct DISanitizer { builder: LLVMDIBuilderRef, visited_nodes: HashSet, item_stack: Vec, + replace_operands: HashMap, } // Sanitize Rust type names to be valid C type names. @@ -62,6 +65,7 @@ impl DISanitizer { builder: unsafe { LLVMCreateDIBuilder(module) }, visited_nodes: HashSet::new(), item_stack: Vec::new(), + replace_operands: HashMap::new(), } } @@ -199,7 +203,7 @@ impl DISanitizer { } // navigate the tree of LLVMValueRefs (DFS-pre-order) - fn visit_item(&mut self, item: Item, depth: usize) { + fn visit_item(&mut self, mut item: Item, depth: usize) { let value_ref = item.value_ref(); let value_id = item.value_id(); @@ -219,6 +223,15 @@ impl DISanitizer { (_, item) => panic!("{item:?} has no value"), }; + if let Item::Operand(operand) = &mut item { + // When we have an operand to replace, we must do so regardless of whether we've already + // seen its value or not, since the same value can appear as an operand in multiple + // nodes in the tree. + if let Some(new_metadata) = self.replace_operands.get(&value_id) { + operand.replace(unsafe { LLVMMetadataAsValue(self.context, *new_metadata) }) + } + } + let first_visit = self.visited_nodes.insert(value_id); if !first_visit { trace!("{log_prefix:log_depth$}already visited"); @@ -268,9 +281,11 @@ impl DISanitizer { let _ = self.item_stack.pop().unwrap(); } - pub fn run(mut self) { + pub fn run(mut self, exported_symbols: &HashSet>) { let module = self.module; + self.replace_operands = self.fix_subprogram_linkage(exported_symbols); + for value in module.globals_iter() { self.visit_item(Item::GlobalVariable(value), 0); } @@ -284,6 +299,103 @@ impl DISanitizer { unsafe { LLVMDisposeDIBuilder(self.builder) }; } + + // Make it so that only exported symbols (programs marked as #[no_mangle]) get BTF + // linkage=global. For all other functions we want linkage=static. This avoid issues like: + // + // Global function write() doesn't return scalar. Only those are supported. + // verification time 18 usec + // stack depth 0+0 + // ... + // + // This is an error we used to get compiling aya-log. Global functions are verified + // independently from their callers, so the verifier has less context and as a result globals + // are harder to verify successfully. + // + // See tests/btf/assembly/exported-symbols.rs . + fn fix_subprogram_linkage( + &mut self, + export_symbols: &HashSet>, + ) -> HashMap { + let mut replace = HashMap::new(); + + for mut function in self + .module + .functions_iter() + .map(|value| unsafe { Function::from_value_ref(value) }) + { + if export_symbols.contains(function.name()) { + continue; + } + + // Skip functions that don't have subprograms. + let Some(mut sub_program) = function.sub_program(self.context) else { + continue; + }; + + let name = sub_program.name().unwrap(); + let linkage_name = sub_program.linkage_name(); + let ty = sub_program.ty(); + + // Create a new subprogram that has DISPFlagLocalToUnit set, so the BTF backend emits it + // with linkage=static + let mut new_program = unsafe { + let new_program = LLVMDIBuilderCreateFunction( + self.builder, + sub_program.scope().unwrap(), + name.as_ptr(), + name.len(), + linkage_name.map(|s| s.as_ptr()).unwrap_or(ptr::null()), + linkage_name.unwrap_or("").len(), + sub_program.file(), + sub_program.line(), + ty, + 1, + 1, + sub_program.line(), + sub_program.type_flags(), + 1, + ); + // Technically this must be called as part of the builder API, but effectively does + // nothing because we don't add any variables through the builder API, instead we + // replace retained nodes manually below. + LLVMDIBuilderFinalizeSubprogram(self.builder, new_program); + + DISubprogram::from_value_ref(LLVMMetadataAsValue(self.context, new_program)) + }; + + // Point the function to the new subprogram. + function.set_subprogram(&new_program); + + // There's no way to set the unit with LLVMDIBuilderCreateFunction + // so we set it after creation. + if let Some(unit) = sub_program.unit() { + new_program.set_unit(unit); + } + + // Add retained nodes from the old program. This is needed to preserve local debug + // variables, including function arguments which otherwise become "anon". See + // LLVMDIBuilderFinalizeSubprogram and + // DISubprogram::replaceRetainedNodes. + if let Some(retained_nodes) = sub_program.retained_nodes() { + new_program.set_retained_nodes(retained_nodes); + } + + // Remove retained nodes from the old program or we'll hit a debug assertion since + // its debug variables no longer point to the program. See the + // NumAbstractSubprograms assertion in DwarfDebug::endFunctionImpl in LLVM. + let empty_node = + unsafe { LLVMMDNodeInContext2(self.context, core::ptr::null_mut(), 0) }; + sub_program.set_retained_nodes(empty_node); + + let ret = replace.insert(sub_program.value_ref as u64, unsafe { + LLVMValueAsMetadata(new_program.value_ref) + }); + assert!(ret.is_none()); + } + + replace + } } #[derive(Clone, Debug, Eq, PartialEq)] @@ -304,6 +416,19 @@ struct Operand { index: u32, } +impl Operand { + fn replace(&mut self, value: LLVMValueRef) { + unsafe { + if !LLVMIsAMDNode(self.parent).is_null() { + let value = LLVMValueAsMetadata(value); + LLVMReplaceMDNodeOperandWith(self.parent, self.index, value); + } else if !LLVMIsAUser(self.parent).is_null() { + LLVMSetOperand(self.parent, self.index, value); + } + } + } +} + impl Item { fn value_ref(&self) -> LLVMValueRef { match self { diff --git a/tests/btf/assembly/auxiliary/dep-exports.rs b/tests/btf/assembly/auxiliary/dep-exports.rs new file mode 100644 index 00000000..6adcdc03 --- /dev/null +++ b/tests/btf/assembly/auxiliary/dep-exports.rs @@ -0,0 +1,14 @@ +// no-prefer-dynamic +// compile-flags: --crate-type rlib -C debuginfo=2 +#![no_std] + +#[inline(never)] +pub fn dep_public_symbol() -> u8 { + // read_volatile stops LTO inlining the function in the calling crate + unsafe { core::ptr::read_volatile(0 as *const u8) } +} + +#[no_mangle] +pub fn dep_no_mangle() -> u8 { + dep_public_symbol() +} diff --git a/tests/btf/assembly/auxiliary/loop-panic-handler.rs b/tests/btf/assembly/auxiliary/loop-panic-handler.rs new file mode 100644 index 00000000..76953359 --- /dev/null +++ b/tests/btf/assembly/auxiliary/loop-panic-handler.rs @@ -0,0 +1,8 @@ +// no-prefer-dynamic +// compile-flags: --crate-type rlib +#![no_std] + +#[panic_handler] +fn panic_impl(_: &core::panic::PanicInfo) -> ! { + loop {} +} diff --git a/tests/btf/assembly/exported-symbols.rs b/tests/btf/assembly/exported-symbols.rs new file mode 100644 index 00000000..98e84ffb --- /dev/null +++ b/tests/btf/assembly/exported-symbols.rs @@ -0,0 +1,50 @@ +// assembly-output: bpf-linker +// compile-flags: --crate-type bin -C link-arg=--emit=obj -C debuginfo=2 -C link-arg=--log-level=debug -C link-arg=--log-file=/tmp/linker-lol.log +#![no_std] +#![no_main] + +// aux-build: loop-panic-handler.rs +extern crate loop_panic_handler; + +// aux-build: dep-exports.rs +extern crate dep_exports as dep; + +pub use dep::dep_public_symbol as local_re_exported; + +#[no_mangle] +fn local_no_mangle() -> u8 { + local_public(1, 2) +} + +#[inline(never)] +pub fn local_public(_arg1: u32, _arg2: u32) -> u8 { + // bind v so we create a debug variable which needs its scope to be fixed + let v = dep::dep_public_symbol(); + // call inline functions so we get inlinedAt scopes to be fixed + inline_function_1(v) + inline_function_2(v) +} + +#[inline(always)] +fn inline_function_1(v: u8) -> u8 { + unsafe { core::ptr::read_volatile(v as *const u8) } +} + +#[inline(always)] +fn inline_function_2(v: u8) -> u8 { + inline_function_1(v) +} + +// #[no_mangle] functions keep linkage=global +// CHECK: FUNC 'local_no_mangle' type_id={{[0-9]+}} linkage=global + +// check that parameter names are preserved +// CHECK: FUNC_PROTO +// CHECK-NEXT: _arg1 +// CHECK-NEXT: _arg2 + +// public functions get static linkage +// CHECK: FUNC '{{.*}}local_public{{.*}}' type_id={{[0-9]+}} linkage=static +// CHECK: FUNC '{{.*}}dep_public_symbol{{.*}}' type_id={{[0-9]+}} linkage=static + +// #[no_mangle] is honored for dep functions +// CHECK: FUNC 'dep_no_mangle' type_id={{[0-9]+}} linkage=global