Skip to content

Commit

Permalink
di: btf: only exported symbols (bpf programs) should have linkage=global
Browse files Browse the repository at this point in the history
We only want exported symbols (programs marked as #[no_mangle]) to have
linkage=global. This avoid issues like:

    Global function write() doesn't return scalar. Only those are supported.
    verification time 18 usec
    stack depth 0+0
    ...

The error above happens when aya-log's WriteBuf::write doesn't get
inlined, and ends up having linkage=global. Global functions are
verified independently from their callers, so the verifier has less
context, and as a result globals are harder to verify.

In clang one makes a function global by not making it `static`. That
model doesn't work for rust, where all symbols exported by dependencies
are non-static, and therefore end up being emitted as global.

This commit implements a custom pass which marks functions as
linkage=static unless they've been explicitly exported.

Fixes aya-rs/aya#808
  • Loading branch information
alessandrod authored and vadorovsky committed Feb 18, 2024
1 parent d11bf49 commit 4214888
Show file tree
Hide file tree
Showing 5 changed files with 203 additions and 6 deletions.
2 changes: 1 addition & 1 deletion src/linker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -449,7 +449,7 @@ impl Linker {
// programs and maps and remove dead code.

unsafe {
llvm::DISanitizer::new(self.context, self.module).run();
llvm::DISanitizer::new(self.context, self.module).run(&self.options.export_symbols);

llvm::optimize(
self.target_machine,
Expand Down
135 changes: 130 additions & 5 deletions src/llvm/di.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
use std::{
collections::{hash_map::DefaultHasher, HashSet},
borrow::Cow,
collections::{hash_map::DefaultHasher, HashMap, HashSet},
hash::Hasher,
ptr,
};

use gimli::{DW_TAG_pointer_type, DW_TAG_structure_type, DW_TAG_variant_part};
Expand All @@ -9,9 +11,9 @@ use log::{trace, warn};

use super::types::{
di::DIType,
ir::{MDNode, Metadata, Value},
ir::{Function, MDNode, Metadata, Value},
};
use crate::llvm::iter::*;
use crate::llvm::{iter::*, types::di::DISubprogram};

// KSYM_NAME_LEN from linux kernel intentionally set
// to lower value found accross kernel versions to ensure
Expand All @@ -24,6 +26,7 @@ pub struct DISanitizer {
builder: LLVMDIBuilderRef,
visited_nodes: HashSet<u64>,
item_stack: Vec<Item>,
replace_operands: HashMap<u64, LLVMMetadataRef>,
}

// Sanitize Rust type names to be valid C type names.
Expand Down Expand Up @@ -62,6 +65,7 @@ impl DISanitizer {
builder: unsafe { LLVMCreateDIBuilder(module) },
visited_nodes: HashSet::new(),
item_stack: Vec::new(),
replace_operands: HashMap::new(),
}
}

Expand Down Expand Up @@ -199,7 +203,7 @@ impl DISanitizer {
}

// navigate the tree of LLVMValueRefs (DFS-pre-order)
fn visit_item(&mut self, item: Item, depth: usize) {
fn visit_item(&mut self, mut item: Item, depth: usize) {
let value_ref = item.value_ref();
let value_id = item.value_id();

Expand All @@ -219,6 +223,15 @@ impl DISanitizer {
(_, item) => panic!("{item:?} has no value"),
};

if let Item::Operand(operand) = &mut item {
// When we have an operand to replace, we must do so regardless of whether we've already
// seen its value or not, since the same value can appear as an operand in multiple
// nodes in the tree.
if let Some(new_metadata) = self.replace_operands.get(&value_id) {
operand.replace(unsafe { LLVMMetadataAsValue(self.context, *new_metadata) })
}
}

let first_visit = self.visited_nodes.insert(value_id);
if !first_visit {
trace!("{log_prefix:log_depth$}already visited");
Expand Down Expand Up @@ -268,9 +281,11 @@ impl DISanitizer {
let _ = self.item_stack.pop().unwrap();
}

pub fn run(mut self) {
pub fn run(mut self, exported_symbols: &HashSet<Cow<'static, str>>) {
let module = self.module;

self.replace_operands = self.fix_subprogram_linkage(exported_symbols);

for value in module.globals_iter() {
self.visit_item(Item::GlobalVariable(value), 0);
}
Expand All @@ -284,6 +299,103 @@ impl DISanitizer {

unsafe { LLVMDisposeDIBuilder(self.builder) };
}

// Make it so that only exported symbols (programs marked as #[no_mangle]) get BTF
// linkage=global. For all other functions we want linkage=static. This avoid issues like:
//
// Global function write() doesn't return scalar. Only those are supported.
// verification time 18 usec
// stack depth 0+0
// ...
//
// This is an error we used to get compiling aya-log. Global functions are verified
// independently from their callers, so the verifier has less context and as a result globals
// are harder to verify successfully.
//
// See tests/btf/assembly/exported-symbols.rs .
fn fix_subprogram_linkage(
&mut self,
export_symbols: &HashSet<Cow<'static, str>>,
) -> HashMap<u64, LLVMMetadataRef> {
let mut replace = HashMap::new();

for mut function in self
.module
.functions_iter()
.map(|value| unsafe { Function::from_value_ref(value) })
{
if export_symbols.contains(function.name()) {
continue;
}

// Skip functions that don't have subprograms.
let Some(mut sub_program) = function.sub_program(self.context) else {
continue;
};

let name = sub_program.name().unwrap();
let linkage_name = sub_program.linkage_name();
let ty = sub_program.ty();

// Create a new subprogram that has DISPFlagLocalToUnit set, so the BTF backend emits it
// with linkage=static
let mut new_program = unsafe {
let new_program = LLVMDIBuilderCreateFunction(
self.builder,
sub_program.scope().unwrap(),
name.as_ptr(),
name.len(),
linkage_name.map(|s| s.as_ptr()).unwrap_or(ptr::null()),
linkage_name.unwrap_or("").len(),
sub_program.file(),
sub_program.line(),
ty,
1,
1,
sub_program.line(),
sub_program.type_flags(),
1,
);
// Technically this must be called as part of the builder API, but effectively does
// nothing because we don't add any variables through the builder API, instead we
// replace retained nodes manually below.
LLVMDIBuilderFinalizeSubprogram(self.builder, new_program);

DISubprogram::from_value_ref(LLVMMetadataAsValue(self.context, new_program))
};

// Point the function to the new subprogram.
function.set_subprogram(&new_program);

// There's no way to set the unit with LLVMDIBuilderCreateFunction
// so we set it after creation.
if let Some(unit) = sub_program.unit() {
new_program.set_unit(unit);
}

// Add retained nodes from the old program. This is needed to preserve local debug
// variables, including function arguments which otherwise become "anon". See
// LLVMDIBuilderFinalizeSubprogram and
// DISubprogram::replaceRetainedNodes.
if let Some(retained_nodes) = sub_program.retained_nodes() {
new_program.set_retained_nodes(retained_nodes);
}

// Remove retained nodes from the old program or we'll hit a debug assertion since
// its debug variables no longer point to the program. See the
// NumAbstractSubprograms assertion in DwarfDebug::endFunctionImpl in LLVM.
let empty_node =
unsafe { LLVMMDNodeInContext2(self.context, core::ptr::null_mut(), 0) };
sub_program.set_retained_nodes(empty_node);

let ret = replace.insert(sub_program.value_ref as u64, unsafe {
LLVMValueAsMetadata(new_program.value_ref)
});
assert!(ret.is_none());
}

replace
}
}

#[derive(Clone, Debug, Eq, PartialEq)]
Expand All @@ -304,6 +416,19 @@ struct Operand {
index: u32,
}

impl Operand {
fn replace(&mut self, value: LLVMValueRef) {
unsafe {
if !LLVMIsAMDNode(self.parent).is_null() {
let value = LLVMValueAsMetadata(value);
LLVMReplaceMDNodeOperandWith(self.parent, self.index, value);
} else if !LLVMIsAUser(self.parent).is_null() {
LLVMSetOperand(self.parent, self.index, value);
}
}
}
}

impl Item {
fn value_ref(&self) -> LLVMValueRef {
match self {
Expand Down
14 changes: 14 additions & 0 deletions tests/btf/assembly/auxiliary/dep-exports.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
// no-prefer-dynamic
// compile-flags: --crate-type rlib -C debuginfo=2
#![no_std]

#[inline(never)]
pub fn dep_public_symbol() -> u8 {
// read_volatile stops LTO inlining the function in the calling crate
unsafe { core::ptr::read_volatile(0 as *const u8) }
}

#[no_mangle]
pub fn dep_no_mangle() -> u8 {
dep_public_symbol()
}
8 changes: 8 additions & 0 deletions tests/btf/assembly/auxiliary/loop-panic-handler.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
// no-prefer-dynamic
// compile-flags: --crate-type rlib
#![no_std]

#[panic_handler]
fn panic_impl(_: &core::panic::PanicInfo) -> ! {
loop {}
}
50 changes: 50 additions & 0 deletions tests/btf/assembly/exported-symbols.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
// assembly-output: bpf-linker
// compile-flags: --crate-type bin -C link-arg=--emit=obj -C debuginfo=2 -C link-arg=--log-level=debug -C link-arg=--log-file=/tmp/linker-lol.log
#![no_std]
#![no_main]

// aux-build: loop-panic-handler.rs
extern crate loop_panic_handler;

// aux-build: dep-exports.rs
extern crate dep_exports as dep;

pub use dep::dep_public_symbol as local_re_exported;

#[no_mangle]
fn local_no_mangle() -> u8 {
local_public(1, 2)
}

#[inline(never)]
pub fn local_public(_arg1: u32, _arg2: u32) -> u8 {
// bind v so we create a debug variable which needs its scope to be fixed
let v = dep::dep_public_symbol();
// call inline functions so we get inlinedAt scopes to be fixed
inline_function_1(v) + inline_function_2(v)
}

#[inline(always)]
fn inline_function_1(v: u8) -> u8 {
unsafe { core::ptr::read_volatile(v as *const u8) }
}

#[inline(always)]
fn inline_function_2(v: u8) -> u8 {
inline_function_1(v)
}

// #[no_mangle] functions keep linkage=global
// CHECK: FUNC 'local_no_mangle' type_id={{[0-9]+}} linkage=global

// check that parameter names are preserved
// CHECK: FUNC_PROTO
// CHECK-NEXT: _arg1
// CHECK-NEXT: _arg2

// public functions get static linkage
// CHECK: FUNC '{{.*}}local_public{{.*}}' type_id={{[0-9]+}} linkage=static
// CHECK: FUNC '{{.*}}dep_public_symbol{{.*}}' type_id={{[0-9]+}} linkage=static

// #[no_mangle] is honored for dep functions
// CHECK: FUNC 'dep_no_mangle' type_id={{[0-9]+}} linkage=global

0 comments on commit 4214888

Please sign in to comment.