Skip to content

Commit

Permalink
Support PIE in ElfReader.
Browse files Browse the repository at this point in the history
Summary:
This diff does two things:
- First, it adds support for PIE to the ElfReader via converting virtual addresses to "binary" addresses.
- Second, it uses the virtual to binary address conversion for all of our self-uprobes that break when our executables are compiled with PIE.

An alternative for the self-uprobes that was considered was to naively use `__executable_start`. However, that would only work for PIE executables and wouldn't work with non-PIE.

Test Plan: Tested with both clang 14 (non-PIE) and clang 15 (PIE).

Reviewers: #stirling, zasgar, #third_party_approvers, oazizi

Reviewed By: zasgar, #third_party_approvers

Signed-off-by: James Bartlett <jamesbartlett@pixielabs.ai>

Differential Revision: https://phab.corp.pixielabs.ai/D12651

GitOrigin-RevId: 0b7004bd21bb9d1770de62ed41daa914f8b77d1b
  • Loading branch information
JamesMBartlett authored and copybaranaut committed Dec 23, 2022
1 parent f0e3b39 commit bca73fa
Show file tree
Hide file tree
Showing 9 changed files with 169 additions and 21 deletions.
6 changes: 3 additions & 3 deletions bazel/repository_locations.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -200,9 +200,9 @@ REPOSITORY_LOCATIONS = dict(
urls = ["https://github.com/r-lyeh-archived/sole/archive/refs/tags/1.0.2.tar.gz"],
),
com_github_serge1_elfio = dict(
sha256 = "f1e2edddec556ac61705b931b5d59f1c89440442d5be522d3ae7d317b917e2d9",
strip_prefix = "ELFIO-b8d2a419b0edf185cfd7dc49a837d8d97001a7ba",
urls = ["https://github.com/pixie-io/ELFIO/archive/b8d2a419b0edf185cfd7dc49a837d8d97001a7ba.tar.gz"],
sha256 = "de0bb37885cc041a00e3b27353125361716fc4a49bd6ce6ed5782a1e8d4ee137",
strip_prefix = "ELFIO-2bdb28ae5d02db994a32cda401489e68c2887a4d",
urls = ["https://github.com/pixie-io/ELFIO/archive/2bdb28ae5d02db994a32cda401489e68c2887a4d.tar.gz"],
),
com_github_simdutf_simdutf = dict(
urls = ["https://github.com/simdutf/simdutf/archive/refs/tags/v1.0.0.tar.gz"],
Expand Down
10 changes: 9 additions & 1 deletion src/stirling/bpf_tools/bcc_wrapper_bpf_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@
* SPDX-License-Identifier: Apache-2.0
*/

#include <unistd.h>
#include <chrono>
#include <thread>
#include "src/stirling/bpf_tools/bcc_wrapper.h"

#include "src/common/fs/fs_wrapper.h"
Expand Down Expand Up @@ -137,9 +140,14 @@ TEST(BCCWrapperTest, GetTGIDStartTime) {

ASSERT_OK_AND_ASSIGN(std::filesystem::path self_path, fs::ReadSymlink("/proc/self/exe"));

int64_t self_pid = getpid();
ASSERT_OK_AND_ASSIGN(auto elf_reader, obj_tools::ElfReader::Create(self_path.string(), self_pid));

// Use address instead of symbol to specify this probe,
// so that even if debug symbols are stripped, the uprobe can still attach.
uint64_t symbol_addr = reinterpret_cast<uint64_t>(&BCCWrapperTestProbeTrigger);
ASSERT_OK_AND_ASSIGN(
uint64_t symbol_addr,
elf_reader->VirtualAddrToBinaryAddr(reinterpret_cast<uint64_t>(&BCCWrapperTestProbeTrigger)));

UProbeSpec uprobe{.binary_path = self_path,
.symbol = {}, // Keep GCC happy.
Expand Down
5 changes: 4 additions & 1 deletion src/stirling/bpf_tools/task_struct_resolver.cc
Original file line number Diff line number Diff line change
Expand Up @@ -163,10 +163,13 @@ StatusOr<TaskStructOffsets> ResolveTaskStructOffsetsCore() {
::px::system::GetPIDStartTimeTicks("/proc/self"));

PL_ASSIGN_OR_RETURN(std::filesystem::path self_path, GetSelfPath());
int64_t pid = getpid();
PL_ASSIGN_OR_RETURN(auto elf_reader, obj_tools::ElfReader::Create(self_path.string(), pid));

// Use address instead of symbol to specify this probe,
// so that even if debug symbols are stripped, the uprobe can still attach.
uint64_t symbol_addr = reinterpret_cast<uint64_t>(&StirlingProbeTrigger);
PL_ASSIGN_OR_RETURN(uint64_t symbol_addr, elf_reader->VirtualAddrToBinaryAddr(
reinterpret_cast<uint64_t>(&StirlingProbeTrigger)));

UProbeSpec uprobe{.binary_path = self_path,
.symbol = {}, // Keep GCC happy.
Expand Down
94 changes: 92 additions & 2 deletions src/stirling/obj_tools/elf_reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include "src/common/base/byte_utils.h"
#include "src/common/base/utils.h"
#include "src/common/fs/fs_wrapper.h"
#include "src/common/system/proc_parser.h"
#include "src/stirling/obj_tools/init.h"

namespace px {
Expand Down Expand Up @@ -159,13 +160,15 @@ Status ElfReader::LocateDebugSymbols(const std::filesystem::path& debug_file_dir
}

// TODO(oazizi): Consider changing binary_path to std::filesystem::path.
StatusOr<std::unique_ptr<ElfReader>> ElfReader::Create(
const std::string& binary_path, const std::filesystem::path& debug_file_dir) {
StatusOr<std::unique_ptr<ElfReader>> ElfReader::Create(const std::string& binary_path,
const std::filesystem::path& debug_file_dir,
int64_t pid) {
VLOG(1) << absl::Substitute("Creating ElfReader, [binary=$0] [debug_file_dir=$1]", binary_path,
debug_file_dir.string());
auto elf_reader = std::unique_ptr<ElfReader>(new ElfReader);

elf_reader->binary_path_ = binary_path;
elf_reader->pid_ = pid;

if (!elf_reader->elf_reader_.load_header_and_sections(binary_path)) {
return error::Internal("Can't find or process ELF file $0", binary_path);
Expand Down Expand Up @@ -241,6 +244,10 @@ StatusOr<int32_t> ElfReader::FindSegmentOffsetOfSection(std::string_view section
return error::NotFound("Could not find segment offset of section '$0'", section_name);
}

static auto NoTextStartAddrError =
Status(px::statuspb::INVALID_ARGUMENT,
"Must provide text_start_addr to ELFReader to use Symbol resolution functions");

StatusOr<std::vector<ElfReader::SymbolInfo>> ElfReader::SearchSymbols(
std::string_view search_symbol, SymbolMatchType match_type, std::optional<int> symbol_type,
bool stop_at_first_match) {
Expand Down Expand Up @@ -325,6 +332,8 @@ StatusOr<std::optional<std::string>> ElfReader::AddrToSymbol(size_t sym_addr) {

const ELFIO::symbol_section_accessor symbols(elf_reader_, symtab_section);

PL_ASSIGN_OR_RETURN(sym_addr, VirtualAddrToBinaryAddr(sym_addr));

// Call ELFIO to get symbol by address.
// ELFIO looks up the symbol and then populates name, size, type, etc.
// We only care about the name, but need to declare the other variables as well.
Expand All @@ -349,6 +358,8 @@ StatusOr<std::optional<std::string>> ElfReader::AddrToSymbol(size_t sym_addr) {
StatusOr<std::optional<std::string>> ElfReader::InstrAddrToSymbol(size_t sym_addr) {
PL_ASSIGN_OR_RETURN(ELFIO::section * symtab_section, SymtabSection());

PL_ASSIGN_OR_RETURN(sym_addr, VirtualAddrToBinaryAddr(sym_addr));

const ELFIO::symbol_section_accessor symbols(elf_reader_, symtab_section);
for (unsigned int j = 0; j < symbols.get_symbols_num(); ++j) {
// Call ELFIO to get symbol by index.
Expand Down Expand Up @@ -391,6 +402,7 @@ StatusOr<std::unique_ptr<ElfReader::Symbolizer>> ElfReader::GetSymbolizer() {
symbols.get_symbol(j, name, addr, size, bind, type, section_index, other);

if (type == ELFIO::STT_FUNC) {
PL_ASSIGN_OR_RETURN(addr, BinaryAddrToVirtualAddr(addr));
symbolizer->AddEntry(addr, size, llvm::demangle(name));
}
}
Expand Down Expand Up @@ -552,6 +564,84 @@ StatusOr<utils::u8string> ElfReader::SymbolByteCode(std::string_view section,
return byte_code;
}

StatusOr<uint64_t> ElfReader::VirtualAddrToBinaryAddr(uint64_t virtual_addr) {
PL_RETURN_IF_ERROR(EnsureVirtToBinaryCalculated());
return virtual_addr + *virtual_to_binary_addr_offset_;
}

StatusOr<uint64_t> ElfReader::BinaryAddrToVirtualAddr(uint64_t binary_addr) {
PL_RETURN_IF_ERROR(EnsureVirtToBinaryCalculated());
return binary_addr - *virtual_to_binary_addr_offset_;
}

Status ElfReader::EnsureVirtToBinaryCalculated() {
if (virtual_to_binary_addr_offset_.has_value()) {
return Status::OK();
}
return CalculateVirtToBinaryAddrConversion();
}

/**
* The calculated offset is used to convert between virtual addresses (eg. the address you
* would get from a function pointer) and "binary" addresses (i.e. the address that `nm` would
* display for a given function).
*
* This conversion is non-trivial and requires information from both the ELF file of the binary in
* question, as well as the /proc/PID/maps file for the PID of the process in question.
*
* For non-PIE executables, this conversion is trivial as the virtual addresses in the ELF file are
* used directly when loading.
*
* However, for PIE, the loaded virtual address can be whatever. So to calculate the offset we look
* at the first loadable segment in the ELF file and compare it to the first entry in the
* /proc/PID/maps file to see how the loader changed the virtual address. This works because the
* loader guarantees that the relative offsets of the different segments remain the same, regardless
* of where in virtual address space it ends up putting the segment.
*
**/
Status ElfReader::CalculateVirtToBinaryAddrConversion() {
if (pid_ == -1) {
return {statuspb::INVALID_ARGUMENT,
"Must specify PID to use symbol resolution functions in ElfReader"};
}
system::ProcParser parser;
std::vector<system::ProcParser::ProcessSMaps> map_entries;
// This is a little inefficient as we only need the first entry.
PL_RETURN_IF_ERROR(parser.ParseProcPIDMaps(pid_, &map_entries));
if (map_entries.size() < 1) {
return {statuspb::INTERNAL, "Failed to parse /proc/$pid/maps to work out address conversion"};
}
auto mapped_virt_addr = map_entries[0].vmem_start;
uint64_t mapped_offset;
if (!absl::SimpleHexAtoi(map_entries[0].offset, &mapped_offset)) {
return {statuspb::INTERNAL,
"Failed to parse offset in /proc/$pid/maps to work out address conversion"};
}

uint64_t mapped_segment_start = mapped_virt_addr - mapped_offset;

const ELFIO::segment* first_loadable_segment = nullptr;
for (int i = 0; i < elf_reader_.segments.size(); i++) {
ELFIO::segment* segment = elf_reader_.segments[i];
if (segment->get_type() == ELFIO::PT_LOAD) {
first_loadable_segment = segment;
break;
}
}

if (first_loadable_segment == nullptr) {
return {statuspb::INTERNAL,
"Calculating virtual to binary offset failed because there are no loadable segments in "
"elf file"};
}
uint64_t elf_virt_addr = first_loadable_segment->get_virtual_address();
uint64_t elf_offset = first_loadable_segment->get_offset();
uint64_t elf_segment_start = elf_virt_addr - elf_offset;

virtual_to_binary_addr_offset_ = elf_segment_start - mapped_segment_start;
return Status::OK();
}

} // namespace obj_tools
} // namespace stirling
} // namespace px
36 changes: 32 additions & 4 deletions src/stirling/obj_tools/elf_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,26 @@ class ElfReader {
*/
static StatusOr<std::unique_ptr<ElfReader>> Create(
const std::string& binary_path,
const std::filesystem::path& debug_file_dir = "/usr/lib/debug");
const std::filesystem::path& debug_file_dir = "/usr/lib/debug", int64_t pid = -1);

static StatusOr<std::unique_ptr<ElfReader>> Create(const std::string& binary_path, int64_t pid) {
return Create(binary_path, "/usr/lib/debug", pid);
}
using TestOnlyUseZeroOffset = bool;
static StatusOr<std::unique_ptr<ElfReader>> Create(const std::string& binary_path,
TestOnlyUseZeroOffset) {
PL_ASSIGN_OR_RETURN(auto elf_reader, Create(binary_path));
elf_reader->virtual_to_binary_addr_offset_ = 0;
return elf_reader;
}

std::filesystem::path& debug_symbols_path() { return debug_symbols_path_; }

struct SymbolInfo {
std::string name;
int type = -1;
// SymbolInfo always contains the so called "binary" address of the symbol (i.e. what `nm` would
// return for the symbol).
uint64_t address = -1;
uint64_t size = -1;

Expand Down Expand Up @@ -98,7 +111,7 @@ class ElfReader {
/**
* Looks up the symbol for an address.
*
* @param addr The symbol address to lookup.
* @param addr The symbol address to lookup. This should be the virtual address of the symbol.
* @return Symbol name if address was found in the symbol table.
* std::nullopt if search completed by address was not found.
* Error if search failed to run as expected.
Expand All @@ -111,7 +124,7 @@ class ElfReader {
* Unlike AddrToSymbol, this function covers the entirety of the function body.
* Any address in the body of the function is resolved, not just where the symbol is located.
*
* @param addr The symbol address to lookup.
* @param addr The symbol address to lookup. This should be the virtual address of the symbol.
* @return Symbol name if address was found in the symbol table.
* std::nullopt if search completed by address was not found.
* Error if search failed to run as expected.
Expand All @@ -127,7 +140,8 @@ class ElfReader {
void AddEntry(uintptr_t addr, size_t size, std::string name);

/**
* Lookup the symbol for the specified address.
* Lookup the symbol for the specified address. The address should be a virtual address (as
* opposed to a "binary" address).
*/
std::string_view Lookup(uintptr_t addr) const;

Expand All @@ -153,6 +167,9 @@ class ElfReader {
*/
StatusOr<px::utils::u8string> SymbolByteCode(std::string_view section, const SymbolInfo& symbol);

StatusOr<uint64_t> VirtualAddrToBinaryAddr(uint64_t virtual_addr);
StatusOr<uint64_t> BinaryAddrToVirtualAddr(uint64_t binary_addr);

private:
ElfReader() = default;

Expand All @@ -177,12 +194,23 @@ class ElfReader {
*/
StatusOr<px::utils::u8string> FuncByteCode(const SymbolInfo& func_symbol);

/**
* Calculates the offset between virtual and binary addresses and stores it in
* virtual_to_binary_addr_offset_ Such that: binary_addr = virtual_addr +
* virtual_to_bianry_addr_offset_
*/
Status CalculateVirtToBinaryAddrConversion();
Status EnsureVirtToBinaryCalculated();

std::string binary_path_;

std::filesystem::path debug_symbols_path_;

// Set up an elf reader, so we can extract debug symbols.
ELFIO::elfio elf_reader_;

int64_t pid_;
std::optional<uint64_t> virtual_to_binary_addr_offset_ = std::nullopt;
};

} // namespace obj_tools
Expand Down
6 changes: 4 additions & 2 deletions src/stirling/obj_tools/elf_reader_symbolizer_bpf_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,8 @@ TEST(SymbolizerTest, InstrAddrToSymbol) {

// Create an ELF reader to symbolize the addresses.
ASSERT_OK_AND_ASSIGN(std::filesystem::path self_path, fs::ReadSymlink("/proc/self/exe"));
ASSERT_OK_AND_ASSIGN(auto elf_reader, ElfReader::Create(self_path.string()));
int64_t self_pid = getpid();
ASSERT_OK_AND_ASSIGN(auto elf_reader, ElfReader::Create(self_path.string(), self_pid));

// Use the ELF reader to symbolize the stack trace addresses.
std::vector<std::string> symbols;
Expand Down Expand Up @@ -157,7 +158,8 @@ TEST(SymbolizerTest, GetSymbolizer) {

// Create an ELF reader to symbolize the addresses.
ASSERT_OK_AND_ASSIGN(std::filesystem::path self_path, fs::ReadSymlink("/proc/self/exe"));
ASSERT_OK_AND_ASSIGN(auto elf_reader, ElfReader::Create(self_path.string()));
int64_t self_pid = getpid();
ASSERT_OK_AND_ASSIGN(auto elf_reader, ElfReader::Create(self_path.string(), self_pid));

// Use the ELF reader to symbolize the stack trace addresses.
ASSERT_OK_AND_ASSIGN(auto symbolizer, elf_reader->GetSymbolizer());
Expand Down
6 changes: 4 additions & 2 deletions src/stirling/obj_tools/elf_reader_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,8 @@ TEST(ElfReaderTest, AddrToSymbol) {
const std::string kSymbolName = "CanYouFindThis";
ASSERT_OK_AND_ASSIGN(const int64_t symbol_addr, NmSymbolNameToAddr(path, kSymbolName));

ASSERT_OK_AND_ASSIGN(std::unique_ptr<ElfReader> elf_reader, ElfReader::Create(path));
ASSERT_OK_AND_ASSIGN(std::unique_ptr<ElfReader> elf_reader,
ElfReader::Create(path, ElfReader::TestOnlyUseZeroOffset{}));

{
ASSERT_OK_AND_ASSIGN(std::optional<std::string> symbol_name,
Expand All @@ -159,7 +160,8 @@ TEST(ElfReaderTest, InstrAddrToSymbol) {
const std::string kSymbolName = "CanYouFindThis";
ASSERT_OK_AND_ASSIGN(const int64_t kSymbolAddr, NmSymbolNameToAddr(path, kSymbolName));

ASSERT_OK_AND_ASSIGN(std::unique_ptr<ElfReader> elf_reader, ElfReader::Create(path));
ASSERT_OK_AND_ASSIGN(std::unique_ptr<ElfReader> elf_reader,
ElfReader::Create(path, ElfReader::TestOnlyUseZeroOffset{}));

{
ASSERT_OK_AND_ASSIGN(std::optional<std::string> symbol_name,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include "src/common/testing/testing.h"
#include "src/stirling/bpf_tools/bcc_wrapper.h"
#include "src/stirling/bpf_tools/macros.h"
#include "src/stirling/obj_tools/elf_reader.h"
#include "src/stirling/source_connectors/perf_profiler/bcc_bpf_intf/stack_event.h"
#include "src/stirling/source_connectors/perf_profiler/shared/symbolization.h"
#include "src/stirling/source_connectors/perf_profiler/stringifier.h"
Expand Down Expand Up @@ -179,10 +180,14 @@ class StringifierTest : public ::testing::Test {
};

TEST_F(StringifierTest, MemoizationTest) {
// Values used in creating the [u|k] probe specs.
const uint64_t foo_addr = reinterpret_cast<uint64_t>(&::test::Foo);
const uint64_t bar_addr = reinterpret_cast<uint64_t>(&::test::Bar);
const std::filesystem::path self_path = GetSelfPath().ValueOrDie();
int64_t self_pid = getpid();
ASSERT_OK_AND_ASSIGN(auto elf_reader, obj_tools::ElfReader::Create(self_path.string(), self_pid));
// Values used in creating the [u|k] probe specs.
ASSERT_OK_AND_ASSIGN(const uint64_t foo_addr, elf_reader->VirtualAddrToBinaryAddr(
reinterpret_cast<uint64_t>(&::test::Foo)));
ASSERT_OK_AND_ASSIGN(const uint64_t bar_addr, elf_reader->VirtualAddrToBinaryAddr(
reinterpret_cast<uint64_t>(&::test::Bar)));

// uprobe specs, for Foo() and Bar(). We invoke our BPF program,
// stack_trace_sampler, when Foo() or Bar() is called.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,21 @@ using px::system::ProcPidPath;
ConnInfoMapManager::ConnInfoMapManager(bpf_tools::BCCWrapper* bcc)
: conn_info_map_(bcc->GetHashTable<uint64_t, struct conn_info_t>("conn_info_map")),
conn_disabled_map_(bcc->GetHashTable<uint64_t, uint64_t>("conn_disabled_map")) {
std::filesystem::path self_path = GetSelfPath().ValueOrDie();
int64_t pid = getpid();
auto elf_reader_or_s = obj_tools::ElfReader::Create(self_path.string(), pid);
if (!elf_reader_or_s.ok()) {
LOG(FATAL) << "Failed to create ElfReader for self probe";
}
auto elf_reader = elf_reader_or_s.ConsumeValueOrDie();
// Use address instead of symbol to specify this probe,
// so that even if debug symbols are stripped, the uprobe can still attach.
uint64_t symbol_addr = reinterpret_cast<uint64_t>(&ConnInfoMapCleanupTrigger);

std::filesystem::path self_path = GetSelfPath().ValueOrDie();
auto symbol_addr_or_s =
elf_reader->VirtualAddrToBinaryAddr(reinterpret_cast<uint64_t>(&ConnInfoMapCleanupTrigger));
if (!symbol_addr_or_s.ok()) {
LOG(FATAL) << "Failed to convert virtual address to binary address for self probe";
}
uint64_t symbol_addr = symbol_addr_or_s.ConsumeValueOrDie();

bpf_tools::UProbeSpec uprobe{.binary_path = self_path,
.symbol = {}, // Keep GCC happy.
Expand Down

0 comments on commit bca73fa

Please sign in to comment.