Skip to content

Commit

Permalink
read/elf: hash support
Browse files Browse the repository at this point in the history
  • Loading branch information
philipc committed Jul 21, 2021
1 parent f6193d8 commit bc43a6f
Show file tree
Hide file tree
Showing 6 changed files with 496 additions and 3 deletions.
66 changes: 65 additions & 1 deletion examples/readobj.rs
Original file line number Diff line number Diff line change
Expand Up @@ -540,8 +540,9 @@ mod elf {
SHT_RELA => print_section_rela(p, endian, data, elf, sections, section),
SHT_NOTE => print_section_notes(p, endian, data, elf, section),
SHT_GROUP => print_section_group(p, endian, data, elf, sections, section),
SHT_HASH => print_hash(p, endian, data, elf, sections, section),
SHT_GNU_HASH => print_gnu_hash(p, endian, data, elf, sections, section),
// TODO:
//SHT_HASH =>
//SHT_DYNAMIC =>
//SHT_SHLIB =>
//SHT_INIT_ARRAY =>
Expand Down Expand Up @@ -792,6 +793,69 @@ mod elf {
}
}

fn print_hash<Elf: FileHeader>(
p: &mut Printer<impl Write>,
endian: Elf::Endian,
data: &[u8],
_elf: &Elf,
_sections: &SectionTable<Elf>,
section: &Elf::SectionHeader,
) {
if let Ok(Some(hash)) = section.hash_header(endian, data) {
p.group("Hash", |p| {
p.field("BucketCount", hash.bucket_count.get(endian));
p.field("ChainCount", hash.chain_count.get(endian));
});
}
/* TODO: add this in a test somewhere
if let Ok(Some(hash_table)) = section.hash(endian, data) {
if let Ok(symbols) = _sections.symbols(endian, data, SHT_DYNSYM) {
for symbol in symbols.symbols() {
let name = symbols.symbol_name(endian, symbol).unwrap();
if !symbol.is_definition(endian) {
continue;
}
let hash = hash(name);
let hash_symbol = hash_table.find(endian, name, hash, &symbols).unwrap();
let hash_name = symbols.symbol_name(endian, hash_symbol).unwrap();
assert_eq!(name, hash_name);
}
}
}
*/
}

fn print_gnu_hash<Elf: FileHeader>(
p: &mut Printer<impl Write>,
endian: Elf::Endian,
data: &[u8],
_elf: &Elf,
_sections: &SectionTable<Elf>,
section: &Elf::SectionHeader,
) {
if let Ok(Some(hash)) = section.gnu_hash_header(endian, data) {
p.group("GnuHash", |p| {
p.field("BucketCount", hash.bucket_count.get(endian));
p.field("SymbolBase", hash.symbol_base.get(endian));
p.field("BloomCount", hash.bloom_count.get(endian));
p.field("BloomShift", hash.bloom_shift.get(endian));
});
}
/* TODO: add this in a test somewhere
if let Ok(Some(hash_table)) = section.gnu_hash(endian, data) {
if let Ok(symbols) = _sections.symbols(endian, data, SHT_DYNSYM) {
for symbol in &symbols.symbols()[hash_table.symbol_base() as usize..] {
let name = symbols.symbol_name(endian, symbol).unwrap();
let hash = gnu_hash(name);
let hash_symbol = hash_table.find(endian, name, hash, &symbols).unwrap();
let hash_name = symbols.symbol_name(endian, hash_symbol).unwrap();
assert_eq!(name, hash_name);
}
}
}
*/
}

static FLAGS_EI_CLASS: &[Flag<u8>] = &flags!(ELFCLASSNONE, ELFCLASS32, ELFCLASS64);
static FLAGS_EI_DATA: &[Flag<u8>] = &flags!(ELFDATANONE, ELFDATA2LSB, ELFDATA2MSB);
static FLAGS_EV: &[Flag<u8>] = &flags!(EV_NONE, EV_CURRENT);
Expand Down
61 changes: 61 additions & 0 deletions src/elf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1796,6 +1796,65 @@ pub const NT_GNU_PROPERTY_TYPE_0: u32 = 5;
// TODO: GNU_PROPERTY_*
// TODO: Elf*_Move

/// Header of `SHT_HASH` section.
#[derive(Debug, Clone, Copy)]
#[repr(C)]
pub struct HashHeader<E: Endian> {
/// The number of hash buckets.
pub bucket_count: U32<E>,
/// The number of chain values.
pub chain_count: U32<E>,
// Array of hash bucket start indices.
// buckets: U32<E>[bucket_count]
// Array of hash chain links. An index of 0 terminates the chain.
// chains: U32<E>[chain_count]
}

/// Calculate the SysV hash for a symbol name.
///
/// Used for `SHT_HASH`.
pub fn hash(name: &[u8]) -> u32 {
let mut hash = 0u32;
for byte in name {
hash = hash.wrapping_mul(16).wrapping_add(u32::from(*byte));
hash ^= (hash >> 24) & 0xf0;
}
hash & 0xfff_ffff
}

/// Header of `SHT_GNU_HASH` section.
#[derive(Debug, Clone, Copy)]
#[repr(C)]
pub struct GnuHashHeader<E: Endian> {
/// The number of hash buckets.
pub bucket_count: U32<E>,
/// The symbol table index of the first symbol in the hash.
pub symbol_base: U32<E>,
/// The number of words in the bloom filter.
///
/// Must be a non-zero power of 2.
pub bloom_count: U32<E>,
/// The bit shift count for the bloom filter.
pub bloom_shift: U32<E>,
// Array of bloom filter words.
// bloom_filters: U32<E>[bloom_count] or U64<E>[bloom_count]
// Array of hash bucket start indices.
// buckets: U32<E>[bucket_count]
// Array of hash values, one for each symbol starting at symbol_base.
// values: U32<E>[symbol_count]
}

/// Calculate the GNU hash for a symbol name.
///
/// Used for `SHT_GNU_HASH`.
pub fn gnu_hash(name: &[u8]) -> u32 {
let mut hash = 5381u32;
for byte in name {
hash = hash.wrapping_mul(33).wrapping_add(u32::from(*byte));
}
hash
}

// Motorola 68k specific definitions.

// m68k values for `Rel*::r_type`.
Expand Down Expand Up @@ -6097,4 +6156,6 @@ unsafe_impl_endian_pod!(
Dyn64,
NoteHeader32,
NoteHeader64,
HashHeader,
GnuHashHeader,
);
211 changes: 211 additions & 0 deletions src/read/elf/hash.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,211 @@
use std::mem;

use crate::elf;
use crate::read::{ReadError, ReadRef, Result};
use crate::{U32, U64};

use super::{FileHeader, Sym, SymbolTable};

/// A SysV symbol hash table in an ELF file.
#[derive(Debug)]
pub struct HashTable<'data, Elf: FileHeader> {
buckets: &'data [U32<Elf::Endian>],
chains: &'data [U32<Elf::Endian>],
}

impl<'data, Elf: FileHeader> HashTable<'data, Elf> {
/// Parse a SysV hash table.
///
/// `data` should be from a `SHT_HASH` section, or from a
/// segment pointed to via the `DT_HASH` entry.
///
/// The header is read at offset 0 in the given `data`.
pub fn parse(endian: Elf::Endian, data: &'data [u8]) -> Result<Self> {
let mut offset = 0;
let header = data
.read::<elf::HashHeader<Elf::Endian>>(&mut offset)
.read_error("Invalid hash header")?;
let buckets = data
.read_slice(&mut offset, header.bucket_count.get(endian) as usize)
.read_error("Invalid hash buckets")?;
let chains = data
.read_slice(&mut offset, header.chain_count.get(endian) as usize)
.read_error("Invalid hash chains")?;
Ok(HashTable { buckets, chains })
}

/// Return the symbol table length.
pub fn symbol_table_length(&self) -> u32 {
self.chains.len() as u32
}

/// Use the hash table to find the symbol table entry with the given name and hash.
pub fn find<R: ReadRef<'data>>(
&self,
endian: Elf::Endian,
name: &[u8],
hash: u32,
symbols: &SymbolTable<'data, Elf, R>,
) -> Option<&'data Elf::Sym> {
// Get the chain start from the bucket for this hash.
let mut index = self.buckets[(hash as usize) % self.buckets.len()].get(endian) as usize;
// Avoid infinite loop.
let mut i = 0;
let strings = symbols.strings();
while index != 0 && i < self.chains.len() {
if let Ok(symbol) = symbols.symbol(index) {
if symbol.name(endian, strings) == Ok(name) {
return Some(symbol);
}
}
index = self.chains.get(index)?.get(endian) as usize;
i += 1;
}
None
}
}

/// A GNU symbol hash table in an ELF file.
#[derive(Debug)]
pub struct GnuHashTable<'data, Elf: FileHeader> {
symbol_base: u32,
bloom_shift: u32,
bloom_filters: &'data [u8],
buckets: &'data [U32<Elf::Endian>],
values: &'data [U32<Elf::Endian>],
}

impl<'data, Elf: FileHeader> GnuHashTable<'data, Elf> {
/// Parse a GNU hash table.
///
/// `data` should be from a `SHT_GNU_HASH` section, or from a
/// segment pointed to via the `DT_GNU_HASH` entry.
///
/// The header is read at offset 0 in the given `data`.
///
/// The header does not contain a length field, and so all of `data`
/// will be used as the hash table values. It does not matter if this
/// is longer than needed, and this will often the case when accessing
/// the hash table via the `DT_GNU_HASH` entry.
pub fn parse(endian: Elf::Endian, data: &'data [u8]) -> Result<Self> {
let mut offset = 0;
let header = data
.read::<elf::GnuHashHeader<Elf::Endian>>(&mut offset)
.read_error("Invalid GNU hash header")?;
let bloom_len =
u64::from(header.bloom_count.get(endian)) * mem::size_of::<Elf::Word>() as u64;
let bloom_filters = data
.read_bytes(&mut offset, bloom_len)
.read_error("Invalid GNU hash bloom filters")?;
let buckets = data
.read_slice(&mut offset, header.bucket_count.get(endian) as usize)
.read_error("Invalid GNU hash buckets")?;
let chain_count = (data.len() - offset as usize) / 4;
let values = data
.read_slice(&mut offset, chain_count)
.read_error("Invalid GNU hash values")?;
Ok(GnuHashTable {
symbol_base: header.symbol_base.get(endian),
bloom_shift: header.bloom_shift.get(endian),
bloom_filters,
buckets,
values,
})
}

/// Return the symbol table index of the first symbol in the hash table.
pub fn symbol_base(&self) -> u32 {
self.symbol_base
}

/// Determine the symbol table length by finding the last entry in the hash table.
///
/// Returns `None` if the hash table is empty or invalid.
pub fn symbol_table_length(&self, endian: Elf::Endian) -> Option<u32> {
// Ensure we find a non-empty bucket.
if self.symbol_base == 0 {
return None;
}

// Find the highest chain index in a bucket.
let mut max_symbol = 0;
for bucket in self.buckets {
let bucket = bucket.get(endian);
if max_symbol < bucket {
max_symbol = bucket;
}
}

// Find the end of the chain.
for value in self
.values
.get(max_symbol.checked_sub(self.symbol_base)? as usize..)?
{
max_symbol += 1;
if value.get(endian) & 1 != 0 {
return Some(max_symbol);
}
}

None
}

/// Use the hash table to find the symbol table entry with the given name and hash.
pub fn find<R: ReadRef<'data>>(
&self,
endian: Elf::Endian,
name: &[u8],
hash: u32,
symbols: &SymbolTable<'data, Elf, R>,
) -> Option<&'data Elf::Sym> {
let word_bits = mem::size_of::<Elf::Word>() as u32 * 8;

// Test against bloom filter.
let bloom_count = self.bloom_filters.len() / mem::size_of::<Elf::Word>();
let offset =
((hash / word_bits) & (bloom_count as u32 - 1)) * mem::size_of::<Elf::Word>() as u32;
let filter = if word_bits == 64 {
self.bloom_filters
.read_at::<U64<Elf::Endian>>(offset.into())
.ok()?
.get(endian)
} else {
self.bloom_filters
.read_at::<U32<Elf::Endian>>(offset.into())
.ok()?
.get(endian)
.into()
};
if filter & (1 << (hash % word_bits)) == 0 {
return None;
}
if filter & (1 << ((hash >> self.bloom_shift) % word_bits)) == 0 {
return None;
}

// Get the chain start from the bucket for this hash.
let index = self.buckets[(hash as usize) % self.buckets.len()].get(endian) as usize;
if index == 0 {
return None;
}

// Test symbols in the chain.
let strings = symbols.strings();
let symbols = symbols.symbols().get(index..)?;
let values = self
.values
.get(index.checked_sub(self.symbol_base as usize)?..)?;
for (symbol, value) in symbols.iter().zip(values.iter()) {
let value = value.get(endian);
if value | 1 == hash | 1 {
if symbol.name(endian, strings) == Ok(name) {
return Some(symbol);
}
}
if value & 1 != 0 {
break;
}
}
None
}
}
3 changes: 3 additions & 0 deletions src/read/elf/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,6 @@ pub use compression::*;

mod note;
pub use note::*;

mod hash;
pub use hash::*;
Loading

0 comments on commit bc43a6f

Please sign in to comment.