Skip to content

Add a simple file header to binary files created by measureme. #41

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 10, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 103 additions & 0 deletions measureme/src/file_header.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
//! All binary files generated by measureme have a simple file header that
//! consists of a 4 byte file magic string and a 4 byte little-endian version
//! number.

use byteorder::{ByteOrder, LittleEndian};
use crate::serialization::SerializationSink;
use std::error::Error;

pub const CURRENT_FILE_FORMAT_VERSION: u32 = 0;
pub const FILE_MAGIC_EVENT_STREAM: &[u8; 4] = b"MMES";
pub const FILE_MAGIC_STRINGTABLE_DATA: &[u8; 4] = b"MMSD";
pub const FILE_MAGIC_STRINGTABLE_INDEX: &[u8; 4] = b"MMSI";

/// The size of the file header in bytes. Note that functions in this module
/// rely on this size to be `8`.
pub const FILE_HEADER_SIZE: usize = 8;

pub fn write_file_header<S: SerializationSink>(s: &S, file_magic: &[u8; 4]) {
// The implementation here relies on FILE_HEADER_SIZE to have the value 8.
// Let's make sure this assumption cannot be violated without being noticed.
assert_eq!(FILE_HEADER_SIZE, 8);

s.write_atomic(FILE_HEADER_SIZE, |bytes| {
bytes[0 .. 4].copy_from_slice(file_magic);
LittleEndian::write_u32(&mut bytes[4..8], CURRENT_FILE_FORMAT_VERSION);
});
}

pub fn read_file_header(
bytes: &[u8],
expected_magic: &[u8; 4]
) -> Result<u32, Box<dyn Error>> {
// The implementation here relies on FILE_HEADER_SIZE to have the value 8.
// Let's make sure this assumption cannot be violated without being noticed.
assert_eq!(FILE_HEADER_SIZE, 8);

let actual_magic = &bytes[0 .. 4];

if actual_magic != expected_magic {
// FIXME: The error message should mention the file path in order to be
// more useful.
let msg = format!(
"Unexpected file magic `{:?}`. Expected `{:?}`",
actual_magic,
expected_magic,
);

return Err(From::from(msg));
}

Ok(LittleEndian::read_u32(&bytes[4..8]))
}

pub fn strip_file_header(data: &[u8]) -> &[u8] {
&data[FILE_HEADER_SIZE ..]
}


#[cfg(test)]
mod tests {
use super::*;
use crate::serialization::test::TestSink;

#[test]
fn roundtrip() {
let data_sink = TestSink::new();

write_file_header(&data_sink, FILE_MAGIC_EVENT_STREAM);

let data = data_sink.into_bytes();

assert_eq!(read_file_header(&data, FILE_MAGIC_EVENT_STREAM).unwrap(),
CURRENT_FILE_FORMAT_VERSION);
}

#[test]
fn invalid_magic() {
let data_sink = TestSink::new();
write_file_header(&data_sink, FILE_MAGIC_STRINGTABLE_DATA);
let mut data = data_sink.into_bytes();

// Invalidate the filemagic
data[2] = 0;
assert!(read_file_header(&data, FILE_MAGIC_STRINGTABLE_DATA).is_err());
}

#[test]
fn other_version() {
let data_sink = TestSink::new();

write_file_header(&data_sink, FILE_MAGIC_STRINGTABLE_INDEX);

let mut data = data_sink.into_bytes();

// Change version
data[4] = 0xFF;
data[5] = 0xFF;
data[6] = 0xFF;
data[7] = 0xFF;
assert_eq!(read_file_header(&data, FILE_MAGIC_STRINGTABLE_INDEX).unwrap(),
0xFFFF_FFFF);
}
}
1 change: 1 addition & 0 deletions measureme/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
mod event;
mod file_header;
mod file_serialization_sink;
mod mmap_serialization_sink;
mod profiler;
Expand Down
5 changes: 5 additions & 0 deletions measureme/src/profiler.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use crate::file_header::{write_file_header, FILE_MAGIC_EVENT_STREAM};
use crate::raw_event::{RawEvent, Timestamp, TimestampKind};
use crate::serialization::SerializationSink;
use crate::stringtable::{SerializableString, StringId, StringTableBuilder};
Expand Down Expand Up @@ -32,6 +33,10 @@ impl<S: SerializationSink> Profiler<S> {
pub fn new(path_stem: &Path) -> Result<Profiler<S>, Box<dyn Error>> {
let paths = ProfilerFiles::new(path_stem);
let event_sink = Arc::new(S::from_path(&paths.events_file)?);

// The first thing in every file we generate must be the file header.
write_file_header(&*event_sink, FILE_MAGIC_EVENT_STREAM);

let string_table = StringTableBuilder::new(
Arc::new(S::from_path(&paths.string_data_file)?),
Arc::new(S::from_path(&paths.string_index_file)?),
Expand Down
19 changes: 11 additions & 8 deletions measureme/src/profiling_data.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
use crate::file_header::FILE_HEADER_SIZE;
use crate::event::Event;
use crate::{ProfilerFiles, RawEvent, StringTable, TimestampKind};
use std::error::Error;
use std::fs;
use std::mem;
use std::path::Path;
Expand All @@ -11,19 +13,19 @@ pub struct ProfilingData {
}

impl ProfilingData {
pub fn new(path_stem: &Path) -> ProfilingData {
pub fn new(path_stem: &Path) -> Result<ProfilingData, Box<dyn Error>> {
let paths = ProfilerFiles::new(path_stem);

let string_data = fs::read(paths.string_data_file).expect("couldn't read string_data file");
let index_data = fs::read(paths.string_index_file).expect("couldn't read string_index file");
let event_data = fs::read(paths.events_file).expect("couldn't read events file");

let string_table = StringTable::new(string_data, index_data);
let string_table = StringTable::new(string_data, index_data)?;

ProfilingData {
Ok(ProfilingData {
string_table,
event_data,
}
})
}

pub fn iter(&self) -> impl Iterator<Item = Event<'_>> {
Expand Down Expand Up @@ -53,15 +55,16 @@ impl<'a> Iterator for ProfilerEventIterator<'a> {
type Item = Event<'a>;

fn next(&mut self) -> Option<Event<'a>> {
let raw_idx = self.curr_event_idx * mem::size_of::<RawEvent>();
let raw_idx_end = raw_idx + mem::size_of::<RawEvent>();
if raw_idx_end > self.data.event_data.len() {
let event_start_addr = FILE_HEADER_SIZE +
self.curr_event_idx * mem::size_of::<RawEvent>();
let event_end_addr = event_start_addr + mem::size_of::<RawEvent>();
if event_end_addr > self.data.event_data.len() {
return None;
}

self.curr_event_idx += 1;

let raw_event_bytes = &self.data.event_data[raw_idx..raw_idx_end];
let raw_event_bytes = &self.data.event_data[event_start_addr..event_end_addr];

let mut raw_event = RawEvent::default();
unsafe {
Expand Down
35 changes: 29 additions & 6 deletions measureme/src/stringtable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,13 @@
//! UTF-8 bytes. The content of a `TAG_STR_REF` is the contents of the entry
//! it references.

use crate::file_header::{write_file_header, read_file_header, strip_file_header,
FILE_MAGIC_STRINGTABLE_DATA, FILE_MAGIC_STRINGTABLE_INDEX};
use crate::serialization::{Addr, SerializationSink};
use byteorder::{ByteOrder, LittleEndian};
use rustc_hash::FxHashMap;
use std::borrow::Cow;
use std::error::Error;
use std::sync::atomic::{AtomicU32, Ordering};
use std::sync::Arc;

Expand Down Expand Up @@ -117,6 +120,11 @@ fn deserialize_index_entry(bytes: &[u8]) -> (StringId, Addr) {

impl<S: SerializationSink> StringTableBuilder<S> {
pub fn new(data_sink: Arc<S>, index_sink: Arc<S>) -> StringTableBuilder<S> {

// The first thing in every file we generate must be the file header.
write_file_header(&*data_sink, FILE_MAGIC_STRINGTABLE_DATA);
write_file_header(&*index_sink, FILE_MAGIC_STRINGTABLE_INDEX);

StringTableBuilder {
data_sink,
index_sink,
Expand Down Expand Up @@ -230,12 +238,27 @@ pub struct StringTable {
}

impl<'data> StringTable {
pub fn new(string_data: Vec<u8>, index_data: Vec<u8>) -> StringTable {
assert!(index_data.len() % 8 == 0);
pub fn new(string_data: Vec<u8>, index_data: Vec<u8>) -> Result<StringTable, Box<dyn Error>> {

let index: FxHashMap<_, _> = index_data.chunks(8).map(deserialize_index_entry).collect();
let string_data_format = read_file_header(&string_data, FILE_MAGIC_STRINGTABLE_DATA)?;
let index_data_format = read_file_header(&index_data, FILE_MAGIC_STRINGTABLE_INDEX)?;

if string_data_format != index_data_format {
Err("Mismatch between StringTable DATA and INDEX format version")?;
}

if string_data_format != 0 {
Err(format!("StringTable file format version '{}' is not supported
by this version of `measureme`.", string_data_format))?;
}

assert!(index_data.len() % 8 == 0);
let index: FxHashMap<_, _> = strip_file_header(&index_data)
.chunks(8)
.map(deserialize_index_entry)
.collect();

StringTable { string_data, index }
Ok(StringTable { string_data, index })
}

#[inline]
Expand All @@ -245,7 +268,7 @@ impl<'data> StringTable {
}

#[cfg(test)]
mod test {
mod tests {
use super::*;

#[test]
Expand Down Expand Up @@ -278,7 +301,7 @@ mod test {
let data_bytes = Arc::try_unwrap(data_sink).unwrap().into_bytes();
let index_bytes = Arc::try_unwrap(index_sink).unwrap().into_bytes();

let string_table = StringTable::new(data_bytes, index_bytes);
let string_table = StringTable::new(data_bytes, index_bytes).unwrap();

for (&id, &expected_string) in string_ids.iter().zip(expected_strings.iter()) {
let str_ref = string_table.get(id);
Expand Down
2 changes: 1 addition & 1 deletion measureme/src/testing_common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ fn generate_profiling_data<S: SerializationSink>(filestem: &Path) -> Vec<Event>
// Process some profiling data. This is the part that would run in a
// post processing tool.
fn process_profiling_data(filestem: &Path, expected_events: &[Event]) {
let profiling_data = ProfilingData::new(filestem);
let profiling_data = ProfilingData::new(filestem).unwrap();

let mut count = 0;

Expand Down
7 changes: 5 additions & 2 deletions mmview/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use std::error::Error;
use std::path::PathBuf;
use measureme::ProfilingData;

Expand All @@ -8,12 +9,14 @@ struct Opt {
file_prefix: PathBuf,
}

fn main() {
fn main() -> Result<(), Box<dyn Error>> {
let opt = Opt::from_args();

let data = ProfilingData::new(&opt.file_prefix);
let data = ProfilingData::new(&opt.file_prefix)?;

for event in data.iter() {
println!("{:?}", event);
}

Ok(())
}
5 changes: 3 additions & 2 deletions stack_collapse/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use std::error::Error;
use std::fs::File;
use std::io::{BufWriter, Write};
use std::path::PathBuf;
Expand All @@ -20,10 +21,10 @@ struct Opt {
interval: u64,
}

fn main() -> Result<(), Box<std::error::Error>> {
fn main() -> Result<(), Box<dyn Error>> {
let opt = Opt::from_args();

let profiling_data = ProfilingData::new(&opt.file_prefix);
let profiling_data = ProfilingData::new(&opt.file_prefix)?;

let first_event_time = {
let current_time = profiling_data.iter().next().unwrap().timestamp;
Expand Down
5 changes: 3 additions & 2 deletions summarize/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#[macro_use]
extern crate prettytable;

use std::error::Error;
use std::fs::File;
use std::io::BufWriter;
use std::path::PathBuf;
Expand All @@ -24,10 +25,10 @@ struct Opt {
percent_above: f64,
}

fn main() -> Result<(), Box<std::error::Error>> {
fn main() -> Result<(), Box<dyn Error>> {
let opt = Opt::from_args();

let data = ProfilingData::new(&opt.file_prefix);
let data = ProfilingData::new(&opt.file_prefix)?;

let mut results = analysis::perform_analysis(data);

Expand Down