Skip to content

v9 format: Increase StringId and Addr size to u64, fixing ICEs during self-profiling #216

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Changelog

## [11.0.0] - 2023-12-14

### Changed
- `measureme`: Update StringId and Addr sizes from u32 to u64 ([GH-216])
- `analyzeme`: v9 file format, which uses larger events ([GH-216])

## [10.1.2] - 2023-12-14

### Changed
Expand Down Expand Up @@ -232,3 +238,4 @@
[GH-208]: https://github.com/rust-lang/measureme/pull/208
[GH-209]: https://github.com/rust-lang/measureme/pull/209
[GH-211]: https://github.com/rust-lang/measureme/pull/211
[GH-216]: https://github.com/rust-lang/measureme/pull/216
8 changes: 7 additions & 1 deletion analyzeme/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "analyzeme"
version = "10.1.2"
version = "11.0.0"
authors = ["Wesley Wiser <wwiser@gmail.com>", "Michael Woerister <michaelwoerister@posteo>"]
edition = "2018"
license = "MIT OR Apache-2.0"
Expand All @@ -14,7 +14,13 @@ serde = { version = "1.0", features = ["derive"] }

# Depending on older versions of this crate allows us to keep supporting older
# file formats.

# File format: v7
analyzeme_9_2_0 = { package = "analyzeme", git = "https://github.com/rust-lang/measureme", tag = "9.2.0" }

# File format: v8
decodeme_10_1_2 = { package = "decodeme", git = "https://github.com/rust-lang/measureme", tag = "10.1.2" }
measureme_10_1_2 = { package = "measureme", git = "https://github.com/rust-lang/measureme", tag = "10.1.2" }

[dev-dependencies]
flate2 = "1.0"
5 changes: 3 additions & 2 deletions analyzeme/src/file_formats/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@ use std::fmt::Debug;

pub mod v7;
pub mod v8;
pub mod v9;

pub use v8 as current;
pub use v9 as current;

/// The [EventDecoder] knows how to decode events for a specific file format.
pub trait EventDecoder: Debug + Send + Sync {
fn num_events(&self) -> usize;
fn metadata(&self) -> &Metadata;
fn metadata(&self) -> Metadata;
fn decode_full_event<'a>(&'a self, event_index: usize) -> Event<'a>;
fn decode_lightweight_event<'a>(&'a self, event_index: usize) -> LightweightEvent;
}
4 changes: 2 additions & 2 deletions analyzeme/src/file_formats/v7.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ impl super::EventDecoder for EventDecoder {
self.legacy_profiling_data.num_events()
}

fn metadata(&self) -> &Metadata {
&self.metadata
fn metadata(&self) -> Metadata {
self.metadata.clone()
}

fn decode_full_event(&self, event_index: usize) -> Event<'_> {
Expand Down
74 changes: 66 additions & 8 deletions analyzeme/src/file_formats/v8.rs
Original file line number Diff line number Diff line change
@@ -1,26 +1,84 @@
//! This module implements file loading for the v8 file format used until
//! crate version 10.0.0
//! crate version 10.0.0.
//!
//! The difference from v8 to v9 copes with the expansion of StringId and Addr
//! types from u32 to u64. Most of the EventDecoder interface is actually
//! unchanged, but the construction of "EventDecoder::new", which parses
//! the stream of events, varies based on these sizes.
//!
//! This file provides conversions to current interfaces, relying on an
//! old version of this crate to parse the u32-based v8 version.

use crate::{Event, LightweightEvent};
pub use decodeme::EventDecoder;
use crate::{Event, EventPayload, LightweightEvent, Timestamp};
use decodeme::Metadata;
use decodeme_10_1_2::event_payload::EventPayload as OldEventPayload;
use decodeme_10_1_2::event_payload::Timestamp as OldTimestamp;
use decodeme_10_1_2::lightweight_event::LightweightEvent as OldLightweightEvent;
pub use decodeme_10_1_2::EventDecoder;
use decodeme_10_1_2::Metadata as OldMetadata;

pub const FILE_FORMAT: u32 = decodeme::CURRENT_FILE_FORMAT_VERSION;
pub const FILE_FORMAT: u32 = measureme_10_1_2::file_header::CURRENT_FILE_FORMAT_VERSION;

// NOTE: These are functionally a hand-rolled "impl From<Old> -> New", but
// given orphan rules, it seems undesirable to spread version-specific
// converters around the codebase.
//
// In lieu of an idiomatic type conversion, we at least centralize compatibility
// with the old "v8" version to this file.

fn v8_metadata_as_current(old: &OldMetadata) -> Metadata {
Metadata {
start_time: old.start_time,
process_id: old.process_id,
cmd: old.cmd.clone(),
}
}

fn v8_timestamp_as_current(old: OldTimestamp) -> Timestamp {
match old {
OldTimestamp::Interval { start, end } => Timestamp::Interval { start, end },
OldTimestamp::Instant(t) => Timestamp::Instant(t),
}
}

fn v8_event_payload_as_current(old: OldEventPayload) -> EventPayload {
match old {
OldEventPayload::Timestamp(t) => EventPayload::Timestamp(v8_timestamp_as_current(t)),
OldEventPayload::Integer(t) => EventPayload::Integer(t),
}
}

fn v8_lightweightevent_as_current(old: OldLightweightEvent) -> LightweightEvent {
LightweightEvent {
event_index: old.event_index,
thread_id: old.thread_id,
payload: v8_event_payload_as_current(old.payload),
}
}

impl super::EventDecoder for EventDecoder {
fn num_events(&self) -> usize {
self.num_events()
}

fn metadata(&self) -> &Metadata {
self.metadata()
fn metadata(&self) -> Metadata {
let old = self.metadata();
v8_metadata_as_current(&old)
}

fn decode_full_event(&self, event_index: usize) -> Event<'_> {
self.decode_full_event(event_index)
let old = self.decode_full_event(event_index);

Event {
event_kind: old.event_kind,
label: old.label,
additional_data: old.additional_data,
payload: v8_event_payload_as_current(old.payload),
thread_id: old.thread_id,
}
}

fn decode_lightweight_event(&self, event_index: usize) -> LightweightEvent {
self.decode_lightweight_event(event_index)
v8_lightweightevent_as_current(self.decode_lightweight_event(event_index))
}
}
25 changes: 25 additions & 0 deletions analyzeme/src/file_formats/v9.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
//! This module implements file loading for the v9 file format

use crate::{Event, LightweightEvent};
pub use decodeme::EventDecoder;
use decodeme::Metadata;

pub const FILE_FORMAT: u32 = decodeme::CURRENT_FILE_FORMAT_VERSION;

impl super::EventDecoder for EventDecoder {
fn num_events(&self) -> usize {
self.num_events()
}

fn metadata(&self) -> Metadata {
self.metadata()
}

fn decode_full_event(&self, event_index: usize) -> Event<'_> {
self.decode_full_event(event_index)
}

fn decode_lightweight_event(&self, event_index: usize) -> LightweightEvent {
self.decode_lightweight_event(event_index)
}
}
98 changes: 93 additions & 5 deletions analyzeme/src/profiling_data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use measureme::file_header::{
use measureme::{
EventId, PageTag, RawEvent, SerializationSink, SerializationSinkBuilder, StringTableBuilder,
};
use std::cell::OnceCell;
use std::fs;
use std::path::Path;
use std::sync::Arc;
Expand All @@ -15,6 +16,7 @@ use std::{error::Error, path::PathBuf};
#[derive(Debug)]
pub struct ProfilingData {
event_decoder: Box<dyn EventDecoder>,
metadata: OnceCell<Metadata>,
}

impl ProfilingData {
Expand Down Expand Up @@ -50,9 +52,6 @@ impl ProfilingData {
data: Vec<u8>,
diagnostic_file_path: Option<&Path>,
) -> Result<ProfilingData, Box<dyn Error + Send + Sync>> {
// let event_decoder = EventDecoder::new(data, diagnostic_file_path)?;
// Ok(ProfilingData { event_decoder })

let file_format_version = read_file_header(
&data,
FILE_MAGIC_TOP_LEVEL,
Expand All @@ -66,6 +65,10 @@ impl ProfilingData {
data,
diagnostic_file_path,
)?),
file_formats::v9::FILE_FORMAT => Box::new(file_formats::v9::EventDecoder::new(
data,
diagnostic_file_path,
)?),
unsupported_version => {
let msg = if unsupported_version > file_formats::current::FILE_FORMAT {
format!(
Expand All @@ -83,11 +86,15 @@ impl ProfilingData {
}
};

Ok(ProfilingData { event_decoder })
Ok(ProfilingData {
event_decoder,
metadata: OnceCell::new(),
})
}

pub fn metadata(&self) -> &Metadata {
self.event_decoder.metadata()
// Cache the metadata during the first access
self.metadata.get_or_init(|| self.event_decoder.metadata())
}

pub fn iter<'a>(&'a self) -> ProfilerEventIterator<'a> {
Expand Down Expand Up @@ -301,6 +308,7 @@ impl ProfilingDataBuilder {
)
.unwrap(),
),
metadata: OnceCell::new(),
}
}

Expand Down Expand Up @@ -641,6 +649,86 @@ mod tests {
);
}

// To generate this revision, a v9 revision of the rust toolchain was
// created, and "rustup toolchain link" was used to name it "bespoke".
// Then, the following commands were executed:
//
// # Make a small test binary and profile it.
// cargo new --bin testbinary
// cargo +bespoke rustc --bin testbinary -- -Zself-profile
//
// # Gzip the output profdata.
// gzip testbinary-...mm_profdata
// mv testbinary-...mm_profdata.gz v9.mm_profdata.gz
#[test]
fn can_read_v9_profdata_files() {
let (data, file_format_version) =
read_data_and_version("tests/profdata/v9.mm_profdata.gz");
assert_eq!(file_format_version, file_formats::v9::FILE_FORMAT);
let profiling_data = ProfilingData::from_paged_buffer(data, None)
.expect("Creating the profiling data failed");
let grouped_events = group_events(&profiling_data);
let event_kinds = grouped_events
.keys()
.map(|k| k.as_str())
.collect::<HashSet<_>>();
let expect_event_kinds = vec![
"GenericActivity",
"IncrementalResultHashing",
"Query",
"ArtifactSize",
]
.into_iter()
.collect::<HashSet<_>>();
assert_eq!(event_kinds, expect_event_kinds);

let generic_activity_len = 5125;
let incremental_hashing_len = 1844;
let query_len = 1877;
let artifact_size_len = 24;
assert_eq!(
grouped_events["GenericActivity"].len(),
generic_activity_len
);
assert_eq!(
grouped_events["IncrementalResultHashing"].len(),
incremental_hashing_len
);
assert_eq!(grouped_events["Query"].len(), query_len);
assert_eq!(grouped_events["ArtifactSize"].len(), artifact_size_len);

assert_eq!(
grouped_events["GenericActivity"][generic_activity_len / 2].label,
"metadata_decode_entry_item_attrs"
);
assert_eq!(
grouped_events["GenericActivity"][generic_activity_len / 2].duration(),
Some(Duration::from_nanos(376))
);

assert_eq!(
grouped_events["IncrementalResultHashing"][incremental_hashing_len - 1].label,
"crate_hash"
);
assert_eq!(
grouped_events["IncrementalResultHashing"][incremental_hashing_len - 1].duration(),
Some(Duration::from_nanos(461))
);

assert_eq!(grouped_events["Query"][0].label, "registered_tools");
assert_eq!(
grouped_events["Query"][0].duration(),
Some(Duration::from_nanos(45077))
);

assert_eq!(
grouped_events["ArtifactSize"][0].label,
"codegen_unit_size_estimate"
);
assert_eq!(grouped_events["ArtifactSize"][0].duration(), None);
assert_eq!(grouped_events["ArtifactSize"][0].integer(), Some(3));
}

fn read_data_and_version(file_path: &str) -> (Vec<u8>, u32) {
let data = std::fs::read(file_path).expect("Test data not found");
let mut gz = flate2::read::GzDecoder::new(&data[..]);
Expand Down
Binary file added analyzeme/tests/profdata/v9.mm_profdata.gz
Binary file not shown.
2 changes: 1 addition & 1 deletion crox/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "crox"
version = "10.1.2"
version = "11.0.0"
authors = ["Wesley Wiser <wwiser@gmail.com>"]
edition = "2018"

Expand Down
2 changes: 1 addition & 1 deletion decodeme/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "decodeme"
version = "10.1.2"
version = "11.0.0"
edition = "2018"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
Expand Down
18 changes: 12 additions & 6 deletions decodeme/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ where
.expect("a time that can be represented as SystemTime"))
}

#[derive(Debug, Deserialize)]
#[derive(Clone, Debug, Deserialize)]
pub struct Metadata {
#[serde(deserialize_with = "system_time_from_nanos")]
pub start_time: SystemTime,
Expand Down Expand Up @@ -113,9 +113,15 @@ impl EventDecoder {

let mut split_data = measureme::split_streams(&entire_file_data[FILE_HEADER_SIZE..]);

let string_data = split_data.remove(&PageTag::StringData).expect("Invalid file: No string data found");
let index_data = split_data.remove(&PageTag::StringIndex).expect("Invalid file: No string index data found");
let event_data = split_data.remove(&PageTag::Events).expect("Invalid file: No event data found");
let string_data = split_data
.remove(&PageTag::StringData)
.expect("Invalid file: No string data found");
let index_data = split_data
.remove(&PageTag::StringIndex)
.expect("Invalid file: No string index data found");
let event_data = split_data
.remove(&PageTag::Events)
.expect("Invalid file: No event data found");

Self::from_separate_buffers(string_data, index_data, event_data, diagnostic_file_path)
}
Expand Down Expand Up @@ -151,8 +157,8 @@ impl EventDecoder {
event_byte_count / RAW_EVENT_SIZE
}

pub fn metadata(&self) -> &Metadata {
&self.metadata
pub fn metadata(&self) -> Metadata {
self.metadata.clone()
}

pub fn decode_full_event<'a>(&'a self, event_index: usize) -> Event<'a> {
Expand Down
Loading