Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support non-utf8 data in collapse by doing lossy conversion #196

Merged
merged 4 commits into from
Nov 30, 2020
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions src/collapse/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -740,18 +740,19 @@ pub(crate) mod testing {

fn count_lines_and_stacks(bytes: &[u8]) -> (usize, usize) {
let mut reader = io::BufReader::new(bytes);
let mut line = String::new();
let mut line = Vec::new();

let (mut nlines, mut nstacks) = (0, 0);
loop {
line.clear();
let n = reader.read_line(&mut line).unwrap();
let n = reader.read_until(0x0A, &mut line).unwrap();
if n == 0 {
nstacks += 1;
break;
}
let l = String::from_utf8_lossy(&line);
nlines += 1;
if line.trim().is_empty() {
if l.trim().is_empty() {
nstacks += 1;
}
}
Expand Down
30 changes: 9 additions & 21 deletions src/collapse/dtrace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,15 +77,15 @@ impl CollapsePrivate for Folder {
R: io::BufRead,
{
// Consumer the header...
let mut line = String::new();
let mut line = Vec::new();
loop {
line.clear();
if reader.read_line(&mut line)? == 0 {
if reader.read_until(0x0A, &mut line)? == 0 {
// We reached the end :( this should not happen.
warn!("File ended while skipping headers");
return Ok(());
};
if line.trim().is_empty() {
if String::from_utf8_lossy(&line).trim().is_empty() {
return Ok(());
}
}
Expand All @@ -99,13 +99,14 @@ impl CollapsePrivate for Folder {
where
R: io::BufRead,
{
let mut line = String::new();
let mut line = Vec::new();
loop {
line.clear();
if reader.read_line(&mut line)? == 0 {
if reader.read_until(0x0A, &mut line)? == 0 {
break;
}
let line = line.trim();
let s = String::from_utf8_lossy(&line);
let line = s.trim();
if line.is_empty() {
continue;
} else if let Ok(count) = line.parse::<usize>() {
Expand Down Expand Up @@ -455,7 +456,7 @@ mod tests {
}

#[test]
fn test_collapse_multi_dtrace_stop_early() {
fn test_collapse_multi_dtrace_non_utf8() {
let invalid_utf8 = unsafe { std::str::from_utf8_unchecked(&[0xf0, 0x28, 0x8c, 0xbc]) };
let invalid_stack = format!("genunix`cv_broadcast+0x1{}\n1\n\n", invalid_utf8);
let valid_stack = "genunix`cv_broadcast+0x1\n1\n\n";
Expand All @@ -472,20 +473,7 @@ mod tests {
let mut folder = Folder::default();
folder.nstacks_per_job = 1;
folder.opt.nthreads = 12;
match <Folder as Collapse>::collapse(&mut folder, &input[..], io::sink()) {
Ok(_) => panic!("collapse should have return error, but instead returned Ok."),
Err(e) => match e.kind() {
io::ErrorKind::InvalidData => assert_eq!(
&format!("{}", e),
"stream did not contain valid UTF-8",
"error message is incorrect.",
),
k => panic!(
"collapse should have returned `InvalidData` error but instead returned {:?}",
k
),
},
}
<Folder as Collapse>::collapse(&mut folder, &input[..], io::sink()).unwrap();
}

#[test]
Expand Down
13 changes: 7 additions & 6 deletions src/collapse/perf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ impl CollapsePrivate for Folder {
// the first stack to figure it out (the worker threads need this
// information to get started). Only read one stack, however, as we would
// like the remaining stacks to be processed on the worker threads.
let mut line_buffer = String::new();
let mut line_buffer = Vec::new();
let eof = self.process_single_stack(&mut line_buffer, reader, occurrences)?;

if eof {
Expand All @@ -184,7 +184,7 @@ impl CollapsePrivate for Folder {
R: io::BufRead,
{
// While there are still stacks left to process, process them...
let mut line_buffer = String::new();
let mut line_buffer = Vec::new();
while !self.process_single_stack(&mut line_buffer, &mut reader, occurrences)? {}

// Reset state...
Expand Down Expand Up @@ -273,7 +273,7 @@ impl Folder {
/// Processes a stack. On success, returns `true` if at end of data; `false` otherwise.
fn process_single_stack<R>(
&mut self,
line_buffer: &mut String,
line_buffer: &mut Vec<u8>,
reader: &mut R,
occurrences: &mut Occurrences,
) -> io::Result<bool>
Expand All @@ -282,16 +282,17 @@ impl Folder {
{
loop {
line_buffer.clear();
if reader.read_line(line_buffer)? == 0 {
if reader.read_until(0x0A, line_buffer)? == 0 {
if !self.stack.is_empty() {
self.after_event(occurrences);
}
return Ok(true);
}
if line_buffer.starts_with('#') {
let line = String::from_utf8_lossy(line_buffer);
if line.starts_with('#') {
continue;
}
let line = line_buffer.trim_end();
let line = line.trim_end();
if line.is_empty() {
self.after_event(occurrences);
return Ok(false);
Expand Down
12 changes: 7 additions & 5 deletions src/collapse/sample.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,14 +76,15 @@ impl Collapse for Folder {
W: io::Write,
{
// Consume the header...
let mut line = String::new();
let mut line = Vec::new();
loop {
line.clear();
if reader.read_line(&mut line)? == 0 {
if reader.read_until(0x0A, &mut line)? == 0 {
warn!("File ended before start of call graph");
return Ok(());
};
if line.starts_with(START_LINE) {
let l = String::from_utf8_lossy(&line);
if l.starts_with(START_LINE) {
break;
}
}
Expand All @@ -92,10 +93,11 @@ impl Collapse for Folder {
let mut occurrences = Occurrences::new(1);
loop {
line.clear();
if reader.read_line(&mut line)? == 0 {
if reader.read_until(0x0A, &mut line)? == 0 {
return invalid_data_error!("File ended before end of call graph");
}
let line = line.trim_end();
let l = String::from_utf8_lossy(&line);
let line = l.trim_end();
if line.is_empty() {
continue;
} else if line.starts_with(" ") {
Expand Down
12 changes: 7 additions & 5 deletions src/collapse/vtune.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,15 @@ impl Collapse for Folder {
W: io::Write,
{
// Consume the header...
let mut line = String::new();
let mut line = Vec::new();
loop {
line.clear();
if reader.read_line(&mut line)? == 0 {
if reader.read_until(0x0A, &mut line)? == 0 {
warn!("File ended before header");
return Ok(());
};
if line.starts_with(HEADER) {
let l = String::from_utf8_lossy(&line);
if l.starts_with(HEADER) {
break;
}
}
Expand All @@ -68,10 +69,11 @@ impl Collapse for Folder {
let mut occurrences = Occurrences::new(1);
loop {
line.clear();
if reader.read_line(&mut line)? == 0 {
if reader.read_until(0x0A, &mut line)? == 0 {
break;
}
let line = line.trim_end();
let l = String::from_utf8_lossy(&line);
let line = l.trim_end();
if line.is_empty() {
continue;
} else {
Expand Down
9 changes: 5 additions & 4 deletions src/differential/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,17 +90,18 @@ where
R: BufRead,
{
let mut total = 0;
let mut line = String::new();
let mut line = Vec::new();
let mut stripped_fractional_samples = false;
loop {
line.clear();

if reader.read_line(&mut line)? == 0 {
if reader.read_until(0x0A, &mut line)? == 0 {
break;
}

let l = String::from_utf8_lossy(&line);
if let Some((stack, count)) =
parse_line(&line, opt.strip_hex, &mut stripped_fractional_samples)
parse_line(&l, opt.strip_hex, &mut stripped_fractional_samples)
{
let mut counts = stack_counts.entry(stack).or_default();
if is_first {
Expand All @@ -110,7 +111,7 @@ where
}
total += count;
} else {
warn!("Unable to parse line: {}", line);
warn!("Unable to parse line: {}", l);
}
}

Expand Down
7 changes: 4 additions & 3 deletions src/flamegraph/attrs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,15 +38,16 @@ impl FuncFrameAttrsMap {
/// tab-separated `name=value` pairs.
pub fn from_reader<R: BufRead>(mut reader: R) -> io::Result<FuncFrameAttrsMap> {
let mut funcattr_map = FuncFrameAttrsMap::default();
let mut line = String::new();
let mut line = Vec::new();
loop {
line.clear();

if reader.read_line(&mut line)? == 0 {
if reader.read_until(0x0A, &mut line)? == 0 {
break;
}

let mut line = line.trim().splitn(2, '\t');
let l = String::from_utf8_lossy(&line);
let mut line = l.trim().splitn(2, '\t');
let func = unwrap_or_continue!(line.next());
if func.is_empty() {
continue;
Expand Down