Skip to content

Commit

Permalink
Remove Result wrappers on some methods
Browse files Browse the repository at this point in the history
`analyze` never had a case where it could fail. Introducing a failure
would be a breaking change in behavior, so I'd rather alter the type
if that ever needs to happen.

The TextData methods could all return Err in the event of a programming
error, but these have been changed to panic instead. There wasn't a
reasonable case where they'd return Err due to user input.
  • Loading branch information
Jacob Peddicord committed Jan 17, 2019
1 parent 11c7f81 commit 8d11161
Show file tree
Hide file tree
Showing 9 changed files with 54 additions and 73 deletions.
4 changes: 2 additions & 2 deletions cli/src/identify.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ pub fn identify_data(
want_diff: bool,
) -> Result<Identification, Error> {
let inst = Instant::now();
let matched = store.analyze(&text_data)?;
let matched = store.analyze(&text_data);

info!(
"{:?} in {} ms",
Expand Down Expand Up @@ -126,7 +126,7 @@ pub fn identify_data(
// try again, optimizing for the current best match
if optimize {
let inst = Instant::now();
let (opt, score) = text_data.optimize_bounds(matched.data)?;
let (opt, score) = text_data.optimize_bounds(matched.data);
let (lower, upper) = opt.lines_view();

info!(
Expand Down
81 changes: 33 additions & 48 deletions src/license.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

use std::{collections::HashMap, fmt};

use failure::{format_err, Error};
use serde_derive::{Deserialize, Serialize};

use crate::{
Expand Down Expand Up @@ -69,7 +68,7 @@ impl fmt::Display for LicenseType {
/// # fn main() -> Result<(), Box<Error>> {
/// # let license = TextData::from("My First License");
/// let sample = TextData::from("copyright 20xx me irl\n// My First License\nfn hello() {\n ...");
/// let (optimized, score) = sample.optimize_bounds(&license)?;
/// let (optimized, score) = sample.optimize_bounds(&license);
/// assert_eq!((1, 2), optimized.lines_view());
/// assert!(score > 0.99f32, "license within text matches");
/// # Ok(())
Expand Down Expand Up @@ -107,11 +106,6 @@ impl TextData {
}
}

// impl specialization might be nice to indicate that this type
// is lacking stored text; perhaps there's another way to indicate that?
// maybe an impl on an enum variant if/when that's available:
// https://github.com/rust-lang/rfcs/pull/1450

/// Consume this `TextData`, returning one without normalized/processed
/// text stored.
///
Expand Down Expand Up @@ -148,19 +142,16 @@ impl TextData {
///
/// Other methods on `TextView` respect this boundary, so it's not needed
/// outside this struct.
pub fn with_view(&self, start: usize, end: usize) -> Result<Self, Error> {
let view = match self.lines_normalized {
Some(ref lines) => &lines[start..end],
None => return Err(format_err!("TextData does not have original text")),
};
pub fn with_view(&self, start: usize, end: usize) -> Self {
let view = &self.lines_normalized.as_ref().expect("TextData does not have original text")[start..end];
let view_joined = view.join("\n");
let processed = apply_aggressive(&view_joined);
Ok(TextData {
TextData {
match_data: NgramSet::from_str(&processed, 2),
lines_view: (start, end),
lines_normalized: self.lines_normalized.clone(),
text_processed: Some(processed),
})
}
}

/// "Erase" the current lines in view and restore the view to its original
Expand All @@ -170,12 +161,12 @@ impl TextData {
/// (and located) with `optimize_bounds`. Now you want to find the other:
/// white-out the matched lines, and re-run the overall search to find a
/// new high score.
pub fn white_out(&self) -> Result<Self, Error> {
pub fn white_out(&self) -> Self {
// note that we're not using the view here...
let lines = self
.lines_normalized
.as_ref()
.ok_or_else(|| format_err!("TextData does not have original text"))?;
.expect("TextData does not have original text");

// ...because it's used here to exclude lines
let new_normalized: Vec<String> = lines
Expand All @@ -191,22 +182,17 @@ impl TextData {
.collect();

let processed = apply_aggressive(&new_normalized.join("\n"));
Ok(TextData {
TextData {
match_data: NgramSet::from_str(&processed, 2),
lines_view: (0, new_normalized.len()),
lines_normalized: Some(new_normalized),
text_processed: Some(processed),
})
}
}

/// Get a slice of the normalized lines in this `TextData`.
///
/// If the text was discarded with `without_text`, this returns `None`.
pub fn lines(&self) -> Option<&[String]> {
match self.lines_normalized {
Some(ref lines) => Some(&lines[self.lines_view.0..self.lines_view.1]),
None => None,
}
pub fn lines(&self) -> &[String] {
&self.lines_normalized.as_ref().expect("TextData does not have original text")[self.lines_view.0..self.lines_view.1]
}

#[doc(hidden)]
Expand Down Expand Up @@ -239,17 +225,17 @@ impl TextData {
///
/// You should check the value of `lines_view` on the returned struct to
/// find the line ranges.
pub fn optimize_bounds(&self, other: &TextData) -> Result<(Self, f32), Error> {
pub fn optimize_bounds(&self, other: &TextData) -> (Self, f32) {
if self.lines_normalized.is_none() {
return Err(format_err!("TextData does not have original text"));
panic!("TextData does not have original text");
}

let view = self.lines_view;

// optimize the ending bounds of the text match
let (end_optimized, _) = self.search_optimize(
&|end| self.with_view(view.0, end).unwrap().match_score(other),
&|end| self.with_view(view.0, end).unwrap(),
&|end| self.with_view(view.0, end).match_score(other),
&|end| self.with_view(view.0, end),
);
let new_end = end_optimized.lines_view.1;

Expand All @@ -258,12 +244,11 @@ impl TextData {
&|start| {
end_optimized
.with_view(start, new_end)
.unwrap()
.match_score(other)
},
&|start| end_optimized.with_view(start, new_end).unwrap(),
&|start| end_optimized.with_view(start, new_end),
);
Ok((optimized, score))
(optimized, score)
}

fn search_optimize(
Expand Down Expand Up @@ -327,25 +312,25 @@ mod tests {
let license = TextData::from(license_text).without_text();
let sample = TextData::from(sample_text);

let (optimized, _) = sample.optimize_bounds(&license).unwrap();
let (optimized, _) = sample.optimize_bounds(&license);
println!("{:?}", optimized.lines_view);
println!("{:?}", optimized.lines_normalized.clone().unwrap());
println!("{:?}", optimized.lines_normalized.clone());
assert_eq!((0, 3), optimized.lines_view);

// add more to the string, try again (avoid int trunc screwups)
let sample_text = format!("{}\none more line", sample_text);
let sample = TextData::from(sample_text.as_str());
let (optimized, _) = sample.optimize_bounds(&license).unwrap();
let (optimized, _) = sample.optimize_bounds(&license);
println!("{:?}", optimized.lines_view);
println!("{:?}", optimized.lines_normalized.clone().unwrap());
println!("{:?}", optimized.lines_normalized.clone());
assert_eq!((0, 3), optimized.lines_view);

// add to the beginning too
let sample_text = format!("some content\nat\n\nthe beginning\n{}", sample_text);
let sample = TextData::from(sample_text.as_str());
let (optimized, _) = sample.optimize_bounds(&license).unwrap();
let (optimized, _) = sample.optimize_bounds(&license);
println!("{:?}", optimized.lines_view);
println!("{:?}", optimized.lines_normalized.clone().unwrap());
println!("{:?}", optimized.lines_normalized.clone());
// end bounds at 7 and 8 have the same score, since they're empty lines (not
// counted). askalono is not smart enough to trim this as close as it
// can.
Expand All @@ -365,22 +350,22 @@ mod tests {
let license = TextData::from(license_text).without_text();

// sanity: the optimized bounds should be at (3, 7)
let (optimized, _) = sample.optimize_bounds(&license).unwrap();
let (optimized, _) = sample.optimize_bounds(&license);
assert_eq!((3, 7), optimized.lines_view);

// this should still work
let sample = sample.with_view(3, 7).unwrap();
let (optimized, _) = sample.optimize_bounds(&license).unwrap();
let sample = sample.with_view(3, 7);
let (optimized, _) = sample.optimize_bounds(&license);
assert_eq!((3, 7), optimized.lines_view);

// but if we shrink the view further, it shouldn't be outside that range
let sample = sample.with_view(4, 6).unwrap();
let (optimized, _) = sample.optimize_bounds(&license).unwrap();
let sample = sample.with_view(4, 6);
let (optimized, _) = sample.optimize_bounds(&license);
assert_eq!((4, 6), optimized.lines_view);

// restoring the view should still be OK too
let sample = sample.with_view(0, 9).unwrap();
let (optimized, _) = sample.optimize_bounds(&license).unwrap();
let sample = sample.with_view(0, 9);
let (optimized, _) = sample.optimize_bounds(&license);
assert_eq!((3, 7), optimized.lines_view);
}

Expand Down Expand Up @@ -413,11 +398,11 @@ mod tests {
let a = TextData::from("aaa\nbbb\nccc\nddd");
assert_eq!(Some("aaa bbb ccc ddd"), a.text_processed());

let b = a.with_view(1, 3).expect("with_view must be ok");
assert_eq!(2, b.lines().unwrap().len());
let b = a.with_view(1, 3);
assert_eq!(2, b.lines().len());
assert_eq!(Some("bbb ccc"), b.text_processed());

let c = b.white_out().expect("white_out must be ok");
let c = b.white_out();
assert_eq!(Some("aaa ddd"), c.text_processed());
}
}
8 changes: 3 additions & 5 deletions src/store/analyze.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@

use std::{cmp::Ordering, fmt};

use failure::Error;

use crate::{license::LicenseType, license::TextData, store::base::Store};

/// Information about text that was compared against licenses in the store.
Expand Down Expand Up @@ -99,7 +97,7 @@ impl Store {
/// This parallelizes the search as much as it can to find the best match.
/// Once a match is obtained, it can be optimized further; see methods on
/// `TextData` for more information.
pub fn analyze(&self, text: &TextData) -> Result<Match<'_>, Error> {
pub fn analyze(&self, text: &TextData) -> Match<'_> {
let mut res: Vec<PartialMatch<'_>>;

// parallel analysis
Expand Down Expand Up @@ -135,11 +133,11 @@ impl Store {
}

let m = &res[0];
Ok(Match {
Match {
score: m.score,
name: m.name.to_string(),
license_type: m.license_type,
data: m.data,
})
}
}
}
2 changes: 1 addition & 1 deletion src/store/base.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ pub(crate) struct LicenseEntry {
///
/// # fn main() -> Result<(), Box<Error>> {
/// let store = Store::from_cache(File::open("askalono-cache.bin.gz")?)?;
/// let result = store.analyze(&TextData::from("what's this"))?;
/// let result = store.analyze(&TextData::from("what's this"));
/// # Ok(())
/// # }
/// ```
Expand Down
18 changes: 8 additions & 10 deletions src/strategy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ impl<'a> ScanStrategy<'a> {
}

fn scan_elimination(&self, text: &TextData) -> Result<ScanResult, Error> {
let mut analysis = self.store.analyze(text)?;
let mut analysis = self.store.analyze(text);
let score = analysis.score;
let mut license = None;
let mut containing = Vec::new();
Expand Down Expand Up @@ -222,8 +222,7 @@ impl<'a> ScanStrategy<'a> {
let mut current_text: Cow<'_, TextData> = Cow::Borrowed(text);
for _n in 0..self.max_passes {
let (optimized, optimized_score) = current_text
.optimize_bounds(analysis.data)
.expect("optimize_bounds failed in elimination loop");
.optimize_bounds(analysis.data);

// stop if we didn't find anything acceptable
if optimized_score < self.confidence_threshold {
Expand All @@ -241,8 +240,8 @@ impl<'a> ScanStrategy<'a> {
});

// and white-out + reanalyze for next iteration
current_text = Cow::Owned(optimized.white_out().expect("optimized must have text"));
analysis = self.store.analyze(&current_text)?;
current_text = Cow::Owned(optimized.white_out());
analysis = self.store.analyze(&current_text);
}
}

Expand Down Expand Up @@ -304,8 +303,8 @@ impl<'a> ScanStrategy<'a> {
'start: for start in (starting_at..text_end).step_by(self.step_size) {
// ...and also the end of window to find high scores.
for end in (start..=text_end).step_by(self.step_size) {
let view = text.with_view(start, end).expect("view missing text");
let analysis = self.store.analyze(&view)?;
let view = text.with_view(start, end);
let analysis = self.store.analyze(&view);

// just getting a feel for the data at this point, not yet
// optimizing the view.
Expand Down Expand Up @@ -346,10 +345,9 @@ impl<'a> ScanStrategy<'a> {
None => return Ok(None),
};
let check = matched.data;
let view = text.with_view(found.0, found.1).expect("view missing text");
let view = text.with_view(found.0, found.1);
let (optimized, optimized_score) = view
.optimize_bounds(check)
.expect("optimize_bounds failed narrowing top-down result");
.optimize_bounds(check);

trace!(
"optimized {} {} at ({:?})",
Expand Down
4 changes: 2 additions & 2 deletions tests/introspection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,14 @@ fn self_apache_header() {
let mut text = String::new();
f.read_to_string(&mut text).unwrap();
let text_data: TextData = text.into();
let matched = store.analyze(&text_data).unwrap();
let matched = store.analyze(&text_data);

// check that it looked apache-2.0-ish
assert_eq!("Apache-2.0", &matched.name);
assert_eq!(askalono::LicenseType::Header, matched.license_type);

// now try to find the bounds of the license header
let (optimized, _) = text_data.optimize_bounds(&matched.data).unwrap();
let (optimized, _) = text_data.optimize_bounds(&matched.data);

// the license is from (0-indexed) lines 3 thru 12 of this file, excluding
// that copyright statement on line 1.
Expand Down
2 changes: 1 addition & 1 deletion tests/real_world.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ fn assert_license(
f.read_to_string(&mut text).unwrap();
let text_data: TextData = text.into();

let matched = store.analyze(&text_data).unwrap();
let matched = store.analyze(&text_data);
assert_eq!(
license_id, matched.name,
"{} was identified as {} but should have been {}",
Expand Down
4 changes: 2 additions & 2 deletions tests/store_sanity.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ fn self_licenses() {
let mut text = String::new();
f.read_to_string(&mut text).unwrap();
let text_data: TextData = text.into();
let matched = store.analyze(&text_data).unwrap();
let matched = store.analyze(&text_data);

assert_eq!(license, &matched.name);
assert_eq!(
Expand All @@ -46,7 +46,7 @@ fn self_licenses() {
fn empty_match() {
let store = common::load_store();
let text = TextData::from("");
let matched = store.analyze(&text).unwrap();
let matched = store.analyze(&text);

assert_eq!(0f32, matched.score);
}
4 changes: 2 additions & 2 deletions wasm/pkg/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ impl MatchResult {
#[wasm_bindgen]
pub fn normalize_text(text: &str) -> String {
let data = TextData::new(text);
data.lines().unwrap().join("\n")
data.lines().join("\n")
}

#[wasm_bindgen]
Expand All @@ -50,7 +50,7 @@ impl AskalonoStore {
}

pub fn identify(&self, text: &str) -> MatchResult {
let matched = self.store.analyze(&text.into()).unwrap();
let matched = self.store.analyze(&text.into());
MatchResult {
name: matched.name,
score: matched.score,
Expand Down

0 comments on commit 8d11161

Please sign in to comment.