-
-
Notifications
You must be signed in to change notification settings - Fork 27
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Change checkers to become objects similar to decode (#29)
* Releasing 0.0.1 * switching branches to add file input * switching branches to add file input * checker works! * comments * feat: added test for DefaultChecker and fixed clippy warnings * formatted code * Updated english checker and lemmeknow to have valid values * Fix small issue around not linking to the right thing Co-authored-by: bee <bee@skerritt.blog> Co-authored-by: swanandx <73115739+swanandx@users.noreply.github.com>
- Loading branch information
1 parent
2382baa
commit c615b3e
Showing
13 changed files
with
290 additions
and
62 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
/// import general checker | ||
use crate::checkers::{ | ||
checker_type::{Check, Checker}, | ||
default_checker::DefaultChecker, | ||
}; | ||
use lemmeknow::Identify; | ||
|
||
/// Library input is the default API input | ||
/// The CLI turns its arguments into a LibraryInput struct | ||
pub struct LibraryInput<Type> { | ||
/// The input to be decoded. | ||
/// Given to us by the user. | ||
pub encoded_text: String, | ||
/// A level of verbosity to determine. | ||
/// How much we print in logs. | ||
pub verbose: i32, | ||
/// The checker to use | ||
pub checker: Checker<Type>, | ||
/// The lemmeknow config to use | ||
pub lemmeknow_config: Identify, | ||
} | ||
|
||
const LEMMEKNOW_DEFAULT_CONFIG: Identify = Identify { | ||
min_rarity: None, | ||
max_rarity: None, | ||
tags: vec![], | ||
exclude_tags: vec![], | ||
file_support: false, | ||
boundaryless: false, | ||
}; | ||
|
||
impl Default for LibraryInput<DefaultChecker> { | ||
fn default() -> Self { | ||
LibraryInput { | ||
encoded_text: String::new(), | ||
// this will be of type Checker<DefaultChecker> | ||
checker: Checker::new(), | ||
verbose: 0, | ||
lemmeknow_config: LEMMEKNOW_DEFAULT_CONFIG, | ||
} | ||
} | ||
} |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
use super::checker_type::Checker; | ||
|
||
pub struct CheckResult { | ||
/// If our checkers return success, we change this bool to True | ||
pub is_identified: bool, | ||
/// text is the text before we check it. | ||
// we can make this &'text str | ||
// but then crack requires lifetime annotations. | ||
pub text: String, | ||
/// Description of the checked text. | ||
pub description: String, | ||
/// Name of the Checker we are using | ||
pub checker_name: &'static str, | ||
/// Description of the Checker we are using | ||
pub checker_description: &'static str, | ||
/// Link to more info about checker | ||
pub link: &'static str, | ||
} | ||
|
||
/// To save time we have a default | ||
/// for checkResult in case we fail | ||
/// I do not believe the checker is important if failed | ||
/// as we will not use it. To save time we will return a default | ||
/// checker. | ||
impl CheckResult { | ||
pub fn new<Type>(checker_used: &Checker<Type>) -> CheckResult { | ||
CheckResult { | ||
is_identified: false, | ||
text: "".to_string(), | ||
checker_name: checker_used.name, | ||
checker_description: checker_used.description, | ||
description: "".to_string(), | ||
link: checker_used.link, | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
/// Checker_type is a type used to define checkers | ||
/// This means that we can standardise the way we check for plaintext | ||
use crate::checkers::checker_result::CheckResult; | ||
use lemmeknow::Identify; | ||
|
||
/// Every checker is of type CheckerType | ||
/// This will let us pick & choose which checkers to use | ||
/// at runtime. | ||
pub struct Checker<Type> { | ||
/// The name of the checker | ||
pub name: &'static str, | ||
/// The description of the checker | ||
/// you can take the first line from Wikipedia | ||
/// Sometimes our checkers do not exist on Wikipedia so we write our own. | ||
pub description: &'static str, | ||
/// The link to the checker's website | ||
/// Wikipedia link, articles, github etc | ||
pub link: &'static str, | ||
/// The tags of the checker | ||
pub tags: Vec<&'static str>, | ||
/// The expected runtime of the checker | ||
/// We get this by bench marking the code | ||
pub expected_runtime: f32, | ||
/// The popularity of the checker | ||
pub popularity: f32, | ||
/// lemmeknow config object | ||
pub lemmeknow_config: Identify, | ||
pub _phatom: std::marker::PhantomData<Type>, | ||
} | ||
|
||
/// Every checker must implement this trait | ||
/// Which checks the given text to see if its plaintext | ||
/// and returns CheckResult, which is our results object. | ||
pub trait Check { | ||
fn new() -> Self; | ||
fn check(&self, text: &str) -> CheckResult; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
use lemmeknow::Identify; | ||
|
||
use super::{ | ||
checker_result::CheckResult, | ||
checker_type::{Check, Checker}, | ||
}; | ||
|
||
/// The default checker is used to check if the text is plaintext | ||
/// Based on what the Ares team has found to be the best checker. | ||
|
||
pub struct DefaultChecker; | ||
|
||
impl Check for Checker<DefaultChecker> { | ||
fn new() -> Self { | ||
Checker { | ||
name: "Template checker", | ||
description: "This is a default template checker. If you're seeing this, it's an error. Please contact us on Discord http://discord.skerritt.blog", | ||
link: "http://discord.skerritt.blog", | ||
tags: vec![], | ||
expected_runtime: 0.0, | ||
popularity: 0.0, | ||
lemmeknow_config: Identify::default(), | ||
_phatom: std::marker::PhantomData, | ||
} | ||
} | ||
|
||
fn check(&self, _text: &str) -> CheckResult { | ||
CheckResult::new(self) | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use crate::checkers::{ | ||
checker_result::CheckResult, | ||
checker_type::{Check, Checker}, | ||
default_checker::DefaultChecker, | ||
}; | ||
|
||
#[test] | ||
fn default_checker_works() { | ||
let checker = Checker::<DefaultChecker>::new(); | ||
let checker_result = CheckResult::new(&checker); | ||
assert_eq!(checker_result.is_identified, false); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,48 +1,84 @@ | ||
// import storage | ||
use crate::checkers::checker_object::CheckObject; | ||
use crate::checkers::checker_result::CheckResult; | ||
use crate::storage; | ||
use lemmeknow::Identify; | ||
// use log::{debug, info, trace}; unused imports | ||
|
||
// given an input, check every item in the array and return true if any of them match | ||
pub fn check_english(input: &str) -> Option<CheckObject> { | ||
if let Some(result) = storage::DICTIONARIES | ||
.iter() | ||
.find(|(_, words)| words.contains(input)) | ||
{ | ||
// result.0 is filename | ||
return Some(CheckObject { | ||
is_identified: true, | ||
text: input, | ||
checker: "Dictionary", | ||
description: result.0.to_string(), | ||
use crate::checkers::checker_type::{Check, Checker}; | ||
|
||
pub struct EnglishChecker; | ||
|
||
/// given an input, check every item in the array and return true if any of them match | ||
impl Check for Checker<EnglishChecker> { | ||
fn new() -> Self { | ||
Checker { | ||
name: "English Checker", | ||
description: "Checks for english words", | ||
link: "https://en.wikipedia.org/wiki/List_of_English_words", | ||
}); | ||
tags: vec!["english"], | ||
expected_runtime: 0.1, | ||
/// English is the most popular language | ||
popularity: 1.0, | ||
lemmeknow_config: Identify::default(), | ||
_phatom: std::marker::PhantomData, | ||
} | ||
} | ||
|
||
fn check(&self, input: &str) -> CheckResult { | ||
let mut plaintext_found = false; | ||
let mut filename = ""; | ||
if let Some(result) = storage::DICTIONARIES | ||
.iter() | ||
.find(|(_, words)| words.contains(input)) | ||
{ | ||
plaintext_found = true; | ||
filename = result.0; // result.0 is the filename | ||
} | ||
|
||
CheckResult { | ||
is_identified: plaintext_found, | ||
text: input.to_string(), | ||
checker_name: self.name, | ||
checker_description: self.description, | ||
description: filename.to_string(), | ||
link: self.link, | ||
} | ||
} | ||
None | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use crate::checkers::english::check_english; | ||
use crate::checkers::{ | ||
checker_type::{Check, Checker}, | ||
english::EnglishChecker, | ||
}; | ||
|
||
#[test] | ||
fn test_check_basic() { | ||
assert!(check_english("preinterview").is_some()); | ||
let checker = Checker::<EnglishChecker>::new(); | ||
assert!(checker.check("preinterview").is_identified); | ||
} | ||
|
||
#[test] | ||
fn test_check_basic2() { | ||
assert!(check_english("and").is_some()); | ||
let checker = Checker::<EnglishChecker>::new(); | ||
assert!(checker.check("and").is_identified); | ||
} | ||
|
||
#[test] | ||
fn test_check_multiple_words() { | ||
assert!(check_english("and woody").is_none()); | ||
let checker = Checker::<EnglishChecker>::new(); | ||
assert_eq!(checker.check("and woody").is_identified, false); | ||
} | ||
|
||
#[test] | ||
fn test_check_non_dictionary_word() { | ||
assert!( | ||
check_english("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaBabyShark").is_none() | ||
let checker = Checker::<EnglishChecker>::new(); | ||
assert_eq!( | ||
checker | ||
.check("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaBabyShark") | ||
.is_identified, | ||
false | ||
); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.