Skip to content

Commit

Permalink
Change checkers to become objects similar to decode (#29)
Browse files Browse the repository at this point in the history
* Releasing 0.0.1

* switching branches to add file input

* switching branches to add file input

* checker works!

* comments

* feat: added test for DefaultChecker and fixed clippy warnings

* formatted code

* Updated english checker and lemmeknow to have valid values

* Fix small issue around not linking to the right thing

Co-authored-by: bee <bee@skerritt.blog>
Co-authored-by: swanandx <73115739+swanandx@users.noreply.github.com>
  • Loading branch information
3 people authored Jul 25, 2022
1 parent 2382baa commit c615b3e
Show file tree
Hide file tree
Showing 13 changed files with 290 additions and 62 deletions.
42 changes: 42 additions & 0 deletions src/api_library_input_struct.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/// import general checker
use crate::checkers::{
checker_type::{Check, Checker},
default_checker::DefaultChecker,
};
use lemmeknow::Identify;

/// Library input is the default API input
/// The CLI turns its arguments into a LibraryInput struct
pub struct LibraryInput<Type> {
/// The input to be decoded.
/// Given to us by the user.
pub encoded_text: String,
/// A level of verbosity to determine.
/// How much we print in logs.
pub verbose: i32,
/// The checker to use
pub checker: Checker<Type>,
/// The lemmeknow config to use
pub lemmeknow_config: Identify,
}

const LEMMEKNOW_DEFAULT_CONFIG: Identify = Identify {
min_rarity: None,
max_rarity: None,
tags: vec![],
exclude_tags: vec![],
file_support: false,
boundaryless: false,
};

impl Default for LibraryInput<DefaultChecker> {
fn default() -> Self {
LibraryInput {
encoded_text: String::new(),
// this will be of type Checker<DefaultChecker>
checker: Checker::new(),
verbose: 0,
lemmeknow_config: LEMMEKNOW_DEFAULT_CONFIG,
}
}
}
12 changes: 0 additions & 12 deletions src/checkers/checker_object.rs

This file was deleted.

36 changes: 36 additions & 0 deletions src/checkers/checker_result.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
use super::checker_type::Checker;

pub struct CheckResult {
/// If our checkers return success, we change this bool to True
pub is_identified: bool,
/// text is the text before we check it.
// we can make this &'text str
// but then crack requires lifetime annotations.
pub text: String,
/// Description of the checked text.
pub description: String,
/// Name of the Checker we are using
pub checker_name: &'static str,
/// Description of the Checker we are using
pub checker_description: &'static str,
/// Link to more info about checker
pub link: &'static str,
}

/// To save time we have a default
/// for checkResult in case we fail
/// I do not believe the checker is important if failed
/// as we will not use it. To save time we will return a default
/// checker.
impl CheckResult {
pub fn new<Type>(checker_used: &Checker<Type>) -> CheckResult {
CheckResult {
is_identified: false,
text: "".to_string(),
checker_name: checker_used.name,
checker_description: checker_used.description,
description: "".to_string(),
link: checker_used.link,
}
}
}
37 changes: 37 additions & 0 deletions src/checkers/checker_type.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
/// Checker_type is a type used to define checkers
/// This means that we can standardise the way we check for plaintext
use crate::checkers::checker_result::CheckResult;
use lemmeknow::Identify;

/// Every checker is of type CheckerType
/// This will let us pick & choose which checkers to use
/// at runtime.
pub struct Checker<Type> {
/// The name of the checker
pub name: &'static str,
/// The description of the checker
/// you can take the first line from Wikipedia
/// Sometimes our checkers do not exist on Wikipedia so we write our own.
pub description: &'static str,
/// The link to the checker's website
/// Wikipedia link, articles, github etc
pub link: &'static str,
/// The tags of the checker
pub tags: Vec<&'static str>,
/// The expected runtime of the checker
/// We get this by bench marking the code
pub expected_runtime: f32,
/// The popularity of the checker
pub popularity: f32,
/// lemmeknow config object
pub lemmeknow_config: Identify,
pub _phatom: std::marker::PhantomData<Type>,
}

/// Every checker must implement this trait
/// Which checks the given text to see if its plaintext
/// and returns CheckResult, which is our results object.
pub trait Check {
fn new() -> Self;
fn check(&self, text: &str) -> CheckResult;
}
46 changes: 46 additions & 0 deletions src/checkers/default_checker.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
use lemmeknow::Identify;

use super::{
checker_result::CheckResult,
checker_type::{Check, Checker},
};

/// The default checker is used to check if the text is plaintext
/// Based on what the Ares team has found to be the best checker.

pub struct DefaultChecker;

impl Check for Checker<DefaultChecker> {
fn new() -> Self {
Checker {
name: "Template checker",
description: "This is a default template checker. If you're seeing this, it's an error. Please contact us on Discord http://discord.skerritt.blog",
link: "http://discord.skerritt.blog",
tags: vec![],
expected_runtime: 0.0,
popularity: 0.0,
lemmeknow_config: Identify::default(),
_phatom: std::marker::PhantomData,
}
}

fn check(&self, _text: &str) -> CheckResult {
CheckResult::new(self)
}
}

#[cfg(test)]
mod tests {
use crate::checkers::{
checker_result::CheckResult,
checker_type::{Check, Checker},
default_checker::DefaultChecker,
};

#[test]
fn default_checker_works() {
let checker = Checker::<DefaultChecker>::new();
let checker_result = CheckResult::new(&checker);
assert_eq!(checker_result.is_identified, false);
}
}
78 changes: 57 additions & 21 deletions src/checkers/english.rs
Original file line number Diff line number Diff line change
@@ -1,48 +1,84 @@
// import storage
use crate::checkers::checker_object::CheckObject;
use crate::checkers::checker_result::CheckResult;
use crate::storage;
use lemmeknow::Identify;
// use log::{debug, info, trace}; unused imports

// given an input, check every item in the array and return true if any of them match
pub fn check_english(input: &str) -> Option<CheckObject> {
if let Some(result) = storage::DICTIONARIES
.iter()
.find(|(_, words)| words.contains(input))
{
// result.0 is filename
return Some(CheckObject {
is_identified: true,
text: input,
checker: "Dictionary",
description: result.0.to_string(),
use crate::checkers::checker_type::{Check, Checker};

pub struct EnglishChecker;

/// given an input, check every item in the array and return true if any of them match
impl Check for Checker<EnglishChecker> {
fn new() -> Self {
Checker {
name: "English Checker",
description: "Checks for english words",
link: "https://en.wikipedia.org/wiki/List_of_English_words",
});
tags: vec!["english"],
expected_runtime: 0.1,
/// English is the most popular language
popularity: 1.0,
lemmeknow_config: Identify::default(),
_phatom: std::marker::PhantomData,
}
}

fn check(&self, input: &str) -> CheckResult {
let mut plaintext_found = false;
let mut filename = "";
if let Some(result) = storage::DICTIONARIES
.iter()
.find(|(_, words)| words.contains(input))
{
plaintext_found = true;
filename = result.0; // result.0 is the filename
}

CheckResult {
is_identified: plaintext_found,
text: input.to_string(),
checker_name: self.name,
checker_description: self.description,
description: filename.to_string(),
link: self.link,
}
}
None
}

#[cfg(test)]
mod tests {
use crate::checkers::english::check_english;
use crate::checkers::{
checker_type::{Check, Checker},
english::EnglishChecker,
};

#[test]
fn test_check_basic() {
assert!(check_english("preinterview").is_some());
let checker = Checker::<EnglishChecker>::new();
assert!(checker.check("preinterview").is_identified);
}

#[test]
fn test_check_basic2() {
assert!(check_english("and").is_some());
let checker = Checker::<EnglishChecker>::new();
assert!(checker.check("and").is_identified);
}

#[test]
fn test_check_multiple_words() {
assert!(check_english("and woody").is_none());
let checker = Checker::<EnglishChecker>::new();
assert_eq!(checker.check("and woody").is_identified, false);
}

#[test]
fn test_check_non_dictionary_word() {
assert!(
check_english("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaBabyShark").is_none()
let checker = Checker::<EnglishChecker>::new();
assert_eq!(
checker
.check("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaBabyShark")
.is_identified,
false
);
}
}
6 changes: 3 additions & 3 deletions src/checkers/human_checker.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
use crate::checkers::checker_object::CheckObject;
use crate::checkers::checker_result::CheckResult;

#[cfg(not(test))]
use text_io::read;

// compile this if we are not running tests
#[cfg(not(test))]
pub fn human_checker(input: &CheckObject) -> bool {
pub fn human_checker(input: &CheckResult) -> bool {
let output_string = format!(
"Is the plaintext '{}' which is {}. [Y/n]? ",
input.text, input.description
Expand All @@ -21,6 +21,6 @@ pub fn human_checker(input: &CheckObject) -> bool {

// use this human_checker for tests
#[cfg(test)]
pub fn human_checker(_input: &CheckObject) -> bool {
pub fn human_checker(_input: &CheckResult) -> bool {
true
}
54 changes: 38 additions & 16 deletions src/checkers/lemmeknow_checker.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use crate::checkers::checker_object::CheckObject;
use lemmeknow::Data;
use lemmeknow::Identify;
use crate::checkers::checker_result::CheckResult;
use lemmeknow::{Data, Identify};

use super::checker_type::{Check, Checker};

const IDENTIFIER: Identify = Identify {
min_rarity: None,
Expand All @@ -11,21 +12,42 @@ const IDENTIFIER: Identify = Identify {
boundaryless: false,
};

pub fn check_lemmeknow(input: &str) -> Option<CheckObject> {
// Uses lemmeknow to check if any regexes match
let lemmeknow_result = IDENTIFIER.identify(input);
if !lemmeknow_result.is_empty() {
let return_object = CheckObject {
is_identified: true,
text: input,
checker: "LemmeKnow",
// Returns a vector of matches
description: format_data_result(&lemmeknow_result[0].data),
pub struct LemmeKnow;

impl Check for Checker<LemmeKnow> {
fn new() -> Self {
Checker {
// TODO: Update fields with proper values
name: "LemmeKnow Checker",
description: "Uses LemmeKnow to check for regex matches",
link: "https://swanandx.github.io/lemmeknow-frontend/",
};
return Some(return_object);
tags: vec!["lemmeknow", "regex"],
expected_runtime: 0.01,
popularity: 1.0,
lemmeknow_config: Identify::default(),
_phatom: std::marker::PhantomData,
}
}

fn check(&self, text: &str) -> CheckResult {
let lemmeknow_result = IDENTIFIER.identify(text);
let mut is_identified = false;
let mut description = "".to_string();
if !lemmeknow_result.is_empty() {
is_identified = true;
description = format_data_result(&lemmeknow_result[0].data)
}

CheckResult {
is_identified,
text: text.to_owned(),
checker_name: self.name,
checker_description: self.description,
// Returns a vector of matches
description,
link: self.link,
}
}
None
}

fn format_data_result(input: &Data) -> String {
Expand Down
Loading

0 comments on commit c615b3e

Please sign in to comment.