Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Change checkers to become objects similar to decode #29

Merged
merged 10 commits into from
Jul 25, 2022
42 changes: 42 additions & 0 deletions src/api_library_input_struct.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/// import general checker
use crate::checkers::{
checker_type::{Check, Checker},
default_checker::DefaultChecker,
};
use lemmeknow::Identify;

/// Library input is the default API input
/// The CLI turns its arguments into a LibraryInput struct
pub struct LibraryInput<Type> {
/// The input to be decoded.
/// Given to us by the user.
pub encoded_text: String,
/// A level of verbosity to determine.
/// How much we print in logs.
pub verbose: i32,
/// The checker to use
pub checker: Checker<Type>,
/// The lemmeknow config to use
pub lemmeknow_config: Identify,
}

const LEMMEKNOW_DEFAULT_CONFIG: Identify = Identify {
min_rarity: None,
max_rarity: None,
tags: vec![],
exclude_tags: vec![],
file_support: false,
boundaryless: false,
};

impl Default for LibraryInput<DefaultChecker> {
fn default() -> Self {
LibraryInput {
encoded_text: String::new(),
// this will be of type Checker<DefaultChecker>
checker: Checker::new(),
verbose: 0,
lemmeknow_config: LEMMEKNOW_DEFAULT_CONFIG,
}
}
}
12 changes: 0 additions & 12 deletions src/checkers/checker_object.rs

This file was deleted.

36 changes: 36 additions & 0 deletions src/checkers/checker_result.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
use super::checker_type::Checker;

pub struct CheckResult {
/// If our checkers return success, we change this bool to True
pub is_identified: bool,
/// text is the text before we check it.
// we can make this &'text str
// but then crack requires lifetime annotations.
pub text: String,
/// Description of the checked text.
pub description: String,
/// Name of the Checker we are using
pub checker_name: &'static str,
/// Description of the Checker we are using
pub checker_description: &'static str,
/// Link to more info about checker
pub link: &'static str,
}

/// To save time we have a default
/// for checkResult in case we fail
/// I do not believe the checker is important if failed
/// as we will not use it. To save time we will return a default
/// checker.
impl CheckResult {
pub fn new<Type>(checker_used: &Checker<Type>) -> CheckResult {
CheckResult {
is_identified: false,
text: "".to_string(),
checker_name: checker_used.name,
checker_description: checker_used.description,
description: "".to_string(),
link: checker_used.link,
}
}
}
37 changes: 37 additions & 0 deletions src/checkers/checker_type.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
/// Checker_type is a type used to define checkers
/// This means that we can standardise the way we check for plaintext
use crate::checkers::checker_result::CheckResult;
use lemmeknow::Identify;

/// Every checker is of type CheckerType
/// This will let us pick & choose which checkers to use
/// at runtime.
pub struct Checker<Type> {
/// The name of the checker
pub name: &'static str,
/// The description of the checker
/// you can take the first line from Wikipedia
/// Sometimes our checkers do not exist on Wikipedia so we write our own.
pub description: &'static str,
/// The link to the checker's website
/// Wikipedia link, articles, github etc
pub link: &'static str,
/// The tags of the checker
pub tags: Vec<&'static str>,
/// The expected runtime of the checker
/// We get this by bench marking the code
pub expected_runtime: f32,
/// The popularity of the checker
pub popularity: f32,
/// lemmeknow config object
pub lemmeknow_config: Identify,
pub _phatom: std::marker::PhantomData<Type>,
}

/// Every checker must implement this trait
/// Which checks the given text to see if its plaintext
/// and returns CheckResult, which is our results object.
pub trait Check {
fn new() -> Self;
fn check(&self, text: &str) -> CheckResult;
}
46 changes: 46 additions & 0 deletions src/checkers/default_checker.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
use lemmeknow::Identify;

use super::{
checker_result::CheckResult,
checker_type::{Check, Checker},
};

/// The default checker is used to check if the text is plaintext
/// Based on what the Ares team has found to be the best checker.

pub struct DefaultChecker;

impl Check for Checker<DefaultChecker> {
fn new() -> Self {
Checker {
name: "Template checker",
description: "This is a default template checker. If you're seeing this, it's an error. Please contact us on Discord http://discord.skerritt.blog",
link: "http://discord.skerritt.blog",
tags: vec![],
expected_runtime: 0.0,
popularity: 0.0,
lemmeknow_config: Identify::default(),
_phatom: std::marker::PhantomData,
}
}

fn check(&self, _text: &str) -> CheckResult {
CheckResult::new(self)
}
}

#[cfg(test)]
mod tests {
use crate::checkers::{
checker_result::CheckResult,
checker_type::{Check, Checker},
default_checker::DefaultChecker,
};

#[test]
fn default_checker_works() {
let checker = Checker::<DefaultChecker>::new();
let checker_result = CheckResult::new(&checker);
assert_eq!(checker_result.is_identified, false);
}
}
78 changes: 57 additions & 21 deletions src/checkers/english.rs
Original file line number Diff line number Diff line change
@@ -1,48 +1,84 @@
// import storage
use crate::checkers::checker_object::CheckObject;
use crate::checkers::checker_result::CheckResult;
use crate::storage;
use lemmeknow::Identify;
// use log::{debug, info, trace}; unused imports

// given an input, check every item in the array and return true if any of them match
pub fn check_english(input: &str) -> Option<CheckObject> {
if let Some(result) = storage::DICTIONARIES
.iter()
.find(|(_, words)| words.contains(input))
{
// result.0 is filename
return Some(CheckObject {
is_identified: true,
text: input,
checker: "Dictionary",
description: result.0.to_string(),
use crate::checkers::checker_type::{Check, Checker};

pub struct EnglishChecker;

/// given an input, check every item in the array and return true if any of them match
impl Check for Checker<EnglishChecker> {
fn new() -> Self {
Checker {
name: "English Checker",
description: "Checks for english words",
link: "https://en.wikipedia.org/wiki/List_of_English_words",
});
tags: vec!["english"],
expected_runtime: 0.1,
/// English is the most popular language
popularity: 1.0,
lemmeknow_config: Identify::default(),
_phatom: std::marker::PhantomData,
}
}

fn check(&self, input: &str) -> CheckResult {
let mut plaintext_found = false;
let mut filename = "";
if let Some(result) = storage::DICTIONARIES
.iter()
.find(|(_, words)| words.contains(input))
{
plaintext_found = true;
filename = result.0; // result.0 is the filename
}

CheckResult {
is_identified: plaintext_found,
text: input.to_string(),
checker_name: self.name,
checker_description: self.description,
description: filename.to_string(),
link: self.link,
}
}
None
}

#[cfg(test)]
mod tests {
use crate::checkers::english::check_english;
use crate::checkers::{
checker_type::{Check, Checker},
english::EnglishChecker,
};

#[test]
fn test_check_basic() {
assert!(check_english("preinterview").is_some());
let checker = Checker::<EnglishChecker>::new();
assert!(checker.check("preinterview").is_identified);
}

#[test]
fn test_check_basic2() {
assert!(check_english("and").is_some());
let checker = Checker::<EnglishChecker>::new();
assert!(checker.check("and").is_identified);
}

#[test]
fn test_check_multiple_words() {
assert!(check_english("and woody").is_none());
let checker = Checker::<EnglishChecker>::new();
assert_eq!(checker.check("and woody").is_identified, false);
}

#[test]
fn test_check_non_dictionary_word() {
assert!(
check_english("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaBabyShark").is_none()
let checker = Checker::<EnglishChecker>::new();
assert_eq!(
checker
.check("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaBabyShark")
.is_identified,
false
);
}
}
6 changes: 3 additions & 3 deletions src/checkers/human_checker.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
use crate::checkers::checker_object::CheckObject;
use crate::checkers::checker_result::CheckResult;

#[cfg(not(test))]
use text_io::read;

// compile this if we are not running tests
#[cfg(not(test))]
pub fn human_checker(input: &CheckObject) -> bool {
pub fn human_checker(input: &CheckResult) -> bool {
let output_string = format!(
"Is the plaintext '{}' which is {}. [Y/n]? ",
input.text, input.description
Expand All @@ -21,6 +21,6 @@ pub fn human_checker(input: &CheckObject) -> bool {

// use this human_checker for tests
#[cfg(test)]
pub fn human_checker(_input: &CheckObject) -> bool {
pub fn human_checker(_input: &CheckResult) -> bool {
true
}
52 changes: 37 additions & 15 deletions src/checkers/lemmeknow_checker.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use crate::checkers::checker_object::CheckObject;
use lemmeknow::Data;
use lemmeknow::Identify;
use crate::checkers::checker_result::CheckResult;
use lemmeknow::{Data, Identify};

use super::checker_type::{Check, Checker};

const IDENTIFIER: Identify = Identify {
min_rarity: None,
Expand All @@ -11,21 +12,42 @@ const IDENTIFIER: Identify = Identify {
boundaryless: false,
};

pub fn check_lemmeknow(input: &str) -> Option<CheckObject> {
// Uses lemmeknow to check if any regexes match
let lemmeknow_result = IDENTIFIER.identify(input);
if !lemmeknow_result.is_empty() {
let return_object = CheckObject {
is_identified: true,
text: input,
checker: "LemmeKnow",
pub struct LemmeKnow;

impl Check for Checker<LemmeKnow> {
fn new() -> Self {
Checker {
// TODO: Update fields with proper values
name: "LemmeKnow Checker",
description: "Uses LemmeKnow to check for regex matches",
link: "https://swanandx.github.io/lemmeknow-frontend/",
tags: vec!["lemmeknow", "regex"],
expected_runtime: 0.01,
popularity: 1.0,
lemmeknow_config: Identify::default(),
_phatom: std::marker::PhantomData,
}
}

fn check(&self, text: &str) -> CheckResult {
let lemmeknow_result = IDENTIFIER.identify(text);
let mut is_identified = false;
let mut description = "".to_string();
if !lemmeknow_result.is_empty() {
is_identified = true;
description = format_data_result(&lemmeknow_result[0].data)
}

CheckResult {
is_identified,
text: text.to_owned(),
checker_name: self.name,
checker_description: self.description,
// Returns a vector of matches
description: format_data_result(&lemmeknow_result[0].data),
description,
link: "https://swanandx.github.io/lemmeknow-frontend/",
bee-san marked this conversation as resolved.
Show resolved Hide resolved
};
return Some(return_object);
}
}
None
}

fn format_data_result(input: &Data) -> String {
Expand Down
Loading