-
-
Notifications
You must be signed in to change notification settings - Fork 12
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Feat: Improve fuzzy find command, insource Fuzzy as Huzzy
- Loading branch information
Showing
9 changed files
with
402 additions
and
37 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
# Huzzy | ||
|
||
Fuzzy string search library in Haskell. | ||
Uses `TextualMonoid` from | ||
[monoid-subclasses](https://hackage.haskell.org/package/monoid-subclasses) | ||
to be able to run on different types of strings. | ||
|
||
This is a fork of Joomy Korkut's [fuzzy](https://github.com/joom/fuzzy), | ||
which itselft was a port of the JavaScript library | ||
[mattyork/fuzzy](https://github.com/mattyork/fuzzy). | ||
|
||
|
||
## Usage | ||
|
||
```haskell | ||
> import Text.Fuzzy | ||
|
||
> match "fnt" "infinite" ("", "") id HandleCase | ||
Just (Fuzzy | ||
{ original = "infinite" | ||
, rendered = "infinite" | ||
, score = 3 | ||
}) | ||
|
||
> match "hsk" ("Haskell",1995) ("<", ">") fst IgnoreCase | ||
Just (Fuzzy | ||
{ original = ("Haskell", 1995) | ||
, rendered = "<h>a<s><k>ell" | ||
, score = 5 | ||
}) | ||
|
||
> langs = [("Standard ML", 1990), ("OCaml", 1996), ("Scala", 2003)] | ||
> filter "ML" langs ("<", ">") fst IgnoreCase | ||
[ Fuzzy | ||
{ original = ("Standard ML", 1990) | ||
, rendered = "standard <m><l>" | ||
, score = 4 | ||
} | ||
, Fuzzy | ||
{ original = ("OCaml", 1996) | ||
, rendered = "oca<m><l>" | ||
, score = 4 | ||
} | ||
] | ||
|
||
> simpleFilter "vm" ["vim", "emacs", "virtual machine"] | ||
["vim","virtual machine"] | ||
|
||
> test "brd" "bread" | ||
True | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
import Distribution.Simple | ||
main = defaultMain |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
name: huzzy | ||
version: 0.1.0.0 | ||
synopsis: Filters a list based on a fuzzy string search | ||
homepage: https://github.com/ad-si/TaskLite/tree/master/huzzy | ||
license: AGPL-3.0-or-later | ||
author: Adrian Sieber | ||
maintainer: mail@adriansieber.com | ||
copyright: Adrian Sieber | ||
category: Text, Fuzzy | ||
|
||
description: | | ||
Please check out the readme on GitHub | ||
at https://github.com/ad-si/TaskLite/tree/master/huzzy#readme | ||
extra-source-files: | ||
- README.md | ||
|
||
dependencies: | ||
- base >= 4.7 && < 5 | ||
- monoid-subclasses | ||
- protolude | ||
|
||
default-extensions: | ||
- NoImplicitPrelude | ||
|
||
ghc-options: | ||
- -Wall | ||
- -Wcompat | ||
- -Wincomplete-record-updates | ||
- -Wincomplete-uni-patterns | ||
- -Wredundant-constraints | ||
- -fno-warn-orphans | ||
|
||
library: | ||
source-dirs: src | ||
|
||
tests: | ||
huzzy-test: | ||
main: tests.hs | ||
source-dirs: tests | ||
dependencies: | ||
- base | ||
- HUnit | ||
- huzzy |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,160 @@ | ||
-- | Fuzzy string search in Haskell. | ||
-- Uses 'TextualMonoid' to be able to run on different types of strings. | ||
module Text.Huzzy where | ||
|
||
import Protolude as P hiding (filter, null) | ||
|
||
import Data.Char (toLower) | ||
import Data.Maybe (isJust, mapMaybe) | ||
import Data.Monoid (mempty, (<>)) | ||
|
||
import qualified Data.Monoid.Textual as T | ||
|
||
|
||
-- | Included in the return type of `match` and `filter`. | ||
-- Contains the original value given, the rendered string | ||
-- and the matching score. | ||
data Fuzzy val prettyText = Fuzzy | ||
{ original :: val | ||
, rendered :: prettyText | ||
, score :: Int | ||
} deriving (Show, Eq) | ||
|
||
|
||
data CaseSensitivity | ||
= IgnoreCase | ||
| HandleCase | ||
deriving (Show, Eq) | ||
|
||
|
||
null :: T.TextualMonoid s => s -> Bool | ||
null = | ||
not . T.any (const True) | ||
|
||
|
||
-- | Returns the rendered output and the | ||
-- matching score for a pattern and a text. | ||
-- Two examples are given below: | ||
-- | ||
-- >>> match HandleCase ("", "") identity "fnt" "infinite" | ||
-- Just (Fuzzy | ||
-- { original = "infinite" | ||
-- , rendered = "infinite" | ||
-- , score = 3 | ||
-- }) | ||
-- | ||
-- >>> match IgnoreCase ("<", ">") fst "hsk" ("Haskell", 1995) | ||
-- Just (Fuzzy | ||
-- { original = ("Haskell", 1995) | ||
-- , rendered = "<h>a<s><k>ell" | ||
-- , score = 5 | ||
-- }) | ||
-- | ||
match | ||
:: (T.TextualMonoid text) | ||
=> CaseSensitivity -- ^ Handle or ignore case of search text | ||
-> (text, text) -- ^ Text to add before and after each match | ||
-> (value -> text) -- ^ Function to extract the text from the container | ||
-> text -- ^ Pattern | ||
-> value -- ^ Value containing the text to search in | ||
-> Maybe (Fuzzy value text) -- ^ Original value, rendered string, and score | ||
match caseSensitivity (pre, post) extractFunc pattern value = | ||
let | ||
searchText = extractFunc value | ||
(searchTextNorm, patternNorm) = | ||
let mapToLower = T.map toLower | ||
in | ||
if caseSensitivity == HandleCase | ||
then (searchText, pattern) | ||
else (mapToLower searchText, pattern) | ||
|
||
(totalScore, _, result, patternFromFold) = | ||
T.foldl_' | ||
(\(tot, cur, res, pat) c -> | ||
case T.splitCharacterPrefix pat of | ||
Nothing -> (tot | ||
, 0 | ||
, res <> T.singleton c | ||
, pat | ||
) | ||
Just (x, xs) -> | ||
if x == c | ||
then let cur' = cur * 2 + 1 | ||
in ( tot + cur' | ||
, cur' | ||
, res <> pre <> T.singleton c <> post | ||
, xs | ||
) | ||
else ( tot | ||
, 0 | ||
, res <> T.singleton c | ||
, pat | ||
) | ||
) | ||
(0, 0, mempty, patternNorm) | ||
searchTextNorm | ||
in | ||
if null patternFromFold | ||
then Just (Fuzzy value result totalScore) | ||
else Nothing | ||
|
||
|
||
-- | The function to filter a list of values | ||
-- by fuzzy search on the text extracted from them. | ||
-- | ||
-- >>> langs = [("Standard ML", 1990), ("OCaml", 1996), ("Scala", 2003)] | ||
-- >>> filter "ML" langs ("<", ">") fst IgnoreCase | ||
-- [ Fuzzy | ||
-- { original = ("Standard ML", 1990) | ||
-- , rendered = "standard <m><l>" | ||
-- , score = 4} | ||
-- , Fuzzy | ||
-- { original = ("OCaml", 1996) | ||
-- , rendered = "oca<m><l>" | ||
-- , score = 4 | ||
-- } | ||
-- ] | ||
filter | ||
:: (T.TextualMonoid text) | ||
=> CaseSensitivity -- ^ Handle or ignore case of search text | ||
-> (text, text) -- ^ Text to add before and after each match | ||
-> (value -> text) -- ^ Function to extract the text from the container | ||
-> text -- ^ Pattern | ||
-> [value] -- ^ List of values containing the text to search in | ||
-> [Fuzzy value text] -- ^ List of results, sorted, highest score first | ||
filter caseSen (pre, post) extractFunc pattern texts = | ||
sortOn | ||
(Down . score) | ||
(mapMaybe | ||
(\t -> match caseSen (pre, post) extractFunc pattern t) | ||
texts | ||
) | ||
|
||
|
||
-- | Return all elements of the list that have a fuzzy | ||
-- match against the pattern. Runs with default settings where | ||
-- nothing is added around the matches, as case insensitive. | ||
-- | ||
-- >>> simpleFilter "vm" ["vim", "emacs", "virtual machine"] | ||
-- ["vim","virtual machine"] | ||
simpleFilter | ||
:: (T.TextualMonoid text) | ||
=> text -- ^ Pattern to look for. | ||
-> [text] -- ^ List of texts to check. | ||
-> [text] -- ^ The ones that match. | ||
simpleFilter pattern xs = | ||
map | ||
original | ||
(filter IgnoreCase (mempty, mempty) identity pattern xs) | ||
|
||
|
||
-- | Returns false if the pattern and the text do not match at all. | ||
-- Returns true otherwise. | ||
-- | ||
-- >>> test "brd" "bread" | ||
-- True | ||
test | ||
:: (T.TextualMonoid text) | ||
=> text -> text -> Bool | ||
test pattern text = | ||
isJust (match IgnoreCase (mempty, mempty) identity pattern text) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
module Main where | ||
|
||
import Protolude hiding (from) | ||
|
||
import Test.HUnit | ||
|
||
import Text.Huzzy as Hu | ||
|
||
|
||
from :: [Assertion] -> Test | ||
from xs = TestList (map TestCase xs) | ||
|
||
tests :: Test | ||
tests = TestList | ||
[ | ||
TestLabel "test" $ TestList [ | ||
TestLabel "should return true when fuzzy match" $ from [ | ||
Hu.test "back" "imaback" @?= True | ||
, Hu.test "back" "bakck" @?= True | ||
, Hu.test "shig" "osh kosh modkhigow" @?= True | ||
, Hu.test "" "osh kosh modkhigow" @?= True | ||
] | ||
, TestLabel "should return false when no fuzzy match" $ from [ | ||
Hu.test "back" "abck" @?= False | ||
, Hu.test "okmgk" "osh kosh modkhigow" @?= False | ||
] | ||
] | ||
, TestLabel "match" $ TestList | ||
[ TestLabel | ||
"should return a greater score for consecutive matches of pattern" | ||
$ from [ | ||
(>) (Hu.score <$> Hu.match IgnoreCase ("", "") identity "abcd" "zabcd") | ||
(Hu.score <$> Hu.match IgnoreCase ("", "") identity "abcd" "azbcd") | ||
@?= True | ||
] | ||
, TestLabel | ||
"should return the string as is if no pre/post and case sensitive" | ||
$ from [ | ||
Hu.rendered <$> Hu.match | ||
HandleCase | ||
("", "") | ||
identity | ||
"ab" | ||
"ZaZbZ" | ||
@?= Just "ZaZbZ" | ||
] | ||
, TestLabel "should return Nothing on no match" $ from [ | ||
Hu.match IgnoreCase ("", "") identity "ZEBRA!" "ZaZbZ" @?= Nothing | ||
] | ||
, TestLabel "should be case sensitive is specified" $ from [ | ||
Hu.match HandleCase ("", "") identity "hask" "Haskell" @?= Nothing | ||
] | ||
, TestLabel "should be wrap pre and post around matches" $ from [ | ||
Hu.rendered <$> Hu.match | ||
HandleCase | ||
("<", ">") | ||
identity | ||
"brd" | ||
"bread" | ||
@?= Just "<b><r>ea<d>" | ||
] | ||
] | ||
, TestLabel "filter" $ TestList [ | ||
TestLabel "should return list untouched when given empty pattern" $ from [ | ||
map | ||
Hu.original | ||
(Hu.filter HandleCase ("", "") identity "" ["abc", "def"]) | ||
@?= ["abc", "def"] | ||
] | ||
, TestLabel "should return the highest score first" $ from [ | ||
(@?=) (head (Hu.filter HandleCase ("", "") identity "cb" ["cab", "acb"]) ) | ||
(head (Hu.filter HandleCase ("", "") identity "cb" ["acb"])) | ||
] | ||
] | ||
] | ||
|
||
|
||
runTests :: IO () | ||
runTests = do | ||
_ <- runTestTT tests | ||
return () | ||
|
||
|
||
-- | For now, main will run our tests. | ||
main :: IO () | ||
main = runTests |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,7 @@ | ||
resolver: lts-14.27 | ||
packages: | ||
# - tasklite-api | ||
- huzzy | ||
- tasklite-app | ||
- tasklite-core | ||
# - tasklite-web | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.