Skip to content

Commit

Permalink
Feat: Improve fuzzy find command, insource Fuzzy as Huzzy
Browse files Browse the repository at this point in the history
  • Loading branch information
ad-si committed Apr 9, 2020
1 parent b5193dd commit 3c5be4f
Show file tree
Hide file tree
Showing 9 changed files with 402 additions and 37 deletions.
51 changes: 51 additions & 0 deletions huzzy/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# Huzzy

Fuzzy string search library in Haskell.
Uses `TextualMonoid` from
[monoid-subclasses](https://hackage.haskell.org/package/monoid-subclasses)
to be able to run on different types of strings.

This is a fork of Joomy Korkut's [fuzzy](https://github.com/joom/fuzzy),
which itselft was a port of the JavaScript library
[mattyork/fuzzy](https://github.com/mattyork/fuzzy).


## Usage

```haskell
> import Text.Fuzzy

> match "fnt" "infinite" ("", "") id HandleCase
Just (Fuzzy
{ original = "infinite"
, rendered = "infinite"
, score = 3
})

> match "hsk" ("Haskell",1995) ("<", ">") fst IgnoreCase
Just (Fuzzy
{ original = ("Haskell", 1995)
, rendered = "<h>a<s><k>ell"
, score = 5
})

> langs = [("Standard ML", 1990), ("OCaml", 1996), ("Scala", 2003)]
> filter "ML" langs ("<", ">") fst IgnoreCase
[ Fuzzy
{ original = ("Standard ML", 1990)
, rendered = "standard <m><l>"
, score = 4
}
, Fuzzy
{ original = ("OCaml", 1996)
, rendered = "oca<m><l>"
, score = 4
}
]

> simpleFilter "vm" ["vim", "emacs", "virtual machine"]
["vim","virtual machine"]

> test "brd" "bread"
True
```
2 changes: 2 additions & 0 deletions huzzy/Setup.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
import Distribution.Simple
main = defaultMain
44 changes: 44 additions & 0 deletions huzzy/package.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
name: huzzy
version: 0.1.0.0
synopsis: Filters a list based on a fuzzy string search
homepage: https://github.com/ad-si/TaskLite/tree/master/huzzy
license: AGPL-3.0-or-later
author: Adrian Sieber
maintainer: mail@adriansieber.com
copyright: Adrian Sieber
category: Text, Fuzzy

description: |
Please check out the readme on GitHub
at https://github.com/ad-si/TaskLite/tree/master/huzzy#readme
extra-source-files:
- README.md

dependencies:
- base >= 4.7 && < 5
- monoid-subclasses
- protolude

default-extensions:
- NoImplicitPrelude

ghc-options:
- -Wall
- -Wcompat
- -Wincomplete-record-updates
- -Wincomplete-uni-patterns
- -Wredundant-constraints
- -fno-warn-orphans

library:
source-dirs: src

tests:
huzzy-test:
main: tests.hs
source-dirs: tests
dependencies:
- base
- HUnit
- huzzy
160 changes: 160 additions & 0 deletions huzzy/src/Text/Huzzy.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
-- | Fuzzy string search in Haskell.
-- Uses 'TextualMonoid' to be able to run on different types of strings.
module Text.Huzzy where

import Protolude as P hiding (filter, null)

import Data.Char (toLower)
import Data.Maybe (isJust, mapMaybe)
import Data.Monoid (mempty, (<>))

import qualified Data.Monoid.Textual as T


-- | Included in the return type of `match` and `filter`.
-- Contains the original value given, the rendered string
-- and the matching score.
data Fuzzy val prettyText = Fuzzy
{ original :: val
, rendered :: prettyText
, score :: Int
} deriving (Show, Eq)


data CaseSensitivity
= IgnoreCase
| HandleCase
deriving (Show, Eq)


null :: T.TextualMonoid s => s -> Bool
null =
not . T.any (const True)


-- | Returns the rendered output and the
-- matching score for a pattern and a text.
-- Two examples are given below:
--
-- >>> match HandleCase ("", "") identity "fnt" "infinite"
-- Just (Fuzzy
-- { original = "infinite"
-- , rendered = "infinite"
-- , score = 3
-- })
--
-- >>> match IgnoreCase ("<", ">") fst "hsk" ("Haskell", 1995)
-- Just (Fuzzy
-- { original = ("Haskell", 1995)
-- , rendered = "<h>a<s><k>ell"
-- , score = 5
-- })
--
match
:: (T.TextualMonoid text)
=> CaseSensitivity -- ^ Handle or ignore case of search text
-> (text, text) -- ^ Text to add before and after each match
-> (value -> text) -- ^ Function to extract the text from the container
-> text -- ^ Pattern
-> value -- ^ Value containing the text to search in
-> Maybe (Fuzzy value text) -- ^ Original value, rendered string, and score
match caseSensitivity (pre, post) extractFunc pattern value =
let
searchText = extractFunc value
(searchTextNorm, patternNorm) =
let mapToLower = T.map toLower
in
if caseSensitivity == HandleCase
then (searchText, pattern)
else (mapToLower searchText, pattern)

(totalScore, _, result, patternFromFold) =
T.foldl_'
(\(tot, cur, res, pat) c ->
case T.splitCharacterPrefix pat of
Nothing -> (tot
, 0
, res <> T.singleton c
, pat
)
Just (x, xs) ->
if x == c
then let cur' = cur * 2 + 1
in ( tot + cur'
, cur'
, res <> pre <> T.singleton c <> post
, xs
)
else ( tot
, 0
, res <> T.singleton c
, pat
)
)
(0, 0, mempty, patternNorm)
searchTextNorm
in
if null patternFromFold
then Just (Fuzzy value result totalScore)
else Nothing


-- | The function to filter a list of values
-- by fuzzy search on the text extracted from them.
--
-- >>> langs = [("Standard ML", 1990), ("OCaml", 1996), ("Scala", 2003)]
-- >>> filter "ML" langs ("<", ">") fst IgnoreCase
-- [ Fuzzy
-- { original = ("Standard ML", 1990)
-- , rendered = "standard <m><l>"
-- , score = 4}
-- , Fuzzy
-- { original = ("OCaml", 1996)
-- , rendered = "oca<m><l>"
-- , score = 4
-- }
-- ]
filter
:: (T.TextualMonoid text)
=> CaseSensitivity -- ^ Handle or ignore case of search text
-> (text, text) -- ^ Text to add before and after each match
-> (value -> text) -- ^ Function to extract the text from the container
-> text -- ^ Pattern
-> [value] -- ^ List of values containing the text to search in
-> [Fuzzy value text] -- ^ List of results, sorted, highest score first
filter caseSen (pre, post) extractFunc pattern texts =
sortOn
(Down . score)
(mapMaybe
(\t -> match caseSen (pre, post) extractFunc pattern t)
texts
)


-- | Return all elements of the list that have a fuzzy
-- match against the pattern. Runs with default settings where
-- nothing is added around the matches, as case insensitive.
--
-- >>> simpleFilter "vm" ["vim", "emacs", "virtual machine"]
-- ["vim","virtual machine"]
simpleFilter
:: (T.TextualMonoid text)
=> text -- ^ Pattern to look for.
-> [text] -- ^ List of texts to check.
-> [text] -- ^ The ones that match.
simpleFilter pattern xs =
map
original
(filter IgnoreCase (mempty, mempty) identity pattern xs)


-- | Returns false if the pattern and the text do not match at all.
-- Returns true otherwise.
--
-- >>> test "brd" "bread"
-- True
test
:: (T.TextualMonoid text)
=> text -> text -> Bool
test pattern text =
isJust (match IgnoreCase (mempty, mempty) identity pattern text)
86 changes: 86 additions & 0 deletions huzzy/tests/tests.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
module Main where

import Protolude hiding (from)

import Test.HUnit

import Text.Huzzy as Hu


from :: [Assertion] -> Test
from xs = TestList (map TestCase xs)

tests :: Test
tests = TestList
[
TestLabel "test" $ TestList [
TestLabel "should return true when fuzzy match" $ from [
Hu.test "back" "imaback" @?= True
, Hu.test "back" "bakck" @?= True
, Hu.test "shig" "osh kosh modkhigow" @?= True
, Hu.test "" "osh kosh modkhigow" @?= True
]
, TestLabel "should return false when no fuzzy match" $ from [
Hu.test "back" "abck" @?= False
, Hu.test "okmgk" "osh kosh modkhigow" @?= False
]
]
, TestLabel "match" $ TestList
[ TestLabel
"should return a greater score for consecutive matches of pattern"
$ from [
(>) (Hu.score <$> Hu.match IgnoreCase ("", "") identity "abcd" "zabcd")
(Hu.score <$> Hu.match IgnoreCase ("", "") identity "abcd" "azbcd")
@?= True
]
, TestLabel
"should return the string as is if no pre/post and case sensitive"
$ from [
Hu.rendered <$> Hu.match
HandleCase
("", "")
identity
"ab"
"ZaZbZ"
@?= Just "ZaZbZ"
]
, TestLabel "should return Nothing on no match" $ from [
Hu.match IgnoreCase ("", "") identity "ZEBRA!" "ZaZbZ" @?= Nothing
]
, TestLabel "should be case sensitive is specified" $ from [
Hu.match HandleCase ("", "") identity "hask" "Haskell" @?= Nothing
]
, TestLabel "should be wrap pre and post around matches" $ from [
Hu.rendered <$> Hu.match
HandleCase
("<", ">")
identity
"brd"
"bread"
@?= Just "<b><r>ea<d>"
]
]
, TestLabel "filter" $ TestList [
TestLabel "should return list untouched when given empty pattern" $ from [
map
Hu.original
(Hu.filter HandleCase ("", "") identity "" ["abc", "def"])
@?= ["abc", "def"]
]
, TestLabel "should return the highest score first" $ from [
(@?=) (head (Hu.filter HandleCase ("", "") identity "cb" ["cab", "acb"]) )
(head (Hu.filter HandleCase ("", "") identity "cb" ["acb"]))
]
]
]


runTests :: IO ()
runTests = do
_ <- runTestTT tests
return ()


-- | For now, main will run our tests.
main :: IO ()
main = runTests
1 change: 1 addition & 0 deletions stack.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
resolver: lts-14.27
packages:
# - tasklite-api
- huzzy
- tasklite-app
- tasklite-core
# - tasklite-web
Expand Down
7 changes: 0 additions & 7 deletions stack.yaml.lock
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,6 @@
# https://docs.haskellstack.org/en/stable/lock_files

packages:
- completed:
hackage: fuzzy-0.1.0.0@sha256:123b9581c587ae5422b67bbad7e81186b1ecbf86941a8049f921ddf136f2234e,833
pantry-tree:
size: 261
sha256: 97019c97e95f2a91089acec705243e1fa5b8529ae112658757e0ab3574905f3c
original:
hackage: fuzzy-0.1.0.0
- completed:
hackage: gi-gtk-declarative-0.4.2@sha256:8f6a0c3fe61143b07b857d28ec4354c6e5540971ffbaf39837009e81c288a0bd,3719
pantry-tree:
Expand Down
Loading

0 comments on commit 3c5be4f

Please sign in to comment.