|
| 1 | +{-| |
| 2 | +Module : Text.Regex.TDFA.Text.Lazy |
| 3 | +Copyright : Chris Kuklewicz 2007-2009, shelarcy 2012 |
| 4 | +License : BSD-style (see the file LICENSE) |
| 5 | +
|
| 6 | +Maintainer : shelarcy <shelarcy@gmail.com> |
| 7 | +Stability : experimental |
| 8 | +Portability : GHC (uses text) |
| 9 | +
|
| 10 | +This modules provides 'RegexMaker' and 'RegexLike' instances for using |
| 11 | +'Text' with the TDFA backend ("Text.Regex.TDFA.NewDFA.Engine" and |
| 12 | +"Text.Regex.TDFA.NewDFA.Tester"). |
| 13 | +
|
| 14 | +This exports instances of the high level API and the medium level |
| 15 | +API of 'compile','execute', and 'regexec'. |
| 16 | +-} |
| 17 | +module Text.Regex.TDFA.Text.Lazy( |
| 18 | + Regex |
| 19 | + ,CompOption |
| 20 | + ,ExecOption |
| 21 | + ,compile |
| 22 | + ,execute |
| 23 | + ,regexec |
| 24 | + ) where |
| 25 | + |
| 26 | +import Data.Array.IArray(Array,(!),elems,amap) |
| 27 | +import qualified Data.Text.Lazy as L(Text,empty,take,drop,uncons,unpack) |
| 28 | + |
| 29 | +import Text.Regex.Base(MatchArray,RegexContext(..),Extract(..),RegexMaker(..),RegexLike(..)) |
| 30 | +import Text.Regex.Base.Impl(polymatch,polymatchM) |
| 31 | +import Text.Regex.TDFA.ReadRegex(parseRegex) |
| 32 | +import Text.Regex.TDFA.String() -- piggyback on RegexMaker for String |
| 33 | +import Text.Regex.TDFA.TDFA(patternToRegex) |
| 34 | +import Text.Regex.TDFA.Common(Regex(..),CompOption,ExecOption(captureGroups),Position) |
| 35 | + |
| 36 | +import Data.Maybe(listToMaybe) |
| 37 | +import Text.Regex.TDFA.NewDFA.Uncons(Uncons(uncons)) |
| 38 | +import qualified Text.Regex.TDFA.NewDFA.Engine as Engine(execMatch) |
| 39 | +import qualified Text.Regex.TDFA.NewDFA.Tester as Tester(matchTest) |
| 40 | + |
| 41 | +instance Extract L.Text where |
| 42 | + before = L.take . toEnum; after = L.drop . toEnum; empty = L.empty |
| 43 | + |
| 44 | +instance RegexContext Regex L.Text L.Text where |
| 45 | + match = polymatch |
| 46 | + matchM = polymatchM |
| 47 | + |
| 48 | +instance Uncons L.Text where |
| 49 | + {- INLINE uncons #-} |
| 50 | + uncons = L.uncons |
| 51 | + |
| 52 | +instance RegexMaker Regex CompOption ExecOption L.Text where |
| 53 | + makeRegexOptsM c e source = makeRegexOptsM c e (L.unpack source) |
| 54 | + |
| 55 | +{-# SPECIALIZE execMatch :: Regex -> Position -> Char -> L.Text -> [MatchArray] #-} |
| 56 | +execMatch :: Uncons text => Regex -> Position -> Char -> text -> [MatchArray] |
| 57 | +execMatch = Engine.execMatch |
| 58 | + |
| 59 | +{-# SPECIALIZE myMatchTest :: Regex -> L.Text -> Bool #-} |
| 60 | +myMatchTest :: Uncons text => Regex -> text -> Bool |
| 61 | +myMatchTest = Tester.matchTest |
| 62 | + |
| 63 | +instance RegexLike Regex L.Text where |
| 64 | + matchOnce r s = listToMaybe (matchAll r s) |
| 65 | + matchAll r s = execMatch r 0 '\n' s |
| 66 | + matchCount r s = length (matchAll r' s) |
| 67 | + where r' = r { regex_execOptions = (regex_execOptions r) {captureGroups = False} } |
| 68 | + matchTest = myMatchTest |
| 69 | + matchOnceText regex source = |
| 70 | + fmap (\ ma -> |
| 71 | + let (o,l) = ma!0 |
| 72 | + in (before o source |
| 73 | + ,fmap (\ ol -> (extract ol source,ol)) ma |
| 74 | + ,after (o+l) source)) |
| 75 | + (matchOnce regex source) |
| 76 | + matchAllText regex source = |
| 77 | + let go :: Int -> L.Text -> [Array Int (Int, Int)] -> [Array Int (L.Text, (Int, Int))] |
| 78 | + go i _ _ | i `seq` False = undefined |
| 79 | + go _i _t [] = [] |
| 80 | + go i t (x:xs) = |
| 81 | + let (off0,len0) = x!0 |
| 82 | + trans pair@(off,len) = (extract (off-i,len) t,pair) |
| 83 | + t' = after (off0+(len0-i)) t |
| 84 | + in fmap trans x : seq t' (go (off0+len0) t' xs) |
| 85 | + in go 0 source (matchAll regex source) |
| 86 | + |
| 87 | +compile :: CompOption -- ^ Flags (summed together) |
| 88 | + -> ExecOption -- ^ Flags (summed together) |
| 89 | + -> L.Text -- ^ The regular expression to compile |
| 90 | + -> Either String Regex -- ^ Returns: the compiled regular expression |
| 91 | +compile compOpt execOpt txt = |
| 92 | + case parseRegex (L.unpack txt) of |
| 93 | + Left err -> Left ("parseRegex for Text.Regex.TDFA.Text.Lazy failed:"++show err) |
| 94 | + Right pattern -> Right (patternToRegex pattern compOpt execOpt) |
| 95 | + |
| 96 | +execute :: Regex -- ^ Compiled regular expression |
| 97 | + -> L.Text -- ^ Text to match against |
| 98 | + -> Either String (Maybe MatchArray) |
| 99 | +execute r txt = Right (matchOnce r txt) |
| 100 | + |
| 101 | +regexec :: Regex -- ^ Compiled regular expression |
| 102 | + -> L.Text -- ^ Text to match against |
| 103 | + -> Either String (Maybe (L.Text, L.Text, L.Text, [L.Text])) |
| 104 | +regexec r txt = |
| 105 | + case matchOnceText r txt of |
| 106 | + Nothing -> Right (Nothing) |
| 107 | + Just (pre,mt,post) -> |
| 108 | + let main = fst (mt!0) |
| 109 | + rest = map fst (tail (elems mt)) -- will be [] |
| 110 | + in Right (Just (pre,main,post,rest)) |
0 commit comments