{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeSynonymInstances #-}
module Hledger.Utils.Regex (
Regexp(reString)
,toRegex
,toRegexCI
,toRegex'
,toRegexCI'
,Replacement
,RegexError
,regexMatch
,regexReplace
,regexReplaceUnmemo
,regexReplaceAllBy
)
where
import Control.Monad (foldM)
import Data.Aeson (ToJSON(..), Value(String))
import Data.Array ((!), elems, indices)
import Data.Char (isDigit)
import Data.List (foldl')
import Data.MemoUgly (memo)
import qualified Data.Text as T
import Text.Regex.TDFA (
Regex, CompOption(..), defaultCompOpt, defaultExecOpt,
makeRegexOptsM, AllMatches(getAllMatches), match, MatchText,
RegexLike(..), RegexMaker(..), RegexOptions(..), RegexContext(..)
)
import Hledger.Utils.UTF8IOCompat (error')
data Regexp
= Regexp { reString :: String, reCompiled :: Regex }
| RegexpCI { reString :: String, reCompiled :: Regex }
instance Eq Regexp where
Regexp s1 _ == Regexp s2 _ = s1 == s2
RegexpCI s1 _ == RegexpCI s2 _ = s1 == s2
_ == _ = False
instance Ord Regexp where
Regexp s1 _ `compare` Regexp s2 _ = s1 `compare` s2
RegexpCI s1 _ `compare` RegexpCI s2 _ = s1 `compare` s2
Regexp _ _ `compare` RegexpCI _ _ = LT
RegexpCI _ _ `compare` Regexp _ _ = GT
instance Show Regexp where
showsPrec d r = showParen (d > app_prec) $ reCons . showsPrec (app_prec+1) (reString r)
where app_prec = 10
reCons = case r of Regexp _ _ -> showString "Regexp "
RegexpCI _ _ -> showString "RegexpCI "
instance Read Regexp where
readsPrec d r = readParen (d > app_prec) (\r -> [(toRegexCI' m,t) |
("RegexCI",s) <- lex r,
(m,t) <- readsPrec (app_prec+1) s]) r
++ readParen (d > app_prec) (\r -> [(toRegex' m, t) |
("Regex",s) <- lex r,
(m,t) <- readsPrec (app_prec+1) s]) r
where app_prec = 10
instance ToJSON Regexp where
toJSON (Regexp s _) = String . T.pack $ "Regexp " ++ s
toJSON (RegexpCI s _) = String . T.pack $ "RegexpCI " ++ s
instance RegexLike Regexp String where
matchOnce = matchOnce . reCompiled
matchAll = matchAll . reCompiled
matchCount = matchCount . reCompiled
matchTest = matchTest . reCompiled
matchAllText = matchAllText . reCompiled
matchOnceText = matchOnceText . reCompiled
instance RegexContext Regexp String String where
match = match . reCompiled
matchM = matchM . reCompiled
toRegex :: String -> Either RegexError Regexp
toRegex = memo $ \s -> mkRegexErr s (Regexp s <$> makeRegexM s)
toRegexCI :: String -> Either RegexError Regexp
toRegexCI = memo $ \s -> mkRegexErr s (RegexpCI s <$> makeRegexOptsM defaultCompOpt{caseSensitive=False} defaultExecOpt s)
mkRegexErr :: String -> Maybe a -> Either RegexError a
mkRegexErr s = maybe (Left errmsg) Right
where errmsg = "this regular expression could not be compiled: " ++ s
toRegex' :: String -> Regexp
toRegex' = either error' id . toRegex
toRegexCI' :: String -> Regexp
toRegexCI' = either error' id . toRegexCI
type Replacement = String
type RegexError = String
regexMatch :: Regexp -> String -> Bool
regexMatch = matchTest
regexReplace :: Regexp -> Replacement -> String -> Either RegexError String
regexReplace re repl = memo $ regexReplaceUnmemo re repl
regexReplaceUnmemo :: Regexp -> Replacement -> String -> Either RegexError String
regexReplaceUnmemo re repl s = foldM (replaceMatch repl) s (reverse $ match (reCompiled re) s :: [MatchText String])
where
replaceMatch :: Replacement -> String -> MatchText String -> Either RegexError String
replaceMatch replpat s matchgroups =
erepl >>= \repl -> Right $ pre ++ repl ++ post
where
((_,(off,len)):_) = elems matchgroups
(pre, post') = splitAt off s
post = drop len post'
erepl = regexReplaceAllByM backrefRegex (lookupMatchGroup matchgroups) replpat
where
lookupMatchGroup :: MatchText String -> String -> Either RegexError String
lookupMatchGroup grps ('\\':s@(_:_)) | all isDigit s =
case read s of n | n `elem` indices grps -> Right $ fst (grps ! n)
_ -> Left $ "no match group exists for backreference \"\\"++s++"\""
lookupMatchGroup _ s = Left $ "lookupMatchGroup called on non-numeric-backreference \""++s++"\", shouldn't happen"
backrefRegex = toRegex' "\\\\[0-9]+"
regexReplaceAllBy :: Regexp -> (String -> String) -> String -> String
regexReplaceAllBy re transform s = prependdone rest
where
(_, rest, prependdone) = foldl' go (0, s, id) matches
where
matches = getAllMatches $ match (reCompiled re) s :: [(Int, Int)]
go :: (Int,String,String->String) -> (Int,Int) -> (Int,String,String->String)
go (pos,todo,prepend) (off,len) =
let (prematch, matchandrest) = splitAt (off - pos) todo
(matched, rest) = splitAt len matchandrest
in (off + len, rest, prepend . (prematch++) . (transform matched ++))
regexReplaceAllByM :: forall m. Monad m => Regexp -> (String -> m String) -> String -> m String
regexReplaceAllByM re transform s =
foldM go (0, s, id) matches >>= \(_, rest, prependdone) -> pure $ prependdone rest
where
matches = getAllMatches $ match (reCompiled re) s :: [(Int, Int)]
go :: (Int,String,String->String) -> (Int,Int) -> m (Int,String,String->String)
go (pos,todo,prepend) (off,len) =
let (prematch, matchandrest) = splitAt (off - pos) todo
(matched, rest) = splitAt len matchandrest
in transform matched >>= \matched' -> pure (off + len, rest, prepend . (prematch++) . (matched' ++))