{-# LANGUAGE ScopedTypeVariables #-}
module Hledger.Utils.Regex (
Regexp
,Replacement
,regexMatches
,regexMatchesCI
,regexReplace
,regexReplaceCI
,regexReplaceMemo
,regexReplaceCIMemo
,regexReplaceBy
,regexReplaceByCI
)
where
import Data.Array
import Data.Char
import Data.List (foldl')
import Data.MemoUgly (memo)
import Text.Regex.TDFA (
Regex, CompOption(..), ExecOption(..), defaultCompOpt, defaultExecOpt,
makeRegexOpts, AllMatches(getAllMatches), match, (=~), MatchText
)
import Hledger.Utils.UTF8IOCompat (error')
type Regexp = String
type Replacement = String
toRegex :: Regexp -> Regex
toRegex = memo (makeRegexOpts compOpt execOpt)
toRegexCI :: Regexp -> Regex
toRegexCI = memo (makeRegexOpts compOpt{caseSensitive=False} execOpt)
compOpt :: CompOption
compOpt = defaultCompOpt
execOpt :: ExecOption
execOpt = defaultExecOpt
regexMatches :: Regexp -> String -> Bool
regexMatches = flip (=~)
regexMatchesCI :: Regexp -> String -> Bool
regexMatchesCI r = match (toRegexCI r)
regexReplaceBy :: Regexp -> (String -> String) -> String -> String
regexReplaceBy r = replaceAllBy (toRegex r)
regexReplaceByCI :: Regexp -> (String -> String) -> String -> String
regexReplaceByCI r = replaceAllBy (toRegexCI r)
regexReplace :: Regexp -> Replacement -> String -> String
regexReplace re = replaceRegex (toRegex re)
regexReplaceCI :: Regexp -> Replacement -> String -> String
regexReplaceCI re = replaceRegex (toRegexCI re)
regexReplaceMemo :: Regexp -> Replacement -> String -> String
regexReplaceMemo re repl = memo (regexReplace re repl)
regexReplaceCIMemo :: Regexp -> Replacement -> String -> String
regexReplaceCIMemo re repl = memo (regexReplaceCI re repl)
replaceRegex :: Regex -> Replacement -> String -> String
replaceRegex re repl s = foldl (replaceMatch repl) s (reverse $ match re s :: [MatchText String])
replaceMatch :: Replacement -> String -> MatchText String -> String
replaceMatch replpat s matchgroups = pre ++ repl ++ post
where
((_,(off,len)):_) = elems matchgroups
(pre, post') = splitAt off s
post = drop len post'
repl = replaceAllBy (toRegex "\\\\[0-9]+") (replaceBackReference matchgroups) replpat
replaceBackReference :: MatchText String -> String -> String
replaceBackReference grps ('\\':s@(_:_)) | all isDigit s =
case read s of n | n `elem` indices grps -> fst (grps ! n)
_ -> error' $ "no match group exists for backreference \"\\"++s++"\""
replaceBackReference _ s = error' $ "replaceBackReference called on non-numeric-backreference \""++s++"\", shouldn't happen"
replaceAllBy :: Regex -> (String -> String) -> String -> String
replaceAllBy re f s = start end
where
(_, end, start) = foldl' go (0, s, id) $ (getAllMatches $ match re s :: [(Int, Int)])
go (ind,read,write) (off,len) =
let (skip, start) = splitAt (off - ind) read
(matched, remaining) = splitAt len start
in (off + len, remaining, write . (skip++) . (f matched ++))