#if __GLASGOW_HASKELL__ < 710
#endif
module Text.Regex.PCRE.Heavy (
(=~)
, (≈)
, scan
, scanO
, scanRanges
, scanRangesO
, sub
, subO
, gsub
, gsubO
, split
, splitO
, re
, mkRegexQQ
, Regex
, PCREOption
, PCRE.compileM
, rawMatch
, rawSub
) where
import Prelude.Compat
import Language.Haskell.TH hiding (match)
import Language.Haskell.TH.Quote
import Language.Haskell.TH.Syntax
import qualified Text.Regex.PCRE.Light as PCRE
import Text.Regex.PCRE.Light.Base
import Data.Maybe (isJust, fromMaybe)
import Data.List (unfoldr, mapAccumL)
import qualified Data.List.NonEmpty as NE
import Data.String.Conversions
import Data.String.Conversions.Monomorphic
import qualified Data.ByteString.Char8 as BS
import qualified Data.ByteString.Internal as BS
import System.IO.Unsafe (unsafePerformIO)
import Foreign (withForeignPtr, allocaBytes, nullPtr, plusPtr, peekElemOff)
substr ∷ SBS → (Int, Int) → SBS
substr s (f, t) = BS.take (t f) . BS.drop f $ s
behead ∷ NE.NonEmpty a → (a, [a])
behead l = (NE.head l, NE.tail l)
reMatch ∷ (ConvertibleStrings SBS a, ConvertibleStrings a SBS) ⇒ Regex → a → Bool
reMatch r s = isJust $ PCRE.match r (cs s) []
(=~), (≈) ∷ (ConvertibleStrings SBS a, ConvertibleStrings a SBS) ⇒ a → Regex → Bool
(=~) = flip reMatch
(≈) = (=~)
rawMatch ∷ Regex → SBS → Int → [PCREExecOption] → Maybe [(Int, Int)]
rawMatch r@(Regex pcreFp _) s offset opts = unsafePerformIO $ do
withForeignPtr pcreFp $ \pcrePtr → do
let nCapt = PCRE.captureCount r
ovecSize = (nCapt + 1) * 3
ovecBytes = ovecSize * size_of_cint
allocaBytes ovecBytes $ \ovec → do
let (strFp, off, len) = BS.toForeignPtr s
withForeignPtr strFp $ \strPtr → do
results ← c_pcre_exec pcrePtr nullPtr (strPtr `plusPtr` off) (fromIntegral len) (fromIntegral offset)
(combineExecOptions opts) ovec (fromIntegral ovecSize)
if results < 0 then return Nothing
else
let loop n o acc =
if n == results then return $ Just $ reverse acc
else do
i ← peekElemOff ovec $! o
j ← peekElemOff ovec (o + 1)
loop (n + 1) (o + 2) ((fromIntegral i, fromIntegral j) : acc)
in loop 0 0 []
nextMatch ∷ Regex → [PCREExecOption] → SBS → Int → Maybe (NE.NonEmpty (Int, Int), Int)
nextMatch r opts str offset =
rawMatch r str offset opts >>= NE.nonEmpty >>= \ms → return (ms, maximum $ fmap snd ms)
scan ∷ (ConvertibleStrings SBS a, ConvertibleStrings a SBS) ⇒ Regex → a → [(a, [a])]
scan r s = scanO r [] s
scanO ∷ (ConvertibleStrings SBS a, ConvertibleStrings a SBS) ⇒ Regex → [PCREExecOption] → a → [(a, [a])]
scanO r opts s = map behead $ fmap (cs . substr str) <$> unfoldr (nextMatch r opts str) 0
where str = toSBS s
scanRanges ∷ (ConvertibleStrings SBS a, ConvertibleStrings a SBS) ⇒ Regex → a → [((Int, Int), [(Int, Int)])]
scanRanges r s = scanRangesO r [] s
scanRangesO ∷ (ConvertibleStrings SBS a, ConvertibleStrings a SBS) ⇒ Regex → [PCREExecOption] → a → [((Int, Int), [(Int, Int)])]
scanRangesO r opts s = map behead $ unfoldr (nextMatch r opts str) 0
where str = toSBS s
class RegexReplacement a where
performReplacement ∷ SBS → [SBS] → a → SBS
instance ConvertibleStrings a SBS ⇒ RegexReplacement a where
performReplacement _ _ to = cs to
instance (ConvertibleStrings SBS a, ConvertibleStrings a SBS) ⇒ RegexReplacement (a → [a] → a) where
performReplacement from groups replacer = cs $ replacer (cs from) (map cs groups)
instance (ConvertibleStrings SBS a, ConvertibleStrings a SBS) ⇒ RegexReplacement (a → a) where
performReplacement from _ replacer = cs $ replacer (cs from)
instance (ConvertibleStrings SBS a, ConvertibleStrings a SBS) ⇒ RegexReplacement ([a] → a) where
performReplacement _ groups replacer = cs $ replacer (map cs groups)
rawSub ∷ RegexReplacement r ⇒ Regex → r → SBS → Int → [PCREExecOption] → Maybe (SBS, Int)
rawSub r t s offset opts =
case rawMatch r s offset opts of
Just ((begin, end):groups) →
let replacement = performReplacement (substr s (begin, end)) (map (substr s) groups) t in
Just (BS.concat [ substr s (0, begin)
, replacement
, substr s (end, BS.length s)], begin + BS.length replacement)
_ → Nothing
sub ∷ (ConvertibleStrings SBS a, ConvertibleStrings a SBS, RegexReplacement r) ⇒ Regex → r → a → a
sub r t s = subO r [] t s
subO ∷ (ConvertibleStrings SBS a, ConvertibleStrings a SBS, RegexReplacement r) ⇒ Regex → [PCREExecOption] → r → a → a
subO r opts t s = fromMaybe s $ cs <$> fst <$> rawSub r t (cs s) 0 opts
gsub ∷ (ConvertibleStrings SBS a, ConvertibleStrings a SBS, RegexReplacement r) ⇒ Regex → r → a → a
gsub r t s = gsubO r [] t s
gsubO ∷ (ConvertibleStrings SBS a, ConvertibleStrings a SBS, RegexReplacement r) ⇒ Regex → [PCREExecOption] → r → a → a
gsubO r opts t s = cs $ loop 0 str
where str = toSBS s
loop offset acc
| offset >= l = acc
| otherwise = case rawSub r t acc offset opts of
Just (result, newOffset) →
if newOffset == offset && l == BS.length result
then acc
else loop newOffset result
_ → acc
where l = BS.length acc
split ∷ (ConvertibleStrings SBS a, ConvertibleStrings a SBS) ⇒ Regex → a → [a]
split r s = splitO r [] s
splitO ∷ (ConvertibleStrings SBS a, ConvertibleStrings a SBS) ⇒ Regex → [PCREExecOption] → a → [a]
splitO r opts s = map cs $ map' (substr str) partRanges
where map' f = foldr ((:) . f) [f (lastL, BS.length str)]
(lastL, partRanges) = mapAccumL invRange 0 ranges
invRange acc (xl, xr) = (xr, (acc, xl))
ranges = map fst $ scanRangesO r opts str
str = toSBS s
instance Lift PCREOption where
lift o = let o' = show o in [| read o' ∷ PCREOption |]
quoteExpRegex ∷ [PCREOption] → String → ExpQ
quoteExpRegex opts txt = [| PCRE.compile (cs (txt ∷ String)) opts |]
where !_ = PCRE.compile (cs txt) opts
mkRegexQQ ∷ [PCREOption] → QuasiQuoter
mkRegexQQ opts = QuasiQuoter
{ quoteExp = quoteExpRegex opts
, quotePat = undefined
, quoteType = undefined
, quoteDec = undefined }
re ∷ QuasiQuoter
re = mkRegexQQ [utf8]