module Text.Regex.PCRE.Heavy (
(=~)
, (≈)
, scan
, scanO
, scanRanges
, scanRangesO
, sub
, subO
, gsub
, gsubO
, split
, splitO
, re
, mkRegexQQ
, Regex
, PCREOption
, PCRE.compileM
, rawMatch
, rawSub
) where
import Language.Haskell.TH hiding (match)
import Language.Haskell.TH.Quote
import Language.Haskell.TH.Syntax
import qualified Text.Regex.PCRE.Light as PCRE
import Text.Regex.PCRE.Light.Base
import Control.Applicative ((<$>))
import Data.Maybe (isJust, fromMaybe)
import Data.List (unfoldr, mapAccumL)
import Data.Stringable
import qualified Data.ByteString.Char8 as BS
import qualified Data.ByteString.Internal as BS
import System.IO.Unsafe (unsafePerformIO)
import Foreign
substr ∷ BS.ByteString → (Int, Int) → BS.ByteString
substr s (f, t) = BS.take (t f) . BS.drop f $ s
behead ∷ [a] → (a, [a])
behead (h:t) = (h, t)
behead [] = error "no head to behead"
reMatch ∷ Stringable a ⇒ Regex → a → Bool
reMatch r s = isJust $ PCRE.match r (toByteString s) []
(=~) ∷ Stringable a ⇒ a → Regex → Bool
(=~) = flip reMatch
(≈) ∷ Stringable a ⇒ a → Regex → Bool
(≈) = (=~)
rawMatch ∷ Regex → BS.ByteString → Int → [PCREExecOption] → Maybe [(Int, Int)]
rawMatch r@(Regex pcreFp _) s offset opts = unsafePerformIO $ do
withForeignPtr pcreFp $ \pcrePtr → do
let nCapt = PCRE.captureCount r
ovecSize = (nCapt + 1) * 3
ovecBytes = ovecSize * size_of_cint
allocaBytes ovecBytes $ \ovec → do
let (strFp, off, len) = BS.toForeignPtr s
withForeignPtr strFp $ \strPtr → do
results ← c_pcre_exec pcrePtr nullPtr (strPtr `plusPtr` off) (fromIntegral len) (fromIntegral offset)
(combineExecOptions opts) ovec (fromIntegral ovecSize)
if results < 0 then return Nothing
else
let loop n o acc =
if n == results then return $ Just $ reverse acc
else do
i ← peekElemOff ovec $! o
j ← peekElemOff ovec (o + 1)
loop (n + 1) (o + 2) ((fromIntegral i, fromIntegral j) : acc)
in loop 0 0 []
nextMatch ∷ Regex → [PCREExecOption] → BS.ByteString → Int → Maybe ([(Int, Int)], Int)
nextMatch r opts str offset =
case rawMatch r str offset opts of
Nothing → Nothing
Just [] → Nothing
Just ms → Just (ms, maximum $ map snd ms)
scan ∷ (Stringable a) ⇒ Regex → a → [(a, [a])]
scan r s = scanO r [] s
scanO ∷ (Stringable a) ⇒ Regex → [PCREExecOption] → a → [(a, [a])]
scanO r opts s = map behead $ map (fromByteString . substr str) <$> unfoldr (nextMatch r opts str) 0
where str = toByteString s
scanRanges ∷ (Stringable a) ⇒ Regex → a → [((Int, Int), [(Int, Int)])]
scanRanges r s = scanRangesO r [] s
scanRangesO ∷ Stringable a ⇒ Regex → [PCREExecOption] → a → [((Int, Int), [(Int, Int)])]
scanRangesO r opts s = map behead $ unfoldr (nextMatch r opts str) 0
where str = toByteString s
class RegexReplacement a where
performReplacement ∷ BS.ByteString → [BS.ByteString] → a → BS.ByteString
instance Stringable a ⇒ RegexReplacement a where
performReplacement _ _ to = toByteString to
instance Stringable a ⇒ RegexReplacement (a → [a] → a) where
performReplacement from groups replacer = toByteString $ replacer (fromByteString from) (map fromByteString groups)
instance Stringable a ⇒ RegexReplacement (a → a) where
performReplacement from _ replacer = toByteString $ replacer (fromByteString from)
instance Stringable a ⇒ RegexReplacement ([a] → a) where
performReplacement _ groups replacer = toByteString $ replacer (map fromByteString groups)
rawSub ∷ RegexReplacement r ⇒ Regex → r → BS.ByteString → Int → [PCREExecOption] → Maybe (BS.ByteString, Int)
rawSub r t s offset opts =
case rawMatch r s offset opts of
Just ((begin, end):groups) →
Just (BS.concat [ substr s (0, begin)
, performReplacement (substr s (begin, end)) (map (substr s) groups) t
, substr s (end, BS.length s)], end)
_ → Nothing
sub ∷ (Stringable a, RegexReplacement r) ⇒ Regex → r → a → a
sub r t s = subO r [] t s
subO ∷ (Stringable a, RegexReplacement r) ⇒ Regex → [PCREExecOption] → r → a → a
subO r opts t s = fromMaybe s $ fromByteString <$> fst <$> rawSub r t (toByteString s) 0 opts
gsub ∷ (Stringable a, RegexReplacement r) ⇒ Regex → r → a → a
gsub r t s = gsubO r [] t s
gsubO ∷ (Stringable a, RegexReplacement r) ⇒ Regex → [PCREExecOption] → r → a → a
gsubO r opts t s = fromByteString $ loop 0 str
where str = toByteString s
loop offset acc =
case rawSub r t acc offset opts of
Just (result, newOffset) →
if newOffset == offset then acc else loop newOffset result
_ → acc
split ∷ Stringable a ⇒ Regex → a → [a]
split r s = splitO r [] s
splitO ∷ Stringable a ⇒ Regex → [PCREExecOption] → a → [a]
splitO r opts s = map fromByteString $ map' (substr str) partRanges
where map' f = foldr ((:) . f) [f (lastL, BS.length str)]
(lastL, partRanges) = mapAccumL invRange 0 ranges
invRange acc (xl, xr) = (xr, (acc, xl))
ranges = map fst $ scanRangesO r opts str
str = toByteString s
instance Lift PCREOption where
lift o = let o' = show o in [| read o' ∷ PCREOption |]
quoteExpRegex ∷ [PCREOption] → String → ExpQ
quoteExpRegex opts txt = [| PCRE.compile (toByteString (txt ∷ String)) opts |]
where !_ = PCRE.compile (toByteString txt) opts
mkRegexQQ ∷ [PCREOption] → QuasiQuoter
mkRegexQQ opts = QuasiQuoter
{ quoteExp = quoteExpRegex opts
, quotePat = undefined
, quoteType = undefined
, quoteDec = undefined }
re ∷ QuasiQuoter
re = mkRegexQQ [utf8]