{-# OPTIONS_GHC -fno-warn-orphans #-}
module Text.Regex.PCRE.ByteString(
Regex,
MatchOffset,
MatchLength,
CompOption(CompOption),
ExecOption(ExecOption),
ReturnCode,
WrapError,
unusedOffset,
getVersion,
compile,
execute,
regexec,
compBlank,
compAnchored,
compAutoCallout,
compCaseless,
compDollarEndOnly,
compDotAll,
compExtended,
compExtra,
compFirstLine,
compMultiline,
compNoAutoCapture,
compUngreedy,
compUTF8,
compNoUTF8Check,
execBlank,
execAnchored,
execNotBOL,
execNotEOL,
execNotEmpty,
execNoUTF8Check,
execPartial
) where
import Prelude hiding (fail)
import Control.Monad.Fail (MonadFail(fail))
import Text.Regex.PCRE.Wrap
import Data.Array(Array,listArray)
import Data.ByteString(ByteString)
import qualified Data.ByteString as B(empty,useAsCString,last,take,drop,null,pack)
import qualified Data.ByteString.Unsafe as B(unsafeUseAsCString,unsafeUseAsCStringLen)
import System.IO.Unsafe(unsafePerformIO)
import Text.Regex.Base.RegexLike(RegexContext(..),RegexMaker(..),RegexLike(..),MatchOffset,MatchLength)
import Text.Regex.Base.Impl(polymatch,polymatchM)
import Foreign.C.String(CStringLen)
import Foreign(nullPtr)
instance RegexContext Regex ByteString ByteString where
match = polymatch
matchM = polymatchM
unwrap :: (Show e) => Either e v -> IO v
unwrap x = case x of Left err -> fail ("Text.Regex.PCRE.ByteString died: "++ show err)
Right v -> return v
{-# INLINE asCStringLen #-}
asCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
asCStringLen s op = B.unsafeUseAsCStringLen s checked
where checked cs@(ptr,_) | ptr == nullPtr = B.unsafeUseAsCStringLen myEmpty (op . trim)
| otherwise = op cs
myEmpty = B.pack [0]
trim (ptr,_) = (ptr,0)
instance RegexMaker Regex CompOption ExecOption ByteString where
makeRegexOpts c e pattern = unsafePerformIO $
compile c e pattern >>= unwrap
makeRegexOptsM c e pattern = either (fail.show) return $ unsafePerformIO $
compile c e pattern
instance RegexLike Regex ByteString where
matchTest regex bs = unsafePerformIO $
asCStringLen bs (wrapTest 0 regex) >>= unwrap
matchOnce regex bs = unsafePerformIO $
execute regex bs >>= unwrap
matchAll regex bs = unsafePerformIO $
asCStringLen bs (wrapMatchAll regex) >>= unwrap
matchCount regex bs = unsafePerformIO $
asCStringLen bs (wrapCount regex) >>= unwrap
compile :: CompOption
-> ExecOption
-> ByteString
-> IO (Either (MatchOffset,String) Regex)
compile c e pattern = do
let asCString bs = if (not (B.null bs)) && (0==B.last bs)
then B.unsafeUseAsCString bs
else B.useAsCString bs
asCString pattern (wrapCompile c e)
execute :: Regex
-> ByteString
-> IO (Either WrapError (Maybe (Array Int (MatchOffset,MatchLength))))
execute regex bs = do
maybeStartEnd <- asCStringLen bs (wrapMatch 0 regex)
case maybeStartEnd of
Right Nothing -> return (Right Nothing)
Right (Just parts) ->
return . Right . Just . listArray (0,pred (length parts))
. map (\(s,e)->(fromIntegral s, fromIntegral (e-s))) $ parts
Left err -> return (Left err)
regexec :: Regex
-> ByteString
-> IO (Either WrapError (Maybe (ByteString, ByteString, ByteString, [ByteString])))
regexec regex bs = do
let getSub (start,stop) | start == unusedOffset = B.empty
| otherwise = B.take (stop-start) . B.drop start $ bs
matchedParts [] = (B.empty,B.empty,bs,[])
matchedParts (matchedStartStop@(start,stop):subStartStop) =
(B.take start bs
,getSub matchedStartStop
,B.drop stop bs
,map getSub subStartStop)
maybeStartEnd <- asCStringLen bs (wrapMatch 0 regex)
case maybeStartEnd of
Right Nothing -> return (Right Nothing)
Right (Just parts) -> return . Right . Just . matchedParts $ parts
Left err -> return (Left err)