{-# OPTIONS_GHC -fno-warn-orphans #-}
module Text.Regex.PCRE.Text
(
Regex
, MatchOffset
, MatchLength
, CompOption(CompOption)
, ExecOption(ExecOption)
, ReturnCode
, WrapError
, unusedOffset
, getVersion
, compile
, execute
, regexec
, compBlank
, compAnchored
, compAutoCallout
, compCaseless
, compDollarEndOnly
, compDotAll
, compExtended
, compExtra
, compFirstLine
, compMultiline
, compNoAutoCapture
, compUngreedy
, compUTF8
, compNoUTF8Check
, execBlank
, execAnchored
, execNotBOL
, execNotEOL
, execNotEmpty
, execNoUTF8Check
, execPartial
) where
import Data.Array(Array,listArray)
import Data.Char(ord)
import Control.Monad.Fail (fail)
import Prelude hiding (fail)
import qualified Data.ByteString as B
import qualified Data.ByteString.Unsafe as B
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import Foreign.C.String(CStringLen,CString)
import Foreign(nullPtr)
import System.IO.Unsafe (unsafePerformIO)
import Text.Regex.Base.Impl
import Text.Regex.Base.RegexLike
import Text.Regex.PCRE.Wrap
instance RegexContext Regex T.Text T.Text where
match = polymatch
matchM = polymatchM
instance RegexMaker Regex CompOption ExecOption T.Text where
makeRegexOpts c e pat = unsafePerformIO $
compile c e pat >>= unwrap
makeRegexOptsM c e pat = either (fail.show) return $ unsafePerformIO $
compile c e pat
instance RegexLike Regex T.Text where
matchTest re tx = unsafePerformIO $
asCStringLen tx (wrapTest 0 re) >>= unwrap
matchOnce re tx = unsafePerformIO $
execute re tx >>= unwrap
matchAll re tx = unsafePerformIO $
asCStringLen tx (wrapMatchAll re) >>= unwrap
matchCount re tx = unsafePerformIO $
asCStringLen tx (wrapCount re) >>= unwrap
compile :: CompOption
-> ExecOption
-> T.Text
-> IO (Either (MatchOffset,String) Regex)
compile c e pat =
asCString pat $ wrapCompile c e
execute :: Regex
-> T.Text
-> IO (Either WrapError (Maybe (Array Int (MatchOffset,MatchLength))))
execute re tx = do
maybeStartEnd <- asCStringLen tx (wrapMatch 0 re)
case maybeStartEnd of
Right Nothing -> return $ Right Nothing
Right (Just parts) ->
return $ Right $ Just $ listArray (0,pred $ length parts)
[ (s,e-s) | (s,e) <- parts ]
Left err -> return $ Left err
regexec :: Regex
-> T.Text
-> IO (Either WrapError (Maybe (T.Text, T.Text, T.Text, [T.Text])))
regexec re tx = do
mb <- asCStringLen tx $ wrapMatch 0 re
case mb of
Right Nothing -> return (Right Nothing)
Right (Just parts) -> return . Right . Just . matchedParts $ parts
Left err -> return (Left err)
where
matchedParts [] = (T.empty,T.empty,tx,[])
matchedParts (mtchd@(start,stop):rst) =
( T.take start tx
, getSub mtchd
, T.drop stop tx
, map getSub rst
)
getSub (start,stop)
| start == unusedOffset = T.empty
| otherwise = T.take (stop-start) . T.drop start $ tx
unwrap :: (Show e) => Either e v -> IO v
unwrap x = case x of
Left e -> fail $ "Text.Regex.PCRE.Text died: " ++ show e
Right v -> return v
{-# INLINE asCString #-}
asCString :: T.Text -> (CString->IO a) -> IO a
asCString t
| T.null t || (ord (T.last t) /= 0) = B.useAsCString $ T.encodeUtf8 t
| otherwise = B.unsafeUseAsCString $ T.encodeUtf8 t
{-# INLINE asCStringLen #-}
asCStringLen :: T.Text -> (CStringLen->IO a) -> IO a
asCStringLen s op = B.unsafeUseAsCStringLen (T.encodeUtf8 s) checked
where
checked cs@(ptr,_)
| ptr == nullPtr = B.unsafeUseAsCStringLen myEmpty $ op . trim
| otherwise = op cs
trim (ptr,_) = (ptr,0)
myEmpty :: B.ByteString
myEmpty = B.pack [0]