{-# OPTIONS_GHC -optc-D__GLASGOW_HASKELL__=606 #-} {-# OPTIONS_GHC -optc-DHAVE_PCRE_H #-} {-# INCLUDE #-} {-# INCLUDE #-} {-# LINE 1 "Text/Regex/PCRE/Wrap.hsc" #-} {-# OPTIONS_GHC -fglasgow-exts -fffi #-} {-# LINE 2 "Text/Regex/PCRE/Wrap.hsc" #-} -- | This will fail or error only if allocation fails or a nullPtr is passed in. -- TODO :: Consider wrapMatchAll using list of start/end offsets and not MatchArray -- -- The exported symbols are the same whether HAVE_PCRE_H is defined, -- but when if it is not defined then @getVersion == Nothing@ and all -- other exported values will call error or fail. module Text.Regex.PCRE.Wrap( -- ** High-level interface Regex, CompOption(CompOption), ExecOption(ExecOption), (=~), (=~~), -- ** Low-level interface StartOffset, EndOffset, ReturnCode(ReturnCode), WrapError, wrapCompile, wrapTest, wrapMatch, wrapMatchAll, wrapCount, -- ** Miscellaneous getVersion, configUTF8, getNumSubs, unusedOffset, -- ** CompOption values compBlank, compAnchored, compAutoCallout, compCaseless, compDollarEndOnly, compDotAll, compExtended, compExtra, compFirstLine, compMultiline, compNoAutoCapture, compUngreedy, compUTF8, compNoUTF8Check, -- ** ExecOption values execBlank, execAnchored, execNotBOL, execNotEOL, execNotEmpty, execNoUTF8Check, execPartial, -- ** ReturnCode values retOk, retNoMatch, retNull, retBadOption, retBadMagic, retUnknownNode, retNoMemory, retNoSubstring ) where {-# LINE 70 "Text/Regex/PCRE/Wrap.hsc" #-} import Control.Monad(when) import Data.Array(Array,accumArray) import Data.Bits(Bits((.|.))) -- ((.&.),(.|.),complement)) import Foreign(unsafePerformIO ,Ptr,ForeignPtr,FinalizerPtr -- ,FunPtr ,alloca,allocaBytes,nullPtr ,peek,peekElemOff ,newForeignPtr,withForeignPtr) import Foreign.C(CInt,CChar) import Foreign.C.String(CString,CStringLen,peekCString) import Text.Regex.Base.RegexLike(RegexOptions(..),RegexMaker(..),RegexContext(..),MatchArray,MatchOffset) {-# LINE 89 "Text/Regex/PCRE/Wrap.hsc" #-} -- | return version of pcre used or Nothing if pcre is not available. getVersion :: Maybe String type PCRE = () type StartOffset = MatchOffset type EndOffset = MatchOffset type WrapError = (ReturnCode,String) newtype CompOption = CompOption CInt deriving (Eq,Show,Num,Bits) newtype ExecOption = ExecOption CInt deriving (Eq,Show,Num,Bits) newtype ReturnCode = ReturnCode CInt deriving (Eq,Show) -- | A compiled regular expression data Regex = Regex (ForeignPtr PCRE) CompOption ExecOption compBlank :: CompOption execBlank :: ExecOption unusedOffset :: MatchOffset retOk :: ReturnCode wrapCompile :: CompOption -- ^ Flags (summed together) -> ExecOption -- ^ Flags (summed together) -> CString -- ^ The regular expression to compile -> IO (Either (MatchOffset,String) Regex) -- ^ Returns: an error offset and string or the compiled regular expression wrapTest :: StartOffset -- ^ Starting index in CStringLen -> Regex -- ^ Compiled regular expression -> CStringLen -- ^ String to match against and length in bytes -> IO (Either WrapError Bool) wrapMatch :: StartOffset -- ^ Starting index in CStringLen -> Regex -- ^ Compiled regular expression -> CStringLen -- ^ String to match against and length in bytes -> IO (Either WrapError (Maybe [(StartOffset,EndOffset)])) -- ^ Returns: 'Right Nothing' if the regex did not match the -- string, or: -- 'Right Just' an array of (offset,length) pairs where index 0 is whole match, and the rest are the captured subexpressions, or: -- 'Left ReturnCode' if there is some strange error wrapMatchAll :: Regex -> CStringLen -> IO (Either WrapError [ MatchArray ]) wrapCount :: Regex -> CStringLen -> IO (Either WrapError Int) getNumSubs :: Regex -> Int configUTF8 :: Bool (=~) :: (RegexMaker Regex CompOption ExecOption source,RegexContext Regex source1 target) => source1 -> source -> target (=~~) :: (RegexMaker Regex CompOption ExecOption source,RegexContext Regex source1 target,Monad m) => source1 -> source -> m target {-# LINE 139 "Text/Regex/PCRE/Wrap.hsc" #-} {-# LINE 140 "Text/Regex/PCRE/Wrap.hsc" #-} {-# LINE 141 "Text/Regex/PCRE/Wrap.hsc" #-} instance RegexOptions Regex CompOption ExecOption where blankCompOpt = compBlank blankExecOpt = execBlank defaultCompOpt = compMultiline defaultExecOpt = execBlank setExecOpts e' (Regex r c _) = Regex r c e' getExecOpts (Regex _ _ e) = e -- (=~) :: (RegexMaker Regex CompOption ExecOption source,RegexContext Regex source1 target) => source1 -> source -> target (=~) x r = let q :: Regex q = makeRegex r in match q x -- (=~~) ::(RegexMaker Regex CompOption ExecOption source,RegexContext Regex source1 target,Monad m) => source1 -> source -> m target (=~~) x r = let q :: Regex q = makeRegex r in matchM q x type PCRE_Extra = () fi :: (Integral i,Num n ) => i -> n fi x = fromIntegral x compBlank = CompOption 0 execBlank = ExecOption 0 unusedOffset = (-1) retOk = ReturnCode 0 retNeededMoreSpace :: ReturnCode retNeededMoreSpace = ReturnCode 0 newtype InfoWhat = InfoWhat CInt deriving (Eq,Show) newtype ConfigWhat = ConfigWhat CInt deriving (Eq,Show) nullTest' :: Ptr a -> String -> IO (Either (MatchOffset,String) b) -> IO (Either (MatchOffset,String) b) {-# INLINE nullTest' #-} nullTest' ptr msg io = do if nullPtr == ptr then return (Left (0,"Ptr parameter was nullPtr in Text.Regex.PCRE.Wrap."++msg)) else io nullTest :: Ptr a -> String -> IO (Either WrapError b) -> IO (Either WrapError b) {-# INLINE nullTest #-} nullTest ptr msg io = do if nullPtr == ptr then return (Left (retOk,"Ptr parameter was nullPtr in Text.Regex.PCRE.Wrap."++msg)) else io wrapRC :: ReturnCode -> IO (Either WrapError b) {-# INLINE wrapRC #-} wrapRC r = return (Left (r,"Error in Text.Regex.PCRE.Wrap: "++show r)) -- | Compiles a regular expression wrapCompile flags e pattern = do nullTest' pattern "wrapCompile pattern" $ do alloca $ \errOffset -> alloca $ \errPtr -> do nullTest' errPtr "wrapCompile errPtr" $ do pcre_ptr <- c_pcre_compile pattern flags errPtr errOffset nullPtr if pcre_ptr == nullPtr then do -- No need to use c_ptr_free in the error case (e.g. pcredemo.c) offset <- peek errOffset string <- peekCString =<< peek errPtr return (Left (fi offset,string)) else do regex <- newForeignPtr c_ptr_free pcre_ptr return . Right $ Regex regex flags e getNumSubs (Regex pcre_fptr _ _) = fi . unsafePerformIO $ withForeignPtr pcre_fptr getNumSubs' getNumSubs' :: Ptr PCRE -> IO CInt {-# INLINE getNumSubs' #-} getNumSubs' pcre_ptr = alloca $ \st -> do -- (st :: Ptr CInt) when (st == nullPtr) (fail "Text.Regex.PCRE.Wrap.getNumSubs' could not allocate a CInt!!!") c_pcre_fullinfo pcre_ptr nullPtr pcreInfoCapturecount st peek st wrapTest startOffset (Regex pcre_fptr _ flags) (cstr,len) = do nullTest cstr "wrapTest cstr" $ do withForeignPtr pcre_fptr $ \pcre_ptr -> do r@(ReturnCode r') <- c_pcre_exec pcre_ptr nullPtr cstr (fi len) (fi startOffset) flags nullPtr 0 if r == retNoMatch then return (Right False) else if r' < 0 then wrapRC r else return (Right True) -- | Matches a regular expression against a string -- -- Should never return (Right (Just [])) wrapMatch startOffset (Regex pcre_fptr _ flags) (cstr,len) = do nullTest cstr "wrapMatch cstr" $ do withForeignPtr pcre_fptr $ \pcre_ptr -> do nsub <- getNumSubs' pcre_ptr let nsub_int :: Int nsub_int = fi nsub ovec_size :: CInt ovec_size = ((nsub + 1) * 3) -- "man pcreapi" for explanation ovec_bytes :: Int ovec_bytes = (fi ovec_size) * (4) {-# LINE 242 "Text/Regex/PCRE/Wrap.hsc" #-} allocaBytes ovec_bytes $ \ovec -> do nullTest ovec "wrapMatch ovec" $ do r@(ReturnCode r') <- c_pcre_exec pcre_ptr nullPtr cstr (fi len) (fi startOffset) flags ovec ovec_size if r == retNoMatch then return (Right Nothing) else if r' < 0 then wrapRC r else do let pairsSet :: Int pairsSet = if r == retNeededMoreSpace -- if r == ReturnCode 0 then nsub_int + 1 -- should not happen else fi r' -- implies pairsSet > 0 extraPairs :: [(Int,Int)] extraPairs = replicate (nsub_int + 1 - pairsSet) (unusedOffset,unusedOffset) pairs <- return . toPairs =<< mapM (peekElemOff ovec) [0 .. ((pairsSet*2)-1)] return . Right . Just $ (pairs ++ extraPairs) -- | wrapMatchAll is an improvement over wrapMatch since it only -- allocates memory with allocaBytes once at the start. -- -- wrapMatchAll (Regex pcre_fptr _ flags) (cstr,len) = do nullTest cstr "wrapMatchAll cstr" $ do withForeignPtr pcre_fptr $ \regex -> do nsub <- getNumSubs' regex let nsub_int :: Int nsub_int = fi nsub ovec_size :: CInt ovec_size = ((nsub + 1) * 3) -- "man pcreapi" for explanation ovec_bytes :: Int ovec_bytes = (fi ovec_size) * (4) {-# LINE 274 "Text/Regex/PCRE/Wrap.hsc" #-} clen = fi len flags' = (execNotEmpty .|. execAnchored .|. flags) allocaBytes ovec_bytes $ \ovec -> nullTest ovec "wrapMatchAll ovec" $ let loop acc flags_in_use pos = do r@(ReturnCode r') <- c_pcre_exec regex nullPtr cstr clen (fi pos) flags_in_use ovec ovec_size if r == retNoMatch then return (Right (acc [])) else if r' < 0 then wrapRC r else do let pairsSet = if r == retNeededMoreSpace then nsub_int+1 else fi r' pairs <- return . toPairs =<< mapM (peekElemOff ovec) [0 .. ((pairsSet*2)-1)] let acc' = acc . (toMatchArray nsub_int pairs:) case pairs of [] -> return (Right (acc' [])) ((s,e):_) | s==e -> if s == len then return (Right (acc' [])) else loop acc' flags' e | otherwise -> loop acc' flags e in loop id flags 0 toMatchArray :: Int -> [(Int,Int)] -> Array Int (Int,Int) toMatchArray n pairs = accumArray (\_ (s,e) -> (s,(e-s))) (-1,0) (0,n) (zip [0..] pairs) toPairs :: [CInt] -> [(Int,Int)] toPairs [] = [] toPairs (a:b:rest) = (fi a,fi b):toPairs rest toPairs [_] = error "Should not have just one element in WrapPCRE.wrapMatchAll.toPairs" wrapCount (Regex pcre_fptr _ flags) (cstr,len) = do nullTest cstr "wrapCount cstr" $ do withForeignPtr pcre_fptr $ \pcre_ptr -> do nsub <- getNumSubs' pcre_ptr let ovec_size :: CInt ovec_size = ((nsub + 1) * 3) -- "man pcreapi" for explanation ovec_bytes :: Int ovec_bytes = (fi ovec_size) * (4) {-# LINE 311 "Text/Regex/PCRE/Wrap.hsc" #-} clen = fi len allocaBytes ovec_bytes $ \ovec -> nullTest ovec "wrapCount ovec" $ let act pos = c_pcre_exec pcre_ptr nullPtr cstr clen (fi pos) flags ovec ovec_size loop acc pos | acc `seq` pos `seq` False = undefined | otherwise = do r@(ReturnCode r') <- act pos if r == retNoMatch then return (Right acc) else if r' < 0 then wrapRC r else do pairs <- return . toPairs =<< mapM (peekElemOff ovec) [0,1] case pairs of [] -> return (Right (succ acc)) ((s,e):_) | s==e -> return (Right (succ acc)) | otherwise -> loop (succ acc) e in loop 0 0 getVersion = unsafePerformIO $ do version <- c_pcre_version if version == nullPtr then return (Just "pcre_version was null") else return . Just =<< peekCString version configUTF8 = unsafePerformIO $ alloca $ \ptrVal -> do -- (ptrVal :: Ptr CInt) when (ptrVal == nullPtr) (fail "Text.Regex.PCRE.Wrap.configUTF8 could not alloca CInt!!!") c_pcre_config pcreConfigUtf8 ptrVal val <- peek ptrVal case val of (1 :: CInt) -> return True 0 -> return False _ -> return False -- should not happen foreign import ccall unsafe "pcre.h pcre_compile" c_pcre_compile :: CString -> CompOption -> Ptr CString -> Ptr CInt -> CString -> IO (Ptr PCRE) foreign import ccall unsafe "&free" c_ptr_free :: FinalizerPtr a -- FunPtr (Ptr a -> IO ()) foreign import ccall unsafe "pcre.h pcre_exec" c_pcre_exec :: Ptr PCRE -> Ptr PCRE_Extra -> CString -> CInt -> CInt -> ExecOption -> Ptr CInt -> CInt -> IO ReturnCode foreign import ccall unsafe "pcre.h pcre_fullinfo" c_pcre_fullinfo :: Ptr PCRE -> Ptr PCRE_Extra -> InfoWhat -> Ptr a -> IO CInt foreign import ccall unsafe "pcre.h pcre_version" c_pcre_version :: IO (Ptr CChar) foreign import ccall unsafe "pcre.h pcre_config" c_pcre_config :: ConfigWhat -> Ptr a -> IO CInt compAnchored :: CompOption compAnchored = CompOption 16 compAutoCallout :: CompOption compAutoCallout = CompOption 16384 compCaseless :: CompOption compCaseless = CompOption 1 compDollarEndOnly :: CompOption compDollarEndOnly = CompOption 32 compDotAll :: CompOption compDotAll = CompOption 4 compExtended :: CompOption compExtended = CompOption 8 compExtra :: CompOption compExtra = CompOption 64 compFirstLine :: CompOption compFirstLine = CompOption 262144 compMultiline :: CompOption compMultiline = CompOption 2 compNoAutoCapture :: CompOption compNoAutoCapture = CompOption 4096 compUngreedy :: CompOption compUngreedy = CompOption 512 compUTF8 :: CompOption compUTF8 = CompOption 2048 compNoUTF8Check :: CompOption compNoUTF8Check = CompOption 8192 {-# LINE 374 "Text/Regex/PCRE/Wrap.hsc" #-} execAnchored :: ExecOption execAnchored = ExecOption 16 execNotBOL :: ExecOption execNotBOL = ExecOption 128 execNotEOL :: ExecOption execNotEOL = ExecOption 256 execNotEmpty :: ExecOption execNotEmpty = ExecOption 1024 execNoUTF8Check :: ExecOption execNoUTF8Check = ExecOption 8192 execPartial :: ExecOption execPartial = ExecOption 32768 {-# LINE 382 "Text/Regex/PCRE/Wrap.hsc" #-} retNoMatch :: ReturnCode retNoMatch = ReturnCode (-1) retNull :: ReturnCode retNull = ReturnCode (-2) retBadOption :: ReturnCode retBadOption = ReturnCode (-3) retBadMagic :: ReturnCode retBadMagic = ReturnCode (-4) retUnknownNode :: ReturnCode retUnknownNode = ReturnCode (-5) retNoMemory :: ReturnCode retNoMemory = ReturnCode (-6) retNoSubstring :: ReturnCode retNoSubstring = ReturnCode (-7) {-# LINE 391 "Text/Regex/PCRE/Wrap.hsc" #-} -- Comment out most of these to avoid unused binding warnings -- PCRE_INFO_FIRSTCHAR is deprecated, use PCRE_INFO_FIRSTBYTE instead. pcreInfoCapturecount :: InfoWhat pcreInfoCapturecount = InfoWhat 2 {-# LINE 397 "Text/Regex/PCRE/Wrap.hsc" #-} {- PCRE_INFO_BACKREFMAX, \ PCRE_INFO_DEFAULT_TABLES, \ PCRE_INFO_FIRSTBYTE, \ PCRE_INFO_FIRSTCHAR, \ PCRE_INFO_FIRSTTABLE, \ PCRE_INFO_LASTLITERAL, \ PCRE_INFO_NAMECOUNT, \ PCRE_INFO_NAMEENTRYSIZE, \ PCRE_INFO_NAMETABLE, \ PCRE_INFO_OPTIONS, \ PCRE_INFO_SIZE, \ PCRE_INFO_STUDYSIZE -} pcreConfigUtf8 :: ConfigWhat pcreConfigUtf8 = ConfigWhat 0 {-# LINE 413 "Text/Regex/PCRE/Wrap.hsc" #-} {- PCRE_CONFIG_UNICODE_PROPERTIES, \ PCRE_CONFIG_NEWLINE, \ PCRE_CONFIG_LINK_SIZE, \ PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, \ PCRE_CONFIG_MATCH_LIMIT, \ PCRE_CONFIG_MATCH_LIMIT_RECURSION, \ PCRE_CONFIG_STACKRECURSE -} {-# LINE 489 "Text/Regex/PCRE/Wrap.hsc" #-}