{-# LANGUAGE BangPatterns, EmptyDataDecls, MagicHash, RecordWildCards,
    ScopedTypeVariables #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}

-- |
-- Module      : Data.Text.ICU.Regex
-- Copyright   : (c) 2010 Bryan O'Sullivan
--
-- License     : BSD-style
-- Maintainer  : bos@serpentine.com
-- Stability   : experimental
-- Portability : GHC
--
-- Regular expression support for Unicode, implemented as bindings to
-- the International Components for Unicode (ICU) libraries.
--
-- The syntax and behaviour of ICU regular expressions are Perl-like.
-- For complete details, see the ICU User Guide entry at
-- <http://userguide.icu-project.org/strings/regexp>.
--
-- /Note/: The functions in this module are not thread safe.  For
-- thread safe use, see 'clone' below, or use the pure functions in
-- "Data.Text.ICU".

module Data.Text.ICU.Regex
    (
    -- * Types
      MatchOption(..)
    , ParseError(errError, errLine, errOffset)
    , Regex
    -- * Functions
    -- ** Construction
    , regex
    , regex'
    , clone
    -- ** Managing text to search
    , setText
    , getText
    -- ** Inspection
    , pattern
    -- ** Searching
    , find
    , findNext
    -- ** Match groups
    -- $groups
    , groupCount
    , start
    , end
    , start_
    , end_
    ) where

import Data.Text.ICU.Regex.Internal
import qualified Control.Exception as E
import Data.IORef (newIORef, readIORef, writeIORef)
import Data.Text (Text)
import qualified Data.Text.Foreign as T
import Data.Text.Foreign (I16)
import Data.Text.ICU.Internal (asBool)
import Data.Text.ICU.Error.Internal (ParseError(..), handleError)
import Data.Word (Word16)
import Foreign.ForeignPtr (ForeignPtr, newForeignPtr, withForeignPtr)
import Foreign.Marshal.Alloc (alloca)
import Foreign.Storable (peek)
import System.IO.Unsafe (unsafePerformIO)

instance Show Regex where
    show :: Regex -> String
show Regex
re = String
"Regex " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Text -> String
forall a. Show a => a -> String
show (Regex -> Text
pattern Regex
re)

-- $groups
--
-- Capturing groups are numbered starting from zero.  Group zero is
-- always the entire matching text.  Groups greater than zero contain
-- the text matching each capturing group in a regular expression.

-- | Compile a regular expression with the given options.  This is
-- safest to use when the pattern is constructed at run time.
regex' :: [MatchOption] -> Text -> IO (Either ParseError Regex)
regex' :: [MatchOption] -> Text -> IO (Either ParseError Regex)
regex' [MatchOption]
opts Text
pat = (Regex -> Either ParseError Regex
forall a b. b -> Either a b
Right (Regex -> Either ParseError Regex)
-> IO Regex -> IO (Either ParseError Regex)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
`fmap` [MatchOption] -> Text -> IO Regex
regex [MatchOption]
opts Text
pat) IO (Either ParseError Regex)
-> (ParseError -> IO (Either ParseError Regex))
-> IO (Either ParseError Regex)
forall e a. Exception e => IO a -> (e -> IO a) -> IO a
`E.catch` \(ParseError
err::ParseError) ->
                  Either ParseError Regex -> IO (Either ParseError Regex)
forall (m :: * -> *) a. Monad m => a -> m a
return (ParseError -> Either ParseError Regex
forall a b. a -> Either a b
Left ParseError
err)

-- | Set the subject text string upon which the regular expression
-- will look for matches.  This function may be called any number of
-- times, allowing the regular expression pattern to be applied to
-- different strings.
setText :: Regex -> Text -> IO ()
setText :: Regex -> Text -> IO ()
setText Regex{ForeignPtr URegularExpression
IORef Haystack
reText :: Regex -> IORef Haystack
reRe :: Regex -> ForeignPtr URegularExpression
reText :: IORef Haystack
reRe :: ForeignPtr URegularExpression
..} Text
t = do
  (ForeignPtr Word16
hayfp, I16
hayLen) <- Text -> IO (ForeignPtr Word16, I16)
T.asForeignPtr Text
t
  ForeignPtr URegularExpression
-> (Ptr URegularExpression -> IO ()) -> IO ()
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr URegularExpression
reRe ((Ptr URegularExpression -> IO ()) -> IO ())
-> (Ptr URegularExpression -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \Ptr URegularExpression
rePtr ->
    ForeignPtr Word16 -> (Ptr Word16 -> IO ()) -> IO ()
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr Word16
hayfp ((Ptr Word16 -> IO ()) -> IO ()) -> (Ptr Word16 -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \Ptr Word16
hayPtr -> (Ptr UErrorCode -> IO ()) -> IO ()
forall a. (Ptr UErrorCode -> IO a) -> IO a
handleError ((Ptr UErrorCode -> IO ()) -> IO ())
-> (Ptr UErrorCode -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$
      Ptr URegularExpression
-> Ptr Word16 -> Int32 -> Ptr UErrorCode -> IO ()
uregex_setText Ptr URegularExpression
rePtr Ptr Word16
hayPtr (I16 -> Int32
forall a b. (Integral a, Num b) => a -> b
fromIntegral I16
hayLen)
  IORef Haystack -> Haystack -> IO ()
forall a. IORef a -> a -> IO ()
writeIORef IORef Haystack
reText (Haystack -> IO ()) -> Haystack -> IO ()
forall a b. (a -> b) -> a -> b
$! ForeignPtr Word16 -> I16 -> Haystack
H ForeignPtr Word16
hayfp I16
hayLen

-- | Get the subject text that is currently associated with this
-- regular expression object.
getText :: Regex -> IO (ForeignPtr Word16, I16)
getText :: Regex -> IO (ForeignPtr Word16, I16)
getText Regex{ForeignPtr URegularExpression
IORef Haystack
reText :: IORef Haystack
reRe :: ForeignPtr URegularExpression
reText :: Regex -> IORef Haystack
reRe :: Regex -> ForeignPtr URegularExpression
..} = do
  H ForeignPtr Word16
fp I16
len <- IORef Haystack -> IO Haystack
forall a. IORef a -> IO a
readIORef IORef Haystack
reText
  (ForeignPtr Word16, I16) -> IO (ForeignPtr Word16, I16)
forall (m :: * -> *) a. Monad m => a -> m a
return (ForeignPtr Word16
fp, I16
len)

-- | Return the source form of the pattern used to construct this
-- regular expression or match.
pattern :: Regex -> Text
pattern :: Regex -> Text
pattern Regex{ForeignPtr URegularExpression
IORef Haystack
reText :: IORef Haystack
reRe :: ForeignPtr URegularExpression
reText :: Regex -> IORef Haystack
reRe :: Regex -> ForeignPtr URegularExpression
..} = IO Text -> Text
forall a. IO a -> a
unsafePerformIO (IO Text -> Text)
-> ((Ptr URegularExpression -> IO Text) -> IO Text)
-> (Ptr URegularExpression -> IO Text)
-> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ForeignPtr URegularExpression
-> (Ptr URegularExpression -> IO Text) -> IO Text
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr URegularExpression
reRe ((Ptr URegularExpression -> IO Text) -> Text)
-> (Ptr URegularExpression -> IO Text) -> Text
forall a b. (a -> b) -> a -> b
$ \Ptr URegularExpression
rePtr ->
  (Ptr Int32 -> IO Text) -> IO Text
forall a b. Storable a => (Ptr a -> IO b) -> IO b
alloca ((Ptr Int32 -> IO Text) -> IO Text)
-> (Ptr Int32 -> IO Text) -> IO Text
forall a b. (a -> b) -> a -> b
$ \Ptr Int32
lenPtr -> do
    Ptr Word16
textPtr <- (Ptr UErrorCode -> IO (Ptr Word16)) -> IO (Ptr Word16)
forall a. (Ptr UErrorCode -> IO a) -> IO a
handleError ((Ptr UErrorCode -> IO (Ptr Word16)) -> IO (Ptr Word16))
-> (Ptr UErrorCode -> IO (Ptr Word16)) -> IO (Ptr Word16)
forall a b. (a -> b) -> a -> b
$ Ptr URegularExpression
-> Ptr Int32 -> Ptr UErrorCode -> IO (Ptr Word16)
uregex_pattern Ptr URegularExpression
rePtr Ptr Int32
lenPtr
    (Ptr Word16 -> I16 -> IO Text
T.fromPtr Ptr Word16
textPtr (I16 -> IO Text) -> (Int32 -> I16) -> Int32 -> IO Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int32 -> I16
forall a b. (Integral a, Num b) => a -> b
fromIntegral) (Int32 -> IO Text) -> IO Int32 -> IO Text
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< Ptr Int32 -> IO Int32
forall a. Storable a => Ptr a -> IO a
peek Ptr Int32
lenPtr

-- | Find the first matching substring of the input string that
-- matches the pattern.
--
-- If /n/ is non-negative, the search for a match begins at the
-- specified index, and any match region is reset.
--
-- If /n/ is -1, the search begins at the start of the input region,
-- or at the start of the full string if no region has been specified.
--
-- If a match is found, 'start', 'end', and 'group' will provide more
-- information regarding the match.
find :: Regex -> I16 -> IO Bool
find :: Regex -> I16 -> IO Bool
find Regex{ForeignPtr URegularExpression
IORef Haystack
reText :: IORef Haystack
reRe :: ForeignPtr URegularExpression
reText :: Regex -> IORef Haystack
reRe :: Regex -> ForeignPtr URegularExpression
..} I16
n =
    (UBool -> Bool) -> IO UBool -> IO Bool
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap UBool -> Bool
forall a. Integral a => a -> Bool
asBool (IO UBool -> IO Bool)
-> ((Ptr URegularExpression -> IO UBool) -> IO UBool)
-> (Ptr URegularExpression -> IO UBool)
-> IO Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ForeignPtr URegularExpression
-> (Ptr URegularExpression -> IO UBool) -> IO UBool
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr URegularExpression
reRe ((Ptr URegularExpression -> IO UBool) -> IO Bool)
-> (Ptr URegularExpression -> IO UBool) -> IO Bool
forall a b. (a -> b) -> a -> b
$ \Ptr URegularExpression
rePtr -> (Ptr UErrorCode -> IO UBool) -> IO UBool
forall a. (Ptr UErrorCode -> IO a) -> IO a
handleError ((Ptr UErrorCode -> IO UBool) -> IO UBool)
-> (Ptr UErrorCode -> IO UBool) -> IO UBool
forall a b. (a -> b) -> a -> b
$
    Ptr URegularExpression -> Int32 -> Ptr UErrorCode -> IO UBool
uregex_find Ptr URegularExpression
rePtr (I16 -> Int32
forall a b. (Integral a, Num b) => a -> b
fromIntegral I16
n)

-- | Find the next pattern match in the input string.  Begin searching
-- the input at the location following the end of he previous match,
-- or at the start of the string (or region) if there is no previous
-- match.
--
-- If a match is found, 'start', 'end', and 'group' will provide more
-- information regarding the match.
findNext :: Regex -> IO Bool
findNext :: Regex -> IO Bool
findNext Regex{ForeignPtr URegularExpression
IORef Haystack
reText :: IORef Haystack
reRe :: ForeignPtr URegularExpression
reText :: Regex -> IORef Haystack
reRe :: Regex -> ForeignPtr URegularExpression
..} =
    (UBool -> Bool) -> IO UBool -> IO Bool
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap UBool -> Bool
forall a. Integral a => a -> Bool
asBool (IO UBool -> IO Bool)
-> ((Ptr URegularExpression -> IO UBool) -> IO UBool)
-> (Ptr URegularExpression -> IO UBool)
-> IO Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ForeignPtr URegularExpression
-> (Ptr URegularExpression -> IO UBool) -> IO UBool
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr URegularExpression
reRe ((Ptr URegularExpression -> IO UBool) -> IO Bool)
-> (Ptr URegularExpression -> IO UBool) -> IO Bool
forall a b. (a -> b) -> a -> b
$ (Ptr UErrorCode -> IO UBool) -> IO UBool
forall a. (Ptr UErrorCode -> IO a) -> IO a
handleError ((Ptr UErrorCode -> IO UBool) -> IO UBool)
-> (Ptr URegularExpression -> Ptr UErrorCode -> IO UBool)
-> Ptr URegularExpression
-> IO UBool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Ptr URegularExpression -> Ptr UErrorCode -> IO UBool
uregex_findNext

-- | Make a copy of a compiled regular expression.  Cloning a regular
-- expression is faster than opening a second instance from the source
-- form of the expression, and requires less memory.
--
-- Note that the current input string and the position of any matched
-- text within it are not cloned; only the pattern itself and and the
-- match mode flags are copied.
--
-- Cloning can be particularly useful to threaded applications that
-- perform multiple match operations in parallel.  Each concurrent RE
-- operation requires its own instance of a 'Regex'.
clone :: Regex -> IO Regex
{-# INLINE clone #-}
clone :: Regex -> IO Regex
clone Regex{ForeignPtr URegularExpression
IORef Haystack
reText :: IORef Haystack
reRe :: ForeignPtr URegularExpression
reText :: Regex -> IORef Haystack
reRe :: Regex -> ForeignPtr URegularExpression
..} = do
  ForeignPtr URegularExpression
fp <- FinalizerPtr URegularExpression
-> Ptr URegularExpression -> IO (ForeignPtr URegularExpression)
forall a. FinalizerPtr a -> Ptr a -> IO (ForeignPtr a)
newForeignPtr FinalizerPtr URegularExpression
uregex_close (Ptr URegularExpression -> IO (ForeignPtr URegularExpression))
-> IO (Ptr URegularExpression)
-> IO (ForeignPtr URegularExpression)
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< ForeignPtr URegularExpression
-> (Ptr URegularExpression -> IO (Ptr URegularExpression))
-> IO (Ptr URegularExpression)
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr URegularExpression
reRe ((Ptr UErrorCode -> IO (Ptr URegularExpression))
-> IO (Ptr URegularExpression)
forall a. (Ptr UErrorCode -> IO a) -> IO a
handleError ((Ptr UErrorCode -> IO (Ptr URegularExpression))
 -> IO (Ptr URegularExpression))
-> (Ptr URegularExpression
    -> Ptr UErrorCode -> IO (Ptr URegularExpression))
-> Ptr URegularExpression
-> IO (Ptr URegularExpression)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Ptr URegularExpression
-> Ptr UErrorCode -> IO (Ptr URegularExpression)
uregex_clone)
  ForeignPtr URegularExpression -> IORef Haystack -> Regex
Regex ForeignPtr URegularExpression
fp (IORef Haystack -> Regex) -> IO (IORef Haystack) -> IO Regex
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
`fmap` Haystack -> IO (IORef Haystack)
forall a. a -> IO (IORef a)
newIORef (ForeignPtr Word16 -> I16 -> Haystack
H ForeignPtr Word16
emptyForeignPtr I16
0)

-- | Return the number of capturing groups in this regular
-- expression's pattern.
groupCount :: Regex -> IO Int
groupCount :: Regex -> IO Int
groupCount Regex{ForeignPtr URegularExpression
IORef Haystack
reText :: IORef Haystack
reRe :: ForeignPtr URegularExpression
reText :: Regex -> IORef Haystack
reRe :: Regex -> ForeignPtr URegularExpression
..} =
    (Int32 -> Int) -> IO Int32 -> IO Int
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Int32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (IO Int32 -> IO Int)
-> ((Ptr URegularExpression -> IO Int32) -> IO Int32)
-> (Ptr URegularExpression -> IO Int32)
-> IO Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ForeignPtr URegularExpression
-> (Ptr URegularExpression -> IO Int32) -> IO Int32
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr URegularExpression
reRe ((Ptr URegularExpression -> IO Int32) -> IO Int)
-> (Ptr URegularExpression -> IO Int32) -> IO Int
forall a b. (a -> b) -> a -> b
$ (Ptr UErrorCode -> IO Int32) -> IO Int32
forall a. (Ptr UErrorCode -> IO a) -> IO a
handleError ((Ptr UErrorCode -> IO Int32) -> IO Int32)
-> (Ptr URegularExpression -> Ptr UErrorCode -> IO Int32)
-> Ptr URegularExpression
-> IO Int32
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Ptr URegularExpression -> Ptr UErrorCode -> IO Int32
uregex_groupCount

-- | Returns the index in the input string of the start of the text
-- matched by the specified capture group during the previous match
-- operation.  Returns @-1@ if the capture group was not part of the
-- last match.
start_ :: Regex -> Int -> IO I16
start_ :: Regex -> Int -> IO I16
start_ Regex{ForeignPtr URegularExpression
IORef Haystack
reText :: IORef Haystack
reRe :: ForeignPtr URegularExpression
reText :: Regex -> IORef Haystack
reRe :: Regex -> ForeignPtr URegularExpression
..} Int
n =
    (Int32 -> I16) -> IO Int32 -> IO I16
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Int32 -> I16
forall a b. (Integral a, Num b) => a -> b
fromIntegral (IO Int32 -> IO I16)
-> ((Ptr URegularExpression -> IO Int32) -> IO Int32)
-> (Ptr URegularExpression -> IO Int32)
-> IO I16
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ForeignPtr URegularExpression
-> (Ptr URegularExpression -> IO Int32) -> IO Int32
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr URegularExpression
reRe ((Ptr URegularExpression -> IO Int32) -> IO I16)
-> (Ptr URegularExpression -> IO Int32) -> IO I16
forall a b. (a -> b) -> a -> b
$ \Ptr URegularExpression
rePtr -> (Ptr UErrorCode -> IO Int32) -> IO Int32
forall a. (Ptr UErrorCode -> IO a) -> IO a
handleError ((Ptr UErrorCode -> IO Int32) -> IO Int32)
-> (Ptr UErrorCode -> IO Int32) -> IO Int32
forall a b. (a -> b) -> a -> b
$
    Ptr URegularExpression -> Int32 -> Ptr UErrorCode -> IO Int32
uregex_start Ptr URegularExpression
rePtr (Int -> Int32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
n)

-- | Returns the index in the input string of the end of the text
-- matched by the specified capture group during the previous match
-- operation.  Returns @-1@ if the capture group was not part of
-- the last match.
end_ :: Regex -> Int -> IO I16
end_ :: Regex -> Int -> IO I16
end_ Regex{ForeignPtr URegularExpression
IORef Haystack
reText :: IORef Haystack
reRe :: ForeignPtr URegularExpression
reText :: Regex -> IORef Haystack
reRe :: Regex -> ForeignPtr URegularExpression
..} Int
n =
  (Int32 -> I16) -> IO Int32 -> IO I16
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Int32 -> I16
forall a b. (Integral a, Num b) => a -> b
fromIntegral (IO Int32 -> IO I16)
-> ((Ptr URegularExpression -> IO Int32) -> IO Int32)
-> (Ptr URegularExpression -> IO Int32)
-> IO I16
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ForeignPtr URegularExpression
-> (Ptr URegularExpression -> IO Int32) -> IO Int32
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr URegularExpression
reRe ((Ptr URegularExpression -> IO Int32) -> IO I16)
-> (Ptr URegularExpression -> IO Int32) -> IO I16
forall a b. (a -> b) -> a -> b
$ \Ptr URegularExpression
rePtr -> (Ptr UErrorCode -> IO Int32) -> IO Int32
forall a. (Ptr UErrorCode -> IO a) -> IO a
handleError ((Ptr UErrorCode -> IO Int32) -> IO Int32)
-> (Ptr UErrorCode -> IO Int32) -> IO Int32
forall a b. (a -> b) -> a -> b
$
  Ptr URegularExpression -> Int32 -> Ptr UErrorCode -> IO Int32
uregex_end Ptr URegularExpression
rePtr (Int -> Int32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
n)

-- | Returns the index in the input string of the start of the text
-- matched by the specified capture group during the previous match
-- operation.  Returns 'Nothing' if the capture group was not part of
-- the last match.
start :: Regex -> Int -> IO (Maybe I16)
start :: Regex -> Int -> IO (Maybe I16)
start Regex
r Int
n = I16 -> Maybe I16
check (I16 -> Maybe I16) -> IO I16 -> IO (Maybe I16)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
`fmap` Regex -> Int -> IO I16
start_ Regex
r Int
n

-- | Returns the index in the input string of the end of the text
-- matched by the specified capture group during the previous match
-- operation.  Returns 'Nothing' if the capture group was not part of
-- the last match.
end :: Regex -> Int -> IO (Maybe I16)
end :: Regex -> Int -> IO (Maybe I16)
end Regex
r Int
n = I16 -> Maybe I16
check (I16 -> Maybe I16) -> IO I16 -> IO (Maybe I16)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
`fmap` Regex -> Int -> IO I16
end_ Regex
r Int
n

check :: I16 -> Maybe I16
check :: I16 -> Maybe I16
check (-1) = Maybe I16
forall a. Maybe a
Nothing
check I16
k    = I16 -> Maybe I16
forall a. a -> Maybe a
Just (I16 -> Maybe I16) -> I16 -> Maybe I16
forall a b. (a -> b) -> a -> b
$! I16 -> I16
forall a b. (Integral a, Num b) => a -> b
fromIntegral I16
k