{-# LANGUAGE BangPatterns, EmptyDataDecls, MagicHash, RecordWildCards,
    ScopedTypeVariables #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}

-- |
-- Module      : Data.Text.ICU.Regex
-- Copyright   : (c) 2010 Bryan O'Sullivan
--
-- License     : BSD-style
-- Maintainer  : bos@serpentine.com
-- Stability   : experimental
-- Portability : GHC
--
-- Regular expression support for Unicode, implemented as bindings to
-- the International Components for Unicode (ICU) libraries.
--
-- The syntax and behaviour of ICU regular expressions are Perl-like.
-- For complete details, see the ICU User Guide entry at
-- <http://userguide.icu-project.org/strings/regexp>.
--
-- /Note/: The functions in this module are not thread safe.  For
-- thread safe use, see 'clone' below, or use the pure functions in
-- "Data.Text.ICU".

module Data.Text.ICU.Regex
    (
    -- * Types
      MatchOption(..)
    , ParseError(errError, errLine, errOffset)
    , Regex
    -- * Functions
    -- ** Construction
    , regex
    , regex'
    , clone
    -- ** Managing text to search
    , setText
    , getUTextPtr
    -- ** Inspection
    , pattern
    -- ** Searching
    , find
    , findNext
    -- ** Match groups
    -- $groups
    , groupCount
    , start
    , end
    , start_
    , end_
    ) where

import Data.Text.ICU.Regex.Internal
import qualified Control.Exception as E
import Data.IORef (newIORef, readIORef, writeIORef)
import Data.Text (Text)
import Data.Text.ICU.Internal (asBool, UTextPtr, asUTextPtr, emptyUTextPtr, TextI, withUTextPtr, fromUCharPtr, newICUPtr)
import Data.Text.ICU.Error.Internal (ParseError(..), handleError)
import Foreign.ForeignPtr (withForeignPtr)
import Foreign.Marshal.Alloc (alloca)
import Foreign.Storable (peek)
import System.IO.Unsafe (unsafePerformIO)

instance Show Regex where
    show :: Regex -> String
show Regex
re = String
"Regex " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Text -> String
forall a. Show a => a -> String
show (Regex -> Text
pattern Regex
re)

-- $groups
--
-- Capturing groups are numbered starting from zero.  Group zero is
-- always the entire matching text.  Groups greater than zero contain
-- the text matching each capturing group in a regular expression.

-- | Compile a regular expression with the given options.  This is
-- safest to use when the pattern is constructed at run time.
regex' :: [MatchOption] -> Text -> IO (Either ParseError Regex)
regex' :: [MatchOption] -> Text -> IO (Either ParseError Regex)
regex' [MatchOption]
opts Text
pat = (Regex -> Either ParseError Regex
forall a b. b -> Either a b
Right (Regex -> Either ParseError Regex)
-> IO Regex -> IO (Either ParseError Regex)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
`fmap` [MatchOption] -> Text -> IO Regex
regex [MatchOption]
opts Text
pat) IO (Either ParseError Regex)
-> (ParseError -> IO (Either ParseError Regex))
-> IO (Either ParseError Regex)
forall e a. Exception e => IO a -> (e -> IO a) -> IO a
`E.catch` \(ParseError
err::ParseError) ->
                  Either ParseError Regex -> IO (Either ParseError Regex)
forall (m :: * -> *) a. Monad m => a -> m a
return (ParseError -> Either ParseError Regex
forall a b. a -> Either a b
Left ParseError
err)

-- | Set the subject text string upon which the regular expression
-- will look for matches.  This function may be called any number of
-- times, allowing the regular expression pattern to be applied to
-- different strings.
setText :: Regex -> Text -> IO ()
setText :: Regex -> Text -> IO ()
setText Regex{ForeignPtr URegularExpression
IORef UTextPtr
reText :: Regex -> IORef UTextPtr
reRe :: Regex -> ForeignPtr URegularExpression
reText :: IORef UTextPtr
reRe :: ForeignPtr URegularExpression
..} Text
t = do
  UTextPtr
hayfp <- Text -> IO UTextPtr
asUTextPtr Text
t
  ForeignPtr URegularExpression
-> (Ptr URegularExpression -> IO ()) -> IO ()
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr URegularExpression
reRe ((Ptr URegularExpression -> IO ()) -> IO ())
-> (Ptr URegularExpression -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \Ptr URegularExpression
rePtr ->
    UTextPtr -> (Ptr UText -> IO ()) -> IO ()
forall a. UTextPtr -> (Ptr UText -> IO a) -> IO a
withUTextPtr UTextPtr
hayfp ((Ptr UText -> IO ()) -> IO ()) -> (Ptr UText -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \Ptr UText
hayPtr -> (Ptr UErrorCode -> IO ()) -> IO ()
forall a. (Ptr UErrorCode -> IO a) -> IO a
handleError ((Ptr UErrorCode -> IO ()) -> IO ())
-> (Ptr UErrorCode -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$
      Ptr URegularExpression -> Ptr UText -> Ptr UErrorCode -> IO ()
uregex_setUText Ptr URegularExpression
rePtr Ptr UText
hayPtr
  IORef UTextPtr -> UTextPtr -> IO ()
forall a. IORef a -> a -> IO ()
writeIORef IORef UTextPtr
reText UTextPtr
hayfp

-- | Get the subject text that is currently associated with this
-- regular expression object.
getUTextPtr :: Regex -> IO UTextPtr
getUTextPtr :: Regex -> IO UTextPtr
getUTextPtr Regex{ForeignPtr URegularExpression
IORef UTextPtr
reText :: IORef UTextPtr
reRe :: ForeignPtr URegularExpression
reText :: Regex -> IORef UTextPtr
reRe :: Regex -> ForeignPtr URegularExpression
..} = IORef UTextPtr -> IO UTextPtr
forall a. IORef a -> IO a
readIORef IORef UTextPtr
reText

-- | Return the source form of the pattern used to construct this
-- regular expression or match.
pattern :: Regex -> Text
pattern :: Regex -> Text
pattern Regex{ForeignPtr URegularExpression
IORef UTextPtr
reText :: IORef UTextPtr
reRe :: ForeignPtr URegularExpression
reText :: Regex -> IORef UTextPtr
reRe :: Regex -> ForeignPtr URegularExpression
..} = IO Text -> Text
forall a. IO a -> a
unsafePerformIO (IO Text -> Text)
-> ((Ptr URegularExpression -> IO Text) -> IO Text)
-> (Ptr URegularExpression -> IO Text)
-> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ForeignPtr URegularExpression
-> (Ptr URegularExpression -> IO Text) -> IO Text
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr URegularExpression
reRe ((Ptr URegularExpression -> IO Text) -> Text)
-> (Ptr URegularExpression -> IO Text) -> Text
forall a b. (a -> b) -> a -> b
$ \Ptr URegularExpression
rePtr ->
  (Ptr Int32 -> IO Text) -> IO Text
forall a b. Storable a => (Ptr a -> IO b) -> IO b
alloca ((Ptr Int32 -> IO Text) -> IO Text)
-> (Ptr Int32 -> IO Text) -> IO Text
forall a b. (a -> b) -> a -> b
$ \Ptr Int32
lenPtr -> do
    Ptr UChar
textPtr <- (Ptr UErrorCode -> IO (Ptr UChar)) -> IO (Ptr UChar)
forall a. (Ptr UErrorCode -> IO a) -> IO a
handleError ((Ptr UErrorCode -> IO (Ptr UChar)) -> IO (Ptr UChar))
-> (Ptr UErrorCode -> IO (Ptr UChar)) -> IO (Ptr UChar)
forall a b. (a -> b) -> a -> b
$ Ptr URegularExpression
-> Ptr Int32 -> Ptr UErrorCode -> IO (Ptr UChar)
uregex_pattern Ptr URegularExpression
rePtr Ptr Int32
lenPtr
    (Ptr UChar -> I16 -> IO Text
fromUCharPtr Ptr UChar
textPtr (I16 -> IO Text) -> (Int32 -> I16) -> Int32 -> IO Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int32 -> I16
forall a b. (Integral a, Num b) => a -> b
fromIntegral) (Int32 -> IO Text) -> IO Int32 -> IO Text
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< Ptr Int32 -> IO Int32
forall a. Storable a => Ptr a -> IO a
peek Ptr Int32
lenPtr

-- | Find the first matching substring of the input string that
-- matches the pattern.
--
-- If /n/ is non-negative, the search for a match begins at the
-- specified index, and any match region is reset.
--
-- If /n/ is -1, the search begins at the start of the input region,
-- or at the start of the full string if no region has been specified.
--
-- If a match is found, 'start', 'end', and 'group' will provide more
-- information regarding the match.
find :: Regex -> TextI -> IO Bool
find :: Regex -> TextI -> IO Bool
find Regex{ForeignPtr URegularExpression
IORef UTextPtr
reText :: IORef UTextPtr
reRe :: ForeignPtr URegularExpression
reText :: Regex -> IORef UTextPtr
reRe :: Regex -> ForeignPtr URegularExpression
..} TextI
n =
    (UBool -> Bool) -> IO UBool -> IO Bool
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap UBool -> Bool
forall a. Integral a => a -> Bool
asBool (IO UBool -> IO Bool)
-> ((Ptr URegularExpression -> IO UBool) -> IO UBool)
-> (Ptr URegularExpression -> IO UBool)
-> IO Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ForeignPtr URegularExpression
-> (Ptr URegularExpression -> IO UBool) -> IO UBool
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr URegularExpression
reRe ((Ptr URegularExpression -> IO UBool) -> IO Bool)
-> (Ptr URegularExpression -> IO UBool) -> IO Bool
forall a b. (a -> b) -> a -> b
$ \Ptr URegularExpression
rePtr -> (Ptr UErrorCode -> IO UBool) -> IO UBool
forall a. (Ptr UErrorCode -> IO a) -> IO a
handleError ((Ptr UErrorCode -> IO UBool) -> IO UBool)
-> (Ptr UErrorCode -> IO UBool) -> IO UBool
forall a b. (a -> b) -> a -> b
$
    Ptr URegularExpression -> Int32 -> Ptr UErrorCode -> IO UBool
uregex_find Ptr URegularExpression
rePtr (TextI -> Int32
forall a b. (Integral a, Num b) => a -> b
fromIntegral TextI
n)

-- | Find the next pattern match in the input string.  Begin searching
-- the input at the location following the end of he previous match,
-- or at the start of the string (or region) if there is no previous
-- match.
--
-- If a match is found, 'start', 'end', and 'group' will provide more
-- information regarding the match.
findNext :: Regex -> IO Bool
findNext :: Regex -> IO Bool
findNext Regex{ForeignPtr URegularExpression
IORef UTextPtr
reText :: IORef UTextPtr
reRe :: ForeignPtr URegularExpression
reText :: Regex -> IORef UTextPtr
reRe :: Regex -> ForeignPtr URegularExpression
..} =
    (UBool -> Bool) -> IO UBool -> IO Bool
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap UBool -> Bool
forall a. Integral a => a -> Bool
asBool (IO UBool -> IO Bool)
-> ((Ptr URegularExpression -> IO UBool) -> IO UBool)
-> (Ptr URegularExpression -> IO UBool)
-> IO Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ForeignPtr URegularExpression
-> (Ptr URegularExpression -> IO UBool) -> IO UBool
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr URegularExpression
reRe ((Ptr URegularExpression -> IO UBool) -> IO Bool)
-> (Ptr URegularExpression -> IO UBool) -> IO Bool
forall a b. (a -> b) -> a -> b
$ (Ptr UErrorCode -> IO UBool) -> IO UBool
forall a. (Ptr UErrorCode -> IO a) -> IO a
handleError ((Ptr UErrorCode -> IO UBool) -> IO UBool)
-> (Ptr URegularExpression -> Ptr UErrorCode -> IO UBool)
-> Ptr URegularExpression
-> IO UBool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Ptr URegularExpression -> Ptr UErrorCode -> IO UBool
uregex_findNext

-- | Make a copy of a compiled regular expression.  Cloning a regular
-- expression is faster than opening a second instance from the source
-- form of the expression, and requires less memory.
--
-- Note that the current input string and the position of any matched
-- text within it are not cloned; only the pattern itself and and the
-- match mode flags are copied.
--
-- Cloning can be particularly useful to threaded applications that
-- perform multiple match operations in parallel.  Each concurrent RE
-- operation requires its own instance of a 'Regex'.
clone :: Regex -> IO Regex
{-# INLINE clone #-}
clone :: Regex -> IO Regex
clone Regex{ForeignPtr URegularExpression
IORef UTextPtr
reText :: IORef UTextPtr
reRe :: ForeignPtr URegularExpression
reText :: Regex -> IORef UTextPtr
reRe :: Regex -> ForeignPtr URegularExpression
..} = do
  (ForeignPtr URegularExpression -> IORef UTextPtr -> Regex)
-> FinalizerPtr URegularExpression
-> IO (Ptr URegularExpression)
-> IO (IORef UTextPtr -> Regex)
forall a i.
(ForeignPtr a -> i) -> FinalizerPtr a -> IO (Ptr a) -> IO i
newICUPtr ForeignPtr URegularExpression -> IORef UTextPtr -> Regex
Regex FinalizerPtr URegularExpression
uregex_close
    (ForeignPtr URegularExpression
-> (Ptr URegularExpression -> IO (Ptr URegularExpression))
-> IO (Ptr URegularExpression)
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr URegularExpression
reRe ((Ptr UErrorCode -> IO (Ptr URegularExpression))
-> IO (Ptr URegularExpression)
forall a. (Ptr UErrorCode -> IO a) -> IO a
handleError ((Ptr UErrorCode -> IO (Ptr URegularExpression))
 -> IO (Ptr URegularExpression))
-> (Ptr URegularExpression
    -> Ptr UErrorCode -> IO (Ptr URegularExpression))
-> Ptr URegularExpression
-> IO (Ptr URegularExpression)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Ptr URegularExpression
-> Ptr UErrorCode -> IO (Ptr URegularExpression)
uregex_clone))
    IO (IORef UTextPtr -> Regex) -> IO (IORef UTextPtr) -> IO Regex
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*>
    UTextPtr -> IO (IORef UTextPtr)
forall a. a -> IO (IORef a)
newIORef UTextPtr
emptyUTextPtr

-- | Return the number of capturing groups in this regular
-- expression's pattern.
groupCount :: Regex -> IO Int
groupCount :: Regex -> IO Int
groupCount Regex{ForeignPtr URegularExpression
IORef UTextPtr
reText :: IORef UTextPtr
reRe :: ForeignPtr URegularExpression
reText :: Regex -> IORef UTextPtr
reRe :: Regex -> ForeignPtr URegularExpression
..} =
    (Int32 -> Int) -> IO Int32 -> IO Int
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Int32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (IO Int32 -> IO Int)
-> ((Ptr URegularExpression -> IO Int32) -> IO Int32)
-> (Ptr URegularExpression -> IO Int32)
-> IO Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ForeignPtr URegularExpression
-> (Ptr URegularExpression -> IO Int32) -> IO Int32
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr URegularExpression
reRe ((Ptr URegularExpression -> IO Int32) -> IO Int)
-> (Ptr URegularExpression -> IO Int32) -> IO Int
forall a b. (a -> b) -> a -> b
$ (Ptr UErrorCode -> IO Int32) -> IO Int32
forall a. (Ptr UErrorCode -> IO a) -> IO a
handleError ((Ptr UErrorCode -> IO Int32) -> IO Int32)
-> (Ptr URegularExpression -> Ptr UErrorCode -> IO Int32)
-> Ptr URegularExpression
-> IO Int32
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Ptr URegularExpression -> Ptr UErrorCode -> IO Int32
uregex_groupCount

-- | Returns the index in the input string of the start of the text
-- matched by the specified capture group during the previous match
-- operation.  Returns @-1@ if the capture group was not part of the
-- last match.
start_ :: Regex -> Int -> IO TextI
start_ :: Regex -> Int -> IO TextI
start_ Regex{ForeignPtr URegularExpression
IORef UTextPtr
reText :: IORef UTextPtr
reRe :: ForeignPtr URegularExpression
reText :: Regex -> IORef UTextPtr
reRe :: Regex -> ForeignPtr URegularExpression
..} Int
n =
    (Int32 -> TextI) -> IO Int32 -> IO TextI
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Int32 -> TextI
forall a b. (Integral a, Num b) => a -> b
fromIntegral (IO Int32 -> IO TextI)
-> ((Ptr URegularExpression -> IO Int32) -> IO Int32)
-> (Ptr URegularExpression -> IO Int32)
-> IO TextI
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ForeignPtr URegularExpression
-> (Ptr URegularExpression -> IO Int32) -> IO Int32
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr URegularExpression
reRe ((Ptr URegularExpression -> IO Int32) -> IO TextI)
-> (Ptr URegularExpression -> IO Int32) -> IO TextI
forall a b. (a -> b) -> a -> b
$ \Ptr URegularExpression
rePtr -> (Ptr UErrorCode -> IO Int32) -> IO Int32
forall a. (Ptr UErrorCode -> IO a) -> IO a
handleError ((Ptr UErrorCode -> IO Int32) -> IO Int32)
-> (Ptr UErrorCode -> IO Int32) -> IO Int32
forall a b. (a -> b) -> a -> b
$
    Ptr URegularExpression -> Int32 -> Ptr UErrorCode -> IO Int32
uregex_start Ptr URegularExpression
rePtr (Int -> Int32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
n)

-- | Returns the index in the input string of the end of the text
-- matched by the specified capture group during the previous match
-- operation.  Returns @-1@ if the capture group was not part of
-- the last match.
end_ :: Regex -> Int -> IO TextI
end_ :: Regex -> Int -> IO TextI
end_ Regex{ForeignPtr URegularExpression
IORef UTextPtr
reText :: IORef UTextPtr
reRe :: ForeignPtr URegularExpression
reText :: Regex -> IORef UTextPtr
reRe :: Regex -> ForeignPtr URegularExpression
..} Int
n =
  (Int32 -> TextI) -> IO Int32 -> IO TextI
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Int32 -> TextI
forall a b. (Integral a, Num b) => a -> b
fromIntegral (IO Int32 -> IO TextI)
-> ((Ptr URegularExpression -> IO Int32) -> IO Int32)
-> (Ptr URegularExpression -> IO Int32)
-> IO TextI
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ForeignPtr URegularExpression
-> (Ptr URegularExpression -> IO Int32) -> IO Int32
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr URegularExpression
reRe ((Ptr URegularExpression -> IO Int32) -> IO TextI)
-> (Ptr URegularExpression -> IO Int32) -> IO TextI
forall a b. (a -> b) -> a -> b
$ \Ptr URegularExpression
rePtr -> (Ptr UErrorCode -> IO Int32) -> IO Int32
forall a. (Ptr UErrorCode -> IO a) -> IO a
handleError ((Ptr UErrorCode -> IO Int32) -> IO Int32)
-> (Ptr UErrorCode -> IO Int32) -> IO Int32
forall a b. (a -> b) -> a -> b
$
  Ptr URegularExpression -> Int32 -> Ptr UErrorCode -> IO Int32
uregex_end Ptr URegularExpression
rePtr (Int -> Int32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
n)

-- | Returns the index in the input string of the start of the text
-- matched by the specified capture group during the previous match
-- operation.  Returns 'Nothing' if the capture group was not part of
-- the last match.
start :: Regex -> Int -> IO (Maybe TextI)
start :: Regex -> Int -> IO (Maybe TextI)
start Regex
r Int
n = TextI -> Maybe TextI
check (TextI -> Maybe TextI) -> IO TextI -> IO (Maybe TextI)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
`fmap` Regex -> Int -> IO TextI
start_ Regex
r Int
n

-- | Returns the index in the input string of the end of the text
-- matched by the specified capture group during the previous match
-- operation.  Returns 'Nothing' if the capture group was not part of
-- the last match.
end :: Regex -> Int -> IO (Maybe TextI)
end :: Regex -> Int -> IO (Maybe TextI)
end Regex
r Int
n = TextI -> Maybe TextI
check (TextI -> Maybe TextI) -> IO TextI -> IO (Maybe TextI)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
`fmap` Regex -> Int -> IO TextI
end_ Regex
r Int
n

check :: TextI -> Maybe TextI
check :: TextI -> Maybe TextI
check (-1) = Maybe TextI
forall a. Maybe a
Nothing
check TextI
k    = TextI -> Maybe TextI
forall a. a -> Maybe a
Just (TextI -> Maybe TextI) -> TextI -> Maybe TextI
forall a b. (a -> b) -> a -> b
$! TextI -> TextI
forall a b. (Integral a, Num b) => a -> b
fromIntegral TextI
k