{-# LANGUAGE BangPatterns, EmptyDataDecls, MagicHash, RecordWildCards,
    ScopedTypeVariables #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}

-- |
-- Module      : Data.Text.ICU.Regex
-- Copyright   : (c) 2010 Bryan O'Sullivan
--
-- License     : BSD-style
-- Maintainer  : bos@serpentine.com
-- Stability   : experimental
-- Portability : GHC
--
-- Regular expression support for Unicode, implemented as bindings to
-- the International Components for Unicode (ICU) libraries.
--
-- The syntax and behaviour of ICU regular expressions are Perl-like.
-- For complete details, see the ICU User Guide entry at
-- <http://userguide.icu-project.org/strings/regexp>.
--
-- /Note/: The functions in this module are not thread safe.  For
-- thread safe use, see 'clone' below, or use the pure functions in
-- "Data.Text.ICU".

module Data.Text.ICU.Regex
    (
    -- * Types
      MatchOption(..)
    , ParseError(errError, errLine, errOffset)
    , Regex
    -- * Functions
    -- ** Construction
    , regex
    , regex'
    , clone
    -- ** Managing text to search
    , setText
    , getUTextPtr
    -- ** Inspection
    , pattern
    -- ** Searching
    , find
    , findNext
    -- ** Match groups
    -- $groups
    , groupCount
    , start
    , end
    , start_
    , end_
    ) where

import Data.Text.ICU.Regex.Internal
import qualified Control.Exception as E
import Data.IORef (newIORef, readIORef, writeIORef)
import Data.Text (Text)
import Data.Text.ICU.Internal (asBool, UTextPtr, asUTextPtr, emptyUTextPtr, TextI, withUTextPtr, fromUCharPtr, newICUPtr)
import Data.Text.ICU.Error.Internal (ParseError(..), handleError)
import Foreign.ForeignPtr (withForeignPtr)
import Foreign.Marshal.Alloc (alloca)
import Foreign.Storable (peek)
import System.IO.Unsafe (unsafePerformIO)

instance Show Regex where
    show :: Regex -> String
show Regex
re = String
"Regex " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> String
show (Regex -> Text
pattern Regex
re)

-- $groups
--
-- Capturing groups are numbered starting from zero.  Group zero is
-- always the entire matching text.  Groups greater than zero contain
-- the text matching each capturing group in a regular expression.

-- | Compile a regular expression with the given options.  This is
-- safest to use when the pattern is constructed at run time.
regex' :: [MatchOption] -> Text -> IO (Either ParseError Regex)
regex' :: [MatchOption] -> Text -> IO (Either ParseError Regex)
regex' [MatchOption]
opts Text
pat = (forall a b. b -> Either a b
Right forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
`fmap` [MatchOption] -> Text -> IO Regex
regex [MatchOption]
opts Text
pat) forall e a. Exception e => IO a -> (e -> IO a) -> IO a
`E.catch` \(ParseError
err::ParseError) ->
                  forall (m :: * -> *) a. Monad m => a -> m a
return (forall a b. a -> Either a b
Left ParseError
err)

-- | Set the subject text string upon which the regular expression
-- will look for matches.  This function may be called any number of
-- times, allowing the regular expression pattern to be applied to
-- different strings.
setText :: Regex -> Text -> IO ()
setText :: Regex -> Text -> IO ()
setText Regex{ForeignPtr URegularExpression
IORef UTextPtr
reText :: Regex -> IORef UTextPtr
reRe :: Regex -> ForeignPtr URegularExpression
reText :: IORef UTextPtr
reRe :: ForeignPtr URegularExpression
..} Text
t = do
  UTextPtr
hayfp <- Text -> IO UTextPtr
asUTextPtr Text
t
  forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr URegularExpression
reRe forall a b. (a -> b) -> a -> b
$ \Ptr URegularExpression
rePtr ->
    forall a. UTextPtr -> (Ptr UText -> IO a) -> IO a
withUTextPtr UTextPtr
hayfp forall a b. (a -> b) -> a -> b
$ \Ptr UText
hayPtr -> forall a. (Ptr UErrorCode -> IO a) -> IO a
handleError forall a b. (a -> b) -> a -> b
$
      Ptr URegularExpression -> Ptr UText -> Ptr UErrorCode -> IO ()
uregex_setUText Ptr URegularExpression
rePtr Ptr UText
hayPtr
  forall a. IORef a -> a -> IO ()
writeIORef IORef UTextPtr
reText UTextPtr
hayfp

-- | Get the subject text that is currently associated with this
-- regular expression object.
getUTextPtr :: Regex -> IO UTextPtr
getUTextPtr :: Regex -> IO UTextPtr
getUTextPtr Regex{ForeignPtr URegularExpression
IORef UTextPtr
reText :: IORef UTextPtr
reRe :: ForeignPtr URegularExpression
reText :: Regex -> IORef UTextPtr
reRe :: Regex -> ForeignPtr URegularExpression
..} = forall a. IORef a -> IO a
readIORef IORef UTextPtr
reText

-- | Return the source form of the pattern used to construct this
-- regular expression or match.
pattern :: Regex -> Text
pattern :: Regex -> Text
pattern Regex{ForeignPtr URegularExpression
IORef UTextPtr
reText :: IORef UTextPtr
reRe :: ForeignPtr URegularExpression
reText :: Regex -> IORef UTextPtr
reRe :: Regex -> ForeignPtr URegularExpression
..} = forall a. IO a -> a
unsafePerformIO forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr URegularExpression
reRe forall a b. (a -> b) -> a -> b
$ \Ptr URegularExpression
rePtr ->
  forall a b. Storable a => (Ptr a -> IO b) -> IO b
alloca forall a b. (a -> b) -> a -> b
$ \Ptr Int32
lenPtr -> do
    Ptr UChar
textPtr <- forall a. (Ptr UErrorCode -> IO a) -> IO a
handleError forall a b. (a -> b) -> a -> b
$ Ptr URegularExpression
-> Ptr Int32 -> Ptr UErrorCode -> IO (Ptr UChar)
uregex_pattern Ptr URegularExpression
rePtr Ptr Int32
lenPtr
    (Ptr UChar -> TextI -> IO Text
fromUCharPtr Ptr UChar
textPtr forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (Integral a, Num b) => a -> b
fromIntegral) forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< forall a. Storable a => Ptr a -> IO a
peek Ptr Int32
lenPtr

-- | Find the first matching substring of the input string that
-- matches the pattern.
--
-- If /n/ is non-negative, the search for a match begins at the
-- specified index, and any match region is reset.
--
-- If /n/ is -1, the search begins at the start of the input region,
-- or at the start of the full string if no region has been specified.
--
-- If a match is found, 'start', 'end', and 'group' will provide more
-- information regarding the match.
find :: Regex -> TextI -> IO Bool
find :: Regex -> TextI -> IO Bool
find Regex{ForeignPtr URegularExpression
IORef UTextPtr
reText :: IORef UTextPtr
reRe :: ForeignPtr URegularExpression
reText :: Regex -> IORef UTextPtr
reRe :: Regex -> ForeignPtr URegularExpression
..} TextI
n =
    forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall a. Integral a => a -> Bool
asBool forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr URegularExpression
reRe forall a b. (a -> b) -> a -> b
$ \Ptr URegularExpression
rePtr -> forall a. (Ptr UErrorCode -> IO a) -> IO a
handleError forall a b. (a -> b) -> a -> b
$
    Ptr URegularExpression -> Int32 -> Ptr UErrorCode -> IO UBool
uregex_find Ptr URegularExpression
rePtr (forall a b. (Integral a, Num b) => a -> b
fromIntegral TextI
n)

-- | Find the next pattern match in the input string.  Begin searching
-- the input at the location following the end of he previous match,
-- or at the start of the string (or region) if there is no previous
-- match.
--
-- If a match is found, 'start', 'end', and 'group' will provide more
-- information regarding the match.
findNext :: Regex -> IO Bool
findNext :: Regex -> IO Bool
findNext Regex{ForeignPtr URegularExpression
IORef UTextPtr
reText :: IORef UTextPtr
reRe :: ForeignPtr URegularExpression
reText :: Regex -> IORef UTextPtr
reRe :: Regex -> ForeignPtr URegularExpression
..} =
    forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall a. Integral a => a -> Bool
asBool forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr URegularExpression
reRe forall a b. (a -> b) -> a -> b
$ forall a. (Ptr UErrorCode -> IO a) -> IO a
handleError forall b c a. (b -> c) -> (a -> b) -> a -> c
. Ptr URegularExpression -> Ptr UErrorCode -> IO UBool
uregex_findNext

-- | Make a copy of a compiled regular expression.  Cloning a regular
-- expression is faster than opening a second instance from the source
-- form of the expression, and requires less memory.
--
-- Note that the current input string and the position of any matched
-- text within it are not cloned; only the pattern itself and and the
-- match mode flags are copied.
--
-- Cloning can be particularly useful to threaded applications that
-- perform multiple match operations in parallel.  Each concurrent RE
-- operation requires its own instance of a 'Regex'.
clone :: Regex -> IO Regex
{-# INLINE clone #-}
clone :: Regex -> IO Regex
clone Regex{ForeignPtr URegularExpression
IORef UTextPtr
reText :: IORef UTextPtr
reRe :: ForeignPtr URegularExpression
reText :: Regex -> IORef UTextPtr
reRe :: Regex -> ForeignPtr URegularExpression
..} = do
  forall a i.
(ForeignPtr a -> i) -> FinalizerPtr a -> IO (Ptr a) -> IO i
newICUPtr ForeignPtr URegularExpression -> IORef UTextPtr -> Regex
Regex FunPtr (Ptr URegularExpression -> IO ())
uregex_close
    (forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr URegularExpression
reRe (forall a. (Ptr UErrorCode -> IO a) -> IO a
handleError forall b c a. (b -> c) -> (a -> b) -> a -> c
. Ptr URegularExpression
-> Ptr UErrorCode -> IO (Ptr URegularExpression)
uregex_clone))
    forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*>
    forall a. a -> IO (IORef a)
newIORef UTextPtr
emptyUTextPtr

-- | Return the number of capturing groups in this regular
-- expression's pattern.
groupCount :: Regex -> IO Int
groupCount :: Regex -> IO Int
groupCount Regex{ForeignPtr URegularExpression
IORef UTextPtr
reText :: IORef UTextPtr
reRe :: ForeignPtr URegularExpression
reText :: Regex -> IORef UTextPtr
reRe :: Regex -> ForeignPtr URegularExpression
..} =
    forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall a b. (Integral a, Num b) => a -> b
fromIntegral forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr URegularExpression
reRe forall a b. (a -> b) -> a -> b
$ forall a. (Ptr UErrorCode -> IO a) -> IO a
handleError forall b c a. (b -> c) -> (a -> b) -> a -> c
. Ptr URegularExpression -> Ptr UErrorCode -> IO Int32
uregex_groupCount

-- | Returns the index in the input string of the start of the text
-- matched by the specified capture group during the previous match
-- operation.  Returns @-1@ if the capture group was not part of the
-- last match.
start_ :: Regex -> Int -> IO TextI
start_ :: Regex -> Int -> IO TextI
start_ Regex{ForeignPtr URegularExpression
IORef UTextPtr
reText :: IORef UTextPtr
reRe :: ForeignPtr URegularExpression
reText :: Regex -> IORef UTextPtr
reRe :: Regex -> ForeignPtr URegularExpression
..} Int
n =
    forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall a b. (Integral a, Num b) => a -> b
fromIntegral forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr URegularExpression
reRe forall a b. (a -> b) -> a -> b
$ \Ptr URegularExpression
rePtr -> forall a. (Ptr UErrorCode -> IO a) -> IO a
handleError forall a b. (a -> b) -> a -> b
$
    Ptr URegularExpression -> Int32 -> Ptr UErrorCode -> IO Int32
uregex_start Ptr URegularExpression
rePtr (forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
n)

-- | Returns the index in the input string of the end of the text
-- matched by the specified capture group during the previous match
-- operation.  Returns @-1@ if the capture group was not part of
-- the last match.
end_ :: Regex -> Int -> IO TextI
end_ :: Regex -> Int -> IO TextI
end_ Regex{ForeignPtr URegularExpression
IORef UTextPtr
reText :: IORef UTextPtr
reRe :: ForeignPtr URegularExpression
reText :: Regex -> IORef UTextPtr
reRe :: Regex -> ForeignPtr URegularExpression
..} Int
n =
  forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall a b. (Integral a, Num b) => a -> b
fromIntegral forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr URegularExpression
reRe forall a b. (a -> b) -> a -> b
$ \Ptr URegularExpression
rePtr -> forall a. (Ptr UErrorCode -> IO a) -> IO a
handleError forall a b. (a -> b) -> a -> b
$
  Ptr URegularExpression -> Int32 -> Ptr UErrorCode -> IO Int32
uregex_end Ptr URegularExpression
rePtr (forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
n)

-- | Returns the index in the input string of the start of the text
-- matched by the specified capture group during the previous match
-- operation.  Returns 'Nothing' if the capture group was not part of
-- the last match.
start :: Regex -> Int -> IO (Maybe TextI)
start :: Regex -> Int -> IO (Maybe TextI)
start Regex
r Int
n = TextI -> Maybe TextI
check forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
`fmap` Regex -> Int -> IO TextI
start_ Regex
r Int
n

-- | Returns the index in the input string of the end of the text
-- matched by the specified capture group during the previous match
-- operation.  Returns 'Nothing' if the capture group was not part of
-- the last match.
end :: Regex -> Int -> IO (Maybe TextI)
end :: Regex -> Int -> IO (Maybe TextI)
end Regex
r Int
n = TextI -> Maybe TextI
check forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
`fmap` Regex -> Int -> IO TextI
end_ Regex
r Int
n

check :: TextI -> Maybe TextI
check :: TextI -> Maybe TextI
check (-1) = forall a. Maybe a
Nothing
check TextI
k    = forall a. a -> Maybe a
Just forall a b. (a -> b) -> a -> b
$! forall a b. (Integral a, Num b) => a -> b
fromIntegral TextI
k