{-# LANGUAGE BangPatterns #-}
module Regex.Internal.Text
  (
    TextToken
  , REText

  , token
  , satisfy
  , char
  , charIgnoreCase
  , anyChar
  , oneOf
  , text
  , textIgnoreCase
  , manyText
  , someText
  , manyTextMin
  , someTextMin
  , manyTextOf
  , someTextOf
  , manyTextOfMin
  , someTextOfMin

  , naturalDec
  , integerDec
  , naturalHex
  , integerHex
  , wordRangeDec
  , intRangeDec
  , wordRangeHex
  , intRangeHex
  , wordDecN
  , wordHexN

  , toMatch
  , withMatch

  , reParse
  , ParserText
  , parse
  , parseSure

  , find
  , findAll
  , splitOn
  , replace
  , replaceAll
  ) where

import Control.Applicative
import Data.Char
import Data.Foldable (foldl')
import Data.Maybe (fromMaybe)
import Numeric.Natural
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Array as TArray
import qualified Data.Text.Internal as TInternal
import qualified Data.Text.Unsafe as TUnsafe
import qualified Data.Text.Internal.Encoding.Utf8 as TInternalUtf8

import Data.CharSet (CharSet)
import qualified Data.CharSet as CS
import Regex.Internal.Parser (Parser)
import qualified Regex.Internal.Parser as P
import Regex.Internal.Regex (RE(..), Greediness(..), Strictness(..))
import qualified Regex.Internal.Regex as R
import qualified Regex.Internal.Num as RNum
import qualified Regex.Internal.Generated.CaseFold as CF

----------------------
-- Token and Text REs
----------------------

-- | The token type used for parsing @Text@.

-- This module uses RE TextToken for Text regexes instead of simply RE Char to
-- support Text slicing. It does mean that use cases not using slicing pay a
-- small cost, but it is not worth having two separate Text regex APIs.
data TextToken = TextToken
  { TextToken -> Array
tArr     :: {-# UNPACK #-} !TArray.Array
  , TextToken -> Int
tOffset  :: {-# UNPACK #-} !Int
  , TextToken -> Char
tChar    :: {-# UNPACK #-} !Char
  }

-- | A type alias for convenience.
--
-- A function which accepts a @RE c a@ will accept a @REText a@.
type REText = RE TextToken

-- | A type alias for convenience.
--
-- A function which accepts a @Parser c a@ will accept a @ParserText a@.
type ParserText = Parser TextToken

-- | Parse a @Char@ into an @a@ if the given function returns @Just@.
token :: (Char -> Maybe a) -> REText a
token :: forall a. (Char -> Maybe a) -> REText a
token Char -> Maybe a
t = (TextToken -> Maybe a) -> RE TextToken a
forall c a. (c -> Maybe a) -> RE c a
R.token (\ !TextToken
tok -> Char -> Maybe a
t (TextToken -> Char
tChar TextToken
tok))
{-# INLINE token #-}

-- | Parse a @Char@ if it satisfies the given predicate.
satisfy :: (Char -> Bool) -> REText Char
satisfy :: (Char -> Bool) -> REText Char
satisfy Char -> Bool
p = (Char -> Maybe Char) -> REText Char
forall a. (Char -> Maybe a) -> REText a
token ((Char -> Maybe Char) -> REText Char)
-> (Char -> Maybe Char) -> REText Char
forall a b. (a -> b) -> a -> b
$ \Char
c -> if Char -> Bool
p Char
c then Char -> Maybe Char
forall a. a -> Maybe a
Just Char
c else Maybe Char
forall a. Maybe a
Nothing
{-# INLINE satisfy #-}

-- | Parse the given @Char@.
char :: Char -> REText Char
char :: Char -> REText Char
char !Char
c = (Char -> Bool) -> REText Char
satisfy (Char
cChar -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==)

-- | Parse the given @Char@, ignoring case.
--
-- Comparisons are performed after applying
-- [simple case folding](https://www.unicode.org/reports/tr44/#Simple_Case_Folding)
-- as described by the Unicode standard.
charIgnoreCase :: Char -> REText Char
charIgnoreCase :: Char -> REText Char
charIgnoreCase Char
c = (Char -> Bool) -> REText Char
satisfy ((Char -> Bool) -> REText Char) -> (Char -> Bool) -> REText Char
forall a b. (a -> b) -> a -> b
$ (Char
c'Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==) (Char -> Bool) -> (Char -> Char) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Char
CF.caseFoldSimple
  where
    !c' :: Char
c' = Char -> Char
CF.caseFoldSimple Char
c
-- See Note [Why simple case fold]

-- | Parse any @Char@.
anyChar :: REText Char
anyChar :: REText Char
anyChar = (Char -> Maybe Char) -> REText Char
forall a. (Char -> Maybe a) -> REText a
token Char -> Maybe Char
forall a. a -> Maybe a
Just

-- | Parse a @Char@ if it is a member of the @CharSet@.
oneOf :: CharSet -> REText Char
oneOf :: CharSet -> REText Char
oneOf !CharSet
cs = (Char -> Bool) -> REText Char
satisfy (Char -> CharSet -> Bool
`CS.member` CharSet
cs)

-- | Parse the given @Text@.
text :: Text -> REText Text
text :: Text -> REText Text
text Text
t = Text
t Text -> RE TextToken () -> REText Text
forall a b. a -> RE TextToken b -> RE TextToken a
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ (Char -> RE TextToken () -> RE TextToken ())
-> RE TextToken () -> Text -> RE TextToken ()
forall a. (Char -> a -> a) -> a -> Text -> a
T.foldr' (REText Char -> RE TextToken () -> RE TextToken ()
forall a b. RE TextToken a -> RE TextToken b -> RE TextToken b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
(*>) (REText Char -> RE TextToken () -> RE TextToken ())
-> (Char -> REText Char)
-> Char
-> RE TextToken ()
-> RE TextToken ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> REText Char
char) (() -> RE TextToken ()
forall a. a -> RE TextToken a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()) Text
t

-- | Parse the given @Text@, ignoring case.
--
-- Comparisons are performed after applying
-- [simple case folding](https://www.unicode.org/reports/tr44/#Simple_Case_Folding)
-- as described by the Unicode standard.
textIgnoreCase :: Text -> REText Text
textIgnoreCase :: Text -> REText Text
textIgnoreCase Text
t =
  (Char -> REText Text -> REText Text)
-> REText Text -> Text -> REText Text
forall a. (Char -> a -> a) -> a -> Text -> a
T.foldr' (\Char
c REText Text
cs -> (Text -> Text -> Text) -> REText Text -> REText Text -> REText Text
forall a1 a2 b c. (a1 -> a2 -> b) -> RE c a1 -> RE c a2 -> RE c b
R.liftA2' Text -> Text -> Text
adjacentAppend (Char -> REText Text
ignoreCaseTokenMatch Char
c) REText Text
cs)
           (Text -> REText Text
forall a. a -> RE TextToken a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Text
T.empty)
           Text
t
-- See Note [Why simple case fold]

-- | Parse any @Text@. Biased towards matching more.
manyText :: REText Text
manyText :: REText Text
manyText = (Text -> Text -> Text) -> Text -> REText Text -> REText Text
forall b a c. (b -> a -> b) -> b -> RE c a -> RE c b
R.foldlMany' Text -> Text -> Text
adjacentAppend Text
T.empty REText Text
anyTokenMatch

-- | Parse any non-empty @Text@. Biased towards matching more.
someText :: REText Text
someText :: REText Text
someText = (Text -> Text -> Text) -> REText Text -> REText Text -> REText Text
forall a1 a2 b c. (a1 -> a2 -> b) -> RE c a1 -> RE c a2 -> RE c b
R.liftA2' Text -> Text -> Text
adjacentAppend REText Text
anyTokenMatch REText Text
manyText

-- | Parse any @Text@. Minimal, i.e. biased towards matching less.
manyTextMin :: REText Text
manyTextMin :: REText Text
manyTextMin = (Text -> Text -> Text) -> Text -> REText Text -> REText Text
forall b a c. (b -> a -> b) -> b -> RE c a -> RE c b
R.foldlManyMin' Text -> Text -> Text
adjacentAppend Text
T.empty REText Text
anyTokenMatch

-- | Parse any non-empty @Text@. Minimal, i.e. biased towards matching less.
someTextMin :: REText Text
someTextMin :: REText Text
someTextMin = (Text -> Text -> Text) -> REText Text -> REText Text -> REText Text
forall a1 a2 b c. (a1 -> a2 -> b) -> RE c a1 -> RE c a2 -> RE c b
R.liftA2' Text -> Text -> Text
adjacentAppend REText Text
anyTokenMatch REText Text
manyTextMin

-- | Parse any @Text@ containing members of the @CharSet@.
-- Biased towards matching more.
manyTextOf :: CharSet -> REText Text
manyTextOf :: CharSet -> REText Text
manyTextOf !CharSet
cs = (Text -> Text -> Text) -> Text -> REText Text -> REText Text
forall b a c. (b -> a -> b) -> b -> RE c a -> RE c b
R.foldlMany' Text -> Text -> Text
adjacentAppend Text
T.empty (CharSet -> REText Text
oneOfTokenMatch CharSet
cs)

-- | Parse any non-empty @Text@ containing members of the @CharSet@.
-- Biased towards matching more.
someTextOf :: CharSet -> REText Text
someTextOf :: CharSet -> REText Text
someTextOf !CharSet
cs = (Text -> Text -> Text) -> REText Text -> REText Text -> REText Text
forall a1 a2 b c. (a1 -> a2 -> b) -> RE c a1 -> RE c a2 -> RE c b
R.liftA2' Text -> Text -> Text
adjacentAppend (CharSet -> REText Text
oneOfTokenMatch CharSet
cs) (CharSet -> REText Text
manyTextOf CharSet
cs)

-- | Parse any @Text@ containing members of the @CharSet@.
-- Minimal, i.e. biased towards matching less.
manyTextOfMin :: CharSet -> REText Text
manyTextOfMin :: CharSet -> REText Text
manyTextOfMin !CharSet
cs = (Text -> Text -> Text) -> Text -> REText Text -> REText Text
forall b a c. (b -> a -> b) -> b -> RE c a -> RE c b
R.foldlManyMin' Text -> Text -> Text
adjacentAppend Text
T.empty (CharSet -> REText Text
oneOfTokenMatch CharSet
cs)

-- | Parse any non-empty @Text@ containing members of the @CharSet@.
-- Minimal, i.e. biased towards matching less.
someTextOfMin :: CharSet -> REText Text
someTextOfMin :: CharSet -> REText Text
someTextOfMin !CharSet
cs =
  (Text -> Text -> Text) -> REText Text -> REText Text -> REText Text
forall a1 a2 b c. (a1 -> a2 -> b) -> RE c a1 -> RE c a2 -> RE c b
R.liftA2' Text -> Text -> Text
adjacentAppend (CharSet -> REText Text
oneOfTokenMatch CharSet
cs) (CharSet -> REText Text
manyTextOfMin CharSet
cs)

-----------------
-- Numeric REs
-----------------

-- | Parse a decimal @Natural@.
-- Leading zeros are not accepted. Biased towards matching more.
naturalDec :: REText Natural
naturalDec :: REText Natural
naturalDec = (Word -> Word -> RE TextToken Word) -> REText Natural
forall c. (Word -> Word -> RE c Word) -> RE c Natural
RNum.mkNaturalDec Word -> Word -> RE TextToken Word
digitRange

-- | Parse a decimal @Integer@. Parse an optional sign, @\'-\'@ or @\'+\'@,
-- followed by the given @RE@, followed by the absolute value of the integer.
-- Leading zeros are not accepted. Biased towards matching more.
integerDec :: REText a -> REText Integer
integerDec :: forall a. REText a -> REText Integer
integerDec REText a
sep = RE TextToken ()
-> RE TextToken () -> REText Natural -> REText Integer
forall c minus plus.
RE c minus -> RE c plus -> RE c Natural -> RE c Integer
RNum.mkSignedInteger RE TextToken ()
minus RE TextToken ()
plus (REText a
sep REText a -> REText Natural -> REText Natural
forall a b. RE TextToken a -> RE TextToken b -> RE TextToken b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> REText Natural
naturalDec)

-- | Parse a hexadecimal @Natural@. Both uppercase @\'A\'..\'F\'@ and lowercase
-- @\'a\'..\'f\'@ are accepted.
-- Leading zeros are not accepted. Biased towards matching more.
naturalHex :: REText Natural
naturalHex :: REText Natural
naturalHex = (Word -> Word -> RE TextToken Word) -> REText Natural
forall c. (Word -> Word -> RE c Word) -> RE c Natural
RNum.mkNaturalHex Word -> Word -> RE TextToken Word
hexDigitRange

-- | Parse a hexadecimal @Integer@. Parse an optional sign, @\'-\'@ or @\'+\'@,
-- followed by the given @RE@, followed by the absolute value of the integer.
-- Both uppercase @\'A\'..\'F\'@ and lowercase @\'a\'..\'f\'@ are accepted.
-- Leading zeros are not accepted. Biased towards matching more.
integerHex :: REText a -> REText Integer
integerHex :: forall a. REText a -> REText Integer
integerHex REText a
sep = RE TextToken ()
-> RE TextToken () -> REText Natural -> REText Integer
forall c minus plus.
RE c minus -> RE c plus -> RE c Natural -> RE c Integer
RNum.mkSignedInteger RE TextToken ()
minus RE TextToken ()
plus (REText a
sep REText a -> REText Natural -> REText Natural
forall a b. RE TextToken a -> RE TextToken b -> RE TextToken b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> REText Natural
naturalHex)

-- | Parse a decimal @Word@ in the range @[low..high]@.
-- Leading zeros are not accepted. Biased towards matching more.
wordRangeDec :: (Word, Word) -> REText Word
wordRangeDec :: (Word, Word) -> RE TextToken Word
wordRangeDec (Word, Word)
lh = (Word -> Word -> RE TextToken Word)
-> (Word, Word) -> RE TextToken Word
forall c. (Word -> Word -> RE c Word) -> (Word, Word) -> RE c Word
RNum.mkWordRangeDec Word -> Word -> RE TextToken Word
digitRange (Word, Word)
lh

-- | Parse a decimal @Int@ in the range @[low..high]@. Parse an optional sign,
-- @\'-\'@ or @\'+\'@, followed by the given @RE@, followed by the absolute
-- value of the integer.
-- Leading zeros are not accepted. Biased towards matching more.
intRangeDec :: REText a -> (Int, Int) -> REText Int
intRangeDec :: forall a. REText a -> (Int, Int) -> REText Int
intRangeDec REText a
sep (Int, Int)
lh =
  RE TextToken ()
-> RE TextToken ()
-> ((Word, Word) -> RE TextToken Word)
-> (Int, Int)
-> REText Int
forall c minus plus.
RE c minus
-> RE c plus
-> ((Word, Word) -> RE c Word)
-> (Int, Int)
-> RE c Int
RNum.mkSignedIntRange RE TextToken ()
minus RE TextToken ()
plus ((REText a
sep REText a -> RE TextToken Word -> RE TextToken Word
forall a b. RE TextToken a -> RE TextToken b -> RE TextToken b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*>) (RE TextToken Word -> RE TextToken Word)
-> ((Word, Word) -> RE TextToken Word)
-> (Word, Word)
-> RE TextToken Word
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Word, Word) -> RE TextToken Word
wordRangeDec) (Int, Int)
lh

-- | Parse a hexadecimal @Word@ in the range @[low..high]@. Both uppercase
-- @\'A\'..\'F\'@ and lowercase @\'a\'..\'f\'@ are accepted.
-- Leading zeros are not accepted. Biased towards matching more.
wordRangeHex :: (Word, Word) -> REText Word
wordRangeHex :: (Word, Word) -> RE TextToken Word
wordRangeHex (Word, Word)
lh = (Word -> Word -> RE TextToken Word)
-> (Word, Word) -> RE TextToken Word
forall c. (Word -> Word -> RE c Word) -> (Word, Word) -> RE c Word
RNum.mkWordRangeHex Word -> Word -> RE TextToken Word
hexDigitRange (Word, Word)
lh

-- | Parse a hexadecimal @Int@ in the range @[low..high]@. Parse an optional
-- sign, @\'-\'@ or @\'+\'@, followed by the given @RE@, followed by the
-- absolute value of the integer.
-- Both uppercase @\'A\'..\'F\'@ and lowercase @\'a\'..\'f\'@ are accepted.
-- Leading zeros are not accepted. Biased towards matching more.
intRangeHex :: REText a -> (Int, Int) -> REText Int
intRangeHex :: forall a. REText a -> (Int, Int) -> REText Int
intRangeHex REText a
sep (Int, Int)
lh =
  RE TextToken ()
-> RE TextToken ()
-> ((Word, Word) -> RE TextToken Word)
-> (Int, Int)
-> REText Int
forall c minus plus.
RE c minus
-> RE c plus
-> ((Word, Word) -> RE c Word)
-> (Int, Int)
-> RE c Int
RNum.mkSignedIntRange RE TextToken ()
minus RE TextToken ()
plus ((REText a
sep REText a -> RE TextToken Word -> RE TextToken Word
forall a b. RE TextToken a -> RE TextToken b -> RE TextToken b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*>) (RE TextToken Word -> RE TextToken Word)
-> ((Word, Word) -> RE TextToken Word)
-> (Word, Word)
-> RE TextToken Word
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Word, Word) -> RE TextToken Word
wordRangeHex) (Int, Int)
lh

-- | Parse a @Word@ of exactly n decimal digits, including any leading zeros.
-- Will not parse values that do not fit in a @Word@.
-- Biased towards matching more.
wordDecN :: Int -> REText Word
wordDecN :: Int -> RE TextToken Word
wordDecN Int
n = (Word -> Word -> RE TextToken Word) -> Int -> RE TextToken Word
forall c. (Word -> Word -> RE c Word) -> Int -> RE c Word
RNum.mkWordDecN Word -> Word -> RE TextToken Word
digitRange Int
n

-- | Parse a @Word@ of exactly n hexadecimal digits, including any leading
-- zeros. Both uppercase @\'A\'..\'F\'@ and lowercase @\'a\'..\'f\'@ are
-- accepted. Will not parse values that do not fit in a @Word@.
-- Biased towards matching more.
wordHexN :: Int -> REText Word
wordHexN :: Int -> RE TextToken Word
wordHexN Int
n = (Word -> Word -> RE TextToken Word) -> Int -> RE TextToken Word
forall c. (Word -> Word -> RE c Word) -> Int -> RE c Word
RNum.mkWordHexN Word -> Word -> RE TextToken Word
hexDigitRange Int
n

minus, plus :: REText ()
minus :: RE TextToken ()
minus = (Char -> Maybe ()) -> RE TextToken ()
forall a. (Char -> Maybe a) -> REText a
token ((Char -> Maybe ()) -> RE TextToken ())
-> (Char -> Maybe ()) -> RE TextToken ()
forall a b. (a -> b) -> a -> b
$ \Char
c -> if Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'-' then () -> Maybe ()
forall a. a -> Maybe a
Just () else Maybe ()
forall a. Maybe a
Nothing
plus :: RE TextToken ()
plus = (Char -> Maybe ()) -> RE TextToken ()
forall a. (Char -> Maybe a) -> REText a
token ((Char -> Maybe ()) -> RE TextToken ())
-> (Char -> Maybe ()) -> RE TextToken ()
forall a b. (a -> b) -> a -> b
$ \Char
c -> if Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'+' then () -> Maybe ()
forall a. a -> Maybe a
Just () else Maybe ()
forall a. Maybe a
Nothing

-- l and h must be in [0..9]
digitRange :: Word -> Word -> REText Word
digitRange :: Word -> Word -> RE TextToken Word
digitRange !Word
l !Word
h = (Char -> Maybe Word) -> RE TextToken Word
forall a. (Char -> Maybe a) -> REText a
token ((Char -> Maybe Word) -> RE TextToken Word)
-> (Char -> Maybe Word) -> RE TextToken Word
forall a b. (a -> b) -> a -> b
$ \Char
c ->
  let d :: Word
d = Int -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Char -> Int
ord Char
c Int -> Int -> Int
forall a. Num a => a -> a -> a
- Char -> Int
ord Char
'0')
  in if Word
l Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
<= Word
d Bool -> Bool -> Bool
&& Word
d Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
<= Word
h then Word -> Maybe Word
forall a. a -> Maybe a
Just Word
d else Maybe Word
forall a. Maybe a
Nothing

-- l and h must be in [0..15]
hexDigitRange :: Word -> Word -> REText Word
hexDigitRange :: Word -> Word -> RE TextToken Word
hexDigitRange !Word
l !Word
h = (Char -> Maybe Word) -> RE TextToken Word
forall a. (Char -> Maybe a) -> REText a
token ((Char -> Maybe Word) -> RE TextToken Word)
-> (Char -> Maybe Word) -> RE TextToken Word
forall a b. (a -> b) -> a -> b
$ \Char
c ->
  let dec :: Word
dec = Int -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Char -> Int
ord Char
c Int -> Int -> Int
forall a. Num a => a -> a -> a
- Char -> Int
ord Char
'0')
      hexl :: Word
hexl = Int -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Char -> Int
ord Char
c Int -> Int -> Int
forall a. Num a => a -> a -> a
- Char -> Int
ord Char
'a')
      hexu :: Word
hexu = Int -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Char -> Int
ord Char
c Int -> Int -> Int
forall a. Num a => a -> a -> a
- Char -> Int
ord Char
'A')
  in do
    Word
d <- case () of
      ()
_ | Word
dec Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
<= Word
9 -> Word -> Maybe Word
forall a. a -> Maybe a
Just Word
dec
        | Word
hexl Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
<= Word
5 -> Word -> Maybe Word
forall a. a -> Maybe a
Just (Word -> Maybe Word) -> Word -> Maybe Word
forall a b. (a -> b) -> a -> b
$! Word
10 Word -> Word -> Word
forall a. Num a => a -> a -> a
+ Word
hexl
        | Word
hexu Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
<= Word
5 -> Word -> Maybe Word
forall a. a -> Maybe a
Just (Word -> Maybe Word) -> Word -> Maybe Word
forall a b. (a -> b) -> a -> b
$! Word
10 Word -> Word -> Word
forall a. Num a => a -> a -> a
+ Word
hexu
        | Bool
otherwise -> Maybe Word
forall a. Maybe a
Nothing
    if Word
l Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
<= Word
d Bool -> Bool -> Bool
&& Word
d Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
<= Word
h then Word -> Maybe Word
forall a. a -> Maybe a
Just Word
d else Maybe Word
forall a. Maybe a
Nothing
-- TODO: This can surely be optimized

----------------
-- Match stuff
----------------

tokenToSlice :: TextToken -> Text
tokenToSlice :: TextToken -> Text
tokenToSlice TextToken
t =
  Array -> Int -> Int -> Text
TInternal.Text (TextToken -> Array
tArr TextToken
t) (TextToken -> Int
tOffset TextToken
t) (Char -> Int
TInternalUtf8.utf8Length (TextToken -> Char
tChar TextToken
t))

tokenMatch :: (TextToken -> Maybe a) -> REText Text
tokenMatch :: forall a. (TextToken -> Maybe a) -> REText Text
tokenMatch TextToken -> Maybe a
t = (TextToken -> Maybe Text) -> REText Text
forall c a. (c -> Maybe a) -> RE c a
R.token (\ !TextToken
tok -> TextToken -> Text
tokenToSlice TextToken
tok Text -> Maybe a -> Maybe Text
forall a b. a -> Maybe b -> Maybe a
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ TextToken -> Maybe a
t TextToken
tok)

tokenWithMatch :: (TextToken -> Maybe a) -> REText (WithMatch a)
tokenWithMatch :: forall a. (TextToken -> Maybe a) -> REText (WithMatch a)
tokenWithMatch TextToken -> Maybe a
t = (TextToken -> Maybe (WithMatch a)) -> RE TextToken (WithMatch a)
forall c a. (c -> Maybe a) -> RE c a
R.token (\ !TextToken
tok -> Text -> a -> WithMatch a
forall a. Text -> a -> WithMatch a
WM (TextToken -> Text
tokenToSlice TextToken
tok) (a -> WithMatch a) -> Maybe a -> Maybe (WithMatch a)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> TextToken -> Maybe a
t TextToken
tok)

anyTokenMatch :: REText Text
anyTokenMatch :: REText Text
anyTokenMatch = (TextToken -> Maybe Text) -> REText Text
forall c a. (c -> Maybe a) -> RE c a
R.token (\TextToken
tok -> Text -> Maybe Text
forall a. a -> Maybe a
Just (Text -> Maybe Text) -> Text -> Maybe Text
forall a b. (a -> b) -> a -> b
$! TextToken -> Text
tokenToSlice TextToken
tok)

ignoreCaseTokenMatch :: Char -> REText Text
ignoreCaseTokenMatch :: Char -> REText Text
ignoreCaseTokenMatch Char
c = (TextToken -> Maybe Text) -> REText Text
forall c a. (c -> Maybe a) -> RE c a
R.token ((TextToken -> Maybe Text) -> REText Text)
-> (TextToken -> Maybe Text) -> REText Text
forall a b. (a -> b) -> a -> b
$ \TextToken
tok ->
  if Char -> Char
CF.caseFoldSimple (TextToken -> Char
tChar TextToken
tok) Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
c'
  then Text -> Maybe Text
forall a. a -> Maybe a
Just (Text -> Maybe Text) -> Text -> Maybe Text
forall a b. (a -> b) -> a -> b
$! TextToken -> Text
tokenToSlice TextToken
tok
  else Maybe Text
forall a. Maybe a
Nothing
  where
    !c' :: Char
c' = Char -> Char
CF.caseFoldSimple Char
c

oneOfTokenMatch :: CharSet -> REText Text
oneOfTokenMatch :: CharSet -> REText Text
oneOfTokenMatch !CharSet
cs = (TextToken -> Maybe Text) -> REText Text
forall c a. (c -> Maybe a) -> RE c a
R.token ((TextToken -> Maybe Text) -> REText Text)
-> (TextToken -> Maybe Text) -> REText Text
forall a b. (a -> b) -> a -> b
$ \TextToken
tok ->
  if Char -> CharSet -> Bool
CS.member (TextToken -> Char
tChar TextToken
tok) CharSet
cs
  then Text -> Maybe Text
forall a. a -> Maybe a
Just (Text -> Maybe Text) -> Text -> Maybe Text
forall a b. (a -> b) -> a -> b
$! TextToken -> Text
tokenToSlice TextToken
tok
  else Maybe Text
forall a. Maybe a
Nothing

-- | Rebuild the @RE@ such that the result is the matched @Text@ instead.
toMatch :: REText a -> REText Text
toMatch :: forall a. REText a -> REText Text
toMatch = REText a -> REText Text
forall a. REText a -> REText Text
go
  where
    go :: REText b -> REText Text
    go :: forall a. REText a -> REText Text
go REText b
re = case REText b
re of
      RToken TextToken -> Maybe b
t -> (TextToken -> Maybe b) -> REText Text
forall a. (TextToken -> Maybe a) -> REText Text
tokenMatch TextToken -> Maybe b
t
      RFmap Strictness
_ a1 -> b
_ RE TextToken a1
re1 -> RE TextToken a1 -> REText Text
forall a. REText a -> REText Text
go RE TextToken a1
re1
      RFmap_ b
_ RE TextToken a1
re1 -> RE TextToken a1 -> REText Text
forall a. REText a -> REText Text
go RE TextToken a1
re1
      RPure b
_ -> Text -> REText Text
forall a c. a -> RE c a
RPure Text
T.empty
      RLiftA2 Strictness
_ a1 -> a2 -> b
_ RE TextToken a1
re1 RE TextToken a2
re2 -> Strictness
-> (Text -> Text -> Text)
-> REText Text
-> REText Text
-> REText Text
forall a1 a2 a c.
Strictness -> (a1 -> a2 -> a) -> RE c a1 -> RE c a2 -> RE c a
RLiftA2 Strictness
Strict Text -> Text -> Text
adjacentAppend (RE TextToken a1 -> REText Text
forall a. REText a -> REText Text
go RE TextToken a1
re1) (RE TextToken a2 -> REText Text
forall a. REText a -> REText Text
go RE TextToken a2
re2)
      REText b
REmpty -> REText Text
forall c a. RE c a
REmpty
      RAlt REText b
re1 REText b
re2 -> REText Text -> REText Text -> REText Text
forall c a. RE c a -> RE c a -> RE c a
RAlt (REText b -> REText Text
forall a. REText a -> REText Text
go REText b
re1) (REText b -> REText Text
forall a. REText a -> REText Text
go REText b
re2)
      RMany a1 -> b
_ a2 -> b
_ a2 -> a1 -> a2
_ a2
_ RE TextToken a1
re1 -> Strictness
-> Greediness
-> (Text -> Text -> Text)
-> Text
-> REText Text
-> REText Text
forall a a1 c.
Strictness
-> Greediness -> (a -> a1 -> a) -> a -> RE c a1 -> RE c a
RFold Strictness
Strict Greediness
Greedy Text -> Text -> Text
adjacentAppend Text
T.empty (RE TextToken a1 -> REText Text
forall a. REText a -> REText Text
go RE TextToken a1
re1)
      RFold Strictness
_ Greediness
gr b -> a1 -> b
_ b
_ RE TextToken a1
re1 -> Strictness
-> Greediness
-> (Text -> Text -> Text)
-> Text
-> REText Text
-> REText Text
forall a a1 c.
Strictness
-> Greediness -> (a -> a1 -> a) -> a -> RE c a1 -> RE c a
RFold Strictness
Strict Greediness
gr Text -> Text -> Text
adjacentAppend Text
T.empty (RE TextToken a1 -> REText Text
forall a. REText a -> REText Text
go RE TextToken a1
re1)

data WithMatch a = WM {-# UNPACK #-} !Text a

instance Functor WithMatch where
  fmap :: forall a b. (a -> b) -> WithMatch a -> WithMatch b
fmap a -> b
f (WM Text
t a
x) = Text -> b -> WithMatch b
forall a. Text -> a -> WithMatch a
WM Text
t (a -> b
f a
x)

fmapWM' :: (a -> b) -> WithMatch a -> WithMatch b
fmapWM' :: forall a b. (a -> b) -> WithMatch a -> WithMatch b
fmapWM' a -> b
f (WM Text
t a
x) = Text -> b -> WithMatch b
forall a. Text -> a -> WithMatch a
WM Text
t (b -> WithMatch b) -> b -> WithMatch b
forall a b. (a -> b) -> a -> b
$! a -> b
f a
x

instance Applicative WithMatch where
  pure :: forall a. a -> WithMatch a
pure = Text -> a -> WithMatch a
forall a. Text -> a -> WithMatch a
WM Text
T.empty
  liftA2 :: forall a b c.
(a -> b -> c) -> WithMatch a -> WithMatch b -> WithMatch c
liftA2 a -> b -> c
f (WM Text
t1 a
x) (WM Text
t2 b
y) = Text -> c -> WithMatch c
forall a. Text -> a -> WithMatch a
WM (Text -> Text -> Text
adjacentAppend Text
t1 Text
t2) (a -> b -> c
f a
x b
y)

liftA2WM' :: (a1 -> a2 -> b) -> WithMatch a1 -> WithMatch a2 -> WithMatch b
liftA2WM' :: forall a b c.
(a -> b -> c) -> WithMatch a -> WithMatch b -> WithMatch c
liftA2WM' a1 -> a2 -> b
f (WM Text
t1 a1
x) (WM Text
t2 a2
y) = Text -> b -> WithMatch b
forall a. Text -> a -> WithMatch a
WM (Text -> Text -> Text
adjacentAppend Text
t1 Text
t2) (b -> WithMatch b) -> b -> WithMatch b
forall a b. (a -> b) -> a -> b
$! a1 -> a2 -> b
f a1
x a2
y

-- | Rebuild the @RE@ to include the matched @Text@ alongside the result.
withMatch :: REText a -> REText (Text, a)
withMatch :: forall a. REText a -> REText (Text, a)
withMatch = (WithMatch a -> (Text, a))
-> RE TextToken (WithMatch a) -> RE TextToken (Text, a)
forall a b c. (a -> b) -> RE c a -> RE c b
R.fmap' (\(WM Text
t a
x) -> (Text
t,a
x)) (RE TextToken (WithMatch a) -> RE TextToken (Text, a))
-> (REText a -> RE TextToken (WithMatch a))
-> REText a
-> RE TextToken (Text, a)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. REText a -> RE TextToken (WithMatch a)
forall b. REText b -> REText (WithMatch b)
go
  where
    go :: REText b -> REText (WithMatch b)
    go :: forall b. REText b -> REText (WithMatch b)
go REText b
re = case REText b
re of
      RToken TextToken -> Maybe b
t -> (TextToken -> Maybe b) -> REText (WithMatch b)
forall a. (TextToken -> Maybe a) -> REText (WithMatch a)
tokenWithMatch TextToken -> Maybe b
t
      RFmap Strictness
st a1 -> b
f RE TextToken a1
re1 ->
        let g :: WithMatch a1 -> WithMatch b
g = case Strictness
st of
              Strictness
Strict -> (a1 -> b) -> WithMatch a1 -> WithMatch b
forall a b. (a -> b) -> WithMatch a -> WithMatch b
fmapWM' a1 -> b
f
              Strictness
NonStrict -> (a1 -> b) -> WithMatch a1 -> WithMatch b
forall a b. (a -> b) -> WithMatch a -> WithMatch b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap a1 -> b
f
        in Strictness
-> (WithMatch a1 -> WithMatch b)
-> RE TextToken (WithMatch a1)
-> REText (WithMatch b)
forall a1 a c. Strictness -> (a1 -> a) -> RE c a1 -> RE c a
RFmap Strictness
Strict WithMatch a1 -> WithMatch b
g (RE TextToken a1 -> RE TextToken (WithMatch a1)
forall b. REText b -> REText (WithMatch b)
go RE TextToken a1
re1)
      RFmap_ b
b RE TextToken a1
re1 -> Strictness
-> (Text -> WithMatch b) -> REText Text -> REText (WithMatch b)
forall a1 a c. Strictness -> (a1 -> a) -> RE c a1 -> RE c a
RFmap Strictness
Strict ((Text -> b -> WithMatch b) -> b -> Text -> WithMatch b
forall a b c. (a -> b -> c) -> b -> a -> c
flip Text -> b -> WithMatch b
forall a. Text -> a -> WithMatch a
WM b
b) (RE TextToken a1 -> REText Text
forall a. REText a -> REText Text
toMatch RE TextToken a1
re1)
      RPure b
b -> WithMatch b -> REText (WithMatch b)
forall a c. a -> RE c a
RPure (b -> WithMatch b
forall a. a -> WithMatch a
forall (f :: * -> *) a. Applicative f => a -> f a
pure b
b)
      RLiftA2 Strictness
st a1 -> a2 -> b
f RE TextToken a1
re1 RE TextToken a2
re2 ->
        let g :: WithMatch a1 -> WithMatch a2 -> WithMatch b
g = case Strictness
st of
              Strictness
Strict -> (a1 -> a2 -> b) -> WithMatch a1 -> WithMatch a2 -> WithMatch b
forall a b c.
(a -> b -> c) -> WithMatch a -> WithMatch b -> WithMatch c
liftA2WM' a1 -> a2 -> b
f
              Strictness
NonStrict -> (a1 -> a2 -> b) -> WithMatch a1 -> WithMatch a2 -> WithMatch b
forall a b c.
(a -> b -> c) -> WithMatch a -> WithMatch b -> WithMatch c
forall (f :: * -> *) a b c.
Applicative f =>
(a -> b -> c) -> f a -> f b -> f c
liftA2 a1 -> a2 -> b
f
        in Strictness
-> (WithMatch a1 -> WithMatch a2 -> WithMatch b)
-> RE TextToken (WithMatch a1)
-> RE TextToken (WithMatch a2)
-> REText (WithMatch b)
forall a1 a2 a c.
Strictness -> (a1 -> a2 -> a) -> RE c a1 -> RE c a2 -> RE c a
RLiftA2 Strictness
Strict WithMatch a1 -> WithMatch a2 -> WithMatch b
g (RE TextToken a1 -> RE TextToken (WithMatch a1)
forall b. REText b -> REText (WithMatch b)
go RE TextToken a1
re1) (RE TextToken a2 -> RE TextToken (WithMatch a2)
forall b. REText b -> REText (WithMatch b)
go RE TextToken a2
re2)
      REText b
REmpty -> REText (WithMatch b)
forall c a. RE c a
REmpty
      RAlt REText b
re1 REText b
re2 -> REText (WithMatch b)
-> REText (WithMatch b) -> REText (WithMatch b)
forall c a. RE c a -> RE c a -> RE c a
RAlt (REText b -> REText (WithMatch b)
forall b. REText b -> REText (WithMatch b)
go REText b
re1) (REText b -> REText (WithMatch b)
forall b. REText b -> REText (WithMatch b)
go REText b
re2)
      RMany a1 -> b
f1 a2 -> b
f2 a2 -> a1 -> a2
f a2
z RE TextToken a1
re1 ->
        (WithMatch a1 -> WithMatch b)
-> (WithMatch a2 -> WithMatch b)
-> (WithMatch a2 -> WithMatch a1 -> WithMatch a2)
-> WithMatch a2
-> RE TextToken (WithMatch a1)
-> REText (WithMatch b)
forall a1 a a2 c.
(a1 -> a)
-> (a2 -> a) -> (a2 -> a1 -> a2) -> a2 -> RE c a1 -> RE c a
RMany ((a1 -> b) -> WithMatch a1 -> WithMatch b
forall a b. (a -> b) -> WithMatch a -> WithMatch b
fmapWM' a1 -> b
f1) ((a2 -> b) -> WithMatch a2 -> WithMatch b
forall a b. (a -> b) -> WithMatch a -> WithMatch b
fmapWM' a2 -> b
f2) ((a2 -> a1 -> a2) -> WithMatch a2 -> WithMatch a1 -> WithMatch a2
forall a b c.
(a -> b -> c) -> WithMatch a -> WithMatch b -> WithMatch c
liftA2WM' a2 -> a1 -> a2
f) (a2 -> WithMatch a2
forall a. a -> WithMatch a
forall (f :: * -> *) a. Applicative f => a -> f a
pure a2
z) (RE TextToken a1 -> RE TextToken (WithMatch a1)
forall b. REText b -> REText (WithMatch b)
go RE TextToken a1
re1)
      RFold Strictness
st Greediness
gr b -> a1 -> b
f b
z RE TextToken a1
re1 ->
        let g :: WithMatch b -> WithMatch a1 -> WithMatch b
g = case Strictness
st of
              Strictness
Strict -> (b -> a1 -> b) -> WithMatch b -> WithMatch a1 -> WithMatch b
forall a b c.
(a -> b -> c) -> WithMatch a -> WithMatch b -> WithMatch c
liftA2WM' b -> a1 -> b
f
              Strictness
NonStrict -> (b -> a1 -> b) -> WithMatch b -> WithMatch a1 -> WithMatch b
forall a b c.
(a -> b -> c) -> WithMatch a -> WithMatch b -> WithMatch c
forall (f :: * -> *) a b c.
Applicative f =>
(a -> b -> c) -> f a -> f b -> f c
liftA2 b -> a1 -> b
f
        in Strictness
-> Greediness
-> (WithMatch b -> WithMatch a1 -> WithMatch b)
-> WithMatch b
-> RE TextToken (WithMatch a1)
-> REText (WithMatch b)
forall a a1 c.
Strictness
-> Greediness -> (a -> a1 -> a) -> a -> RE c a1 -> RE c a
RFold Strictness
Strict Greediness
gr WithMatch b -> WithMatch a1 -> WithMatch b
g (b -> WithMatch b
forall a. a -> WithMatch a
forall (f :: * -> *) a. Applicative f => a -> f a
pure b
z) (RE TextToken a1 -> RE TextToken (WithMatch a1)
forall b. REText b -> REText (WithMatch b)
go RE TextToken a1
re1)

----------
-- Parse
----------

tokenFoldr :: (TextToken -> b -> b) -> b -> Text -> b
tokenFoldr :: forall b. (TextToken -> b -> b) -> b -> Text -> b
tokenFoldr TextToken -> b -> b
f b
z (TInternal.Text Array
a Int
o0 Int
l) = Int -> b
loop Int
o0
  where
    loop :: Int -> b
loop Int
o | Int
o Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
o0 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
l = b
z
    loop Int
o = case Array -> Int -> Iter
TUnsafe.iterArray Array
a Int
o of
      TUnsafe.Iter Char
c Int
clen -> TextToken -> b -> b
f (Array -> Int -> Char -> TextToken
TextToken Array
a Int
o Char
c) (Int -> b
loop (Int
o Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
clen))
{-# INLINE tokenFoldr #-}

-- | \(O(mn \log m)\). Parse a @Text@ with a @REText@.
--
-- Uses 'Regex.Text.compile', see the note there.
--
-- If parsing multiple @Text@s using the same @RE@, it is wasteful to compile
-- the @RE@ every time. So, prefer to
--
-- * Compile once with 'Regex.Text.compile' or 'Regex.Text.compileBounded' and
--   use the compiled 'ParserText'  with 'parse' as many times as required.
-- * Alternately, partially apply this function to a @RE@ and use the function
--   as many times as required.
reParse :: REText a -> Text -> Maybe a
reParse :: forall a. REText a -> Text -> Maybe a
reParse REText a
re = let !p :: Parser TextToken a
p = REText a -> Parser TextToken a
forall c a. RE c a -> Parser c a
P.compile REText a
re in Parser TextToken a -> Text -> Maybe a
forall a. ParserText a -> Text -> Maybe a
parse Parser TextToken a
p
{-# INLINE reParse #-}

-- | \(O(mn \log m)\). Parse a @Text@ with a @ParserText@.
parse :: ParserText a -> Text -> Maybe a
parse :: forall a. ParserText a -> Text -> Maybe a
parse = (forall b. (TextToken -> b -> b) -> b -> Text -> b)
-> Parser TextToken a -> Text -> Maybe a
forall f c a. Foldr f c -> Parser c a -> f -> Maybe a
P.parseFoldr (TextToken -> b -> b) -> b -> Text -> b
forall b. (TextToken -> b -> b) -> b -> Text -> b
tokenFoldr

-- | \(O(mn \log m)\). Parse a @Text@ with a @ParserText@. Calls 'error' on
-- parse failure.
--
-- For use with parsers that are known to never fail.
parseSure :: ParserText a -> Text -> a
parseSure :: forall a. ParserText a -> Text -> a
parseSure ParserText a
p = a -> Maybe a -> a
forall a. a -> Maybe a -> a
fromMaybe a
forall a. a
parseSureError (Maybe a -> a) -> (Text -> Maybe a) -> Text -> a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ParserText a -> Text -> Maybe a
forall a. ParserText a -> Text -> Maybe a
parse ParserText a
p

parseSureError :: a
parseSureError :: forall a. a
parseSureError = [Char] -> a
forall a. [Char] -> a
errorWithoutStackTrace
  [Char]
"Regex.Text.parseSure: parse failed; if parsing can fail use 'parse' instead"

reParseSure :: REText a -> Text -> a
reParseSure :: forall a. REText a -> Text -> a
reParseSure REText a
re = a -> Maybe a -> a
forall a. a -> Maybe a -> a
fromMaybe a
forall a. a
parseSureError (Maybe a -> a) -> (Text -> Maybe a) -> Text -> a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. REText a -> Text -> Maybe a
forall a. REText a -> Text -> Maybe a
reParse REText a
re
{-# INLINE reParseSure #-}

-- | \(O(mn \log m)\). Find the first occurence of the given @RE@ in a @Text@.
--
-- ==== __Examples__
--
-- >>> find (text "meow") "homeowner"
-- Just "meow"
--
-- To test whether a @Text@ is present in another @Text@, like above, prefer
-- @Data.Text.'T.isInfixOf'@.
--
-- >>> find (textIgnoreCase "haskell") "Look I'm Haskelling!"
-- Just "Haskell"
-- >>> find (text "backtracking") "parser-regex"
-- Nothing
--
find :: REText a -> Text -> Maybe a
find :: forall a. REText a -> Text -> Maybe a
find = REText a -> Text -> Maybe a
forall a. REText a -> Text -> Maybe a
reParse (REText a -> Text -> Maybe a)
-> (REText a -> REText a) -> REText a -> Text -> Maybe a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. REText a -> REText a
forall c a. RE c a -> RE c a
R.toFind
{-# INLINE find #-}

-- | \(O(mn \log m)\). Find all non-overlapping occurences of the given @RE@ in
-- the @Text@.
--
-- ==== __Examples__
--
-- >>> findAll (text "ana") "banananana"
-- ["ana","ana"]
--
-- @
-- data Roll = Roll
--   Natural -- ^ Rolls
--   Natural -- ^ Faces on the die
--   deriving Show
--
-- roll :: REText Roll
-- roll = Roll \<$> ('naturalDec' \<|> pure 1) \<* 'char' \'d\' \<*> naturalDec
-- @
--
-- >>> findAll roll "3d6, d10, 2d10"
-- [Roll 3 6,Roll 1 10,Roll 2 10]
--
findAll :: REText a -> Text -> [a]
findAll :: forall a. REText a -> Text -> [a]
findAll = REText [a] -> Text -> [a]
forall a. REText a -> Text -> a
reParseSure (REText [a] -> Text -> [a])
-> (REText a -> REText [a]) -> REText a -> Text -> [a]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. REText a -> REText [a]
forall c a. RE c a -> RE c [a]
R.toFindMany
{-# INLINE findAll #-}

-- | \(O(mn \log m)\). Split a @Text@ at occurences of the given @RE@.
--
-- ==== __Examples__
--
-- >>> splitOn (char ' ') "Glasses are really versatile"
-- ["Glasses","are","really","versatile"]
--
-- For simple splitting, like above, prefer @Data.Text.'Data.Text.words'@,
-- @Data.Text.'Data.Text.lines'@, @Data.Text.'Data.Text.split'@ or
-- @Data.Text.'Data.Text.splitOn'@, whichever is applicable.
--
-- >>> splitOn (char ' ' *> oneOf "+-=" *> char ' ') "3 - 1 + 1/2 - 2 = 0"
-- ["3","1","1/2","2","0"]
--
-- If the @Text@ starts or ends with a delimiter, the result will contain
-- empty @Text@s at those positions.
--
-- >>> splitOn (char 'a') "ayaya"
-- ["","y","y",""]
--
splitOn :: REText a -> Text -> [Text]
splitOn :: forall a. REText a -> Text -> [Text]
splitOn = REText [Text] -> Text -> [Text]
forall a. REText a -> Text -> a
reParseSure (REText [Text] -> Text -> [Text])
-> (REText a -> REText [Text]) -> REText a -> Text -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. REText a -> REText [Text]
forall a. REText a -> REText [Text]
toSplitOn
{-# INLINE splitOn #-}

toSplitOn :: REText a -> REText [Text]
toSplitOn :: forall a. REText a -> REText [Text]
toSplitOn REText a
re = REText Text
manyTextMin REText Text -> REText a -> REText [Text]
forall c a sep. RE c a -> RE c sep -> RE c [a]
`R.sepBy` REText a
re

-- | \(O(mn \log m)\). Replace the first match of the given @RE@ with its
-- result. If there is no match, the result is @Nothing@.
--
-- ==== __Examples__
--
-- >>> replace ("world" <$ text "Haskell") "Hello, Haskell!"
-- Just "Hello, world!"
--
-- >>> replace ("," <$ some (char '.')) "one...two...ten"
-- Just "one,two...ten"
--
replace :: REText Text -> Text -> Maybe Text
replace :: REText Text -> Text -> Maybe Text
replace = REText Text -> Text -> Maybe Text
forall a. REText a -> Text -> Maybe a
reParse (REText Text -> Text -> Maybe Text)
-> (REText Text -> REText Text)
-> REText Text
-> Text
-> Maybe Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. REText Text -> REText Text
toReplace
{-# INLINE replace #-}

toReplace :: REText Text -> REText Text
toReplace :: REText Text -> REText Text
toReplace REText Text
re = (Text -> Text -> Text -> Text)
-> REText Text -> REText Text -> RE TextToken (Text -> Text)
forall a b c.
(a -> b -> c) -> RE TextToken a -> RE TextToken b -> RE TextToken c
forall (f :: * -> *) a b c.
Applicative f =>
(a -> b -> c) -> f a -> f b -> f c
liftA2 Text -> Text -> Text -> Text
f REText Text
manyTextMin REText Text
re RE TextToken (Text -> Text) -> REText Text -> REText Text
forall a b.
RE TextToken (a -> b) -> RE TextToken a -> RE TextToken b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> REText Text
manyText
  where
    f :: Text -> Text -> Text -> Text
f Text
a Text
b Text
c = [Text] -> Text
reverseConcat [Text
c,Text
b,Text
a]

-- | \(O(mn \log m)\). Replace non-overlapping matches of the given @RE@ with
-- their results.
--
-- ==== __Examples__
--
-- >>> replaceAll (" and " <$ text ", ") "red, blue, green"
-- "red and blue and green"
--
-- For simple replacements like above, prefer @Data.Text.'Data.Text.replace'@.
--
-- >>> replaceAll ("Fruit" <$ text "Time" <|> "banana" <$ text "arrow") "Time flies like an arrow"
-- "Fruit flies like a banana"
--
-- @
-- sep = 'oneOf' "-./"
-- digits n = 'toMatch' ('replicateM_' n (oneOf 'Data.CharSet.digit'))
-- toYmd d m y = mconcat [y, \"-\", m, \"-\", d]
-- date = toYmd \<$> digits 2 \<* sep
--              \<*> digits 2 \<* sep
--              \<*> digits 4
-- @
-- >>> replaceAll date "01/01/1970, 01-04-1990, 03.07.2011"
-- "1970-01-01, 1990-04-01, 2011-07-03"
--
replaceAll :: REText Text -> Text -> Text
replaceAll :: REText Text -> Text -> Text
replaceAll = REText Text -> Text -> Text
forall a. REText a -> Text -> a
reParseSure (REText Text -> Text -> Text)
-> (REText Text -> REText Text) -> REText Text -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. REText Text -> REText Text
toReplaceMany
{-# INLINE replaceAll #-}

toReplaceMany :: REText Text -> REText Text
toReplaceMany :: REText Text -> REText Text
toReplaceMany REText Text
re =
  [Text] -> Text
reverseConcat ([Text] -> Text) -> REText [Text] -> REText Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ([Text] -> Text -> [Text])
-> [Text] -> REText Text -> REText [Text]
forall b a c. (b -> a -> b) -> b -> RE c a -> RE c b
R.foldlMany' ((Text -> [Text] -> [Text]) -> [Text] -> Text -> [Text]
forall a b c. (a -> b -> c) -> b -> a -> c
flip (:)) [] (REText Text
re REText Text -> REText Text -> REText Text
forall a. RE TextToken a -> RE TextToken a -> RE TextToken a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> REText Text
anyTokenMatch)

-------------------------
-- Low level Text stuff
-------------------------

-- WARNING: If t1 and t2 are not empty, they must be adjacent slices of the
-- same Text. In other words, sameByteArray# a1 _a2 && o1 + l1 == _o2.
adjacentAppend :: Text -> Text -> Text
adjacentAppend :: Text -> Text -> Text
adjacentAppend t1 :: Text
t1@(TInternal.Text Array
a1 Int
o1 Int
l1) t2 :: Text
t2@(TInternal.Text Array
_a2 Int
_o2 Int
l2)
  | Text -> Bool
T.null Text
t1 = Text
t2
  | Text -> Bool
T.null Text
t2 = Text
t1
  | Bool
otherwise = Array -> Int -> Int -> Text
TInternal.Text Array
a1 Int
o1 (Int
l1Int -> Int -> Int
forall a. Num a => a -> a -> a
+Int
l2)

-- reverseConcat = T.concat . reverse
reverseConcat :: [Text] -> Text
reverseConcat :: [Text] -> Text
reverseConcat [Text]
ts = case [Text]
ts of
  [] -> Text
T.empty
  [Text
t] -> Text
t
  [Text]
_ | Int
len Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 -> Text
T.empty
    | Bool
otherwise -> Array -> Int -> Int -> Text
TInternal.Text Array
arr Int
0 Int
len
  where
    flen :: Int -> Text -> Int
flen Int
acc (TInternal.Text Array
_ Int
_ Int
l)
      | Int
acc' Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0 = Int
acc'
      | Bool
otherwise = Int
forall a. a
reverseConcatOverflowError
      where
        acc' :: Int
acc' = Int
acc Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
l
    len :: Int
len = (Int -> Text -> Int) -> Int -> [Text] -> Int
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' Int -> Text -> Int
flen Int
0 [Text]
ts
    arr :: Array
arr = (forall s. ST s (MArray s)) -> Array
TArray.run ((forall s. ST s (MArray s)) -> Array)
-> (forall s. ST s (MArray s)) -> Array
forall a b. (a -> b) -> a -> b
$ do
      MArray s
marr <- Int -> ST s (MArray s)
forall s. Int -> ST s (MArray s)
TArray.new Int
len
      let loop :: Int -> [Text] -> ST s (MArray s)
loop !Int
_ [] = MArray s -> ST s (MArray s)
forall a. a -> ST s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure MArray s
marr
          loop Int
i (TInternal.Text Array
a Int
o Int
l : [Text]
ts') =
            Int -> MArray s -> Int -> Array -> Int -> ST s ()
forall s. Int -> MArray s -> Int -> Array -> Int -> ST s ()
TArray.copyI Int
l MArray s
marr (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
l) Array
a Int
o ST s () -> ST s (MArray s) -> ST s (MArray s)
forall a b. ST s a -> ST s b -> ST s b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Int -> [Text] -> ST s (MArray s)
loop (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
l) [Text]
ts'
      Int -> [Text] -> ST s (MArray s)
loop Int
len [Text]
ts

reverseConcatOverflowError :: a
reverseConcatOverflowError :: forall a. a
reverseConcatOverflowError =
  [Char] -> a
forall a. [Char] -> a
errorWithoutStackTrace [Char]
"Regex.Text.reverseConcat: size overflow"

----------
-- Notes
----------

-- Note [Why simple case fold]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
--
-- Unicode defines two different ways to case fold, "simple" and "full". Full is
-- superior to simple, and capable of folding more pairs of texts to the same
-- text. This is what is used by Data.Text.toCaseFold.
--
-- However, full maps a Char to one or more Chars, for instance "ß" maps to
-- "ss". Since we operate on one Char at a time without backtracking, we must
-- have branching in our regex corresponding to possible texts that case fold to
-- a target text. For instance, to match "sssss" with full case fold given the
-- above mapping, possible inputs are
--
-- sssss, sssß, ssßs, sßss, ßsss, sßß, ßsß, ßßs
--
-- Fun fact: the number of strings that match "s"*n is Fibonacci(n+1).
-- Of course, we can't have textIgnoreCase take a text and explode into a regex
-- of exponential size.
--
-- So, we restrict ourselves to simple case folding. Simple case folding
-- maps a single Char to a single Char. And it's easy to test that the required
-- Char and a Char in the input case fold to the same Char.
--
-- Note that charIgnoreCase could possibly use full case folding. Only a small
-- number of texts would case fold to the case fold of a single Char. But we
-- stick with simple case fold for consistency.