-- | This is a POSIX version of parseRegex that allows NUL characters.
-- Lazy\/Possessive\/Backrefs are not recognized.  Anchors \^ and \$ are
-- recognized.
--
-- A 'PGroup' returned always has @(Maybe 'GroupIndex')@ set to @(Just _)@
-- and never to @Nothing@.

module Text.Regex.TDFA.ReadRegex (parseRegex) where

{- By Chris Kuklewicz, 2007. BSD License, see the LICENSE file. -}

import Text.Regex.TDFA.Pattern {- all -}
import Text.ParserCombinators.Parsec((<|>), (<?>),
  try, runParser, many, getState, setState, CharParser, ParseError,
  sepBy1, option, notFollowedBy, many1, lookAhead, eof, between,
  string, noneOf, digit, char, anyChar)

import Control.Monad (liftM, guard)

import Data.Foldable (asum)
import qualified Data.Set as Set(fromList)

-- | An element inside @[...]@, denoting a character class.
data BracketElement
  = BEChar  Char       -- ^ A single character.
  | BERange Char Char  -- ^ A character range (e.g. @a-z@).
  | BEColl  String     -- ^ @foo@ in @[.foo.]@.
  | BEEquiv String     -- ^ @bar@ in @[=bar=]@.
  | BEClass String     -- ^ A POSIX character class (candidate), e.g. @alpha@ parsed from @[:alpha:]@.

-- | Return either an error message or a tuple of the Pattern and the
-- largest group index and the largest DoPa index (both have smallest
-- index of 1).  Since the regular expression is supplied as [Char] it
-- automatically supports unicode and @\\NUL@ characters.
parseRegex :: String -> Either ParseError (Pattern,(GroupIndex,DoPa))
parseRegex :: String -> Either ParseError (Pattern, (GroupIndex, DoPa))
parseRegex String
x = GenParser
  Char (GroupIndex, GroupIndex) (Pattern, (GroupIndex, DoPa))
-> (GroupIndex, GroupIndex)
-> String
-> String
-> Either ParseError (Pattern, (GroupIndex, DoPa))
forall tok st a.
GenParser tok st a -> st -> String -> [tok] -> Either ParseError a
runParser (do Pattern
pat <- P Pattern
p_regex
                             ParsecT String (GroupIndex, GroupIndex) Identity ()
forall s (m :: * -> *) t u.
(Stream s m t, Show t) =>
ParsecT s u m ()
eof
                             (GroupIndex
lastGroupIndex,GroupIndex
lastDopa) <- ParsecT
  String (GroupIndex, GroupIndex) Identity (GroupIndex, GroupIndex)
forall (m :: * -> *) s u. Monad m => ParsecT s u m u
getState
                             (Pattern, (GroupIndex, DoPa))
-> GenParser
     Char (GroupIndex, GroupIndex) (Pattern, (GroupIndex, DoPa))
forall (m :: * -> *) a. Monad m => a -> m a
return (Pattern
pat,(GroupIndex
lastGroupIndex,GroupIndex -> DoPa
DoPa GroupIndex
lastDopa))) (GroupIndex
0,GroupIndex
0) String
x String
x

type P = CharParser (GroupIndex, Int)

p_regex :: P Pattern
p_regex :: P Pattern
p_regex = ([Pattern] -> Pattern)
-> ParsecT String (GroupIndex, GroupIndex) Identity [Pattern]
-> P Pattern
forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
liftM [Pattern] -> Pattern
POr (ParsecT String (GroupIndex, GroupIndex) Identity [Pattern]
 -> P Pattern)
-> ParsecT String (GroupIndex, GroupIndex) Identity [Pattern]
-> P Pattern
forall a b. (a -> b) -> a -> b
$ P Pattern
-> ParsecT String (GroupIndex, GroupIndex) Identity Char
-> ParsecT String (GroupIndex, GroupIndex) Identity [Pattern]
forall s (m :: * -> *) t u a sep.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m sep -> ParsecT s u m [a]
sepBy1 P Pattern
p_branch (Char -> ParsecT String (GroupIndex, GroupIndex) Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'|')

-- man re_format helps a lot, it says one-or-more pieces so this is
-- many1 not many.  Use "()" to indicate an empty piece.
p_branch :: P Pattern
p_branch :: P Pattern
p_branch = ([Pattern] -> Pattern)
-> ParsecT String (GroupIndex, GroupIndex) Identity [Pattern]
-> P Pattern
forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
liftM [Pattern] -> Pattern
PConcat (ParsecT String (GroupIndex, GroupIndex) Identity [Pattern]
 -> P Pattern)
-> ParsecT String (GroupIndex, GroupIndex) Identity [Pattern]
-> P Pattern
forall a b. (a -> b) -> a -> b
$ P Pattern
-> ParsecT String (GroupIndex, GroupIndex) Identity [Pattern]
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 P Pattern
p_piece

p_piece :: P Pattern
p_piece :: P Pattern
p_piece = (P Pattern
p_anchor P Pattern -> P Pattern -> P Pattern
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> P Pattern
p_atom) P Pattern -> (Pattern -> P Pattern) -> P Pattern
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Pattern -> P Pattern
p_post_atom -- correct specification

p_atom :: P Pattern
p_atom :: P Pattern
p_atom =  P Pattern
p_group P Pattern -> P Pattern -> P Pattern
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> P Pattern
p_bracket P Pattern -> P Pattern -> P Pattern
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> P Pattern
p_char P Pattern -> String -> P Pattern
forall s u (m :: * -> *) a.
ParsecT s u m a -> String -> ParsecT s u m a
<?> String
"an atom"

group_index :: P (Maybe GroupIndex)
group_index :: P (Maybe GroupIndex)
group_index = do
  (GroupIndex
gi,GroupIndex
ci) <- ParsecT
  String (GroupIndex, GroupIndex) Identity (GroupIndex, GroupIndex)
forall (m :: * -> *) s u. Monad m => ParsecT s u m u
getState
  let index :: GroupIndex
index = GroupIndex -> GroupIndex
forall a. Enum a => a -> a
succ GroupIndex
gi
  (GroupIndex, GroupIndex)
-> ParsecT String (GroupIndex, GroupIndex) Identity ()
forall (m :: * -> *) u s. Monad m => u -> ParsecT s u m ()
setState (GroupIndex
index,GroupIndex
ci)
  Maybe GroupIndex -> P (Maybe GroupIndex)
forall (m :: * -> *) a. Monad m => a -> m a
return (GroupIndex -> Maybe GroupIndex
forall a. a -> Maybe a
Just GroupIndex
index)

p_group :: P Pattern
p_group :: P Pattern
p_group = ParsecT String (GroupIndex, GroupIndex) Identity Char
-> ParsecT String (GroupIndex, GroupIndex) Identity Char
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m a
lookAhead (Char -> ParsecT String (GroupIndex, GroupIndex) Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'(') ParsecT String (GroupIndex, GroupIndex) Identity Char
-> P Pattern -> P Pattern
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> do
  Maybe GroupIndex
index <- P (Maybe GroupIndex)
group_index
  (Pattern -> Pattern) -> P Pattern -> P Pattern
forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
liftM (Maybe GroupIndex -> Pattern -> Pattern
PGroup Maybe GroupIndex
index) (P Pattern -> P Pattern) -> P Pattern -> P Pattern
forall a b. (a -> b) -> a -> b
$ ParsecT String (GroupIndex, GroupIndex) Identity Char
-> ParsecT String (GroupIndex, GroupIndex) Identity Char
-> P Pattern
-> P Pattern
forall s (m :: * -> *) t u open close a.
Stream s m t =>
ParsecT s u m open
-> ParsecT s u m close -> ParsecT s u m a -> ParsecT s u m a
between (Char -> ParsecT String (GroupIndex, GroupIndex) Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'(') (Char -> ParsecT String (GroupIndex, GroupIndex) Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
')') P Pattern
p_regex

-- p_post_atom takes the previous atom as a parameter
p_post_atom :: Pattern -> P Pattern
p_post_atom :: Pattern -> P Pattern
p_post_atom Pattern
atom = (Char -> ParsecT String (GroupIndex, GroupIndex) Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'?' ParsecT String (GroupIndex, GroupIndex) Identity Char
-> P Pattern -> P Pattern
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Pattern -> P Pattern
forall (m :: * -> *) a. Monad m => a -> m a
return (Pattern -> Pattern
PQuest Pattern
atom))
               P Pattern -> P Pattern -> P Pattern
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> (Char -> ParsecT String (GroupIndex, GroupIndex) Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'+' ParsecT String (GroupIndex, GroupIndex) Identity Char
-> P Pattern -> P Pattern
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Pattern -> P Pattern
forall (m :: * -> *) a. Monad m => a -> m a
return (Pattern -> Pattern
PPlus Pattern
atom))
               P Pattern -> P Pattern -> P Pattern
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> (Char -> ParsecT String (GroupIndex, GroupIndex) Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'*' ParsecT String (GroupIndex, GroupIndex) Identity Char
-> P Pattern -> P Pattern
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Pattern -> P Pattern
forall (m :: * -> *) a. Monad m => a -> m a
return (Bool -> Pattern -> Pattern
PStar Bool
True Pattern
atom))
               P Pattern -> P Pattern -> P Pattern
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> Pattern -> P Pattern
p_bound Pattern
atom
               P Pattern -> P Pattern -> P Pattern
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> Pattern -> P Pattern
forall (m :: * -> *) a. Monad m => a -> m a
return Pattern
atom

p_bound :: Pattern -> P Pattern
p_bound :: Pattern -> P Pattern
p_bound Pattern
atom = P Pattern -> P Pattern
forall tok st a. GenParser tok st a -> GenParser tok st a
try (P Pattern -> P Pattern) -> P Pattern -> P Pattern
forall a b. (a -> b) -> a -> b
$ ParsecT String (GroupIndex, GroupIndex) Identity Char
-> ParsecT String (GroupIndex, GroupIndex) Identity Char
-> P Pattern
-> P Pattern
forall s (m :: * -> *) t u open close a.
Stream s m t =>
ParsecT s u m open
-> ParsecT s u m close -> ParsecT s u m a -> ParsecT s u m a
between (Char -> ParsecT String (GroupIndex, GroupIndex) Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'{') (Char -> ParsecT String (GroupIndex, GroupIndex) Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'}') (Pattern -> P Pattern
p_bound_spec Pattern
atom)

p_bound_spec :: Pattern -> P Pattern
p_bound_spec :: Pattern -> P Pattern
p_bound_spec Pattern
atom = do String
lowS <- ParsecT String (GroupIndex, GroupIndex) Identity Char
-> ParsecT String (GroupIndex, GroupIndex) Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String (GroupIndex, GroupIndex) Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
digit
                       let lowI :: GroupIndex
lowI = String -> GroupIndex
forall a. Read a => String -> a
read String
lowS
                       Maybe GroupIndex
highMI <- Maybe GroupIndex -> P (Maybe GroupIndex) -> P (Maybe GroupIndex)
forall s (m :: * -> *) t a u.
Stream s m t =>
a -> ParsecT s u m a -> ParsecT s u m a
option (GroupIndex -> Maybe GroupIndex
forall a. a -> Maybe a
Just GroupIndex
lowI) (P (Maybe GroupIndex) -> P (Maybe GroupIndex))
-> P (Maybe GroupIndex) -> P (Maybe GroupIndex)
forall a b. (a -> b) -> a -> b
$ P (Maybe GroupIndex) -> P (Maybe GroupIndex)
forall tok st a. GenParser tok st a -> GenParser tok st a
try (P (Maybe GroupIndex) -> P (Maybe GroupIndex))
-> P (Maybe GroupIndex) -> P (Maybe GroupIndex)
forall a b. (a -> b) -> a -> b
$ do
                                   Char
_ <- Char -> ParsecT String (GroupIndex, GroupIndex) Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
','
  -- parsec note: if 'many digits' fails below then the 'try' ensures
  -- that the ',' will not match the closing '}' in p_bound, same goes
  -- for any non '}' garbage after the 'many digits'.
                                   String
highS <- ParsecT String (GroupIndex, GroupIndex) Identity Char
-> ParsecT String (GroupIndex, GroupIndex) Identity String
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many ParsecT String (GroupIndex, GroupIndex) Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
digit
                                   if String -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
highS then Maybe GroupIndex -> P (Maybe GroupIndex)
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe GroupIndex
forall a. Maybe a
Nothing -- no upper bound
                                     else do let highI :: GroupIndex
highI = String -> GroupIndex
forall a. Read a => String -> a
read String
highS
                                             Bool -> ParsecT String (GroupIndex, GroupIndex) Identity ()
forall (f :: * -> *). Alternative f => Bool -> f ()
guard (GroupIndex
lowI GroupIndex -> GroupIndex -> Bool
forall a. Ord a => a -> a -> Bool
<= GroupIndex
highI)
                                             Maybe GroupIndex -> P (Maybe GroupIndex)
forall (m :: * -> *) a. Monad m => a -> m a
return (GroupIndex -> Maybe GroupIndex
forall a. a -> Maybe a
Just (String -> GroupIndex
forall a. Read a => String -> a
read String
highS))
                       Pattern -> P Pattern
forall (m :: * -> *) a. Monad m => a -> m a
return (GroupIndex -> Maybe GroupIndex -> Pattern -> Pattern
PBound GroupIndex
lowI Maybe GroupIndex
highMI Pattern
atom)

-- An anchor cannot be modified by a repetition specifier
p_anchor :: P Pattern
p_anchor :: P Pattern
p_anchor = (Char -> ParsecT String (GroupIndex, GroupIndex) Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'^' ParsecT String (GroupIndex, GroupIndex) Identity Char
-> P Pattern -> P Pattern
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> (DoPa -> Pattern)
-> ParsecT String (GroupIndex, GroupIndex) Identity DoPa
-> P Pattern
forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
liftM DoPa -> Pattern
PCarat ParsecT String (GroupIndex, GroupIndex) Identity DoPa
char_index)
       P Pattern -> P Pattern -> P Pattern
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> (Char -> ParsecT String (GroupIndex, GroupIndex) Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'$' ParsecT String (GroupIndex, GroupIndex) Identity Char
-> P Pattern -> P Pattern
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> (DoPa -> Pattern)
-> ParsecT String (GroupIndex, GroupIndex) Identity DoPa
-> P Pattern
forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
liftM DoPa -> Pattern
PDollar ParsecT String (GroupIndex, GroupIndex) Identity DoPa
char_index)
       P Pattern -> P Pattern -> P Pattern
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> P Pattern -> P Pattern
forall tok st a. GenParser tok st a -> GenParser tok st a
try (do String
_ <- String -> ParsecT String (GroupIndex, GroupIndex) Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"()"
                   Maybe GroupIndex
index <- P (Maybe GroupIndex)
group_index
                   Pattern -> P Pattern
forall (m :: * -> *) a. Monad m => a -> m a
return (Pattern -> P Pattern) -> Pattern -> P Pattern
forall a b. (a -> b) -> a -> b
$ Maybe GroupIndex -> Pattern -> Pattern
PGroup Maybe GroupIndex
index Pattern
PEmpty)
       P Pattern -> String -> P Pattern
forall s u (m :: * -> *) a.
ParsecT s u m a -> String -> ParsecT s u m a
<?> String
"empty () or anchor ^ or $"

char_index :: P DoPa
char_index :: ParsecT String (GroupIndex, GroupIndex) Identity DoPa
char_index = do (GroupIndex
gi,GroupIndex
ci) <- ParsecT
  String (GroupIndex, GroupIndex) Identity (GroupIndex, GroupIndex)
forall (m :: * -> *) s u. Monad m => ParsecT s u m u
getState
                let ci' :: GroupIndex
ci' = GroupIndex -> GroupIndex
forall a. Enum a => a -> a
succ GroupIndex
ci
                (GroupIndex, GroupIndex)
-> ParsecT String (GroupIndex, GroupIndex) Identity ()
forall (m :: * -> *) u s. Monad m => u -> ParsecT s u m ()
setState (GroupIndex
gi,GroupIndex
ci')
                DoPa -> ParsecT String (GroupIndex, GroupIndex) Identity DoPa
forall (m :: * -> *) a. Monad m => a -> m a
return (GroupIndex -> DoPa
DoPa GroupIndex
ci')

p_char :: P Pattern
p_char :: P Pattern
p_char = P Pattern
p_dot P Pattern -> P Pattern -> P Pattern
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> P Pattern
p_left_brace P Pattern -> P Pattern -> P Pattern
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> P Pattern
p_escaped P Pattern -> P Pattern -> P Pattern
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> P Pattern
p_other_char where
  p_dot :: P Pattern
p_dot = Char -> ParsecT String (GroupIndex, GroupIndex) Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'.' ParsecT String (GroupIndex, GroupIndex) Identity Char
-> ParsecT String (GroupIndex, GroupIndex) Identity DoPa
-> ParsecT String (GroupIndex, GroupIndex) Identity DoPa
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ParsecT String (GroupIndex, GroupIndex) Identity DoPa
char_index ParsecT String (GroupIndex, GroupIndex) Identity DoPa
-> (DoPa -> P Pattern) -> P Pattern
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Pattern -> P Pattern
forall (m :: * -> *) a. Monad m => a -> m a
return (Pattern -> P Pattern) -> (DoPa -> Pattern) -> DoPa -> P Pattern
forall b c a. (b -> c) -> (a -> b) -> a -> c
. DoPa -> Pattern
PDot
  p_left_brace :: P Pattern
p_left_brace = P Pattern -> P Pattern
forall tok st a. GenParser tok st a -> GenParser tok st a
try (P Pattern -> P Pattern) -> P Pattern -> P Pattern
forall a b. (a -> b) -> a -> b
$ (Char -> ParsecT String (GroupIndex, GroupIndex) Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'{' ParsecT String (GroupIndex, GroupIndex) Identity Char
-> ParsecT String (GroupIndex, GroupIndex) Identity ()
-> ParsecT String (GroupIndex, GroupIndex) Identity ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ParsecT String (GroupIndex, GroupIndex) Identity Char
-> ParsecT String (GroupIndex, GroupIndex) Identity ()
forall s (m :: * -> *) t a u.
(Stream s m t, Show a) =>
ParsecT s u m a -> ParsecT s u m ()
notFollowedBy ParsecT String (GroupIndex, GroupIndex) Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
digit ParsecT String (GroupIndex, GroupIndex) Identity ()
-> ParsecT String (GroupIndex, GroupIndex) Identity DoPa
-> ParsecT String (GroupIndex, GroupIndex) Identity DoPa
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ParsecT String (GroupIndex, GroupIndex) Identity DoPa
char_index ParsecT String (GroupIndex, GroupIndex) Identity DoPa
-> (DoPa -> P Pattern) -> P Pattern
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Pattern -> P Pattern
forall (m :: * -> *) a. Monad m => a -> m a
return (Pattern -> P Pattern) -> (DoPa -> Pattern) -> DoPa -> P Pattern
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (DoPa -> Char -> Pattern
`PChar` Char
'{'))
  p_escaped :: P Pattern
p_escaped = Char -> ParsecT String (GroupIndex, GroupIndex) Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'\\' ParsecT String (GroupIndex, GroupIndex) Identity Char
-> ParsecT String (GroupIndex, GroupIndex) Identity Char
-> ParsecT String (GroupIndex, GroupIndex) Identity Char
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ParsecT String (GroupIndex, GroupIndex) Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
anyChar ParsecT String (GroupIndex, GroupIndex) Identity Char
-> (Char -> P Pattern) -> P Pattern
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \Char
c -> ParsecT String (GroupIndex, GroupIndex) Identity DoPa
char_index ParsecT String (GroupIndex, GroupIndex) Identity DoPa
-> (DoPa -> P Pattern) -> P Pattern
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Pattern -> P Pattern
forall (m :: * -> *) a. Monad m => a -> m a
return (Pattern -> P Pattern) -> (DoPa -> Pattern) -> DoPa -> P Pattern
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (DoPa -> Char -> Pattern
`PEscape` Char
c)
  p_other_char :: P Pattern
p_other_char = String -> ParsecT String (GroupIndex, GroupIndex) Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m Char
noneOf String
specials ParsecT String (GroupIndex, GroupIndex) Identity Char
-> (Char -> P Pattern) -> P Pattern
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \Char
c -> ParsecT String (GroupIndex, GroupIndex) Identity DoPa
char_index ParsecT String (GroupIndex, GroupIndex) Identity DoPa
-> (DoPa -> P Pattern) -> P Pattern
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Pattern -> P Pattern
forall (m :: * -> *) a. Monad m => a -> m a
return (Pattern -> P Pattern) -> (DoPa -> Pattern) -> DoPa -> P Pattern
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (DoPa -> Char -> Pattern
`PChar` Char
c)
    where specials :: String
specials  = String
"^.[$()|*+?{\\"

-- parse [bar] and [^bar] sets of characters
p_bracket :: P Pattern
p_bracket :: P Pattern
p_bracket = (Char -> ParsecT String (GroupIndex, GroupIndex) Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'[') ParsecT String (GroupIndex, GroupIndex) Identity Char
-> P Pattern -> P Pattern
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ( (Char -> ParsecT String (GroupIndex, GroupIndex) Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'^' ParsecT String (GroupIndex, GroupIndex) Identity Char
-> P Pattern -> P Pattern
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Bool -> P Pattern
p_set Bool
True) P Pattern -> P Pattern -> P Pattern
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> (Bool -> P Pattern
p_set Bool
False) )

p_set :: Bool -> P Pattern
p_set :: Bool -> P Pattern
p_set Bool
invert = do String
initial <- String
-> ParsecT String (GroupIndex, GroupIndex) Identity String
-> ParsecT String (GroupIndex, GroupIndex) Identity String
forall s (m :: * -> *) t a u.
Stream s m t =>
a -> ParsecT s u m a -> ParsecT s u m a
option String
"" (Char -> ParsecT String (GroupIndex, GroupIndex) Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
']' ParsecT String (GroupIndex, GroupIndex) Identity Char
-> ParsecT String (GroupIndex, GroupIndex) Identity String
-> ParsecT String (GroupIndex, GroupIndex) Identity String
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> String -> ParsecT String (GroupIndex, GroupIndex) Identity String
forall (m :: * -> *) a. Monad m => a -> m a
return String
"]")
                  [BracketElement]
values <- if String -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
initial then ParsecT String (GroupIndex, GroupIndex) Identity BracketElement
-> ParsecT
     String (GroupIndex, GroupIndex) Identity [BracketElement]
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String (GroupIndex, GroupIndex) Identity BracketElement
p_set_elem else ParsecT String (GroupIndex, GroupIndex) Identity BracketElement
-> ParsecT
     String (GroupIndex, GroupIndex) Identity [BracketElement]
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many ParsecT String (GroupIndex, GroupIndex) Identity BracketElement
p_set_elem
                  Char
_ <- Char -> ParsecT String (GroupIndex, GroupIndex) Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
']'
                  DoPa
ci <- ParsecT String (GroupIndex, GroupIndex) Identity DoPa
char_index
                  let chars :: Maybe (Set Char)
chars = String -> Maybe (Set Char)
forall a. Ord a => [a] -> Maybe (Set a)
maybe'set (String -> Maybe (Set Char)) -> String -> Maybe (Set Char)
forall a b. (a -> b) -> a -> b
$ [String] -> String
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat ([String] -> String) -> [String] -> String
forall a b. (a -> b) -> a -> b
$
                        String
initial String -> [String] -> [String]
forall a. a -> [a] -> [a]
:
                        [ Char
c | BEChar Char
c <- [BracketElement]
values ] String -> [String] -> [String]
forall a. a -> [a] -> [a]
:
                        [ [Char
start..Char
end] | BERange Char
start Char
end <- [BracketElement]
values ]
                      colls :: Maybe (Set PatternSetCollatingElement)
colls = [PatternSetCollatingElement]
-> Maybe (Set PatternSetCollatingElement)
forall a. Ord a => [a] -> Maybe (Set a)
maybe'set [String -> PatternSetCollatingElement
PatternSetCollatingElement String
coll | BEColl String
coll <- [BracketElement]
values ]
                      equivs :: Maybe (Set PatternSetEquivalenceClass)
equivs = [PatternSetEquivalenceClass]
-> Maybe (Set PatternSetEquivalenceClass)
forall a. Ord a => [a] -> Maybe (Set a)
maybe'set [String -> PatternSetEquivalenceClass
PatternSetEquivalenceClass String
equiv | BEEquiv String
equiv <- [BracketElement]
values]
                      class's :: Maybe (Set PatternSetCharacterClass)
class's = [PatternSetCharacterClass] -> Maybe (Set PatternSetCharacterClass)
forall a. Ord a => [a] -> Maybe (Set a)
maybe'set [String -> PatternSetCharacterClass
PatternSetCharacterClass String
a'class | BEClass String
a'class <- [BracketElement]
values]
                      maybe'set :: [a] -> Maybe (Set a)
maybe'set [a]
x = if [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [a]
x then Maybe (Set a)
forall a. Maybe a
Nothing else Set a -> Maybe (Set a)
forall a. a -> Maybe a
Just ([a] -> Set a
forall a. Ord a => [a] -> Set a
Set.fromList [a]
x)
                      sets :: PatternSet
sets = Maybe (Set Char)
-> Maybe (Set PatternSetCharacterClass)
-> Maybe (Set PatternSetCollatingElement)
-> Maybe (Set PatternSetEquivalenceClass)
-> PatternSet
PatternSet Maybe (Set Char)
chars Maybe (Set PatternSetCharacterClass)
class's Maybe (Set PatternSetCollatingElement)
colls Maybe (Set PatternSetEquivalenceClass)
equivs
                  PatternSet
sets PatternSet -> P Pattern -> P Pattern
`seq` Pattern -> P Pattern
forall (m :: * -> *) a. Monad m => a -> m a
return (Pattern -> P Pattern) -> Pattern -> P Pattern
forall a b. (a -> b) -> a -> b
$ if Bool
invert then DoPa -> PatternSet -> Pattern
PAnyNot DoPa
ci PatternSet
sets else DoPa -> PatternSet -> Pattern
PAny DoPa
ci PatternSet
sets

-- From here down the code is the parser and functions for pattern [ ] set things

p_set_elem :: P BracketElement
p_set_elem :: ParsecT String (GroupIndex, GroupIndex) Identity BracketElement
p_set_elem = BracketElement
-> ParsecT String (GroupIndex, GroupIndex) Identity BracketElement
checkBracketElement (BracketElement
 -> ParsecT String (GroupIndex, GroupIndex) Identity BracketElement)
-> ParsecT String (GroupIndex, GroupIndex) Identity BracketElement
-> ParsecT String (GroupIndex, GroupIndex) Identity BracketElement
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< [ParsecT String (GroupIndex, GroupIndex) Identity BracketElement]
-> ParsecT String (GroupIndex, GroupIndex) Identity BracketElement
forall (t :: * -> *) (f :: * -> *) a.
(Foldable t, Alternative f) =>
t (f a) -> f a
asum
  [ ParsecT String (GroupIndex, GroupIndex) Identity BracketElement
p_set_elem_class
  , ParsecT String (GroupIndex, GroupIndex) Identity BracketElement
p_set_elem_equiv
  , ParsecT String (GroupIndex, GroupIndex) Identity BracketElement
p_set_elem_coll
  , ParsecT String (GroupIndex, GroupIndex) Identity BracketElement
p_set_elem_range
  , ParsecT String (GroupIndex, GroupIndex) Identity BracketElement
p_set_elem_char
  , String
-> ParsecT String (GroupIndex, GroupIndex) Identity BracketElement
forall (m :: * -> *) a. MonadFail m => String -> m a
fail String
"Failed to parse bracketed string"
  ]

p_set_elem_class :: P BracketElement
p_set_elem_class :: ParsecT String (GroupIndex, GroupIndex) Identity BracketElement
p_set_elem_class = (String -> BracketElement)
-> ParsecT String (GroupIndex, GroupIndex) Identity String
-> ParsecT String (GroupIndex, GroupIndex) Identity BracketElement
forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
liftM String -> BracketElement
BEClass (ParsecT String (GroupIndex, GroupIndex) Identity String
 -> ParsecT String (GroupIndex, GroupIndex) Identity BracketElement)
-> ParsecT String (GroupIndex, GroupIndex) Identity String
-> ParsecT String (GroupIndex, GroupIndex) Identity BracketElement
forall a b. (a -> b) -> a -> b
$
  ParsecT String (GroupIndex, GroupIndex) Identity String
-> ParsecT String (GroupIndex, GroupIndex) Identity String
forall tok st a. GenParser tok st a -> GenParser tok st a
try (ParsecT String (GroupIndex, GroupIndex) Identity String
-> ParsecT String (GroupIndex, GroupIndex) Identity String
-> ParsecT String (GroupIndex, GroupIndex) Identity String
-> ParsecT String (GroupIndex, GroupIndex) Identity String
forall s (m :: * -> *) t u open close a.
Stream s m t =>
ParsecT s u m open
-> ParsecT s u m close -> ParsecT s u m a -> ParsecT s u m a
between (String -> ParsecT String (GroupIndex, GroupIndex) Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"[:") (String -> ParsecT String (GroupIndex, GroupIndex) Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
":]") (ParsecT String (GroupIndex, GroupIndex) Identity Char
-> ParsecT String (GroupIndex, GroupIndex) Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 (ParsecT String (GroupIndex, GroupIndex) Identity Char
 -> ParsecT String (GroupIndex, GroupIndex) Identity String)
-> ParsecT String (GroupIndex, GroupIndex) Identity Char
-> ParsecT String (GroupIndex, GroupIndex) Identity String
forall a b. (a -> b) -> a -> b
$ String -> ParsecT String (GroupIndex, GroupIndex) Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m Char
noneOf String
":]"))

p_set_elem_equiv :: P BracketElement
p_set_elem_equiv :: ParsecT String (GroupIndex, GroupIndex) Identity BracketElement
p_set_elem_equiv = (String -> BracketElement)
-> ParsecT String (GroupIndex, GroupIndex) Identity String
-> ParsecT String (GroupIndex, GroupIndex) Identity BracketElement
forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
liftM String -> BracketElement
BEEquiv (ParsecT String (GroupIndex, GroupIndex) Identity String
 -> ParsecT String (GroupIndex, GroupIndex) Identity BracketElement)
-> ParsecT String (GroupIndex, GroupIndex) Identity String
-> ParsecT String (GroupIndex, GroupIndex) Identity BracketElement
forall a b. (a -> b) -> a -> b
$
  ParsecT String (GroupIndex, GroupIndex) Identity String
-> ParsecT String (GroupIndex, GroupIndex) Identity String
forall tok st a. GenParser tok st a -> GenParser tok st a
try (ParsecT String (GroupIndex, GroupIndex) Identity String
-> ParsecT String (GroupIndex, GroupIndex) Identity String
-> ParsecT String (GroupIndex, GroupIndex) Identity String
-> ParsecT String (GroupIndex, GroupIndex) Identity String
forall s (m :: * -> *) t u open close a.
Stream s m t =>
ParsecT s u m open
-> ParsecT s u m close -> ParsecT s u m a -> ParsecT s u m a
between (String -> ParsecT String (GroupIndex, GroupIndex) Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"[=") (String -> ParsecT String (GroupIndex, GroupIndex) Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"=]") (ParsecT String (GroupIndex, GroupIndex) Identity Char
-> ParsecT String (GroupIndex, GroupIndex) Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 (ParsecT String (GroupIndex, GroupIndex) Identity Char
 -> ParsecT String (GroupIndex, GroupIndex) Identity String)
-> ParsecT String (GroupIndex, GroupIndex) Identity Char
-> ParsecT String (GroupIndex, GroupIndex) Identity String
forall a b. (a -> b) -> a -> b
$ String -> ParsecT String (GroupIndex, GroupIndex) Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m Char
noneOf String
"=]"))

p_set_elem_coll :: P BracketElement
p_set_elem_coll :: ParsecT String (GroupIndex, GroupIndex) Identity BracketElement
p_set_elem_coll =  (String -> BracketElement)
-> ParsecT String (GroupIndex, GroupIndex) Identity String
-> ParsecT String (GroupIndex, GroupIndex) Identity BracketElement
forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
liftM String -> BracketElement
BEColl (ParsecT String (GroupIndex, GroupIndex) Identity String
 -> ParsecT String (GroupIndex, GroupIndex) Identity BracketElement)
-> ParsecT String (GroupIndex, GroupIndex) Identity String
-> ParsecT String (GroupIndex, GroupIndex) Identity BracketElement
forall a b. (a -> b) -> a -> b
$
  ParsecT String (GroupIndex, GroupIndex) Identity String
-> ParsecT String (GroupIndex, GroupIndex) Identity String
forall tok st a. GenParser tok st a -> GenParser tok st a
try (ParsecT String (GroupIndex, GroupIndex) Identity String
-> ParsecT String (GroupIndex, GroupIndex) Identity String
-> ParsecT String (GroupIndex, GroupIndex) Identity String
-> ParsecT String (GroupIndex, GroupIndex) Identity String
forall s (m :: * -> *) t u open close a.
Stream s m t =>
ParsecT s u m open
-> ParsecT s u m close -> ParsecT s u m a -> ParsecT s u m a
between (String -> ParsecT String (GroupIndex, GroupIndex) Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"[.") (String -> ParsecT String (GroupIndex, GroupIndex) Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
".]") (ParsecT String (GroupIndex, GroupIndex) Identity Char
-> ParsecT String (GroupIndex, GroupIndex) Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 (ParsecT String (GroupIndex, GroupIndex) Identity Char
 -> ParsecT String (GroupIndex, GroupIndex) Identity String)
-> ParsecT String (GroupIndex, GroupIndex) Identity Char
-> ParsecT String (GroupIndex, GroupIndex) Identity String
forall a b. (a -> b) -> a -> b
$ String -> ParsecT String (GroupIndex, GroupIndex) Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m Char
noneOf String
".]"))

p_set_elem_range :: P BracketElement
p_set_elem_range :: ParsecT String (GroupIndex, GroupIndex) Identity BracketElement
p_set_elem_range = ParsecT String (GroupIndex, GroupIndex) Identity BracketElement
-> ParsecT String (GroupIndex, GroupIndex) Identity BracketElement
forall tok st a. GenParser tok st a -> GenParser tok st a
try (ParsecT String (GroupIndex, GroupIndex) Identity BracketElement
 -> ParsecT String (GroupIndex, GroupIndex) Identity BracketElement)
-> ParsecT String (GroupIndex, GroupIndex) Identity BracketElement
-> ParsecT String (GroupIndex, GroupIndex) Identity BracketElement
forall a b. (a -> b) -> a -> b
$ do
  Char
start <- String -> ParsecT String (GroupIndex, GroupIndex) Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m Char
noneOf String
"]"
  Char
_  <- Char -> ParsecT String (GroupIndex, GroupIndex) Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'-'
  Char
end <- String -> ParsecT String (GroupIndex, GroupIndex) Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m Char
noneOf String
"]"
  BracketElement
-> ParsecT String (GroupIndex, GroupIndex) Identity BracketElement
forall (m :: * -> *) a. Monad m => a -> m a
return (BracketElement
 -> ParsecT String (GroupIndex, GroupIndex) Identity BracketElement)
-> BracketElement
-> ParsecT String (GroupIndex, GroupIndex) Identity BracketElement
forall a b. (a -> b) -> a -> b
$ Char -> Char -> BracketElement
BERange Char
start Char
end

p_set_elem_char :: P BracketElement
p_set_elem_char :: ParsecT String (GroupIndex, GroupIndex) Identity BracketElement
p_set_elem_char = do
  Char
c <- String -> ParsecT String (GroupIndex, GroupIndex) Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m Char
noneOf String
"]"
  BracketElement
-> ParsecT String (GroupIndex, GroupIndex) Identity BracketElement
forall (m :: * -> *) a. Monad m => a -> m a
return (Char -> BracketElement
BEChar Char
c)

-- | Fail when 'BracketElement' is invalid, e.g. empty range @1-0@.
-- This failure should not be caught.
--
checkBracketElement :: BracketElement -> P BracketElement
checkBracketElement :: BracketElement
-> ParsecT String (GroupIndex, GroupIndex) Identity BracketElement
checkBracketElement BracketElement
e =
  case BracketElement
e of
    BERange Char
start Char
end
      | Char
start Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
> Char
end -> String
-> ParsecT String (GroupIndex, GroupIndex) Identity BracketElement
forall (m :: * -> *) a. MonadFail m => String -> m a
fail (String
 -> ParsecT String (GroupIndex, GroupIndex) Identity BracketElement)
-> String
-> ParsecT String (GroupIndex, GroupIndex) Identity BracketElement
forall a b. (a -> b) -> a -> b
$ [String] -> String
unwords
          [ String
"End point"
          , Char -> String
forall a. Show a => a -> String
show Char
end
          , String
"of dashed character range is less than starting point"
          , Char -> String
forall a. Show a => a -> String
show Char
start
          ]
      | Bool
otherwise -> ParsecT String (GroupIndex, GroupIndex) Identity BracketElement
ok
    BEChar  Char
_ -> ParsecT String (GroupIndex, GroupIndex) Identity BracketElement
ok
    BEClass String
_ -> ParsecT String (GroupIndex, GroupIndex) Identity BracketElement
ok
    BEColl  String
_ -> ParsecT String (GroupIndex, GroupIndex) Identity BracketElement
ok
    BEEquiv String
_ -> ParsecT String (GroupIndex, GroupIndex) Identity BracketElement
ok
  where
    ok :: ParsecT String (GroupIndex, GroupIndex) Identity BracketElement
ok = BracketElement
-> ParsecT String (GroupIndex, GroupIndex) Identity BracketElement
forall (m :: * -> *) a. Monad m => a -> m a
return BracketElement
e