{-# LANGUAGE CPP #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE OverloadedStrings #-}
module Regex.KDE.Compile
(compileRegex)
where
import Data.Word (Word8)
import qualified Data.ByteString as B
import Data.ByteString (ByteString)
import qualified Data.ByteString.UTF8 as U
import Safe
import Data.Attoparsec.ByteString as A hiding (match)
import Data.Char
import Control.Applicative
import Regex.KDE.Regex
import Control.Monad
import Control.Monad.State.Strict
#if !MIN_VERSION_base(4,11,0)
import Data.Semigroup ((<>))
#endif
compileRegex :: Bool -> ByteString -> Either String Regex
compileRegex :: Bool -> ByteString -> Either [Char] Regex
compileRegex Bool
caseSensitive ByteString
bs =
let !res :: Either [Char] Regex
res = forall a. Parser a -> ByteString -> Either [Char] a
parseOnly (forall (m :: * -> *) s a. Monad m => StateT s m a -> s -> m a
evalStateT StateT Int (Parser ByteString) Regex
parser Int
0) ByteString
bs
in Either [Char] Regex
res
where
parser :: StateT Int (Parser ByteString) Regex
parser = do
!Regex
re <- Bool -> StateT Int (Parser ByteString) Regex
pRegex Bool
caseSensitive
(Regex
re forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift forall t. Chunk t => Parser t ()
A.endOfInput) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
do ByteString
rest <- forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift Parser ByteString
A.takeByteString
forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail forall a b. (a -> b) -> a -> b
$ [Char]
"parse error at byte position " forall a. [a] -> [a] -> [a]
++
forall a. Show a => a -> [Char]
show (ByteString -> Int
B.length ByteString
bs forall a. Num a => a -> a -> a
- ByteString -> Int
B.length ByteString
rest)
type RParser = StateT Int Parser
pRegex :: Bool -> RParser Regex
pRegex :: Bool -> StateT Int (Parser ByteString) Regex
pRegex Bool
caseSensitive =
forall (f :: * -> *) a. Alternative f => a -> f a -> f a
option Regex
MatchNull forall a b. (a -> b) -> a -> b
$
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr Regex -> Regex -> Regex
MatchAlt
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Bool -> StateT Int (Parser ByteString) Regex
pAltPart Bool
caseSensitive)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (forall (f :: * -> *) a. Alternative f => f a -> f [a]
many forall a b. (a -> b) -> a -> b
$ forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (Char -> Parser Char
char Char
'|') forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> (Bool -> StateT Int (Parser ByteString) Regex
pAltPart Bool
caseSensitive forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a. Monoid a => a
mempty))
pAltPart :: Bool -> RParser Regex
pAltPart :: Bool -> StateT Int (Parser ByteString) Regex
pAltPart Bool
caseSensitive = forall a. Monoid a => [a] -> a
mconcat forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (f :: * -> *) a. Alternative f => f a -> f [a]
many1 (Bool -> StateT Int (Parser ByteString) Regex
pRegexPart Bool
caseSensitive)
char :: Char -> Parser Char
char :: Char -> Parser Char
char Char
c =
Char
c forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ (Word8 -> Bool) -> Parser Word8
satisfy (forall a. Eq a => a -> a -> Bool
== forall a b. (Integral a, Num b) => a -> b
fromIntegral (Char -> Int
ord Char
c))
pRegexPart :: Bool -> RParser Regex
pRegexPart :: Bool -> StateT Int (Parser ByteString) Regex
pRegexPart Bool
caseSensitive =
(forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (Bool -> Parser Regex
pRegexChar Bool
caseSensitive) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Bool -> StateT Int (Parser ByteString) Regex
pParenthesized Bool
caseSensitive) forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>=
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift forall b c a. (b -> c) -> (a -> b) -> a -> c
. Regex -> Parser Regex
pSuffix
pParenthesized :: Bool -> RParser Regex
pParenthesized :: Bool -> StateT Int (Parser ByteString) Regex
pParenthesized Bool
caseSensitive = do
Word8
_ <- forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift ((Word8 -> Bool) -> Parser Word8
satisfy (forall a. Eq a => a -> a -> Bool
== Word8
40))
Bool
resetCaptureNumbers <- forall (f :: * -> *) a. Alternative f => a -> f a -> f a
option Bool
False (Bool
True forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (ByteString -> Parser ByteString
string ByteString
"?|"))
Regex -> Regex
modifier <- if Bool
resetCaptureNumbers
then forall (m :: * -> *) a. Monad m => a -> m a
return forall a. a -> a
id
else forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift ((Word8 -> Bool) -> Parser Word8
satisfy (forall a. Eq a => a -> a -> Bool
== Word8
63) forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser (Regex -> Regex)
pGroupModifiers)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Int -> Regex -> Regex
MatchCapture forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (forall s (m :: * -> *). MonadState s m => (s -> s) -> m ()
modify (forall a. Num a => a -> a -> a
+ Int
1) forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall s (m :: * -> *). MonadState s m => m s
get))
Int
currentCaptureNumber <- forall s (m :: * -> *). MonadState s m => m s
get
Regex
contents <- forall (f :: * -> *) a. Alternative f => a -> f a -> f a
option Regex
MatchNull forall a b. (a -> b) -> a -> b
$
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr Regex -> Regex -> Regex
MatchAlt
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Bool -> StateT Int (Parser ByteString) Regex
pAltPart Bool
caseSensitive)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (forall (f :: * -> *) a. Alternative f => f a -> f [a]
many forall a b. (a -> b) -> a -> b
$ forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (Char -> Parser Char
char Char
'|') forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*>
(((if Bool
resetCaptureNumbers
then forall s (m :: * -> *). MonadState s m => s -> m ()
put Int
currentCaptureNumber
else forall (m :: * -> *) a. Monad m => a -> m a
return ()) forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Bool -> StateT Int (Parser ByteString) Regex
pAltPart Bool
caseSensitive) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a. Monoid a => a
mempty))
Word8
_ <- forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift ((Word8 -> Bool) -> Parser Word8
satisfy (forall a. Eq a => a -> a -> Bool
== Word8
41))
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Regex -> Regex
modifier Regex
contents
pGroupModifiers :: Parser (Regex -> Regex)
pGroupModifiers :: Parser (Regex -> Regex)
pGroupModifiers =
(forall a. a -> a
id forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Char -> Parser Char
char Char
':')
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
do Direction
dir <- forall (f :: * -> *) a. Alternative f => a -> f a -> f a
option Direction
Forward forall a b. (a -> b) -> a -> b
$ Direction
Backward forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Char -> Parser Char
char Char
'<'
(Direction -> Regex -> Regex
AssertPositive Direction
dir forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Char -> Parser Char
char Char
'=') forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Direction -> Regex -> Regex
AssertNegative Direction
dir forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Char -> Parser Char
char Char
'!')
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
do Word8
n <- (Word8 -> Bool) -> Parser Word8
satisfy (\Word8
d -> Word8
d forall a. Ord a => a -> a -> Bool
>= Word8
48 Bool -> Bool -> Bool
&& Word8
d forall a. Ord a => a -> a -> Bool
<= Word8
57)
forall (m :: * -> *) a. Monad m => a -> m a
return (\Regex
_ -> Int -> Regex
Subroutine (forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
n forall a. Num a => a -> a -> a
- Int
48))
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
do Word8
_ <- (Word8 -> Bool) -> Parser Word8
satisfy (forall a. Eq a => a -> a -> Bool
== Word8
82)
forall (m :: * -> *) a. Monad m => a -> m a
return (\Regex
_ -> Int -> Regex
Subroutine Int
0)
pSuffix :: Regex -> Parser Regex
pSuffix :: Regex -> Parser Regex
pSuffix Regex
re = forall (f :: * -> *) a. Alternative f => a -> f a -> f a
option Regex
re forall a b. (a -> b) -> a -> b
$ do
Word8
w <- (Word8 -> Bool) -> Parser Word8
satisfy (\Word8
x -> Word8
x forall a. Eq a => a -> a -> Bool
== Word8
42 Bool -> Bool -> Bool
|| Word8
x forall a. Eq a => a -> a -> Bool
== Word8
43 Bool -> Bool -> Bool
|| Word8
x forall a. Eq a => a -> a -> Bool
== Word8
63 Bool -> Bool -> Bool
|| Word8
x forall a. Eq a => a -> a -> Bool
== Word8
123)
(case Word8
w of
Word8
42 -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Regex -> Regex -> Regex
MatchAlt (Regex -> Regex
MatchSome Regex
re) Regex
MatchNull
Word8
43 -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Regex -> Regex
MatchSome Regex
re
Word8
63 -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Regex -> Regex -> Regex
MatchAlt Regex
re Regex
MatchNull
Word8
123 -> do
let isDig :: a -> Bool
isDig a
x = a
x forall a. Ord a => a -> a -> Bool
>= a
48 Bool -> Bool -> Bool
&& a
x forall a. Ord a => a -> a -> Bool
< a
58
Maybe Int
minn <- forall (f :: * -> *) a. Alternative f => a -> f a -> f a
option forall a. Maybe a
Nothing forall a b. (a -> b) -> a -> b
$ forall a. Read a => [Char] -> Maybe a
readMay forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [Char]
U.toString forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Word8 -> Bool) -> Parser ByteString
A.takeWhile forall {a}. (Ord a, Num a) => a -> Bool
isDig
Maybe Int
maxn <- forall (f :: * -> *) a. Alternative f => a -> f a -> f a
option Maybe Int
minn forall a b. (a -> b) -> a -> b
$ Char -> Parser Char
char Char
',' forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*>
(forall a. Read a => [Char] -> Maybe a
readMay forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [Char]
U.toString forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Word8 -> Bool) -> Parser ByteString
A.takeWhile forall {a}. (Ord a, Num a) => a -> Bool
isDig)
Char
_ <- Char -> Parser Char
char Char
'}'
case (Maybe Int
minn, Maybe Int
maxn) of
(Maybe Int
Nothing, Maybe Int
Nothing) -> forall (m :: * -> *) a. MonadPlus m => m a
mzero
(Just Int
n, Maybe Int
Nothing) -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$! Int -> Regex -> Regex
atleast Int
n Regex
re
(Maybe Int
Nothing, Just Int
n) -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$! Int -> Regex -> Regex
atmost Int
n Regex
re
(Just Int
m, Just Int
n) -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$! Int -> Int -> Regex -> Regex
between Int
m Int
n Regex
re
Word8
_ -> forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail [Char]
"pSuffix encountered impossible byte") forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Regex -> Parser Regex
pQuantifierModifier
where
atmost :: Int -> Regex -> Regex
atmost Int
0 Regex
_ = Regex
MatchNull
atmost Int
n Regex
r = Regex -> Regex -> Regex
MatchAlt (forall a. Monoid a => [a] -> a
mconcat (forall a. Int -> a -> [a]
replicate Int
n Regex
r)) (Int -> Regex -> Regex
atmost (Int
nforall a. Num a => a -> a -> a
-Int
1) Regex
r)
between :: Int -> Int -> Regex -> Regex
between Int
0 Int
n Regex
r = Int -> Regex -> Regex
atmost Int
n Regex
r
between Int
m Int
n Regex
r = forall a. Monoid a => [a] -> a
mconcat (forall a. Int -> a -> [a]
replicate Int
m Regex
r) forall a. Semigroup a => a -> a -> a
<> Int -> Regex -> Regex
atmost (Int
n forall a. Num a => a -> a -> a
- Int
m) Regex
r
atleast :: Int -> Regex -> Regex
atleast Int
n Regex
r = forall a. Monoid a => [a] -> a
mconcat (forall a. Int -> a -> [a]
replicate Int
n Regex
r) forall a. Semigroup a => a -> a -> a
<> Regex -> Regex -> Regex
MatchAlt (Regex -> Regex
MatchSome Regex
r) Regex
MatchNull
pQuantifierModifier :: Regex -> Parser Regex
pQuantifierModifier :: Regex -> Parser Regex
pQuantifierModifier Regex
re = forall (f :: * -> *) a. Alternative f => a -> f a -> f a
option Regex
re forall a b. (a -> b) -> a -> b
$
(Regex -> Regex
Possessive Regex
re forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ (Word8 -> Bool) -> Parser Word8
satisfy (forall a. Eq a => a -> a -> Bool
== Word8
43)) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
(Regex -> Regex
Lazy Regex
re forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ (Word8 -> Bool) -> Parser Word8
satisfy (forall a. Eq a => a -> a -> Bool
==Word8
63))
pRegexChar :: Bool -> Parser Regex
pRegexChar :: Bool -> Parser Regex
pRegexChar Bool
caseSensitive = do
Word8
w <- (Word8 -> Bool) -> Parser Word8
satisfy forall a b. (a -> b) -> a -> b
$ forall a b. a -> b -> a
const Bool
True
case Word8
w of
Word8
46 -> forall (m :: * -> *) a. Monad m => a -> m a
return Regex
MatchAnyChar
Word8
37 -> (do
ByteString
ds <- (Word8 -> Bool) -> Parser ByteString
A.takeWhile1 (\Word8
x -> Word8
x forall a. Ord a => a -> a -> Bool
>= Word8
48 Bool -> Bool -> Bool
&& Word8
x forall a. Ord a => a -> a -> Bool
<= Word8
57)
case forall a. Read a => [Char] -> Maybe a
readMay (ByteString -> [Char]
U.toString ByteString
ds) of
Just !Int
n -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Int -> Regex
MatchDynamic Int
n
Maybe Int
Nothing -> forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail [Char]
"not a number")
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall (m :: * -> *) a. Monad m => a -> m a
return ((Char -> Bool) -> Regex
MatchChar (forall a. Eq a => a -> a -> Bool
== Char
'%'))
Word8
92 -> Parser Regex
pRegexEscapedChar
Word8
36 -> forall (m :: * -> *) a. Monad m => a -> m a
return Regex
AssertEnd
Word8
94 -> forall (m :: * -> *) a. Monad m => a -> m a
return Regex
AssertBeginning
Word8
91 -> Parser Regex
pRegexCharClass
Word8
_ | Word8
w forall a. Ord a => a -> a -> Bool
< Word8
128
, Bool -> Bool
not (Word8 -> Bool
isSpecial Word8
w)
-> do let c :: Char
c = Int -> Char
chr forall a b. (a -> b) -> a -> b
$ forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
w
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$! (Char -> Bool) -> Regex
MatchChar forall a b. (a -> b) -> a -> b
$
if Bool
caseSensitive
then (forall a. Eq a => a -> a -> Bool
== Char
c)
else (\Char
d -> Char -> Char
toLower Char
d forall a. Eq a => a -> a -> Bool
== Char -> Char
toLower Char
c)
| Word8
w forall a. Ord a => a -> a -> Bool
>= Word8
0xc0 -> do
ByteString
rest <- case Word8
w of
Word8
_ | Word8
w forall a. Ord a => a -> a -> Bool
>= Word8
0xf0 -> Int -> Parser ByteString
A.take Int
3
| Word8
w forall a. Ord a => a -> a -> Bool
>= Word8
0xe0 -> Int -> Parser ByteString
A.take Int
2
| Bool
otherwise -> Int -> Parser ByteString
A.take Int
1
case ByteString -> Maybe (Char, ByteString)
U.uncons (Word8 -> ByteString -> ByteString
B.cons Word8
w ByteString
rest) of
Just (Char
d, ByteString
_) -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$! (Char -> Bool) -> Regex
MatchChar forall a b. (a -> b) -> a -> b
$
if Bool
caseSensitive
then (forall a. Eq a => a -> a -> Bool
== Char
d)
else (\Char
e -> Char -> Char
toLower Char
e forall a. Eq a => a -> a -> Bool
== Char -> Char
toLower Char
d)
Maybe (Char, ByteString)
Nothing -> forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail [Char]
"could not decode as UTF8"
| Bool
otherwise -> forall (m :: * -> *) a. MonadPlus m => m a
mzero
pRegexEscapedChar :: Parser Regex
pRegexEscapedChar :: Parser Regex
pRegexEscapedChar = do
Char
c <- Parser Char
anyChar
(case Char
c of
Char
'b' -> forall (m :: * -> *) a. Monad m => a -> m a
return Regex
AssertWordBoundary
Char
'B' -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Direction -> Regex -> Regex
AssertNegative Direction
Forward Regex
AssertWordBoundary
Char
'{' -> do
ByteString
ds <- (Word8 -> Bool) -> Parser ByteString
A.takeWhile1 (\Word8
x -> Word8
x forall a. Ord a => a -> a -> Bool
>= Word8
48 Bool -> Bool -> Bool
&& Word8
x forall a. Ord a => a -> a -> Bool
<= Word8
57)
Char
_ <- Char -> Parser Char
char Char
'}'
case forall a. Read a => [Char] -> Maybe a
readMay (ByteString -> [Char]
U.toString ByteString
ds) of
Just !Int
n -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Int -> Regex
MatchCaptured forall a b. (a -> b) -> a -> b
$ Int
n
Maybe Int
Nothing -> forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail [Char]
"not a number"
Char
'd' -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ (Char -> Bool) -> Regex
MatchChar Char -> Bool
isDigit
Char
'D' -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ (Char -> Bool) -> Regex
MatchChar (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
isDigit)
Char
's' -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ (Char -> Bool) -> Regex
MatchChar Char -> Bool
isSpace
Char
'S' -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ (Char -> Bool) -> Regex
MatchChar (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
isSpace)
Char
'w' -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ (Char -> Bool) -> Regex
MatchChar Char -> Bool
isWordChar
Char
'W' -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ (Char -> Bool) -> Regex
MatchChar (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
isWordChar)
Char
_ | Char
c forall a. Ord a => a -> a -> Bool
>= Char
'0' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'9' ->
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$! Int -> Regex
MatchCaptured (Char -> Int
ord Char
c forall a. Num a => a -> a -> a
- Char -> Int
ord Char
'0')
| Bool
otherwise -> forall (m :: * -> *) a. MonadPlus m => m a
mzero) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> ((Char -> Bool) -> Regex
MatchChar forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Eq a => a -> a -> Bool
(==) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Char -> Parser Char
pEscaped Char
c)
pEscaped :: Char -> Parser Char
pEscaped :: Char -> Parser Char
pEscaped Char
c =
case Char
c of
Char
'\\' -> forall (m :: * -> *) a. Monad m => a -> m a
return Char
c
Char
'a' -> forall (m :: * -> *) a. Monad m => a -> m a
return Char
'\a'
Char
'f' -> forall (m :: * -> *) a. Monad m => a -> m a
return Char
'\f'
Char
'n' -> forall (m :: * -> *) a. Monad m => a -> m a
return Char
'\n'
Char
'r' -> forall (m :: * -> *) a. Monad m => a -> m a
return Char
'\r'
Char
't' -> forall (m :: * -> *) a. Monad m => a -> m a
return Char
'\t'
Char
'v' -> forall (m :: * -> *) a. Monad m => a -> m a
return Char
'\v'
Char
'0' -> do
ByteString
ds <- Int -> Parser ByteString
A.take Int
3
case forall a. Read a => [Char] -> Maybe a
readMay ([Char]
"'\\o" forall a. [a] -> [a] -> [a]
++ ByteString -> [Char]
U.toString ByteString
ds forall a. [a] -> [a] -> [a]
++ [Char]
"'") of
Just Char
x -> forall (m :: * -> *) a. Monad m => a -> m a
return Char
x
Maybe Char
Nothing -> forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail [Char]
"invalid octal character escape"
Char
_ | Char
c forall a. Ord a => a -> a -> Bool
>= Char
'1' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'7' -> do
let octalDigitScanner :: a -> a -> Maybe a
octalDigitScanner a
s a
w
| a
s forall a. Ord a => a -> a -> Bool
< a
3, a
w forall a. Ord a => a -> a -> Bool
>= a
48 Bool -> Bool -> Bool
&& a
w forall a. Ord a => a -> a -> Bool
<= a
55
= forall a. a -> Maybe a
Just (a
s forall a. Num a => a -> a -> a
+ a
1)
| Bool
otherwise = forall a. Maybe a
Nothing
ByteString
ds <- forall s. s -> (s -> Word8 -> Maybe s) -> Parser ByteString
A.scan (Int
1 :: Int) forall {a} {a}. (Ord a, Ord a, Num a, Num a) => a -> a -> Maybe a
octalDigitScanner
case forall a. Read a => [Char] -> Maybe a
readMay ([Char]
"'\\o" forall a. [a] -> [a] -> [a]
++ [Char
c] forall a. [a] -> [a] -> [a]
++ ByteString -> [Char]
U.toString ByteString
ds forall a. [a] -> [a] -> [a]
++ [Char]
"'") of
Just Char
x -> forall (m :: * -> *) a. Monad m => a -> m a
return Char
x
Maybe Char
Nothing -> forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail [Char]
"invalid octal character escape"
Char
'z' -> do
ByteString
ds <- Int -> Parser ByteString
A.take Int
4
case forall a. Read a => [Char] -> Maybe a
readMay ([Char]
"'\\x" forall a. [a] -> [a] -> [a]
++ ByteString -> [Char]
U.toString ByteString
ds forall a. [a] -> [a] -> [a]
++ [Char]
"'") of
Just Char
x -> forall (m :: * -> *) a. Monad m => a -> m a
return Char
x
Maybe Char
Nothing -> forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail [Char]
"invalid hex character escape"
Char
'x' -> do
ByteString
ds <- ((Word8 -> Bool) -> Parser Word8
satisfy (forall a. Eq a => a -> a -> Bool
== Word8
123) forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> (Word8 -> Bool) -> Parser ByteString
A.takeWhile (forall a. Eq a => a -> a -> Bool
/= Word8
125) forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* (Word8 -> Bool) -> Parser Word8
satisfy (forall a. Eq a => a -> a -> Bool
== Word8
125))
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Int -> Parser ByteString
A.take Int
2
case forall a. Read a => [Char] -> Maybe a
readMay ([Char]
"'\\x" forall a. [a] -> [a] -> [a]
++ ByteString -> [Char]
U.toString ByteString
ds forall a. [a] -> [a] -> [a]
++ [Char]
"'") of
Just Char
x -> forall (m :: * -> *) a. Monad m => a -> m a
return Char
x
Maybe Char
Nothing -> forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail [Char]
"invalid hex character escape"
Char
_ | Char -> Bool
isPunctuation Char
c Bool -> Bool -> Bool
|| Char -> Bool
isSymbol Char
c Bool -> Bool -> Bool
|| Char -> Bool
isSpace Char
c -> forall (m :: * -> *) a. Monad m => a -> m a
return Char
c
| Bool
otherwise -> forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail forall a b. (a -> b) -> a -> b
$ [Char]
"invalid escape \\" forall a. [a] -> [a] -> [a]
++ [Char
c]
pRegexCharClass :: Parser Regex
pRegexCharClass :: Parser Regex
pRegexCharClass = do
Bool
negated <- forall (f :: * -> *) a. Alternative f => a -> f a -> f a
option Bool
False forall a b. (a -> b) -> a -> b
$ Bool
True forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ (Word8 -> Bool) -> Parser Word8
satisfy (forall a. Eq a => a -> a -> Bool
== Word8
94)
let getEscapedClass :: Parser ByteString (Char -> Bool)
getEscapedClass = do
Word8
_ <- (Word8 -> Bool) -> Parser Word8
satisfy (forall a. Eq a => a -> a -> Bool
== Word8
92)
(Char -> Bool
isDigit forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Char -> Parser Char
char Char
'd')
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
isDigit forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Char -> Parser Char
char Char
'D')
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Bool
isSpace forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Char -> Parser Char
char Char
's')
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
isSpace forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Char -> Parser Char
char Char
'S')
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Bool
isWordChar forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Char -> Parser Char
char Char
'w')
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
isWordChar forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Char -> Parser Char
char Char
'W')
let getPosixClass :: Parser ByteString (Char -> Bool)
getPosixClass = do
ByteString
_ <- ByteString -> Parser ByteString
string ByteString
"[:"
Bool
localNegated <- forall (f :: * -> *) a. Alternative f => a -> f a -> f a
option Bool
False forall a b. (a -> b) -> a -> b
$ Bool
True forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ (Word8 -> Bool) -> Parser Word8
satisfy (forall a. Eq a => a -> a -> Bool
== Word8
94)
Char -> Bool
res <- (Char -> Bool
isAlphaNum forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ ByteString -> Parser ByteString
string ByteString
"alnum")
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Bool
isAlpha forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ ByteString -> Parser ByteString
string ByteString
"alpha")
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Bool
isAscii forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ ByteString -> Parser ByteString
string ByteString
"ascii")
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> ((\Char
c -> Char -> Bool
isSpace Char
c Bool -> Bool -> Bool
&& Char
c forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` [Char
'\n',Char
'\r',Char
'\f',Char
'\v']) forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$
ByteString -> Parser ByteString
string ByteString
"blank")
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Bool
isControl forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ ByteString -> Parser ByteString
string ByteString
"cntrl")
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> ((\Char
c -> Char -> Bool
isPrint Char
c Bool -> Bool -> Bool
|| Char -> Bool
isSpace Char
c) forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ ByteString -> Parser ByteString
string ByteString
"graph:")
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Bool
isLower forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ ByteString -> Parser ByteString
string ByteString
"lower")
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Bool
isUpper forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ ByteString -> Parser ByteString
string ByteString
"upper")
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Bool
isPrint forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ ByteString -> Parser ByteString
string ByteString
"print")
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Bool
isPunctuation forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ ByteString -> Parser ByteString
string ByteString
"punct")
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Bool
isSpace forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ ByteString -> Parser ByteString
string ByteString
"space")
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> ((\Char
c -> Char -> Bool
isAlphaNum Char
c Bool -> Bool -> Bool
||
Char -> GeneralCategory
generalCategory Char
c forall a. Eq a => a -> a -> Bool
== GeneralCategory
ConnectorPunctuation)
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ ByteString -> Parser ByteString
string ByteString
"word:")
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Bool
isHexDigit forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ ByteString -> Parser ByteString
string ByteString
"xdigit")
ByteString
_ <- ByteString -> Parser ByteString
string ByteString
":]"
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$! if Bool
localNegated then Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
res else Char -> Bool
res
let getC :: Parser Char
getC = ((Word8 -> Bool) -> Parser Word8
satisfy (forall a. Eq a => a -> a -> Bool
== Word8
92) forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser Char
anyChar forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Char -> Parser Char
pEscaped) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
(Int -> Char
chr forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (Integral a, Num b) => a -> b
fromIntegral forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Word8 -> Bool) -> Parser Word8
satisfy (\Word8
x -> Word8
x forall a. Eq a => a -> a -> Bool
/= Word8
92 Bool -> Bool -> Bool
&& Word8
x forall a. Eq a => a -> a -> Bool
/= Word8
93))
let getCRange :: Parser ByteString (Char -> Bool)
getCRange = do
Char
c <- Parser Char
getC
(\Char
d -> (\Char
x -> Char
x forall a. Ord a => a -> a -> Bool
>= Char
c Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
<= Char
d)) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Parser Char
char Char
'-' forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser Char
getC) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
forall (m :: * -> *) a. Monad m => a -> m a
return (forall a. Eq a => a -> a -> Bool
== Char
c)
let getCClass :: Parser ByteString (Char -> Bool)
getCClass = do
ByteString
_ <- ByteString -> Parser ByteString
A.string ByteString
"\\p"
ByteString
ds <- (Word8 -> Bool) -> Parser Word8
satisfy (forall a. Eq a => a -> a -> Bool
== Word8
123) forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> (Word8 -> Bool) -> Parser ByteString
A.takeWhile (forall a. Eq a => a -> a -> Bool
/= Word8
125) forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* (Word8 -> Bool) -> Parser Word8
satisfy (forall a. Eq a => a -> a -> Bool
== Word8
125)
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$
(case ByteString
ds of
ByteString
"Lu" -> (forall a. Eq a => a -> a -> Bool
== GeneralCategory
UppercaseLetter)
ByteString
"Ll" -> (forall a. Eq a => a -> a -> Bool
== GeneralCategory
LowercaseLetter)
ByteString
"Lt" -> (forall a. Eq a => a -> a -> Bool
== GeneralCategory
TitlecaseLetter)
ByteString
"Lm" -> (forall a. Eq a => a -> a -> Bool
== GeneralCategory
ModifierLetter)
ByteString
"Lo" -> (forall a. Eq a => a -> a -> Bool
== GeneralCategory
OtherLetter)
ByteString
"Mn" -> (forall a. Eq a => a -> a -> Bool
== GeneralCategory
NonSpacingMark)
ByteString
"Mc" -> (forall a. Eq a => a -> a -> Bool
== GeneralCategory
SpacingCombiningMark)
ByteString
"Me" -> (forall a. Eq a => a -> a -> Bool
== GeneralCategory
EnclosingMark)
ByteString
"Nd" -> (forall a. Eq a => a -> a -> Bool
== GeneralCategory
DecimalNumber)
ByteString
"Nl" -> (forall a. Eq a => a -> a -> Bool
== GeneralCategory
LetterNumber)
ByteString
"No" -> (forall a. Eq a => a -> a -> Bool
== GeneralCategory
OtherNumber)
ByteString
"Pc" -> (forall a. Eq a => a -> a -> Bool
== GeneralCategory
ConnectorPunctuation)
ByteString
"Pd" -> (forall a. Eq a => a -> a -> Bool
== GeneralCategory
DashPunctuation)
ByteString
"Ps" -> (forall a. Eq a => a -> a -> Bool
== GeneralCategory
OpenPunctuation)
ByteString
"Pe" -> (forall a. Eq a => a -> a -> Bool
== GeneralCategory
ClosePunctuation)
ByteString
"Pi" -> (forall a. Eq a => a -> a -> Bool
== GeneralCategory
InitialQuote)
ByteString
"Pf" -> (forall a. Eq a => a -> a -> Bool
== GeneralCategory
FinalQuote)
ByteString
"Po" -> (forall a. Eq a => a -> a -> Bool
== GeneralCategory
OtherPunctuation)
ByteString
"Sm" -> (forall a. Eq a => a -> a -> Bool
== GeneralCategory
MathSymbol)
ByteString
"Sc" -> (forall a. Eq a => a -> a -> Bool
== GeneralCategory
CurrencySymbol)
ByteString
"Sk" -> (forall a. Eq a => a -> a -> Bool
== GeneralCategory
ModifierSymbol)
ByteString
"So" -> (forall a. Eq a => a -> a -> Bool
== GeneralCategory
OtherSymbol)
ByteString
"Zs" -> (forall a. Eq a => a -> a -> Bool
== GeneralCategory
Space)
ByteString
"Zl" -> (forall a. Eq a => a -> a -> Bool
== GeneralCategory
LineSeparator)
ByteString
"Zp" -> (forall a. Eq a => a -> a -> Bool
== GeneralCategory
ParagraphSeparator)
ByteString
"Cc" -> (forall a. Eq a => a -> a -> Bool
== GeneralCategory
Control)
ByteString
"Cf" -> (forall a. Eq a => a -> a -> Bool
== GeneralCategory
Format)
ByteString
"Cs" -> (forall a. Eq a => a -> a -> Bool
== GeneralCategory
Surrogate)
ByteString
"Co" -> (forall a. Eq a => a -> a -> Bool
== GeneralCategory
PrivateUse)
ByteString
"Cn" -> (forall a. Eq a => a -> a -> Bool
== GeneralCategory
NotAssigned)
ByteString
_ -> (forall a b. a -> b -> a
const Bool
False)) forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> GeneralCategory
generalCategory
[Char -> Bool]
brack <- forall (f :: * -> *) a. Alternative f => a -> f a -> f a
option [] forall a b. (a -> b) -> a -> b
$ [(forall a. Eq a => a -> a -> Bool
==Char
']')] forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Char -> Parser Char
char Char
']'
[Char -> Bool]
fs <- forall (f :: * -> *) a. Alternative f => f a -> f [a]
many (Parser ByteString (Char -> Bool)
getEscapedClass forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser ByteString (Char -> Bool)
getPosixClass forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser ByteString (Char -> Bool)
getCRange forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser ByteString (Char -> Bool)
getCClass)
Word8
_ <- (Word8 -> Bool) -> Parser Word8
satisfy (forall a. Eq a => a -> a -> Bool
== Word8
93)
let f :: Char -> Bool
f Char
c = forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any (forall a b. (a -> b) -> a -> b
$ Char
c) forall a b. (a -> b) -> a -> b
$ [Char -> Bool]
brack forall a. [a] -> [a] -> [a]
++ [Char -> Bool]
fs
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$! (Char -> Bool) -> Regex
MatchChar (if Bool
negated then (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
f) else Char -> Bool
f)
anyChar :: Parser Char
anyChar :: Parser Char
anyChar = do
Word8
w <- (Word8 -> Bool) -> Parser Word8
satisfy (forall a b. a -> b -> a
const Bool
True)
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$! Int -> Char
chr forall a b. (a -> b) -> a -> b
$ forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
w
isSpecial :: Word8 -> Bool
isSpecial :: Word8 -> Bool
isSpecial Word8
92 = Bool
True
isSpecial Word8
63 = Bool
True
isSpecial Word8
42 = Bool
True
isSpecial Word8
43 = Bool
True
isSpecial Word8
91 = Bool
True
isSpecial Word8
93 = Bool
True
isSpecial Word8
37 = Bool
True
isSpecial Word8
40 = Bool
True
isSpecial Word8
41 = Bool
True
isSpecial Word8
124 = Bool
True
isSpecial Word8
46 = Bool
True
isSpecial Word8
36 = Bool
True
isSpecial Word8
94 = Bool
True
isSpecial Word8
_ = Bool
False