{-# LANGUAGE BangPatterns           #-}
{-# LANGUAGE FlexibleInstances      #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE GADTs                  #-}
{-# LANGUAGE PatternGuards          #-}
{-# LANGUAGE ScopedTypeVariables    #-}
module Sasha.Internal.ERE (
    ERE (..),
    -- * Construction
    --
    -- | Binary operators are
    --
    -- * '<>' for append
    -- * '\/' for union
    -- * '/\' for intersection
    --
    empty,
    eps,
    char,
    charRange,
    utf8Char,
    anyChar,
    anyUtf8Char,
    appends,
    unions,
    intersections,
    star,
    plus,
    string,
    utf8String,
    complement,
    everything,
    satisfy,
    digit,
    -- * Equivalence
    equivalent,
    -- * Derivative
    nullable,
    derivative,
    match,
    -- * Other
    isEmpty,
    isEverything,
    ) where

import Algebra.Lattice
       (BoundedJoinSemiLattice (..), BoundedMeetSemiLattice (..), Lattice (..))
import Data.Bits       (shiftR, (.&.), (.|.))
import Data.Char       (ord)
import Data.Foldable   (toList)
import Data.Set        (Set)
import Data.String     (IsString (..))
import Data.Word       (Word8)
import Data.Word8Set   (Word8Set)
import Test.QuickCheck (Arbitrary (..))

import qualified Data.Set        as Set
import qualified Data.Word8Set   as W8S
import qualified Test.QuickCheck as QC

-------------------------------------------------------------------------------
-- Doctest
-------------------------------------------------------------------------------

-- $setup
-- >>> :set -XOverloadedStrings
-- >>> import Control.Monad (void)
-- >>> import Data.Foldable (traverse_)
-- >>> import Data.List (sort)
-- >>> import Algebra.Lattice ((/\), (\/))
--
-- >>> import Test.QuickCheck ((===))
-- >>> import qualified Test.QuickCheck as QC
--
-------------------------------------------------------------------------------
-- ERE
-------------------------------------------------------------------------------

-- | Extended regular expression
--
data ERE
    = EREAppend [ERE]              -- ^ Concatenation
    | EREUnion Word8Set (Set ERE)  -- ^ Union
    | EREStar ERE                  -- ^ Kleene star
    | ERENot ERE                   -- ^ Complement
  deriving (ERE -> ERE -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: ERE -> ERE -> Bool
$c/= :: ERE -> ERE -> Bool
== :: ERE -> ERE -> Bool
$c== :: ERE -> ERE -> Bool
Eq, Eq ERE
ERE -> ERE -> Bool
ERE -> ERE -> Ordering
ERE -> ERE -> ERE
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: ERE -> ERE -> ERE
$cmin :: ERE -> ERE -> ERE
max :: ERE -> ERE -> ERE
$cmax :: ERE -> ERE -> ERE
>= :: ERE -> ERE -> Bool
$c>= :: ERE -> ERE -> Bool
> :: ERE -> ERE -> Bool
$c> :: ERE -> ERE -> Bool
<= :: ERE -> ERE -> Bool
$c<= :: ERE -> ERE -> Bool
< :: ERE -> ERE -> Bool
$c< :: ERE -> ERE -> Bool
compare :: ERE -> ERE -> Ordering
$ccompare :: ERE -> ERE -> Ordering
Ord, Int -> ERE -> ShowS
[ERE] -> ShowS
ERE -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [ERE] -> ShowS
$cshowList :: [ERE] -> ShowS
show :: ERE -> String
$cshow :: ERE -> String
showsPrec :: Int -> ERE -> ShowS
$cshowsPrec :: Int -> ERE -> ShowS
Show)

-------------------------------------------------------------------------------
-- Smart constructor
-------------------------------------------------------------------------------

-- | Empty regex. Doesn't accept anything.
--
-- prop> match empty s === False
--
empty :: ERE
empty :: ERE
empty = Word8Set -> Set ERE -> ERE
EREUnion Word8Set
W8S.empty forall a. Set a
Set.empty

-- | Everything.
--
-- prop> match everything s === True
--
everything :: ERE
everything :: ERE
everything = ERE -> ERE
complement ERE
empty

-- | Empty string. /Note:/ different than 'empty'.
--
-- prop> match eps s === null s
--
eps :: ERE
eps :: ERE
eps = [ERE] -> ERE
EREAppend []

-- | Character.
--
char :: Word8 -> ERE
char :: Word8 -> ERE
char Word8
c = Word8Set -> Set ERE -> ERE
EREUnion (Word8 -> Word8Set
W8S.singleton Word8
c) forall a. Set a
Set.empty

-- | Character range.
--
charRange :: Word8 -> Word8 -> ERE
charRange :: Word8 -> Word8 -> ERE
charRange Word8
l Word8
u = Word8Set -> Set ERE -> ERE
EREUnion (Word8 -> Word8 -> Word8Set
W8S.range Word8
l Word8
u) forall a. Set a
Set.empty

-- | Any character.
--
anyChar :: ERE
anyChar :: ERE
anyChar = Word8Set -> Set ERE -> ERE
EREUnion Word8Set
W8S.full forall a. Set a
Set.empty

anyUtf8Char :: ERE
anyUtf8Char :: ERE
anyUtf8Char = [ERE] -> ERE
unions
    [ Word8 -> Word8 -> ERE
charRange Word8
0x00 Word8
0x7F
    , Word8 -> Word8 -> ERE
charRange Word8
0xC2 Word8
0xDF forall a. Semigroup a => a -> a -> a
<> Word8 -> Word8 -> ERE
charRange Word8
0x80 Word8
0xBF
    , Word8 -> Word8 -> ERE
charRange Word8
0xE0 Word8
0xE0 forall a. Semigroup a => a -> a -> a
<> Word8 -> Word8 -> ERE
charRange Word8
0xa0 Word8
0xBF forall a. Semigroup a => a -> a -> a
<> Word8 -> Word8 -> ERE
charRange Word8
0x80 Word8
0xBF
    , Word8 -> Word8 -> ERE
charRange Word8
0xE1 Word8
0xEC forall a. Semigroup a => a -> a -> a
<> Word8 -> Word8 -> ERE
charRange Word8
0x80 Word8
0xBF forall a. Semigroup a => a -> a -> a
<> Word8 -> Word8 -> ERE
charRange Word8
0x80 Word8
0xBF
    , Word8 -> Word8 -> ERE
charRange Word8
0xED Word8
0xED forall a. Semigroup a => a -> a -> a
<> Word8 -> Word8 -> ERE
charRange Word8
0x80 Word8
0x9F forall a. Semigroup a => a -> a -> a
<> Word8 -> Word8 -> ERE
charRange Word8
0x80 Word8
0xBF
    , Word8 -> Word8 -> ERE
charRange Word8
0xEE Word8
0xEF forall a. Semigroup a => a -> a -> a
<> Word8 -> Word8 -> ERE
charRange Word8
0x80 Word8
0xBF forall a. Semigroup a => a -> a -> a
<> Word8 -> Word8 -> ERE
charRange Word8
0x80 Word8
0xBF
    , Word8 -> Word8 -> ERE
charRange Word8
0xF0 Word8
0xF0 forall a. Semigroup a => a -> a -> a
<> Word8 -> Word8 -> ERE
charRange Word8
0x90 Word8
0xBF forall a. Semigroup a => a -> a -> a
<> Word8 -> Word8 -> ERE
charRange Word8
0x80 Word8
0xBF forall a. Semigroup a => a -> a -> a
<> Word8 -> Word8 -> ERE
charRange Word8
0x80 Word8
0xBF
    , Word8 -> Word8 -> ERE
charRange Word8
0xF1 Word8
0xF3 forall a. Semigroup a => a -> a -> a
<> Word8 -> Word8 -> ERE
charRange Word8
0x80 Word8
0xBF forall a. Semigroup a => a -> a -> a
<> Word8 -> Word8 -> ERE
charRange Word8
0x80 Word8
0xBF forall a. Semigroup a => a -> a -> a
<> Word8 -> Word8 -> ERE
charRange Word8
0x80 Word8
0xBF
    , Word8 -> Word8 -> ERE
charRange Word8
0xF4 Word8
0xF4 forall a. Semigroup a => a -> a -> a
<> Word8 -> Word8 -> ERE
charRange Word8
0x80 Word8
0x8f forall a. Semigroup a => a -> a -> a
<> Word8 -> Word8 -> ERE
charRange Word8
0x80 Word8
0xBF forall a. Semigroup a => a -> a -> a
<> Word8 -> Word8 -> ERE
charRange Word8
0x80 Word8
0xBF
    ]

-- | Concatenate regular expressions.
--
-- prop> r <> empty === empty
-- prop> empty <>  r === empty
-- prop> ( r <> s) <> t === r <> (s <> t)
--
-- prop>  r <> eps === r
-- prop> eps <>  r === r
--
appends :: [ERE] -> ERE
appends :: [ERE] -> ERE
appends [ERE]
rs0
    | forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
elem ERE
empty [ERE]
rs1 = ERE
empty
    | Bool
otherwise = case [ERE]
rs1 of
        [ERE
r] -> ERE
r
        [ERE]
rs  -> [ERE] -> ERE
EREAppend [ERE]
rs
  where
    -- flatten one level of EREAppend
    rs1 :: [ERE]
rs1 = forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap ERE -> [ERE]
f [ERE]
rs0

    f :: ERE -> [ERE]
f (EREAppend [ERE]
rs) = [ERE]
rs
    f ERE
r             = [ERE
r]

-- | Union of regular expressions.
--
-- prop>  r \/ r === r
-- prop>  r \/ s === s \/ r
-- prop> ( r \/ s) \/ t === r \/ (s \/ t)
--
-- prop> empty \/  r === r
-- prop>  r \/ empty === r
--
-- prop> everything \/  r === everything
-- prop>  r \/ everything === everything
--
unions :: [ERE] -> ERE
unions :: [ERE] -> ERE
unions = forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry Word8Set -> Set ERE -> ERE
mk forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (t :: * -> *) m a.
(Foldable t, Monoid m) =>
(a -> m) -> t a -> m
foldMap ERE -> (Word8Set, Set ERE)
f where
    mk :: Word8Set -> Set ERE -> ERE
mk Word8Set
cs Set ERE
rss
        | forall a. Set a -> Bool
Set.null Set ERE
rss = Word8Set -> Set ERE -> ERE
EREUnion Word8Set
cs forall a. Set a
Set.empty
        | forall a. Ord a => a -> Set a -> Bool
Set.member ERE
everything Set ERE
rss = ERE
everything
        | Word8Set -> Bool
W8S.null Word8Set
cs = case forall a. Set a -> [a]
Set.toList Set ERE
rss of
            []  -> ERE
empty
            [ERE
r] -> ERE
r
            [ERE]
_   -> Word8Set -> Set ERE -> ERE
EREUnion Word8Set
cs Set ERE
rss
        | Bool
otherwise    = Word8Set -> Set ERE -> ERE
EREUnion Word8Set
cs Set ERE
rss

    f :: ERE -> (Word8Set, Set ERE)
f (EREUnion Word8Set
cs Set ERE
rs) = (Word8Set
cs, Set ERE
rs)
    f ERE
r                = (Word8Set
W8S.empty, forall a. a -> Set a
Set.singleton ERE
r)

-- | Intersection of regular expressions.
--
-- prop>  r /\ r === r
-- prop>  r /\ s === s /\ r
-- prop> ( r /\ s) /\ t === r /\ (s /\ t)
--
-- prop> empty /\  r === empty
-- prop>  r /\ empty === empty
--
-- prop> everything /\  r === r
-- prop>  r /\ everything === r
--
intersections :: [ERE] -> ERE
intersections :: [ERE] -> ERE
intersections = ERE -> ERE
complement forall b c a. (b -> c) -> (a -> b) -> a -> c
. [ERE] -> ERE
unions forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a -> b) -> [a] -> [b]
map ERE -> ERE
complement

-- | Complement.
--
-- prop> complement (complement r) ===  r
--
complement :: ERE -> ERE
complement :: ERE -> ERE
complement ERE
r = case ERE
r of
    ERENot ERE
r'                    -> ERE
r'
    ERE
_                            -> ERE -> ERE
ERENot ERE
r

-- | Kleene star.
--
-- prop> star (star r) === star ( r)
--
-- prop> star eps     ===  eps
-- prop> star empty   ===  eps
-- prop> star anyChar ===  everything
--
-- prop> star (r \/ eps) === star r
-- prop> star (char c \/ eps) === star (char c)
-- prop> star (empty \/ eps) === eps
--
star :: ERE -> ERE
star :: ERE -> ERE
star ERE
r = case ERE
r of
    EREStar ERE
_                          -> ERE
r
    EREAppend []                       -> ERE
eps
    EREUnion Word8Set
cs Set ERE
rs
        | Word8Set -> Bool
W8S.null Word8Set
cs, forall a. Set a -> Bool
Set.null Set ERE
rs     -> ERE
eps
        | Word8Set -> Bool
W8S.isFull Word8Set
cs, forall a. Set a -> Bool
Set.null Set ERE
rs   -> ERE
everything
        | forall a. Ord a => a -> Set a -> Bool
Set.member ERE
eps Set ERE
rs -> case forall a. Set a -> [a]
Set.toList Set ERE
rs' of
            []                  -> ERE -> ERE
star (Word8Set -> Set ERE -> ERE
EREUnion Word8Set
cs forall a. Set a
Set.empty)
            [ERE
r'] | Word8Set -> Bool
W8S.null Word8Set
cs  -> ERE -> ERE
star ERE
r'
            [ERE]
_                   -> ERE -> ERE
EREStar (Word8Set -> Set ERE -> ERE
EREUnion Word8Set
cs Set ERE
rs')
          where
            rs' :: Set ERE
rs' = forall a. Ord a => a -> Set a -> Set a
Set.delete ERE
eps Set ERE
rs
    ERE
_                                  -> ERE -> ERE
EREStar ERE
r

-- | Kleene plus
--
-- @
-- 'plus' r = r <> 'star' r
-- @
plus :: ERE -> ERE
plus :: ERE -> ERE
plus ERE
r = ERE
r forall a. Semigroup a => a -> a -> a
<> ERE -> ERE
star ERE
r

-- | Literal string.
--
string :: [Word8] -> ERE
string :: [Word8] -> ERE
string []  = ERE
eps
string [Word8
c] = Word8Set -> Set ERE -> ERE
EREUnion (Word8 -> Word8Set
W8S.singleton Word8
c) forall a. Set a
Set.empty
string [Word8]
cs  = [ERE] -> ERE
EREAppend forall a b. (a -> b) -> a -> b
$ forall a b. (a -> b) -> [a] -> [b]
map Word8 -> ERE
char [Word8]
cs

-- | UTF8 string
utf8String :: String -> ERE
utf8String :: String -> ERE
utf8String = [Word8] -> ERE
string forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap Char -> [Word8]
encodeCharUtf8

-- | UTF8 character, i.e. may match multiple bytes.
utf8Char :: Char -> ERE
utf8Char :: Char -> ERE
utf8Char = [Word8] -> ERE
string forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> [Word8]
encodeCharUtf8

satisfy :: (Word8 -> Bool) -> ERE
satisfy :: (Word8 -> Bool) -> ERE
satisfy Word8 -> Bool
p = Word8Set -> Set ERE -> ERE
EREUnion ([Word8] -> Word8Set
W8S.fromList [ Word8
x | Word8
x <- [ forall a. Bounded a => a
minBound .. forall a. Bounded a => a
maxBound], Word8 -> Bool
p Word8
x ]) forall a. Set a
Set.empty

digit :: ERE
digit :: ERE
digit = Word8 -> Word8 -> ERE
charRange (forall a b. (Integral a, Num b) => a -> b
fromIntegral (Char -> Int
ord Char
'0')) (forall a b. (Integral a, Num b) => a -> b
fromIntegral (Char -> Int
ord Char
'9'))

-------------------------------------------------------------------------------
-- derivative
-------------------------------------------------------------------------------

instance Semigroup ERE where
    ERE
r <> :: ERE -> ERE -> ERE
<> ERE
r' = [ERE] -> ERE
appends [ERE
r, ERE
r']

instance Monoid ERE where
    mempty :: ERE
mempty  = ERE
eps
    mappend :: ERE -> ERE -> ERE
mappend = forall a. Semigroup a => a -> a -> a
(<>)
    mconcat :: [ERE] -> ERE
mconcat = [ERE] -> ERE
appends

instance Lattice ERE where
    ERE
r \/ :: ERE -> ERE -> ERE
\/ ERE
r' = [ERE] -> ERE
unions [ERE
r, ERE
r']
    ERE
r /\ :: ERE -> ERE -> ERE
/\ ERE
r' = [ERE] -> ERE
intersections [ERE
r, ERE
r']

instance BoundedJoinSemiLattice ERE where
    bottom :: ERE
bottom = ERE
empty

instance BoundedMeetSemiLattice ERE where
    top :: ERE
top = ERE
everything

-- | Uses 'utf8string'.
instance IsString ERE where
    fromString :: String -> ERE
fromString = String -> ERE
utf8String

-- | Uses smart constructors.
instance Arbitrary ERE where
    arbitrary :: Gen ERE
arbitrary = forall a. (Int -> Gen a) -> Gen a
QC.sized Int -> Gen ERE
arb
      where
        arb :: Int -> Gen ERE
arb Int
n | Int
n forall a. Ord a => a -> a -> Bool
<= Int
1    = forall a. [(Int, Gen a)] -> Gen a
QC.frequency
            [ (Int
20, Word8Set -> Set ERE -> ERE
EREUnion forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall a. Arbitrary a => Gen a
arbitrary forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a. Set a
Set.empty)
            , (Int
1, forall (f :: * -> *) a. Applicative f => a -> f a
pure ERE
eps)
            , (Int
1, forall (f :: * -> *) a. Applicative f => a -> f a
pure ERE
empty)
            ]
              | Bool
otherwise = forall a. [Gen a] -> Gen a
QC.oneof
            [ do
                [Int]
p <- Int -> Gen [Int]
arbPartition (Int
n forall a. Num a => a -> a -> a
- Int
1)
                [ERE] -> ERE
unions forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (t :: * -> *) (f :: * -> *) a b.
(Traversable t, Applicative f) =>
(a -> f b) -> t a -> f (t b)
traverse Int -> Gen ERE
arb [Int]
p
            , do
                [Int]
p <- Int -> Gen [Int]
arbPartition (Int
n forall a. Num a => a -> a -> a
- Int
1)
                [ERE] -> ERE
appends forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (t :: * -> *) (f :: * -> *) a b.
(Traversable t, Applicative f) =>
(a -> f b) -> t a -> f (t b)
traverse Int -> Gen ERE
arb [Int]
p

            , ERE -> ERE
star forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Int -> Gen ERE
arb (Int
n forall a. Num a => a -> a -> a
- Int
1)
            , ERE -> ERE
complement forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Int -> Gen ERE
arb (Int
n forall a. Num a => a -> a -> a
- Int
1)
            ]

    shrink :: ERE -> [ERE]
shrink (EREAppend [ERE]
rs) = [ERE]
rs
    shrink (EREStar ERE
r)    = ERE
r forall a. a -> [a] -> [a]
: forall a b. (a -> b) -> [a] -> [b]
map ERE -> ERE
EREStar (forall a. Arbitrary a => a -> [a]
shrink ERE
r)
    shrink (ERENot ERE
r)     = ERE
r forall a. a -> [a] -> [a]
: forall a b. (a -> b) -> [a] -> [b]
map ERE -> ERE
ERENot (forall a. Arbitrary a => a -> [a]
shrink ERE
r)
    shrink ERE
_              = []

arbPartition :: Int -> QC.Gen [Int]
arbPartition :: Int -> Gen [Int]
arbPartition Int
k = case forall a. Ord a => a -> a -> Ordering
compare Int
k Int
1 of
    Ordering
LT -> forall (f :: * -> *) a. Applicative f => a -> f a
pure []
    Ordering
EQ -> forall (f :: * -> *) a. Applicative f => a -> f a
pure [Int
1]
    Ordering
GT -> do
        Int
first <- (Int, Int) -> Gen Int
QC.chooseInt (Int
1, Int
k)
        [Int]
rest <- Int -> Gen [Int]
arbPartition forall a b. (a -> b) -> a -> b
$ Int
k forall a. Num a => a -> a -> a
- Int
first
        forall a. [a] -> Gen [a]
QC.shuffle (Int
first forall a. a -> [a] -> [a]
: [Int]
rest)



-------------------------------------------------------------------------------
-- derivative
-------------------------------------------------------------------------------

-- | We say that a regular expression r is nullable if the language it defines
-- contains the empty string.
--
-- >>> nullable eps
-- True
--
-- >>> nullable (star "x")
-- True
--
-- >>> nullable "foo"
-- False
--
-- >>> nullable (complement eps)
-- False
--
nullable :: ERE -> Bool
nullable :: ERE -> Bool
nullable (EREAppend [ERE]
rs)    = forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all ERE -> Bool
nullable [ERE]
rs
nullable (EREUnion Word8Set
_cs Set ERE
rs) = forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any ERE -> Bool
nullable Set ERE
rs
nullable (EREStar ERE
_)       = Bool
True
nullable (ERENot ERE
r)        = Bool -> Bool
not (ERE -> Bool
nullable ERE
r)

-- | Intuitively, the derivative of a language \(\mathcal{L} \subset \Sigma^\star\)
-- with respect to a symbol \(a \in \Sigma\) is the language that includes only
-- those suffixes of strings with a leading symbol \(a\) in \(\mathcal{L}\).
--
derivative :: Word8 -> ERE -> ERE
derivative :: Word8 -> ERE -> ERE
derivative Word8
c (EREUnion Word8Set
cs Set ERE
rs)  = [ERE] -> ERE
unions forall a b. (a -> b) -> a -> b
$ Word8 -> Word8Set -> ERE
derivativeChars Word8
c Word8Set
cs forall a. a -> [a] -> [a]
: [ Word8 -> ERE -> ERE
derivative Word8
c ERE
r | ERE
r <- forall (t :: * -> *) a. Foldable t => t a -> [a]
toList Set ERE
rs]
derivative Word8
c (EREAppend [ERE]
rs)    = Word8 -> [ERE] -> ERE
derivativeAppend Word8
c [ERE]
rs
derivative Word8
c rs :: ERE
rs@(EREStar ERE
r)    = Word8 -> ERE -> ERE
derivative Word8
c ERE
r forall a. Semigroup a => a -> a -> a
<> ERE
rs
derivative Word8
c (ERENot ERE
r)        = ERE -> ERE
complement (Word8 -> ERE -> ERE
derivative Word8
c ERE
r)

derivativeAppend :: Word8 -> [ERE] -> ERE
derivativeAppend :: Word8 -> [ERE] -> ERE
derivativeAppend Word8
_ []      = ERE
empty
derivativeAppend Word8
c [ERE
r]     = Word8 -> ERE -> ERE
derivative Word8
c ERE
r
derivativeAppend Word8
c (ERE
r:[ERE]
rs)
    | ERE -> Bool
nullable ERE
r         = [ERE] -> ERE
unions [ERE
r' forall a. Semigroup a => a -> a -> a
<> [ERE] -> ERE
appends [ERE]
rs, ERE
rs']
    | Bool
otherwise          = ERE
r' forall a. Semigroup a => a -> a -> a
<> [ERE] -> ERE
appends [ERE]
rs
  where
    r' :: ERE
r'  = Word8 -> ERE -> ERE
derivative Word8
c ERE
r
    rs' :: ERE
rs' = Word8 -> [ERE] -> ERE
derivativeAppend Word8
c [ERE]
rs

derivativeChars :: Word8 -> Word8Set -> ERE
derivativeChars :: Word8 -> Word8Set -> ERE
derivativeChars Word8
c Word8Set
cs
    | Word8
c Word8 -> Word8Set -> Bool
`W8S.member` Word8Set
cs      = ERE
eps
    | Bool
otherwise              = ERE
empty

match :: ERE -> [Word8] -> Bool
match :: ERE -> [Word8] -> Bool
match ERE
ere []     = ERE -> Bool
nullable ERE
ere
match ERE
ere (Word8
w:[Word8]
ws) = ERE -> [Word8] -> Bool
match (Word8 -> ERE -> ERE
derivative Word8
w ERE
ere) [Word8]
ws

-------------------------------------------------------------------------------
-- isEmpty
-------------------------------------------------------------------------------

-- | Whether 'ERE' is (structurally) equal to 'empty'.
isEmpty :: ERE -> Bool
isEmpty :: ERE -> Bool
isEmpty (EREUnion Word8Set
cs Set ERE
rs) = Word8Set -> Bool
W8S.null Word8Set
cs Bool -> Bool -> Bool
&& forall a. Set a -> Bool
Set.null Set ERE
rs
isEmpty ERE
_                = Bool
False

-- | Whether 'ERE' is (structurally) equal to 'everything'.
isEverything :: ERE -> Bool
isEverything :: ERE -> Bool
isEverything (ERENot (EREUnion Word8Set
cs Set ERE
rs)) = Word8Set -> Bool
W8S.null Word8Set
cs Bool -> Bool -> Bool
&& forall a. Set a -> Bool
Set.null Set ERE
rs
isEverything ERE
_                         = Bool
False

-------------------------------------------------------------------------------
-- Utf8
-------------------------------------------------------------------------------

encodeCharUtf8 :: Char -> [Word8]
encodeCharUtf8 :: Char -> [Word8]
encodeCharUtf8 Char
c
  | Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\x07F'  = Word8
w8
                  forall a. a -> [a] -> [a]
: []
  | Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\x7FF'  = (Word8
0xC0 forall a. Bits a => a -> a -> a
.|.  Int -> Word8
w8ShiftR  Int
6          )
                  forall a. a -> [a] -> [a]
: (Word8
0x80 forall a. Bits a => a -> a -> a
.|. (Word8
w8          forall a. Bits a => a -> a -> a
.&. Word8
0x3F))
                  forall a. a -> [a] -> [a]
: []
  | Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\xD7FF' = (Word8
0xE0 forall a. Bits a => a -> a -> a
.|.  Int -> Word8
w8ShiftR Int
12          )
                  forall a. a -> [a] -> [a]
: (Word8
0x80 forall a. Bits a => a -> a -> a
.|. (Int -> Word8
w8ShiftR  Int
6 forall a. Bits a => a -> a -> a
.&. Word8
0x3F))
                  forall a. a -> [a] -> [a]
: (Word8
0x80 forall a. Bits a => a -> a -> a
.|. (Word8
w8          forall a. Bits a => a -> a -> a
.&. Word8
0x3F))
                  forall a. a -> [a] -> [a]
: []
  | Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\xDFFF' = Word8
0xEF forall a. a -> [a] -> [a]
: Word8
0xBF forall a. a -> [a] -> [a]
: Word8
0xBD -- U+FFFD
                  forall a. a -> [a] -> [a]
: []
  | Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\xFFFF' = (Word8
0xE0 forall a. Bits a => a -> a -> a
.|.  Int -> Word8
w8ShiftR Int
12          )
                  forall a. a -> [a] -> [a]
: (Word8
0x80 forall a. Bits a => a -> a -> a
.|. (Int -> Word8
w8ShiftR  Int
6 forall a. Bits a => a -> a -> a
.&. Word8
0x3F))
                  forall a. a -> [a] -> [a]
: (Word8
0x80 forall a. Bits a => a -> a -> a
.|. (Word8
w8          forall a. Bits a => a -> a -> a
.&. Word8
0x3F))
                  forall a. a -> [a] -> [a]
: []
  | Bool
otherwise     = (Word8
0xf0 forall a. Bits a => a -> a -> a
.|.  Int -> Word8
w8ShiftR Int
18          )
                  forall a. a -> [a] -> [a]
: (Word8
0x80 forall a. Bits a => a -> a -> a
.|. (Int -> Word8
w8ShiftR Int
12 forall a. Bits a => a -> a -> a
.&. Word8
0x3F))
                  forall a. a -> [a] -> [a]
: (Word8
0x80 forall a. Bits a => a -> a -> a
.|. (Int -> Word8
w8ShiftR  Int
6 forall a. Bits a => a -> a -> a
.&. Word8
0x3F))
                  forall a. a -> [a] -> [a]
: (Word8
0x80 forall a. Bits a => a -> a -> a
.|. (Word8
w8          forall a. Bits a => a -> a -> a
.&. Word8
0x3F))
                  forall a. a -> [a] -> [a]
: []
  where
    w8 :: Word8
    w8 :: Word8
w8 = forall a b. (Integral a, Num b) => a -> b
fromIntegral (Char -> Int
ord Char
c)

    w8ShiftR :: Int -> Word8
    w8ShiftR :: Int -> Word8
w8ShiftR Int
b = forall a b. (Integral a, Num b) => a -> b
fromIntegral (forall a. Bits a => a -> Int -> a
shiftR (Char -> Int
ord Char
c) Int
b)

-------------------------------------------------------------------------------
-- Equivalance
-------------------------------------------------------------------------------

equivalent :: ERE -> ERE -> Bool
equivalent :: ERE -> ERE -> Bool
equivalent ERE
x0 ERE
y0 = (ERE, ERE) -> Bool
agree (ERE
x0, ERE
y0) Bool -> Bool -> Bool
&& Set (ERE, ERE) -> [(ERE, ERE)] -> [[(ERE, ERE)]] -> Bool
go forall a. Monoid a => a
mempty [(ERE
x0, ERE
y0)] []
  where
    -- we use two queues, so we can append chunks cheaply.
    go :: Set (ERE, ERE) -> [(ERE, ERE)] -> [[(ERE,ERE)]] -> Bool
    go :: Set (ERE, ERE) -> [(ERE, ERE)] -> [[(ERE, ERE)]] -> Bool
go !Set (ERE, ERE)
_  []              []       = Bool
True
    go Set (ERE, ERE)
acc []              ([(ERE, ERE)]
zs:[[(ERE, ERE)]]
zss) = Set (ERE, ERE) -> [(ERE, ERE)] -> [[(ERE, ERE)]] -> Bool
go Set (ERE, ERE)
acc [(ERE, ERE)]
zs [[(ERE, ERE)]]
zss
    go Set (ERE, ERE)
acc (p :: (ERE, ERE)
p@(ERE
x, ERE
y) : [(ERE, ERE)]
zs) [[(ERE, ERE)]]
zss
        | (ERE, ERE)
p forall a. Ord a => a -> Set a -> Bool
`Set.member` Set (ERE, ERE)
acc = Set (ERE, ERE) -> [(ERE, ERE)] -> [[(ERE, ERE)]] -> Bool
go Set (ERE, ERE)
acc [(ERE, ERE)]
zs [[(ERE, ERE)]]
zss
        -- if two regexps are structurally the same, we don't need to recurse.
        | ERE
x forall a. Eq a => a -> a -> Bool
== ERE
y             = Set (ERE, ERE) -> [(ERE, ERE)] -> [[(ERE, ERE)]] -> Bool
go (forall a. Ord a => a -> Set a -> Set a
Set.insert (ERE, ERE)
p Set (ERE, ERE)
acc) [(ERE, ERE)]
zs [[(ERE, ERE)]]
zss
        | forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all (ERE, ERE) -> Bool
agree [(ERE, ERE)]
ps       = Set (ERE, ERE) -> [(ERE, ERE)] -> [[(ERE, ERE)]] -> Bool
go (forall a. Ord a => a -> Set a -> Set a
Set.insert (ERE, ERE)
p Set (ERE, ERE)
acc) [(ERE, ERE)]
zs ([(ERE, ERE)]
ps forall a. a -> [a] -> [a]
: [[(ERE, ERE)]]
zss)
        | Bool
otherwise          = Bool
False
      where
        cs :: [Word8]
cs = [forall a. Bounded a => a
minBound .. forall a. Bounded a => a
maxBound] :: [Word8]
        ps :: [(ERE, ERE)]
ps = forall a b. (a -> b) -> [a] -> [b]
map (\Word8
c -> (Word8 -> ERE -> ERE
derivative Word8
c ERE
x, Word8 -> ERE -> ERE
derivative Word8
c ERE
y)) [Word8]
cs

    agree :: (ERE, ERE) -> Bool
    agree :: (ERE, ERE) -> Bool
agree (ERE
x, ERE
y) = ERE -> Bool
nullable ERE
x forall a. Eq a => a -> a -> Bool
== ERE -> Bool
nullable ERE
y