-- BNFC 3
-- (C) 2021 Andreas Abel

{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE PatternGuards #-}

-- | Tools to manipulate regular expressions.

module BNFC.Types.Regex where

import BNFC.Prelude
import qualified Data.List        as List
import qualified BNFC.Utils.List2 as List2
import qualified Data.Set         as Set

-- * Regular expressions
---------------------------------------------------------------------------

-- | Regular expressions are constructed over character classes.
--
--   Use smart constructors to ensure invariants.
data Regex
  = RChar  CharClass
      -- ^ Atomic regular expression.
  | RAlts  (List2 Regex)
      -- ^ Alternative/sum: List free of duplicates and @RAlt@.
      --   We use list instead of set to preserve the order given by the user.
      --   Empty list would mean empty language,
      --   but this is instead represented by the empty character class.
  | RMinus Regex Regex
      -- ^ Difference.
      --   Most lexer generators do not support difference in general,
      --   only at the level of character classes.
      --   LBNF has general difference, so it is represented here.
  | REps
      -- ^ Language of the empty word (empty sequence).
  | RSeqs  (List2 Regex)
      -- ^ Sequence/product.  List free of @RSeq@.
      --   Empty list is @eps@ (language of the empty word).
  | RStar  Regex
      -- ^ 0 or more repetitions.
      --   'Regex' isn't 'RStar', 'RPlus', 'ROpt', @'RAlts' []@ nor 'REps'.
  | RPlus  Regex
      -- ^ 1 or more repetitions.
      --   'Regex' isn't 'RStar', 'RPlus', 'ROpt', @'RAlts' []@ nor 'REps'.
  | ROpt   Regex
      -- ^ 0 or 1 repetitions.
      --   'Regex' isn't 'RStar', 'RPlus', 'ROpt', @'RAlts' []@ nor 'REps'.
  deriving (Regex -> Regex -> Bool
(Regex -> Regex -> Bool) -> (Regex -> Regex -> Bool) -> Eq Regex
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Regex -> Regex -> Bool
$c/= :: Regex -> Regex -> Bool
== :: Regex -> Regex -> Bool
$c== :: Regex -> Regex -> Bool
Eq, Eq Regex
Eq Regex
-> (Regex -> Regex -> Ordering)
-> (Regex -> Regex -> Bool)
-> (Regex -> Regex -> Bool)
-> (Regex -> Regex -> Bool)
-> (Regex -> Regex -> Bool)
-> (Regex -> Regex -> Regex)
-> (Regex -> Regex -> Regex)
-> Ord Regex
Regex -> Regex -> Bool
Regex -> Regex -> Ordering
Regex -> Regex -> Regex
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: Regex -> Regex -> Regex
$cmin :: Regex -> Regex -> Regex
max :: Regex -> Regex -> Regex
$cmax :: Regex -> Regex -> Regex
>= :: Regex -> Regex -> Bool
$c>= :: Regex -> Regex -> Bool
> :: Regex -> Regex -> Bool
$c> :: Regex -> Regex -> Bool
<= :: Regex -> Regex -> Bool
$c<= :: Regex -> Regex -> Bool
< :: Regex -> Regex -> Bool
$c< :: Regex -> Regex -> Bool
compare :: Regex -> Regex -> Ordering
$ccompare :: Regex -> Regex -> Ordering
$cp1Ord :: Eq Regex
Ord, Int -> Regex -> ShowS
[Regex] -> ShowS
Regex -> String
(Int -> Regex -> ShowS)
-> (Regex -> String) -> ([Regex] -> ShowS) -> Show Regex
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Regex] -> ShowS
$cshowList :: [Regex] -> ShowS
show :: Regex -> String
$cshow :: Regex -> String
showsPrec :: Int -> Regex -> ShowS
$cshowsPrec :: Int -> Regex -> ShowS
Show)

pattern REmpty :: Regex
pattern $bREmpty :: Regex
$mREmpty :: forall r. Regex -> (Void# -> r) -> (Void# -> r) -> r
REmpty = RChar CEmpty

pattern RAlt :: Regex -> Regex -> Regex
pattern $bRAlt :: Regex -> Regex -> Regex
$mRAlt :: forall r. Regex -> (Regex -> Regex -> r) -> (Void# -> r) -> r
RAlt r1 r2 = RAlts (List2 r1 r2 [])

pattern RSeq :: Regex -> Regex -> Regex
pattern $bRSeq :: Regex -> Regex -> Regex
$mRSeq :: forall r. Regex -> (Regex -> Regex -> r) -> (Void# -> r) -> r
RSeq r1 r2 = RSeqs (List2 r1 r2 [])

-- | Check if a regular expression is nullable (accepts the empty string).
nullable :: Regex -> Bool
nullable :: Regex -> Bool
nullable = \case
  RChar CharClass
_      -> Bool
False
  RMinus Regex
r1 Regex
r2 -> Regex -> Bool
nullable Regex
r1 Bool -> Bool -> Bool
&& Bool -> Bool
not (Regex -> Bool
nullable Regex
r2)
  RAlts List2 Regex
rs     -> (Regex -> Bool) -> List2 Regex -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any Regex -> Bool
nullable List2 Regex
rs
  RSeqs List2 Regex
rs     -> (Regex -> Bool) -> List2 Regex -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Regex -> Bool
nullable List2 Regex
rs
  Regex
REps         -> Bool
True
  RStar Regex
_      -> Bool
True
  RPlus Regex
r      -> Regex -> Bool
nullable Regex
r
  ROpt Regex
_       -> Bool
True

-- | Check if a regular expression matches at least one word.
--
--   For differences, this check may err on the positive side.
class Satisfiable a where
  satisfiable :: a -> Bool

instance Satisfiable Regex where
  satisfiable :: Regex -> Bool
satisfiable = \case
    RChar CharClass
c      -> CharClass -> Bool
forall a. Satisfiable a => a -> Bool
satisfiable CharClass
c
    RMinus Regex
r Regex
_   -> Regex -> Bool
forall a. Satisfiable a => a -> Bool
satisfiable Regex
r            -- approximatively!
    RAlts List2 Regex
rs     -> (Regex -> Bool) -> List2 Regex -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any Regex -> Bool
forall a. Satisfiable a => a -> Bool
satisfiable List2 Regex
rs
    RSeqs List2 Regex
rs     -> (Regex -> Bool) -> List2 Regex -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Regex -> Bool
forall a. Satisfiable a => a -> Bool
satisfiable List2 Regex
rs
    Regex
REps         -> Bool
True
    RStar Regex
_      -> Bool
True
    RPlus Regex
r      -> Regex -> Bool
forall a. Satisfiable a => a -> Bool
satisfiable Regex
r
    ROpt Regex
_       -> Bool
True

instance Satisfiable CharClass where
  satisfiable :: CharClass -> Bool
satisfiable (CMinus CharClassUnion
c CharClassUnion
_) = CharClassUnion -> Bool
forall a. Satisfiable a => a -> Bool
satisfiable CharClassUnion
c   -- approximatively!

instance Satisfiable CharClassUnion where
  satisfiable :: CharClassUnion -> Bool
satisfiable = \case
    CharClassUnion
CAny    -> Bool
True
    CAlt [CharClassAtom]
cs -> Bool -> Bool
not (Bool -> Bool) -> Bool -> Bool
forall a b. (a -> b) -> a -> b
$ [CharClassAtom] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [CharClassAtom]
cs

-- * Character classes
---------------------------------------------------------------------------

-- | Character classes are regular expressions that recognize
-- character sequences of length exactly one.  These are often
-- distinguished from arbitrary regular expressions in lexer
-- generators, e.g. in @alex@.
--
-- We represent character classes as a difference of unions of atomic
-- character classes.
--
-- Semantics: @⟦ CMinus ccYes ccNo ⟧ = ⟦ ccYes ⟧ \ ⟦ ccNo ⟧@
data CharClass = CMinus
  { CharClass -> CharClassUnion
ccYes :: CharClassUnion
      -- ^ Character in question must be in one of these character classes.
  , CharClass -> CharClassUnion
ccNo  :: CharClassUnion
      -- ^ Character in question must not be in one of these character classes.
      --   Must be empty if 'ccYes' is empty.
  }
  deriving (CharClass -> CharClass -> Bool
(CharClass -> CharClass -> Bool)
-> (CharClass -> CharClass -> Bool) -> Eq CharClass
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: CharClass -> CharClass -> Bool
$c/= :: CharClass -> CharClass -> Bool
== :: CharClass -> CharClass -> Bool
$c== :: CharClass -> CharClass -> Bool
Eq, Eq CharClass
Eq CharClass
-> (CharClass -> CharClass -> Ordering)
-> (CharClass -> CharClass -> Bool)
-> (CharClass -> CharClass -> Bool)
-> (CharClass -> CharClass -> Bool)
-> (CharClass -> CharClass -> Bool)
-> (CharClass -> CharClass -> CharClass)
-> (CharClass -> CharClass -> CharClass)
-> Ord CharClass
CharClass -> CharClass -> Bool
CharClass -> CharClass -> Ordering
CharClass -> CharClass -> CharClass
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: CharClass -> CharClass -> CharClass
$cmin :: CharClass -> CharClass -> CharClass
max :: CharClass -> CharClass -> CharClass
$cmax :: CharClass -> CharClass -> CharClass
>= :: CharClass -> CharClass -> Bool
$c>= :: CharClass -> CharClass -> Bool
> :: CharClass -> CharClass -> Bool
$c> :: CharClass -> CharClass -> Bool
<= :: CharClass -> CharClass -> Bool
$c<= :: CharClass -> CharClass -> Bool
< :: CharClass -> CharClass -> Bool
$c< :: CharClass -> CharClass -> Bool
compare :: CharClass -> CharClass -> Ordering
$ccompare :: CharClass -> CharClass -> Ordering
$cp1Ord :: Eq CharClass
Ord, Int -> CharClass -> ShowS
[CharClass] -> ShowS
CharClass -> String
(Int -> CharClass -> ShowS)
-> (CharClass -> String)
-> ([CharClass] -> ShowS)
-> Show CharClass
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [CharClass] -> ShowS
$cshowList :: [CharClass] -> ShowS
show :: CharClass -> String
$cshow :: CharClass -> String
showsPrec :: Int -> CharClass -> ShowS
$cshowsPrec :: Int -> CharClass -> ShowS
Show)

pattern CEmpty :: CharClass
pattern $bCEmpty :: CharClass
$mCEmpty :: forall r. CharClass -> (Void# -> r) -> (Void# -> r) -> r
CEmpty = CC CCEmpty

pattern CC :: CharClassUnion -> CharClass
pattern $bCC :: CharClassUnion -> CharClass
$mCC :: forall r. CharClass -> (CharClassUnion -> r) -> (Void# -> r) -> r
CC c = c `CMinus` CCEmpty

-- | Possibly overlapping union of character classes.

data CharClassUnion
  = CAny
      -- ^ Any character, LBNF @char@.
  | CAlt [CharClassAtom]
      -- ^ Any of the given (≥0) alternatives.  List is free of duplicates.
  deriving (Eq CharClassUnion
Eq CharClassUnion
-> (CharClassUnion -> CharClassUnion -> Ordering)
-> (CharClassUnion -> CharClassUnion -> Bool)
-> (CharClassUnion -> CharClassUnion -> Bool)
-> (CharClassUnion -> CharClassUnion -> Bool)
-> (CharClassUnion -> CharClassUnion -> Bool)
-> (CharClassUnion -> CharClassUnion -> CharClassUnion)
-> (CharClassUnion -> CharClassUnion -> CharClassUnion)
-> Ord CharClassUnion
CharClassUnion -> CharClassUnion -> Bool
CharClassUnion -> CharClassUnion -> Ordering
CharClassUnion -> CharClassUnion -> CharClassUnion
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: CharClassUnion -> CharClassUnion -> CharClassUnion
$cmin :: CharClassUnion -> CharClassUnion -> CharClassUnion
max :: CharClassUnion -> CharClassUnion -> CharClassUnion
$cmax :: CharClassUnion -> CharClassUnion -> CharClassUnion
>= :: CharClassUnion -> CharClassUnion -> Bool
$c>= :: CharClassUnion -> CharClassUnion -> Bool
> :: CharClassUnion -> CharClassUnion -> Bool
$c> :: CharClassUnion -> CharClassUnion -> Bool
<= :: CharClassUnion -> CharClassUnion -> Bool
$c<= :: CharClassUnion -> CharClassUnion -> Bool
< :: CharClassUnion -> CharClassUnion -> Bool
$c< :: CharClassUnion -> CharClassUnion -> Bool
compare :: CharClassUnion -> CharClassUnion -> Ordering
$ccompare :: CharClassUnion -> CharClassUnion -> Ordering
$cp1Ord :: Eq CharClassUnion
Ord, Int -> CharClassUnion -> ShowS
[CharClassUnion] -> ShowS
CharClassUnion -> String
(Int -> CharClassUnion -> ShowS)
-> (CharClassUnion -> String)
-> ([CharClassUnion] -> ShowS)
-> Show CharClassUnion
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [CharClassUnion] -> ShowS
$cshowList :: [CharClassUnion] -> ShowS
show :: CharClassUnion -> String
$cshow :: CharClassUnion -> String
showsPrec :: Int -> CharClassUnion -> ShowS
$cshowsPrec :: Int -> CharClassUnion -> ShowS
Show)

instance Eq CharClassUnion where
  CharClassUnion
CAny     == :: CharClassUnion -> CharClassUnion -> Bool
== CharClassUnion
CAny     = Bool
True
  CAlt [CharClassAtom]
cc1 == CAlt [CharClassAtom]
cc2 = [CharClassAtom] -> Set CharClassAtom
forall a. Ord a => [a] -> Set a
Set.fromList [CharClassAtom]
cc1 Set CharClassAtom -> Set CharClassAtom -> Bool
forall a. Eq a => a -> a -> Bool
== [CharClassAtom] -> Set CharClassAtom
forall a. Ord a => [a] -> Set a
Set.fromList [CharClassAtom]
cc2
  CharClassUnion
CAny     == CAlt{}   = Bool
False
  CAlt{}   == CharClassUnion
CAny     = Bool
False

pattern CCEmpty :: CharClassUnion
pattern $bCCEmpty :: CharClassUnion
$mCCEmpty :: forall r. CharClassUnion -> (Void# -> r) -> (Void# -> r) -> r
CCEmpty = CAlt []

-- | Atomic character class.

data CharClassAtom
  = CChar Char  -- ^ A single character.
  | CDigit      -- ^ @0-9@, LBNF @digit@.
  | CLower      -- ^ Lower case character, LBNF @lower@.
  | CUpper      -- ^ Upper case character, LBNF @upper@.
  deriving (CharClassAtom -> CharClassAtom -> Bool
(CharClassAtom -> CharClassAtom -> Bool)
-> (CharClassAtom -> CharClassAtom -> Bool) -> Eq CharClassAtom
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: CharClassAtom -> CharClassAtom -> Bool
$c/= :: CharClassAtom -> CharClassAtom -> Bool
== :: CharClassAtom -> CharClassAtom -> Bool
$c== :: CharClassAtom -> CharClassAtom -> Bool
Eq, Eq CharClassAtom
Eq CharClassAtom
-> (CharClassAtom -> CharClassAtom -> Ordering)
-> (CharClassAtom -> CharClassAtom -> Bool)
-> (CharClassAtom -> CharClassAtom -> Bool)
-> (CharClassAtom -> CharClassAtom -> Bool)
-> (CharClassAtom -> CharClassAtom -> Bool)
-> (CharClassAtom -> CharClassAtom -> CharClassAtom)
-> (CharClassAtom -> CharClassAtom -> CharClassAtom)
-> Ord CharClassAtom
CharClassAtom -> CharClassAtom -> Bool
CharClassAtom -> CharClassAtom -> Ordering
CharClassAtom -> CharClassAtom -> CharClassAtom
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: CharClassAtom -> CharClassAtom -> CharClassAtom
$cmin :: CharClassAtom -> CharClassAtom -> CharClassAtom
max :: CharClassAtom -> CharClassAtom -> CharClassAtom
$cmax :: CharClassAtom -> CharClassAtom -> CharClassAtom
>= :: CharClassAtom -> CharClassAtom -> Bool
$c>= :: CharClassAtom -> CharClassAtom -> Bool
> :: CharClassAtom -> CharClassAtom -> Bool
$c> :: CharClassAtom -> CharClassAtom -> Bool
<= :: CharClassAtom -> CharClassAtom -> Bool
$c<= :: CharClassAtom -> CharClassAtom -> Bool
< :: CharClassAtom -> CharClassAtom -> Bool
$c< :: CharClassAtom -> CharClassAtom -> Bool
compare :: CharClassAtom -> CharClassAtom -> Ordering
$ccompare :: CharClassAtom -> CharClassAtom -> Ordering
$cp1Ord :: Eq CharClassAtom
Ord, Int -> CharClassAtom -> ShowS
[CharClassAtom] -> ShowS
CharClassAtom -> String
(Int -> CharClassAtom -> ShowS)
-> (CharClassAtom -> String)
-> ([CharClassAtom] -> ShowS)
-> Show CharClassAtom
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [CharClassAtom] -> ShowS
$cshowList :: [CharClassAtom] -> ShowS
show :: CharClassAtom -> String
$cshow :: CharClassAtom -> String
showsPrec :: Int -> CharClassAtom -> ShowS
$cshowsPrec :: Int -> CharClassAtom -> ShowS
Show)

-- | Union of character class unions.
instance Semigroup CharClassUnion where
  CharClassUnion
CAny    <> :: CharClassUnion -> CharClassUnion -> CharClassUnion
<> CharClassUnion
_        = CharClassUnion
CAny
  CharClassUnion
_       <> CharClassUnion
CAny     = CharClassUnion
CAny
  CAlt [CharClassAtom]
cs <> CAlt [CharClassAtom]
cs' = [CharClassAtom] -> CharClassUnion
CAlt ([CharClassAtom]
cs [CharClassAtom] -> [CharClassAtom] -> [CharClassAtom]
forall a. Semigroup a => a -> a -> a
<> [CharClassAtom]
cs')

instance Monoid CharClassUnion where
  mempty :: CharClassUnion
mempty  = [CharClassAtom] -> CharClassUnion
CAlt []
  mappend :: CharClassUnion -> CharClassUnion -> CharClassUnion
mappend = CharClassUnion -> CharClassUnion -> CharClassUnion
forall a. Semigroup a => a -> a -> a
(<>)

-- * Smart constructor for regular expressions.
---------------------------------------------------------------------------

rChar :: Char -> Regex
rChar :: Char -> Regex
rChar = CharClass -> Regex
RChar (CharClass -> Regex) -> (Char -> CharClass) -> Char -> Regex
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> CharClass
cChar

-- | Simplifications included, but no distributivity.
rSeq :: Regex -> Regex -> Regex
rSeq :: Regex -> Regex -> Regex
rSeq = ((Regex, Regex) -> Regex) -> Regex -> Regex -> Regex
forall a b c. ((a, b) -> c) -> a -> b -> c
curry (((Regex, Regex) -> Regex) -> Regex -> Regex -> Regex)
-> ((Regex, Regex) -> Regex) -> Regex -> Regex -> Regex
forall a b. (a -> b) -> a -> b
$ \case
  -- 0r = 0
  (Regex
REmpty , Regex
_       ) -> Regex
REmpty
  (Regex
_       , Regex
REmpty ) -> Regex
REmpty
  -- 1r = r
  (Regex
REps    , Regex
r      ) -> Regex
r
  (Regex
r       , Regex
REps   ) -> Regex
r
  -- r*r* = r*
  (RStar Regex
r1, RStar Regex
r2) | Regex
r1 Regex -> Regex -> Bool
forall a. Eq a => a -> a -> Bool
== Regex
r2 -> Regex -> Regex
RStar Regex
r1
  -- r?r* = r*r? = r*
  (ROpt  Regex
r1, RStar Regex
r2) | Regex
r1 Regex -> Regex -> Bool
forall a. Eq a => a -> a -> Bool
== Regex
r2 -> Regex -> Regex
RStar Regex
r2
  (RStar Regex
r1, ROpt  Regex
r2) | Regex
r1 Regex -> Regex -> Bool
forall a. Eq a => a -> a -> Bool
== Regex
r2 -> Regex -> Regex
RStar Regex
r1
  -- r+r* = r*r+ = r+
  (RPlus Regex
r1, RStar Regex
r2) | Regex
r1 Regex -> Regex -> Bool
forall a. Eq a => a -> a -> Bool
== Regex
r2 -> Regex -> Regex
RPlus Regex
r1
  (RStar Regex
r1, RPlus Regex
r2) | Regex
r1 Regex -> Regex -> Bool
forall a. Eq a => a -> a -> Bool
== Regex
r2 -> Regex -> Regex
RPlus Regex
r2
  -- r+r? = r?r+ = r+
  (RPlus Regex
r1, ROpt  Regex
r2) | Regex
r1 Regex -> Regex -> Bool
forall a. Eq a => a -> a -> Bool
== Regex
r2 -> Regex -> Regex
RPlus Regex
r1
  (ROpt  Regex
r1, RPlus Regex
r2) | Regex
r1 Regex -> Regex -> Bool
forall a. Eq a => a -> a -> Bool
== Regex
r2 -> Regex -> Regex
RPlus Regex
r2
  -- rr* = r*r = r+
  (Regex
r1      , RStar Regex
r2) | Regex
r1 Regex -> Regex -> Bool
forall a. Eq a => a -> a -> Bool
== Regex
r2 -> Regex -> Regex
RPlus Regex
r2
  (RStar Regex
r1, Regex
r2      ) | Regex
r1 Regex -> Regex -> Bool
forall a. Eq a => a -> a -> Bool
== Regex
r2 -> Regex -> Regex
RPlus Regex
r1
  -- Associate
  (RSeqs List2 Regex
r1, RSeqs List2 Regex
r2) -> List2 Regex -> Regex
RSeqs (List2 Regex -> Regex) -> List2 Regex -> Regex
forall a b. (a -> b) -> a -> b
$ List2 Regex
r1 List2 Regex -> List2 Regex -> List2 Regex
forall a. Semigroup a => a -> a -> a
<> List2 Regex
r2
  (Regex
r       , RSeqs List2 Regex
rs) -> List2 Regex -> Regex
RSeqs (List2 Regex -> Regex) -> List2 Regex -> Regex
forall a b. (a -> b) -> a -> b
$ Regex -> List2 Regex -> List2 Regex
forall a. a -> List2 a -> List2 a
List2.cons Regex
r List2 Regex
rs
  (RSeqs List2 Regex
rs, Regex
r       ) -> List2 Regex -> Regex
RSeqs (List2 Regex -> Regex) -> List2 Regex -> Regex
forall a b. (a -> b) -> a -> b
$ List2 Regex -> Regex -> List2 Regex
forall a. List2 a -> a -> List2 a
List2.snoc List2 Regex
rs Regex
r
  -- general sequences
  (Regex
r1      , Regex
r2      ) -> Regex
r1 Regex -> Regex -> Regex
`RSeq` Regex
r2

rSeqs :: [Regex] -> Regex
rSeqs :: [Regex] -> Regex
rSeqs = (Regex -> Regex -> Regex) -> Regex -> [Regex] -> Regex
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr Regex -> Regex -> Regex
rSeq Regex
REps

rAlt :: Regex -> Regex -> Regex
rAlt :: Regex -> Regex -> Regex
rAlt = ((Regex, Regex) -> Regex) -> Regex -> Regex -> Regex
forall a b c. ((a, b) -> c) -> a -> b -> c
curry (((Regex, Regex) -> Regex) -> Regex -> Regex -> Regex)
-> ((Regex, Regex) -> Regex) -> Regex -> Regex -> Regex
forall a b. (a -> b) -> a -> b
$ \case
  -- 0 + r = r
  (Regex
REmpty  , Regex
r     ) -> Regex
r
  (Regex
r       , Regex
REmpty) -> Regex
r
  -- join character alternatives
  (RChar CharClass
c1, RChar CharClass
c2) -> CharClass -> CharClass -> Regex
cAlt CharClass
c1 CharClass
c2
  -- Associate to the left
  (RAlts List2 Regex
r1, RAlts List2 Regex
r2) -> List2 Regex -> Regex
RAlts (List2 Regex -> Regex) -> List2 Regex -> Regex
forall a b. (a -> b) -> a -> b
$ [Regex] -> List2 Regex
forall a. [a] -> List2 a
List2.fromList ([Regex] -> List2 Regex) -> [Regex] -> List2 Regex
forall a b. (a -> b) -> a -> b
$ [Regex] -> [Regex]
forall a. Ord a => [a] -> [a]
nubOrd ([Regex] -> [Regex]) -> [Regex] -> [Regex]
forall a b. (a -> b) -> a -> b
$ List2 Regex -> [Item (List2 Regex)]
forall l. IsList l => l -> [Item l]
List2.toList List2 Regex
r1 [Regex] -> [Regex] -> [Regex]
forall a. Semigroup a => a -> a -> a
<> List2 Regex -> [Item (List2 Regex)]
forall l. IsList l => l -> [Item l]
List2.toList List2 Regex
r2
  (Regex
r       , RAlts List2 Regex
rs)
     | Regex
r Regex -> List2 Regex -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` List2 Regex
rs -> List2 Regex -> Regex
RAlts List2 Regex
rs
     | Bool
otherwise   -> List2 Regex -> Regex
RAlts (List2 Regex -> Regex) -> List2 Regex -> Regex
forall a b. (a -> b) -> a -> b
$ Regex -> List2 Regex -> List2 Regex
forall a. a -> List2 a -> List2 a
List2.cons Regex
r List2 Regex
rs
  (RAlts List2 Regex
rs, Regex
r       )
     | Regex
r Regex -> List2 Regex -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` List2 Regex
rs -> List2 Regex -> Regex
RAlts List2 Regex
rs
     | Bool
otherwise   -> List2 Regex -> Regex
RAlts (List2 Regex -> Regex) -> List2 Regex -> Regex
forall a b. (a -> b) -> a -> b
$ List2 Regex -> Regex -> List2 Regex
forall a. List2 a -> a -> List2 a
List2.snoc List2 Regex
rs Regex
r
  -- general alternatives
  (Regex
r1, Regex
r2)
     | Regex
r1 Regex -> Regex -> Bool
forall a. Eq a => a -> a -> Bool
== Regex
r2  -> Regex
r1  -- idempotency, but not the general case
     | Bool
otherwise -> Regex
r1 Regex -> Regex -> Regex
`RAlt` Regex
r2

rAlts :: [Regex] -> Regex
rAlts :: [Regex] -> Regex
rAlts = (Regex -> Regex -> Regex) -> Regex -> [Regex] -> Regex
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr Regex -> Regex -> Regex
rAlt Regex
REmpty

rMinus :: Regex -> Regex -> Regex
rMinus :: Regex -> Regex -> Regex
rMinus = ((Regex, Regex) -> Regex) -> Regex -> Regex -> Regex
forall a b c. ((a, b) -> c) -> a -> b -> c
curry (((Regex, Regex) -> Regex) -> Regex -> Regex -> Regex)
-> ((Regex, Regex) -> Regex) -> Regex -> Regex -> Regex
forall a b. (a -> b) -> a -> b
$ \case
  -- 0 - r = 0
  (Regex
REmpty  , Regex
_     ) -> Regex
REmpty
  -- r - 0 = r
  (Regex
r       , Regex
REmpty) -> Regex
r
  -- join character alternatives
  (RChar CharClass
c1, RChar CharClass
c2) -> CharClass -> CharClass -> Regex
cMinus CharClass
c1 CharClass
c2
  -- remove subtracted elements
  (RAlts List2 Regex
rs, RAlts List2 Regex
ss) ->
    case List2 Regex -> [Item (List2 Regex)]
forall l. IsList l => l -> [Item l]
List2.toList List2 Regex
rs [Regex] -> [Regex] -> [Regex]
forall a. Eq a => [a] -> [a] -> [a]
List.\\ List2 Regex -> [Item (List2 Regex)]
forall l. IsList l => l -> [Item l]
List2.toList List2 Regex
ss of
      []  -> Regex
REmpty
      [Regex]
rs' -> [Regex] -> Regex
rAlts [Regex]
rs' Regex -> Regex -> Regex
`RMinus` List2 Regex -> Regex
RAlts List2 Regex
ss
  (Regex
r       , RAlts List2 Regex
rs)
    | Regex
r Regex -> List2 Regex -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` List2 Regex
rs -> Regex
REmpty
    | Bool
otherwise   -> Regex
r Regex -> Regex -> Regex
`RMinus` List2 Regex -> Regex
RAlts List2 Regex
rs
  -- r - r = 0
  (Regex
r1, Regex
r2)
     | Regex
r1 Regex -> Regex -> Bool
forall a. Eq a => a -> a -> Bool
== Regex
r2  -> Regex
REmpty
     | Bool
otherwise -> Regex
r1 Regex -> Regex -> Regex
`RMinus` Regex
r2

rStar :: Regex -> Regex
rStar :: Regex -> Regex
rStar = \case
  Regex
REmpty   -> Regex
REps
  Regex
REps     -> Regex
REps
  ROpt  Regex
r  -> Regex -> Regex
RStar Regex
r
  RStar Regex
r  -> Regex -> Regex
RStar Regex
r
  RPlus Regex
r  -> Regex -> Regex
RStar Regex
r
  Regex
r        -> Regex -> Regex
RStar Regex
r

rPlus :: Regex -> Regex
rPlus :: Regex -> Regex
rPlus = \case
  Regex
REmpty   -> Regex
REmpty
  Regex
REps     -> Regex
REps
  ROpt  Regex
r  -> Regex -> Regex
RStar Regex
r
  RStar Regex
r  -> Regex -> Regex
RStar Regex
r
  RPlus Regex
r  -> Regex -> Regex
RPlus Regex
r
  Regex
r        -> Regex -> Regex
RPlus Regex
r

rOpt :: Regex -> Regex
rOpt :: Regex -> Regex
rOpt = \case
  Regex
REmpty   -> Regex
REps
  Regex
REps     -> Regex
REps
  RStar Regex
r  -> Regex -> Regex
RStar Regex
r
  RPlus Regex
r  -> Regex -> Regex
RStar Regex
r
  ROpt  Regex
r  -> Regex -> Regex
ROpt  Regex
r
  Regex
r        -> Regex -> Regex
ROpt  Regex
r


-- | Disjunction of two character classes is either a character class again ('RChar')
-- or simply the disjunction ('RAlt').
--
-- @(p1 \ m1) ∪ (p2 \ m2) = (p1 ∪ p2) \ (m1 ∪ m2)@ if @p1 ⊥ m2@ and @p2 ⊥ m1@
cAlt :: CharClass -> CharClass -> Regex
cAlt :: CharClass -> CharClass -> Regex
cAlt c1 :: CharClass
c1@(CMinus CharClassUnion
p1 CharClassUnion
m1) c2 :: CharClass
c2@(CMinus CharClassUnion
p2 CharClassUnion
m2)
  | CharClass
c1 CharClass -> CharClass -> Bool
forall a. Eq a => a -> a -> Bool
== CharClass
cAny Bool -> Bool -> Bool
|| CharClass
c2 CharClass -> CharClass -> Bool
forall a. Eq a => a -> a -> Bool
== CharClass
cAny     = CharClass -> Regex
RChar CharClass
cAny
  | CharClassUnion
p1 CharClassUnion -> CharClassUnion -> Either CharClass CharClassUnion
`ccuMinus` CharClassUnion
m2 Either CharClass CharClassUnion
-> Either CharClass CharClassUnion -> Bool
forall a. Eq a => a -> a -> Bool
== CharClassUnion -> Either CharClass CharClassUnion
forall a b. b -> Either a b
Right CharClassUnion
p1,
    CharClassUnion
p2 CharClassUnion -> CharClassUnion -> Either CharClass CharClassUnion
`ccuMinus` CharClassUnion
m1 Either CharClass CharClassUnion
-> Either CharClass CharClassUnion -> Bool
forall a. Eq a => a -> a -> Bool
== CharClassUnion -> Either CharClass CharClassUnion
forall a b. b -> Either a b
Right CharClassUnion
p2 = CharClass -> Regex
RChar (CharClass -> Regex) -> CharClass -> Regex
forall a b. (a -> b) -> a -> b
$ (CharClass -> CharClass)
-> (CharClassUnion -> CharClass)
-> Either CharClass CharClassUnion
-> CharClass
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either CharClass -> CharClass
forall a. a -> a
id CharClassUnion -> CharClass
CC (Either CharClass CharClassUnion -> CharClass)
-> Either CharClass CharClassUnion -> CharClass
forall a b. (a -> b) -> a -> b
$ (CharClassUnion
p1 CharClassUnion -> CharClassUnion -> CharClassUnion
forall a. Semigroup a => a -> a -> a
<> CharClassUnion
p2) CharClassUnion -> CharClassUnion -> Either CharClass CharClassUnion
`ccuMinus` (CharClassUnion
m1 CharClassUnion -> CharClassUnion -> CharClassUnion
forall a. Semigroup a => a -> a -> a
<> CharClassUnion
m2)
  | Bool
otherwise                    = CharClass -> Regex
RChar CharClass
c1 Regex -> Regex -> Regex
`RAlt` CharClass -> Regex
RChar CharClass
c2


-- | Disjunction of two character classes is either a character class again ('RChar')
-- or simply the disjunction ('RMinus').
--
-- @(p1 \ m1) \ (0 \ m2)  = p1 \ m1@
-- @(p1 \ m1) \ (p2 \ m2) = p1 \ (m1 ∪ p2)@  if @p1 \ m2 = p1@
cMinus :: CharClass -> CharClass -> Regex
cMinus :: CharClass -> CharClass -> Regex
cMinus c1 :: CharClass
c1@(CMinus CharClassUnion
p1 CharClassUnion
m1) c2 :: CharClass
c2@(CMinus CharClassUnion
p2 CharClassUnion
m2)
  | CharClassUnion
p2 CharClassUnion -> CharClassUnion -> Bool
forall a. Eq a => a -> a -> Bool
== CharClassUnion
forall a. Monoid a => a
mempty                 = CharClass -> Regex
RChar CharClass
c1
  | CharClassUnion
p1 CharClassUnion -> CharClassUnion -> Either CharClass CharClassUnion
`ccuMinus` CharClassUnion
m2 Either CharClass CharClassUnion
-> Either CharClass CharClassUnion -> Bool
forall a. Eq a => a -> a -> Bool
== CharClassUnion -> Either CharClass CharClassUnion
forall a b. b -> Either a b
Right CharClassUnion
p1 = CharClass -> Regex
RChar (CharClass -> Regex) -> CharClass -> Regex
forall a b. (a -> b) -> a -> b
$ (CharClass -> CharClass)
-> (CharClassUnion -> CharClass)
-> Either CharClass CharClassUnion
-> CharClass
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either CharClass -> CharClass
forall a. a -> a
id CharClassUnion -> CharClass
CC (Either CharClass CharClassUnion -> CharClass)
-> Either CharClass CharClassUnion -> CharClass
forall a b. (a -> b) -> a -> b
$ CharClassUnion
p1 CharClassUnion -> CharClassUnion -> Either CharClass CharClassUnion
`ccuMinus` (CharClassUnion
m1 CharClassUnion -> CharClassUnion -> CharClassUnion
forall a. Semigroup a => a -> a -> a
<> CharClassUnion
p2)
  | Bool
otherwise                    = CharClass -> Regex
RChar CharClass
c1 Regex -> Regex -> Regex
`RMinus` CharClass -> Regex
RChar CharClass
c2

-- * Smart constructors for character classes.
---------------------------------------------------------------------------

-- | Match given characters.
cChar :: Char -> CharClass
cChar :: Char -> CharClass
cChar Char
c = CharClassUnion -> CharClass
CC (CharClassUnion -> CharClass) -> CharClassUnion -> CharClass
forall a b. (a -> b) -> a -> b
$ [CharClassAtom] -> CharClassUnion
CAlt [Char -> CharClassAtom
CChar Char
c]

-- | Match any of the given characters.
cAlts :: [Char] -> CharClass
cAlts :: String -> CharClass
cAlts String
cs = CharClassUnion -> CharClass
CC (CharClassUnion -> CharClass) -> CharClassUnion -> CharClass
forall a b. (a -> b) -> a -> b
$ [CharClassAtom] -> CharClassUnion
CAlt ([CharClassAtom] -> CharClassUnion)
-> [CharClassAtom] -> CharClassUnion
forall a b. (a -> b) -> a -> b
$ [CharClassAtom] -> [CharClassAtom]
forall a. Ord a => [a] -> [a]
nubOrd ([CharClassAtom] -> [CharClassAtom])
-> [CharClassAtom] -> [CharClassAtom]
forall a b. (a -> b) -> a -> b
$ (Char -> CharClassAtom) -> String -> [CharClassAtom]
forall a b. (a -> b) -> [a] -> [b]
map Char -> CharClassAtom
CChar String
cs

-- BNFC builtin character classes.

cDigit, cLower, cUpper, cLetter, cAny :: CharClass
cDigit :: CharClass
cDigit  = CharClassAtom -> CharClass
cAtom CharClassAtom
CDigit
cLower :: CharClass
cLower  = CharClassAtom -> CharClass
cAtom CharClassAtom
CLower
cUpper :: CharClass
cUpper  = CharClassAtom -> CharClass
cAtom CharClassAtom
CUpper
cLetter :: CharClass
cLetter = CharClassUnion -> CharClass
CC (CharClassUnion -> CharClass) -> CharClassUnion -> CharClass
forall a b. (a -> b) -> a -> b
$ [CharClassAtom] -> CharClassUnion
CAlt [ CharClassAtom
CLower, CharClassAtom
CUpper ]
cAny :: CharClass
cAny    = CharClassUnion -> CharClass
CC CharClassUnion
CAny

-- Embeddings.

cAtom :: CharClassAtom -> CharClass
cAtom :: CharClassAtom -> CharClass
cAtom = CharClassUnion -> CharClass
CC (CharClassUnion -> CharClass)
-> (CharClassAtom -> CharClassUnion) -> CharClassAtom -> CharClass
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [CharClassAtom] -> CharClassUnion
CAlt ([CharClassAtom] -> CharClassUnion)
-> (CharClassAtom -> [CharClassAtom])
-> CharClassAtom
-> CharClassUnion
forall b c a. (b -> c) -> (a -> b) -> a -> c
. CharClassAtom -> [CharClassAtom]
forall el coll. Singleton el coll => el -> coll
singleton

-- | Smart constructor for @CharClass@ from difference..
--
-- Mutually reduce:  @(A - B) = (A \ B) - (B \ A)@
ccuMinus :: CharClassUnion -> CharClassUnion -> Either CharClass CharClassUnion
ccuMinus :: CharClassUnion -> CharClassUnion -> Either CharClass CharClassUnion
ccuMinus = ((CharClassUnion, CharClassUnion)
 -> Either CharClass CharClassUnion)
-> CharClassUnion
-> CharClassUnion
-> Either CharClass CharClassUnion
forall a b c. ((a, b) -> c) -> a -> b -> c
curry (((CharClassUnion, CharClassUnion)
  -> Either CharClass CharClassUnion)
 -> CharClassUnion
 -> CharClassUnion
 -> Either CharClass CharClassUnion)
-> ((CharClassUnion, CharClassUnion)
    -> Either CharClass CharClassUnion)
-> CharClassUnion
-> CharClassUnion
-> Either CharClass CharClassUnion
forall a b. (a -> b) -> a -> b
$ \case
  (CharClassUnion
_      , CharClassUnion
CAny)   -> CharClassUnion -> Either CharClass CharClassUnion
forall a b. b -> Either a b
Right CharClassUnion
forall a. Monoid a => a
mempty
  (c1 :: CharClassUnion
c1@CharClassUnion
CAny, CharClassUnion
c2  )
    | CharClassUnion
c2 CharClassUnion -> CharClassUnion -> Bool
forall a. Eq a => a -> a -> Bool
== CharClassUnion
forall a. Monoid a => a
mempty  -> CharClassUnion -> Either CharClass CharClassUnion
forall a b. b -> Either a b
Right CharClassUnion
c1
    | Bool
otherwise     -> CharClass -> Either CharClass CharClassUnion
forall a b. a -> Either a b
Left  (CharClass -> Either CharClass CharClassUnion)
-> CharClass -> Either CharClass CharClassUnion
forall a b. (a -> b) -> a -> b
$ CharClassUnion
c1 CharClassUnion -> CharClassUnion -> CharClass
`CMinus` CharClassUnion
c2
  (CAlt [CharClassAtom]
cs1, CAlt [CharClassAtom]
cs2)
    | [CharClassAtom] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [CharClassAtom]
cs1' Bool -> Bool -> Bool
||
      [CharClassAtom] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [CharClassAtom]
cs2' -> CharClassUnion -> Either CharClass CharClassUnion
forall a b. b -> Either a b
Right (CharClassUnion -> Either CharClass CharClassUnion)
-> CharClassUnion -> Either CharClass CharClassUnion
forall a b. (a -> b) -> a -> b
$ [CharClassAtom] -> CharClassUnion
CAlt [CharClassAtom]
cs1'
    | Bool
otherwise -> CharClass -> Either CharClass CharClassUnion
forall a b. a -> Either a b
Left  (CharClass -> Either CharClass CharClassUnion)
-> CharClass -> Either CharClass CharClassUnion
forall a b. (a -> b) -> a -> b
$ [CharClassAtom] -> CharClassUnion
CAlt [CharClassAtom]
cs1' CharClassUnion -> CharClassUnion -> CharClass
`CMinus` [CharClassAtom] -> CharClassUnion
CAlt [CharClassAtom]
cs2'
    where
    cs1' :: [CharClassAtom]
cs1' = [CharClassAtom]
cs1 [CharClassAtom] -> [CharClassAtom] -> [CharClassAtom]
forall a. Eq a => [a] -> [a] -> [a]
List.\\ [CharClassAtom]
cs2
    cs2' :: [CharClassAtom]
cs2' = [CharClassAtom]
cs2 [CharClassAtom] -> [CharClassAtom] -> [CharClassAtom]
forall a. Eq a => [a] -> [a] -> [a]
List.\\ [CharClassAtom]
cs1


--------------------------------------------------------------------

onlyOneChar :: CharClassUnion -> Bool
onlyOneChar :: CharClassUnion -> Bool
onlyOneChar CharClassUnion
CAny         = Bool
True
onlyOneChar (CAlt [CharClassAtom]
atoms) = [CharClassAtom] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [CharClassAtom]
atoms Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
1

isEmpty :: CharClassUnion -> Bool
isEmpty :: CharClassUnion -> Bool
isEmpty CharClassUnion
CAny         = Bool
False
isEmpty (CAlt [CharClassAtom]
atoms) = [CharClassAtom] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [CharClassAtom]
atoms