module Language.HaLex.RegExp (
RegExp (..)
, cataRegExp
, matchesRE
, matches'
, sizeRegExp
, showRE
, simplifyRegExp
, extREtoRE
) where
data RegExp sy = Empty
| Epsilon
| Literal sy
| Or (RegExp sy) (RegExp sy)
| Then (RegExp sy) (RegExp sy)
| Star (RegExp sy)
| OneOrMore (RegExp sy)
| Optional (RegExp sy)
| RESet [sy]
deriving (Read, Eq)
cataRegExp :: ( re
, re
, re -> re -> re
, re -> re
, sy -> re
, re -> re -> re
, re -> re
, re -> re
, [sy] -> re
) -> RegExp sy -> re
cataRegExp (empty,epsilon,or,star,lit,th,one,opt,set) = cata
where cata Empty = empty
cata Epsilon = epsilon
cata (Or er1 er2) = or (cata er1) (cata er2)
cata (Star er) = star (cata er)
cata (Literal a) = lit a
cata (Then er1 er2) = th (cata er1) (cata er2)
cata (OneOrMore er) = one (cata er)
cata (Optional er) = opt (cata er)
cata (RESet st) = set st
matchesRE :: Eq sy
=> RegExp sy
-> [sy]
-> Bool
matchesRE Empty inp = False
matchesRE Epsilon inp = inp == []
matchesRE (Literal l) inp = ([l] == inp)
matchesRE (Or re1 re2) inp = matchesRE re1 inp || matchesRE re2 inp
matchesRE (Then re1 re2) inp = or [ matchesRE re1 s1 && matchesRE re2 s2
| (s1,s2) <- splits inp]
matchesRE (Star re) inp = matchesRE Epsilon inp ||
or [ matchesRE re s1 && matchesRE (Star re) s2
| (s1,s2) <- frontSplits inp ]
matches' :: Eq sy
=> RegExp sy
-> [sy]
-> Bool
matches' = matchesRE . extREtoRE
splits :: [a]
-> [ ([a],[a]) ]
splits s = [ splitAt n s | n <- [ 0 .. length s ] ]
frontSplits :: [a] -> [ ([a],[a]) ]
frontSplits s = [ splitAt n s | n <- [ 1 .. length s ] ]
sizeRegExp :: RegExp sy
-> Int
sizeRegExp = cataRegExp (0,0,(+),id,\x -> 1,(+),id,id,length)
showRE :: Show sy
=> RegExp sy
-> [Char]
showRE = cataRegExp ("{}"
, "@"
, \ l r -> "(" ++ l ++ "|" ++ r ++ ")"
, \ er -> "(" ++ er ++ ")*"
, show
, \ l r -> "(" ++ l ++ r ++ ")"
, \ er -> "(" ++ er ++ ")+"
, \ er -> "(" ++ er ++ ")?"
, \ set -> show set
)
instance Show sy => Show (RegExp sy) where
showsPrec _ Empty = showString "{}"
showsPrec _ Epsilon = showChar '@'
showsPrec _ (Literal c) = showsPrec 0 c
showsPrec n (Star e) = showsPrec 10 e . showChar '*'
showsPrec n (OneOrMore e) = showParen (n == 4)
$ showsPrec 10 e
. showChar '+'
showsPrec _ (Optional e) = showsPrec 10 e
. showChar '?'
showsPrec n (e1 `Or` e2) = showParen (n /= 0 && n /= 4)
$ showsPrec 4 e1
. showChar '|'
. showsPrec 4 e2
showsPrec n (e1 `Then` e2) = showParen (n /= 0 && n /= 6)
$ showsPrec 6 e1
. showChar ' '
. showsPrec 6 e2
showsPrec _ (RESet set) = showList set
isSymbol x = x `elem` "|? "
simplifyRegExp :: Eq sy => RegExp sy -> RegExp sy
simplifyRegExp Empty = Empty
simplifyRegExp Epsilon = Epsilon
simplifyRegExp (Literal x) = Literal x
simplifyRegExp (Star x) = case x' of
Epsilon -> Epsilon
Empty -> Epsilon
Or Epsilon a -> Star (simplifyRegExp a)
Or a Epsilon -> Star (simplifyRegExp a)
_ -> Star x'
where x' = simplifyRegExp x
simplifyRegExp (Then x y) | x' == Empty = Empty
| y' == Empty = Empty
| x' == Epsilon = y'
| y' == Epsilon = x'
| y' == Star x' = OneOrMore x'
| x' == Star y' = OneOrMore y'
| otherwise = Then x' y'
where x' = simplifyRegExp x
y' = simplifyRegExp y
simplifyRegExp a@(Or x y)
| x' == y' = x'
| x' == Empty = y'
| y' == Empty = x'
| otherwise = f x' y'
where x' = simplifyRegExp x
y' = simplifyRegExp y
f Epsilon (OneOrMore p) = Star p
f Epsilon re = Optional re
f (OneOrMore p) Epsilon = Star p
f re Epsilon = Optional re
f re1 re2 = Or re1 re2
simplifyRegExp (OneOrMore x) = case x' of
Empty -> Empty
Epsilon -> Epsilon
Or p Epsilon -> Star p
Or Epsilon p -> Star p
_ -> OneOrMore x'
where x' = simplifyRegExp x
simplifyRegExp (Optional x) = Optional (simplifyRegExp x)
simplifyRegExp (RESet set) = RESet set
extREtoRE :: RegExp sy -> RegExp sy
extREtoRE = cataRegExp ( Empty
, Epsilon
, \ l r -> Or l r
, \ er -> Star er
, \ a -> Literal a
, \ l r -> Then l r
, \ er -> Then er (Star er)
, \ er -> Or Epsilon er
, \ set -> foldr1 Or (map Literal set)
)