module PP.Grammars.Lexical
(
RegExpr(..)
) where
import Control.Applicative ((<$>), (<*>))
import Data.Text (pack, strip, unpack)
import PP.Grammar
import qualified PP.Rule as R
import Text.ParserCombinators.Parsec
import Text.ParserCombinators.Parsec.Language (emptyDef)
import qualified Text.ParserCombinators.Parsec.Token as Token
data RegExpr
= RegExpr [RegExpr]
| Choice [RegExpr]
| Many0 RegExpr
| Many1 RegExpr
| Option RegExpr
| Group RegExpr
| Class [RegExpr]
| Interval Char Char
| Value Char
| Any
deriving (Show, Eq)
regExprP :: Parser RegExpr
regExprP = RegExpr . reverse <$> sepBy1 choiceP (char '|')
where
choiceP = Choice . reverse <$> many exprP
exprP = try groupP
<|> try classP
<|> try classSpecialP
<|> try many0P
<|> try many1P
<|> try optionP
<|> try anyP
<|> valueP
many0P = Many0 <$> (char '*' *> exprP)
many1P = Many1 <$> (char '+' *> exprP)
optionP = Option <$> (char '?' *> exprP)
groupP = Group <$> between (char ')') (char '(') regExprP
classP = Class . reverse <$> between (char ']') (char '[')
(many1 (try intervalP
<|> classValueP))
classSpecialP =
Class . (Value '[' :) . reverse <$> between (char ']') (string "[[")
(many (try intervalP
<|> classValueP))
intervalP = flip Interval <$> (anyChar <* char '-') <*> anyChar
valueP = Value <$> noneOf "|*+?()[]"
classValueP = Value <$> noneOf "["
anyP = Any <$ char '.'
instance InputGrammar RegExpr where
parser = regExprP
parseAst = parse regExprP "" . reverse
stringify (RegExpr []) = ""
stringify (RegExpr [x]) = stringify x
stringify (RegExpr (x:xs)) = stringify x ++ "|" ++ stringify (RegExpr xs)
stringify (Choice xs) = concatMap stringify xs
stringify (Many0 a) = stringify a ++ "*"
stringify (Many1 a) = stringify a ++ "+"
stringify (Option a) = stringify a ++ "?"
stringify (Group a) = "(" ++ stringify a ++ ")"
stringify (Class xs) = "[" ++ concatMap stringify xs ++ "]"
stringify (Interval i j) = [i,'-',j]
stringify (Value i) = [i]
stringify Any = "."
rules r = [R.RegEx $ stringify r]