{-| Module : PP.Grammars.Ebnf Description : Defines a AST and parser for the EBNF language Copyright : (c) 2017 Patrick Champion License : see LICENSE file Maintainer : chlablak@gmail.com Stability : provisional Portability : portable AST for the EBNF language. Based on the grammar given in the ISO/IEC 14977:1996, page 10, part 8.2. Comments are valid in EBNF, but are not present in this AST. -} module PP.Grammars.Ebnf ( -- * AST Syntax(..) -- ** Inner ASTs , SyntaxRule(..) , DefinitionsList(..) , SingleDefinition(..) , Term(..) , Exception(..) , Factor(..) , Primary(..) , MetaIdentifier(..) ) where import Control.Applicative ((<$>), (<*>)) import qualified Data.List as L import Data.Maybe import Data.Text (pack, strip, unpack) import PP.Grammar import PP.Grammars.LexicalHelper (LexicalRule, lexicalString) import qualified PP.Rule as R import Text.ParserCombinators.Parsec import Text.ParserCombinators.Parsec.Language (emptyDef) import qualified Text.ParserCombinators.Parsec.Token as Token -- |Start rule newtype Syntax = Syntax [SyntaxRule] deriving (Show, Eq) -- |Syntax rule data SyntaxRule -- |Defines the sequence of symbols represented by a MetaIdentifier = SyntaxRule MetaIdentifier DefinitionsList -- |Defines a lexical definition inside the EBNF grammar | LexicalInner LexicalRule deriving (Show, Eq) -- |Separates alternative SingleDefinition newtype DefinitionsList = DefinitionsList [SingleDefinition] deriving (Show, Eq) -- |Separates successive Term newtype SingleDefinition = SingleDefinition [Term] deriving (Show, Eq) -- |Represents any sequence of symbols that is defined by the Factor -- but not defined by the Exception data Term = Term Factor (Maybe Exception) deriving (Show, Eq) -- |A Factor may be used as an Exception if it could be replaced by a -- Factor containing no MetaIdentifier newtype Exception = Exception Factor deriving (Show, Eq) -- |The Integer specifies the number of repetitions of the Primay data Factor = Factor (Maybe Integer) Primary deriving (Show, Eq) -- |Primary data Primary -- |Encloses symbols which are optional = OptionalSequence DefinitionsList -- |Encloses symbols which may be repeated any number of times | RepeatedSequence DefinitionsList -- |Allows any DefinitionsList to be a Primary | GroupedSequence DefinitionsList -- |A Primary can be a MetaIdentifier | PrimaryMetaIdentifier MetaIdentifier -- |Represents the characters between the quote symbols '...' or "..." | TerminalString String -- |Empty Primary | Empty deriving (Show, Eq) -- |A MetaIdentifier is the name of a syntactic element of the langage being defined newtype MetaIdentifier = MetaIdentifier String deriving (Show, Eq) -- |Lexer definitions for EBNF lexer = Token.makeTokenParser def where def = emptyDef { Token.commentStart = "(*" , Token.commentEnd = "*)" , Token.commentLine = "" , Token.nestedComments = False , Token.identStart = letter , Token.identLetter = alphaNum <|> oneOf "_- " , Token.reservedNames = [] , Token.reservedOpNames = ["=", ";", "|", ",", "-", "*"] , Token.caseSensitive = True } identifier = Token.identifier lexer reservedOp = Token.reservedOp lexer stringLiteral = Token.stringLiteral lexer natural = Token.natural lexer whiteSpace = Token.whiteSpace lexer parens = Token.parens lexer braces = Token.braces lexer angles = Token.angles lexer brackets = Token.brackets lexer -- |Syntax parser syntax :: Parser Syntax syntax = whiteSpace *> (Syntax <$> many1 syntaxRule) "syntax" -- |SyntaxRule parser syntaxRule :: Parser SyntaxRule syntaxRule = try (SyntaxRule <$> (metaIdentifier <* reservedOp "=") <*> (definitionsList <* reservedOp ";")) <|> LexicalInner <$> parser "syntax rule" -- |DefinitionsList parser definitionsList :: Parser DefinitionsList definitionsList = DefinitionsList <$> sepBy1 singleDefinition (reservedOp "|") "definitions list" -- |SingleDefinition parser singleDefinition :: Parser SingleDefinition singleDefinition = SingleDefinition <$> sepBy1 term (reservedOp ",") "single definition" -- |Term parser term :: Parser Term term = Term <$> factor <*> optionMaybe (reservedOp "-" *> exception) "term" -- |Exception parser exception :: Parser Exception exception = Exception <$> factor "exception" -- |Factor parser factor :: Parser Factor factor = Factor <$> optionMaybe (natural <* reservedOp "*") <*> primary "factor" -- |Primary parser primary :: Parser Primary primary = option Empty ( OptionalSequence <$> brackets definitionsList <|> RepeatedSequence <$> braces definitionsList <|> GroupedSequence <$> parens definitionsList <|> PrimaryMetaIdentifier <$> metaIdentifier <|> TerminalString <$> stringLiteral ) -- end of option "primary" -- |MetaIdentifier parser metaIdentifier :: Parser MetaIdentifier metaIdentifier = trimMetaIdentifier <$> (angles identifier <|> identifier) "meta identifier" where trimMetaIdentifier = MetaIdentifier . unpack . strip . pack -- |Lexify an EBNF syntax tree lexifySyntax :: Syntax -> Syntax lexifySyntax s = replaceTerm tok $ addLexicalInner tok s where tok = generateTokens $ findTerm s findTerm (Syntax srs) = L.concatMap findTerm' srs findTerm' (SyntaxRule _ dl) = findTerm'' dl findTerm' (LexicalInner _) = [] findTerm'' (DefinitionsList sds) = L.concatMap findTerm''' sds findTerm''' (SingleDefinition ts) = L.concatMap findTerm'''' ts findTerm'''' (Term f _) = findTerm''''' f findTerm''''' (Factor _ p) = findTerm'''''' p findTerm'''''' (OptionalSequence dl) = findTerm'' dl findTerm'''''' (RepeatedSequence dl) = findTerm'' dl findTerm'''''' (GroupedSequence dl) = findTerm'' dl findTerm'''''' (PrimaryMetaIdentifier mi) = [] findTerm'''''' (TerminalString term) = [term] findTerm'''''' Empty = [] generateTokens = map (\t -> (t, "__token_" ++ t)) . L.nub addLexicalInner [] s = s addLexicalInner ((n, t):ts) (Syntax srs) = addLexicalInner ts $ Syntax $ LexicalInner (lexicalString t n) : srs replaceTerm [] s = s replaceTerm (t:ts) (Syntax srs) = replaceTerm ts $ Syntax $ L.map (replaceTerm' t) srs replaceTerm' t (SyntaxRule r dl) = SyntaxRule r $ replaceTerm'' t dl replaceTerm' _ li@(LexicalInner _) = li replaceTerm'' t (DefinitionsList sds) = DefinitionsList $ L.map (replaceTerm''' t) sds replaceTerm''' t (SingleDefinition ts) = SingleDefinition $ L.map (replaceTerm'''' t) ts replaceTerm'''' t (Term f e) = Term (replaceTerm''''' t f) e replaceTerm''''' t (Factor f p) = Factor f $ replaceTerm'''''' t p replaceTerm'''''' t (OptionalSequence dl) = OptionalSequence $ replaceTerm'' t dl replaceTerm'''''' t (RepeatedSequence dl) = RepeatedSequence $ replaceTerm'' t dl replaceTerm'''''' t (GroupedSequence dl) = GroupedSequence $ replaceTerm'' t dl replaceTerm'''''' (n, t) ts@(TerminalString s) = if n == s then PrimaryMetaIdentifier (MetaIdentifier t) else ts replaceTerm'''''' _ p = p -- * InputGrammar instances for EBNF AST instance InputGrammar Syntax where parser = syntax stringify (Syntax []) = "" stringify (Syntax [sr]) = stringify sr stringify (Syntax (sr:r)) = stringify sr ++ "\n" ++ stringify (Syntax r) rules (Syntax srs) = R.uniformize $ L.concatMap rules srs lexify = lexifySyntax instance InputGrammar SyntaxRule where parser = syntaxRule stringify (SyntaxRule mi dl) = stringify mi ++ "=" ++ stringify dl ++ ";" stringify (LexicalInner lr) = stringify lr rules (SyntaxRule (MetaIdentifier mi) dl) = [R.Rule mi [r, R.Empty] | r <- rules dl] rules (LexicalInner lr) = rules lr instance InputGrammar DefinitionsList where parser = definitionsList stringify (DefinitionsList []) = "" stringify (DefinitionsList [sd]) = stringify sd stringify (DefinitionsList (sd:r)) = stringify sd ++ "|" ++ stringify (DefinitionsList r) rules (DefinitionsList sds) = L.concatMap rules sds instance InputGrammar SingleDefinition where parser = singleDefinition stringify (SingleDefinition []) = "" stringify (SingleDefinition [t]) = stringify t stringify (SingleDefinition (t:r)) = stringify t ++ "," ++ stringify (SingleDefinition r) rules (SingleDefinition [t]) = rules t rules (SingleDefinition (t:ts)) = [R.Concat [r,n] | r <- rules t, n <- rules (SingleDefinition ts)] instance InputGrammar Term where parser = term stringify (Term f Nothing) = stringify f stringify (Term f (Just e)) = stringify f ++ "-" ++ stringify e rules (Term f Nothing) = rules f rules _ = error "no translation for exception" -- ... yet instance InputGrammar Exception where parser = exception stringify (Exception f) = stringify f rules _ = undefined -- should not be called, look at the Term instance instance InputGrammar Factor where parser = factor stringify (Factor Nothing p) = stringify p stringify (Factor (Just i) p) = show i ++ "*" ++ stringify p rules (Factor Nothing p) = rules p rules (Factor (Just i) p) = [R.Concat . concat $ replicate (fromIntegral i) (rules p)] instance InputGrammar Primary where parser = primary stringify (OptionalSequence dl) = "[" ++ stringify dl ++ "]" stringify (RepeatedSequence dl) = "{" ++ stringify dl ++ "}" stringify (GroupedSequence dl) = "(" ++ stringify dl ++ ")" stringify (PrimaryMetaIdentifier mi) = stringify mi stringify (TerminalString s) = show s stringify Empty = "" rules a@(OptionalSequence dl) = let x = stringify a in R.NonTerm x : R.Rule x [R.Empty] : [R.Rule x [r, R.Empty] | r <- rules dl] rules a@(RepeatedSequence dl) = let x = stringify a in R.NonTerm x : R.Rule x [R.Empty] : [R.Rule x [r, R.NonTerm x, R.Empty] | r <- rules dl] rules a@(GroupedSequence dl) = let x = stringify a in R.NonTerm x : [R.Rule x [r, R.Empty] | r <- rules dl] rules (PrimaryMetaIdentifier mi) = rules mi rules (TerminalString s) = [R.Concat $ L.map R.Term s] rules Empty = [R.Empty] instance InputGrammar MetaIdentifier where parser = metaIdentifier stringify (MetaIdentifier s) = "<" ++ s ++ ">" rules (MetaIdentifier s) = [R.NonTerm s]