{ -------------------------------------------------------------------- -- | -- Module : Language.Dot.Parser -- License : GPL-3 -- -- Maintainer : Marcelo Garlet Millani -- Stability : experimental -- Portability : portable -- -- Parsing function and abstract syntax tree for the [DOT language] -- (https://www.graphviz.org/doc/info/lang.html). -- -- The parser is implemented using `happy`, and so is fairly fast. -------------------------------------------------------------------- module Language.Dot.Parser (parse , GraphType(..) , Name(..) , Statement(..) , Subgraph(..) , Port(..) , Compass(..) ) where import HappyDot.Parser import Language.Dot.Graph import Data.Char import Control.Monad.Trans.State import qualified Debug.Trace as D } %name dot %monad { P } {thenE} {returnE} %lexer { lexer } { TokenEOF } %tokentype { Token } %error { parseError } %token graph { TokenGraph } digraph { TokenDigraph} node { TokenNode } edge { TokenEdge } subgraph { TokenSubgraph} label { TokenLabel $$ } qlabel { TokenQuotedLabel $$ } strict { TokenStrict } arrow { TokenArrow $$ } '=' { TokenEQ } '{' { TokenOCB } '}' { TokenCCB } '[' { TokenOSB } ']' { TokenCSB } ':' { TokenCo } ';' { TokenSemiCo } ',' { TokenComma } '+' { TokenPlus } xmlClose { TokenCT $$ } xmlOpen { TokenOT $$ } xmlProc { TokenProc $$ } %% Graph : Strictness GraphType Name_empty Stmt_list { ($1, $2, $3, $4) } Strictness : strict { True } | {- empty -} { False } GraphType: graph { Graph } | digraph { Digraph } Name_empty : Name { Just $1 } | {- empty -} { Nothing } Name : label { StringID $1} | Quoted_string { StringID $1} | Xml { XMLID $1 } Quoted_string: Quoted_sum { concat $ reverse $1 } Quoted_sum: qlabel { [$1] } | Quoted_sum '+' qlabel {$3 : $1} Stmt_list : '{' Stmt_list_aux '}' { reverse $2 } Stmt_list_aux : Stmt_list_aux Stmt Stmt_sep { $2 : $1 } | {- empty -} { [] } Stmt_sep: ';' { Just $1 } | ',' { Just $1 } | {- empty -} { Nothing } Stmt : Attr_stmt { $1 } | Node_stmt { $1 } | Edge_stmt { $1 } | Name '=' Name { AttributeStatement ( $1, $3) } | Subgraph { SubgraphStatement $1 } Attr_stmt: node Attr_list { NodeAttribute $2 } | edge Attr_list { EdgeAttribute $2 } | graph Attr_list { GraphAttribute $2 } Attr_list: Attr_list_aux { reverse $1 } Attr_list_aux : Attr_list_aux '[' A_list ']' { $3 ++ $1 } | {- empty -} { [] } A_list : A_list Stmt_sep Name '=' Name { ( $3, $5) : $1 } | {- empty -} { [] } Edge_stmt : Node EdgeRHS Attr_list { EdgeStatement ((uncurry NodeRef $1):(reverse $2)) $3 } | Subgraph EdgeRHS Attr_list { EdgeStatement ($1:(reverse $2)) $3 } EdgeRHS : EdgeRHS arrow Node { (uncurry NodeRef $3) : $1 } | EdgeRHS arrow Subgraph { $3 : $1 } | arrow Node { [uncurry NodeRef $2] } | arrow Subgraph { [$2] } | arrow ';' {% getLineNumber `thenE` \line -> failE $ "Unfinished edge statement." } | arrow ',' {% getLineNumber `thenE` \line -> failE $ "Unfinished edge statement." } Node_stmt : Node Attr_list { (uncurry NodeStatement) $1 $2 } Node : Name { ($1, Nothing) } | Name Port { ($1, (Just $2)) } Port : ':' Name { case toCompass $ show $2 of Just c -> Port Nothing (Just c) Nothing -> Port (if show $2 == "_" then Nothing else (Just $2)) Nothing } | ':' Name ':' label { Port (Just $2) (toCompass $4) } Subgraph : subgraph Name_empty Stmt_list { Subgraph $2 $3 } | Stmt_list { Subgraph Nothing $1 } Xml : xmlOpen XmlList xmlClose {show $1 ++ (concat $ reverse $2) ++ show $3} XmlList : XmlList XmlElement {$2 : $1} | {- empty -} { [] } XmlElement : xmlOpen XmlList xmlClose { show $1 ++ (concat $ reverse $2) ++ show $3 } | xmlProc { show $1 } { data Except a = Success a | Failure String type P a = String -> Int -> Except a thenE :: P a -> (a -> P b) -> P b thenE m k s l = case m s l of Success a -> k a s l Failure e -> Failure e returnE :: a -> P a returnE a s l = Success a failE :: String -> P a failE e s l = Failure $ "Error on line " ++ show l ++ ": " ++ e catchE :: P a -> (String -> P a) -> P a catchE m k s l = case m s l of Success a -> Success a Failure e -> k e s l getLineNumber s l = Success l parseError :: Token -> P a parseError t = getLineNumber `thenE` \line -> failE $ "Parse error on line " ++ show line ++ "." data Token = TokenGraph | TokenDigraph | TokenNode | TokenEdge | TokenSubgraph | TokenStrict | TokenLabel String | TokenQuotedLabel String | TokenXML String | TokenCompass Compass | TokenArrow Char | TokenEQ | TokenOCB | TokenCCB | TokenOSB | TokenCSB | TokenCo | TokenSemiCo | TokenComma | TokenCT String | TokenOT String | TokenProc String | TokenPlus | TokenEOF deriving (Eq) instance Show Token where show tk = case tk of TokenGraph -> "graph" TokenDigraph -> "digraph" TokenNode -> "node" TokenEdge -> "edge" TokenSubgraph -> "subgraph" TokenStrict -> "strict" TokenLabel label -> label TokenQuotedLabel label -> label TokenXML xml -> xml TokenCompass compass -> show compass TokenArrow c -> [c] TokenEQ -> "=" TokenOCB -> "{" TokenCCB -> "]" TokenOSB -> "[" TokenCSB -> "]" TokenCo -> ":" TokenSemiCo -> ";" TokenComma -> "," TokenCT tag -> "" TokenOT tag -> "<" ++ tag ++ ">" TokenProc xml -> "" TokenPlus -> "+" TokenEOF -> "" -- | Parse a graph written in the DOT language. parse s = case dot ('\n':s) 0 of -- A newline is added to treat '#' in the beginning of a line properly. Success ast -> Right ast Failure msg -> Left msg toCompass l = case map toLower l of "n" -> Just $ North "ne" -> Just $ NorthEast "e" -> Just $ East "se" -> Just $ SouthEast "s" -> Just $ South "sw" -> Just $ SouthWest "w" -> Just $ West "nw" -> Just $ NorthWest "c" -> Just $ Center "_" -> Nothing _ -> Nothing lexer :: (Token -> (P a)) -> (P a) lexer cont str line = case dropWhile (\c -> c /= '\n' && isSpace c) str of '\n':'#':s' -> let s'' = dropWhile (/= '\n') s' in lexer cont s'' (line + 1) '\n':s' -> lexer cont s' (line +1) ',':s' -> cont TokenComma s' line ';':s' -> cont TokenSemiCo s' line 's':'u':'b':'g':'r':'a':'p':'h':s' -> cont TokenSubgraph s' line 'g':'r':'a':'p':'h':s' -> cont TokenGraph s' line 'd':'i':'g':'r':'a':'p':'h':s' -> cont TokenDigraph s' line 'e':'d':'g':'e':s' -> cont TokenEdge s' line 'n':'o':'d':'e':s' -> cont TokenNode s' line 's':'t':'r':'i':'c':'t':s' -> cont TokenStrict s' line '{':s' -> cont TokenOCB s' line '}':s' -> cont TokenCCB s' line '[':s' -> cont TokenOSB s' line ']':s' -> cont TokenCSB s' line '+':s' -> cont TokenPlus s' line '=':s' -> cont TokenEQ s' line ':':s' -> cont TokenCo s' line '-':'-':s' -> cont (TokenArrow '-') s' line '-':'>':s' -> cont (TokenArrow '>') s' line '/':'/':s' -> let s'' = dropWhile (/= '\n') s' in lexer cont s'' line '/':'*':s' -> let (s'', line') = execState consumeComment (s', line) in lexer cont s'' (line') '"':s' -> let (qs, (s'', line')) = runState consumeQuotedString (s', line) in case qs of Right string -> cont (TokenQuotedLabel string) s'' line' Left msg -> failE msg s' line' '<':'/':s' -> let (tag, (s'', line')) = runState consumeTag (s', line) in cont (TokenCT tag) (tail s'') line' '<':'!':'-':'-':s' -> let (s'', line') = execState consumeXMLComment (s', line) in lexer cont s'' line' '<':'?':s' -> let (proc, (s'', line')) = runState consumeProcessing (s', line) in cont (TokenProc proc) s'' line' [] -> cont TokenEOF [] line s' -> let (label, s'') = span (\x -> not $ isSpace x || x `elem` ";,{}[]=\"") s' in if null label then failE ("Unexpected character: " ++ [head s'']) s' line else cont (TokenLabel label) s'' line }