{-# LANGUAGE GADTs #-}
module CommonParserUtil where
import Terminal
import TokenInterface
import Text.Regex.TDFA
import System.Exit
import System.Process
import Control.Monad
import Data.Typeable
import Control.Exception
import SaveProdRules
import AutomatonType
import LoadAutomaton
type RegExpStr = String
type LexFun token = String -> Maybe token
type LexerSpecList token = [(RegExpStr, LexFun token)]
data LexerSpec token =
LexerSpec { endOfToken :: token,
lexerSpecList :: LexerSpecList token
}
type ProdRuleStr = String
type ParseFun token ast = Stack token ast -> ast
type ParserSpecList token ast = [(ProdRuleStr, ParseFun token ast)]
data ParserSpec token ast =
ParserSpec { startSymbol :: String,
parserSpecList :: ParserSpecList token ast,
baseDir :: String,
actionTblFile :: String,
gotoTblFile :: String,
grammarFile :: String,
parserSpecFile :: String,
genparserexe :: String
}
data Spec token ast =
Spec (LexerSpec token) (ParserSpec token ast)
type Line = Int
type Column = Int
data LexError = LexError Int Int String
deriving (Typeable, Show)
instance Exception LexError
prLexError (LexError line col text) = do
putStr $ "No matching lexer spec at "
putStr $ "Line " ++ show line
putStr $ "Column " ++ show col
putStr $ " : "
putStr $ take 10 text
lexing :: TokenInterface token =>
LexerSpec token -> String -> IO [Terminal token]
lexing lexerspec text = lexing_ lexerspec 1 1 text
lexing_ :: TokenInterface token =>
LexerSpec token -> Line -> Column -> String -> IO [Terminal token]
lexing_ lexerspec line col [] = do
let eot = endOfToken lexerspec
return [Terminal (fromToken eot) line col eot]
lexing_ lexerspec line col text = do
(matchedText, theRestText, maybeTok) <-
matchLexSpec line col (lexerSpecList lexerspec) text
let (line_, col_) = moveLineCol line col matchedText
terminalList <- lexing_ lexerspec line_ col_ theRestText
case maybeTok of
Nothing -> return terminalList
Just tok -> do
let terminal = Terminal matchedText line col tok
return (terminal:terminalList)
matchLexSpec :: TokenInterface token =>
Line -> Column -> LexerSpecList token -> String
-> IO (String, String, Maybe token)
matchLexSpec line col [] text = do
throw (LexError line col text)
matchLexSpec line col ((aSpec,tokenBuilder):lexerspec) text = do
let (pre, matched, post) = text =~ aSpec :: (String,String,String)
case pre of
"" -> return (matched, post, tokenBuilder matched)
_ -> matchLexSpec line col lexerspec text
moveLineCol :: Line -> Column -> String -> (Line, Column)
moveLineCol line col "" = (line, col)
moveLineCol line col ('\n':text) = moveLineCol (line+1) 1 text
moveLineCol line col (ch:text) = moveLineCol line (col+1) text
data ParseError token ast where
NotFoundAction :: (TokenInterface token, Typeable token, Typeable ast, Show token, Show ast) =>
(Terminal token) -> Int -> (Stack token ast) -> ActionTable -> GotoTable -> ParseError token ast
NotFoundGoto :: (TokenInterface token, Typeable token, Typeable ast, Show token, Show ast) =>
Int -> String -> (Stack token ast) -> ActionTable -> GotoTable -> ParseError token ast
deriving (Typeable)
instance (Show token, Show ast) => Show (ParseError token ast) where
showsPrec p (NotFoundAction terminal state stack _ _) =
(++) "NotFoundAction" . (++) (terminalToString terminal) . (++) (show state)
showsPrec p (NotFoundGoto topstate lhs stack _ _) =
(++) "NotFoundGoto" . (++) (show topstate) . (++) lhs
instance (TokenInterface token, Typeable token, Show token, Typeable ast, Show ast)
=> Exception (ParseError token ast)
prParseError (NotFoundAction terminal state stack actiontbl gototbl) = do
putStrLn $
("Not found in the action table: "
++ terminalToString terminal)
++ " : "
++ show (state, tokenTextFromTerminal terminal)
++ "\n" ++ prStack stack ++ "\n"
prParseError (NotFoundGoto topState lhs stack actiontbl gototbl) = do
putStrLn $
("Not found in the goto table: ")
++ " : "
++ show (topState,lhs) ++ "\n"
++ prStack stack ++ "\n"
parsing :: (TokenInterface token, Typeable token, Typeable ast, Show token, Show ast) =>
ParserSpec token ast -> [Terminal token] -> IO ast
parsing parserSpec terminalList = do
writtenBool <- saveProdRules specFileName sSym pSpecList
when writtenBool generateAutomaton
(actionTbl, gotoTbl, prodRules) <-
loadAutomaton grammarFileName actionTblFileName gotoTblFileName
ast <- runAutomaton actionTbl gotoTbl prodRules pFunList terminalList
return ast
where
specFileName = parserSpecFile parserSpec
grammarFileName = grammarFile parserSpec
actionTblFileName = actionTblFile parserSpec
gotoTblFileName = gotoTblFile parserSpec
executable = genparserexe parserSpec
sSym = startSymbol parserSpec
pSpecList = map fst (parserSpecList parserSpec)
pFunList = map snd (parserSpecList parserSpec)
generateAutomaton = do
exitCode <- rawSystem "stack"
[ "exec", "--",
executable, specFileName, "-output",
grammarFileName, actionTblFileName, gotoTblFileName
]
case exitCode of
ExitFailure code -> exitWith exitCode
ExitSuccess -> putStrLn ("Successfully generated: " ++
actionTblFileName ++ ", " ++
gotoTblFileName ++ ", " ++
grammarFileName);
data StkElem token ast =
StkState Int
| StkTerminal (Terminal token)
| StkNonterminal ast String
type Stack token ast = [StkElem token ast]
emptyStack = []
get :: Stack token ast -> Int -> ast
get stack i =
case stack !! (i-1) of
StkNonterminal ast _ -> ast
_ -> error $ "get: out of bound: " ++ show i
getText :: Stack token ast -> Int -> String
getText stack i =
case stack !! (i-1) of
StkTerminal (Terminal text _ _ _) -> text
_ -> error $ "getText: out of bound: " ++ show i
push :: a -> [a] -> [a]
push elem stack = elem:stack
pop :: [a] -> (a, [a])
pop (elem:stack) = (elem, stack)
pop [] = error "Attempt to pop from the empty stack"
prStack :: TokenInterface token => Stack token ast -> String
prStack [] = "end"
prStack (StkState i : stack) = "S" ++ show i ++ " : " ++ prStack stack
prStack (StkTerminal (Terminal text _ _ token) : stack) =
fromToken token ++ "(" ++ text ++ ")" ++ " : " ++ prStack stack
prStack (StkNonterminal ast str : stack) = str ++ " : " ++ prStack stack
currentState :: Stack token ast -> Int
currentState (StkState i : stack) = i
currentState _ = error "No state found in the stack top"
tokenTextFromTerminal :: TokenInterface token => Terminal token -> String
tokenTextFromTerminal (Terminal _ _ _ token) = fromToken token
lookupActionTable :: TokenInterface token => ActionTable -> Int -> (Terminal token) -> Maybe Action
lookupActionTable actionTbl state terminal =
lookupTable actionTbl (state,tokenTextFromTerminal terminal)
("Not found in the action table: " ++ terminalToString terminal)
lookupGotoTable :: GotoTable -> Int -> String -> Maybe Int
lookupGotoTable gotoTbl state nonterminalStr =
lookupTable gotoTbl (state,nonterminalStr)
("Not found in the goto table: ")
lookupTable :: (Eq a, Show a) => [(a,b)] -> a -> String -> Maybe b
lookupTable tbl key msg =
case [ val | (key', val) <- tbl, key==key' ] of
[] -> Nothing
(h:_) -> Just h
revTakeRhs :: Int -> [a] -> [a]
revTakeRhs 0 stack = []
revTakeRhs n (_:nt:stack) = revTakeRhs (n-1) stack ++ [nt]
runAutomaton :: (TokenInterface token, Typeable token, Typeable ast, Show token, Show ast) =>
ActionTable -> GotoTable -> ProdRules -> [ParseFun token ast] ->
[Terminal token] ->
IO ast
runAutomaton actionTbl gotoTbl prodRules pFunList terminalList = do
let initStack = push (StkState 0) emptyStack
run terminalList initStack
where
run terminalList stack = do
let state = currentState stack
let terminal = head terminalList
let text = tokenTextFromTerminal terminal
let action =
case lookupActionTable actionTbl state terminal of
Just action -> action
Nothing -> throw (NotFoundAction terminal state stack actionTbl gotoTbl)
debug ("\nState " ++ show state)
debug ("Token " ++ text)
debug ("Stack " ++ prStack stack)
case action of
Accept -> do
debug "Accept"
case stack !! 1 of
StkNonterminal ast _ -> return ast
_ -> fail "Not Stknontermianl on Accept"
Shift toState -> do
debug ("Shift " ++ show toState)
let stack1 = push (StkTerminal (head terminalList)) stack
let stack2 = push (StkState toState) stack1
run (tail terminalList) stack2
Reduce n -> do
debug ("Reduce " ++ show n)
let prodrule = prodRules !! n
debug ("\t" ++ show prodrule)
let builderFun = pFunList !! n
let lhs = fst prodrule
let rhsLength = length (snd prodrule)
let rhsAst = revTakeRhs rhsLength stack
let ast = builderFun rhsAst
let stack1 = drop (rhsLength*2) stack
let topState = currentState stack1
let toState =
case lookupGotoTable gotoTbl topState lhs of
Just state -> state
Nothing -> throw (NotFoundGoto topState lhs stack actionTbl gotoTbl)
let stack2 = push (StkNonterminal ast lhs) stack1
let stack3 = push (StkState toState) stack2
run terminalList stack3
flag = False
debug :: String -> IO ()
debug msg = if flag then putStrLn msg else return ()
data Candidate =
TerminalSymbol String
| NonterminalSymbol String
deriving Show
compCandidates :: [Candidate] -> Int -> ActionTable -> GotoTable -> IO [[Candidate]]
compCandidates symbols state actTbl gotoTbl = do
putStrLn (show symbols)
case [(lookahead,prnum) | ((s,lookahead),Reduce prnum) <- actTbl, state==s] of
[] -> do let cand1 = [(nonterminal,snext) | ((s,nonterminal),snext) <- gotoTbl, state==s]
let cand2 = [(terminal,snext) | ((s,terminal),Shift snext) <- actTbl, state==s]
if null cand1
then
do listOfList <-
mapM (\(terminal,snext)-> do
putStrLn $ "state " ++ show state ++
": shift to " ++ show snext ++
" on " ++ terminal
compCandidates
(symbols++[TerminalSymbol terminal]) snext actTbl gotoTbl) cand2
return $ concat listOfList
else
do listOfList <-
mapM (\(nonterminal,snext)-> do
putStrLn $ "state " ++ show state ++
": go to " ++ show snext ++
" on " ++ nonterminal
compCandidates
(symbols++[NonterminalSymbol nonterminal]) snext actTbl gotoTbl) cand1
return $ concat listOfList
l -> do putStrLn $ "state " ++ show state ++
": found reduce prodrule #" ++ show (snd (head l)) ++
" on " ++ fst (head l)
putStrLn $ "CANDIDATE: " ++ show [symbols]
return [symbols]