{-# LANGUAGE TypeFamilies, FlexibleContexts #-}
module Text.ANTLR.Allstar.ParserGenerator
( GrammarSymbol(..), Token(..), ATNEnv(..)
, AST(..), ATNState(..), ATNEdge(..)
, ATNEdgeLabel(..)
, parse
) where
import Data.List
import qualified Data.Set as DS
import Debug.Trace
data GrammarSymbol nt t = NT nt | T t | EPS deriving (Eq, Ord, Show)
data ATNState nt =
Init nt
| Middle nt Int Int
| Final nt
deriving (Eq, Ord, Show)
type ATNEdge nt t = (ATNState nt, ATNEdgeLabel nt t, ATNState nt)
data ATNEdgeLabel nt t =
GS (GrammarSymbol nt t)
| PRED Bool
deriving (Eq, Ord, Show)
type ATNEnv nt t = DS.Set (ATNEdge nt t)
isInit :: ATNState nt -> Bool
isInit (Init nt) = True
isInit _ = False
outgoingEdge :: (Eq nt, Show nt) => ATNState nt -> ATNEnv nt t -> ATNEdge nt t
outgoingEdge p atnEnv = let edges = outgoingEdges p atnEnv
in case edges of
[edge] -> edge
_ -> error "Multiple edges found"
outgoingEdges :: (Eq nt, Show nt) => ATNState nt -> ATNEnv nt t -> [ATNEdge nt t]
outgoingEdges p atnEnv = DS.toList (DS.filter (\(p',_,_) -> p' == p) atnEnv)
type ATNStack nt = [ATNState nt]
type ATNConfig nt = (ATNState nt, Int, ATNStack nt)
type DFA nt t = [DFAEdge nt t]
type DFAEdge nt t = (DFAState nt, t, DFAState nt)
data DFAState nt = Dinit [ATNConfig nt] | D [ATNConfig nt] | F Int | Derror deriving (Eq, Ord, Show)
type DFAEnv nt t = [(GrammarSymbol nt t, DFA nt t)]
class Token t where
type Label t :: *
type Literal t :: *
getLabel :: t -> Label t
getLiteral :: t -> Literal t
data AST nt tok = Node nt [AST nt tok] | Leaf tok deriving (Eq, Show)
emptyEnv = []
emptyStack = []
emptyDerivation = []
getConflictSetsPerLoc :: (Eq nt, Ord nt) => DFAState nt -> [[ATNConfig nt]]
getConflictSetsPerLoc q =
case q of
F _ -> error "final state passed to getConflictSetsPerLoc"
Derror -> error "error state passed to getConflictSetsPerLoc"
D configs -> let sortedConfigs = sortOn (\(p, i, gamma) -> (p, gamma)) configs
in groupBy (\(p, i, gamma) (p', j, gamma') ->
p == p' && i /= j && gamma == gamma')
sortedConfigs
getProdSetsPerState :: (Eq nt, Ord nt) => DFAState nt -> [[ATNConfig nt]]
getProdSetsPerState q =
case q of
F _ -> error "final state passed to getProdSetsPerState"
Derror -> error "error state passed to getProdSetsPerState"
D configs -> let sortedConfigs = sortOn (\(p, i, gamma) -> (p, gamma)) configs
in groupBy (\(p, _, _) (p', _, _) -> p == p')
sortedConfigs
dfaTrans :: (Eq nt, Eq t) => DFAState nt -> t -> DFA nt t -> Maybe (DFAEdge nt t)
dfaTrans d t dfa = find (\(d1, label, _) -> d1 == d && label == t) dfa
findInitialState :: DFA nt t -> Maybe (DFAState nt)
findInitialState dfa =
let isInit d = case d of
Dinit _ -> True
_ -> False
in case find (\(d1, _, _) -> isInit d1) dfa of
Just (d1, _, _) -> Just d1
Nothing -> Nothing
allEqual :: Eq a => [a] -> Bool
allEqual [] = True
allEqual (x : xs) = all (== x) xs
bind :: Eq a => a -> b -> [(a, b)] -> [(a, b)]
bind k v [] = [(k, v)]
bind k v ((k', v') : al') = if k == k' then (k, v) : al' else (k', v') : bind k v al'
parse :: (Eq nt, Show nt, Ord nt, Eq (Label tok), Show (Label tok), Ord (Label tok), Token tok, Show tok) =>
[tok] -> GrammarSymbol nt (Label tok) -> ATNEnv nt (Label tok) -> Bool -> Either String (AST nt tok)
parse input startSym atnEnv useCache =
let parseLoop input currState stack dfaEnv subtrees astStack =
case (currState, startSym) of
(Final c, NT c') ->
if c == c' then
Right (Node c subtrees)
else
case (stack, astStack) of
(q : stack', leftSiblings : astStack') ->
parseLoop input q stack' dfaEnv (leftSiblings ++ [Node c subtrees]) astStack'
_ -> error ("Reached a final ATN state, but parse is incomplete " ++
"and there's no ATN state to return to")
(_, _) ->
case (outgoingEdge currState atnEnv) of
(p, t, q) ->
case (t, input) of
(GS (T b), []) -> error "Input has been exhausted"
(GS (T b), c : cs) -> if b == getLabel c then
parseLoop cs q stack dfaEnv (subtrees ++ [Leaf c]) astStack
else
Left ("remaining input: " ++ show input)
(GS (NT b), _) -> let stack' = q : stack
in case adaptivePredict (NT b) input stack' dfaEnv of
Nothing -> Left ("Couldn't find a path through ATN " ++ show b ++
" with input " ++ show input)
Just (i, dfaEnv') -> parseLoop input (Middle b i 0) stack' dfaEnv' [] (subtrees : astStack)
(GS EPS, _) -> parseLoop input q stack dfaEnv subtrees astStack
(PRED _, _) -> error "not implemented"
initialDfaEnv = DS.toList (DS.foldr (\(p,_,_) ntNames ->
case p of Init ntName -> DS.insert (NT ntName, []) ntNames
_ -> ntNames)
DS.empty
atnEnv)
in case startSym of (NT c) ->
case adaptivePredict startSym input emptyStack initialDfaEnv of
Nothing -> Left ("Couldn't find a path through ATN " ++ show c ++
" with input " ++ show input)
Just (iStart, initialDfaEnv') -> parseLoop input (Middle c iStart 0) emptyStack initialDfaEnv' [] emptyStack
_ -> error "Start symbol must be a nonterminal"
where
adaptivePredict sym input stack dfaEnv =
case lookup sym dfaEnv of
Nothing -> error ("No DFA found for " ++ show sym)
Just dfa -> let d0 = case findInitialState dfa of
Just d0 -> d0
Nothing -> startState sym emptyStack
in sllPredict sym input d0 stack dfaEnv
startState sym stack =
case sym of
NT ntName ->
let initEdges = outgoingEdges (Init ntName) atnEnv
loopOverAtnPaths initEdges =
case initEdges of
[] -> []
(Init _, GS EPS, q@(Middle _ i _)) : es ->
(closure [] (q, i, stack)) ++ loopOverAtnPaths es
_ -> error "ATN path must begin with an epsilon edge from Init to Choice"
in D (loopOverAtnPaths initEdges)
_ -> error "Symbol passed to startState must be a nonterminal"
closure busy currConfig =
if elem currConfig busy then
[]
else
let busy' = currConfig : busy
(p, i, gamma) = currConfig
pEdges = outgoingEdges p atnEnv
loopOverEdges es =
case es of
[] -> []
(_, GS (NT ntName), q) : es' ->
closure busy' (Init ntName, i, q : gamma) ++
loopOverEdges es'
(_, GS EPS, q) : es' ->
closure busy' (q, i, gamma) ++
loopOverEdges es'
(_, GS (T _), _) : es' ->
loopOverEdges es'
in case (p, gamma) of
(Final _, []) -> [currConfig]
(Final _, q : gamma') -> currConfig : closure busy' (q, i, gamma')
_ -> currConfig : loopOverEdges pEdges
sllPredict sym input d0 stack initialDfaEnv =
let predictionLoop d tokens dfaEnv =
case tokens of
[] -> Nothing
t : ts ->
let (d', dfaEnv') =
if useCache then
case lookup sym dfaEnv of
Nothing -> error ("No DFA found for nonterminal " ++ show sym ++ show dfaEnv)
Just dfa ->
case dfaTrans d (getLabel t) dfa of
Just (_, _, d2) -> (d2, dfaEnv)
Nothing -> let d' = target d t
in (d', bind sym ((d, getLabel t, d') : dfa) dfaEnv)
else
(target d t, dfaEnv)
in case d' of
Derror -> Nothing
F i -> Just (i, dfaEnv')
D atnConfigs ->
let conflictSets = getConflictSetsPerLoc d'
prodSets = getProdSetsPerState d'
stackSensitive =
any (\cSet -> length cSet > 1) conflictSets &&
not (any (\pSet -> length pSet == 1) prodSets)
in if stackSensitive then
Just (llPredict sym input stack, initialDfaEnv)
else
predictionLoop d' ts dfaEnv'
in predictionLoop d0 input initialDfaEnv
llPredict sym input stack =
let d0 = startState sym stack
predictionLoop d tokens =
case tokens of
[] -> error ("Empty input in llPredict")
t : ts ->
let mv = move d (getLabel t)
d' = D (concat (map (closure []) mv))
in case d' of
D [] -> error ("empty DFA state in llPredict")
D atnConfigs ->
case nub (map (\(_, j, _) -> j) atnConfigs) of
[i] -> i
_ ->
let altSets = getConflictSetsPerLoc d'
in case altSets of
[] -> error ("No alt sets found")
a : as ->
if allEqual altSets && length a > 1 then
minimum (map (\(_, i, _) -> i) a)
else
predictionLoop d' ts
in predictionLoop d0 input
target d a =
let mv = move d (getLabel a)
d' = D (concat (map (closure []) mv))
in case d' of
D [] -> Derror
D atnConfigs ->
case nub (map (\(_, j, _) -> j) atnConfigs) of
[i] -> F i
_ -> d'
move q t =
case q of
D atnConfigs ->
let qsForP (p, i, gamma) =
let pOutgoingEdges = outgoingEdges p atnEnv
in foldr (\(p', label, q) acc ->
case label of
GS (T a) -> if t == a then
(q, i, gamma) : acc
else
acc
_ -> acc)
[]
pOutgoingEdges
in concat (map qsForP atnConfigs)