%if false \begin{code} module GLL.Machines.RGLL ( Slot(..) , Alt(..) , Symbol(..) , PrL , NtL , parse , gllSPPF , charS , charT , nT , epsilon , pNodeLookup ) where import Data.Foldable hiding (forM_, toList) import Prelude hiding (lookup, foldr, fmap, foldl, elem, sum) import Control.Monad import Control.Applicative hiding (empty) import Data.Map (Map(..), empty, insertWith, (!), toList, lookup) import Data.Set (member, Set(..)) import qualified Data.IntMap as IM import qualified Data.Map as M import qualified Data.Array as Array import qualified Data.Set as S import qualified Data.IntSet as IS import GLL.Common import GLL.Types.Abstract import GLL.Types.Grammar \end{code} %endif \begin{code} type LhsState = (Nt, Int) type RhsState = (Slot, Int, Int) \end{code} %if false \begin{code} type Context = (SPPF, Rcal, Ucal, GSS, Pcal) \end{code} %endif \begin{spec} data Alt = Alt Nt [Symbol] data Slot = Slot Nt [Symbol] [Symbol] \end{spec} \begin{code} type Rcal = [(RhsState, SPPFNode)] type Rcal' = Set (Int,Int,Slot,SPPFNode) type Ucal = IM.IntMap (IM.IntMap (S.Set Slot)) type GSS = IM.IntMap (M.Map Nt [GSSEdge]) -- can be set? TODO type Pcal = IM.IntMap (M.Map Nt [Int]) -- can be set? TODO type GSSEdge = (SlotL, SPPFNode) type GSSNode = (Nt, Int) data GSlot = GSlot Slot | U0 deriving (Ord, Eq) data ASM a = ASM (Context -> (a, Context)) \end{code} \begin{code} addState :: SPPFNode -> RhsState -> ASM () getState :: ASM (Maybe (RhsState,SPPFNode)) addSPPFEdge :: SPPFNode -> SPPFNode -> ASM () popGSS :: GSSNode -> (Int) -> ASM [GSSEdge] addGSSEdge :: GSSNode -> GSSEdge -> ASM () getPops :: GSSNode -> ASM [Int] joinSPPFs :: Slot -> SPPFNode -> Int -> Int -> Int -> ASM SPPFNode \end{code} \begin{code} runASM :: ASM a -> Context -> Context runASM (ASM f) p = snd $ f p \end{code} %if false \begin{code} addSPPFEdge f t = ASM $ \((dv,pMap),r,u,gss,p) -> ((), (( -- dv insertWith (++) f [t] dv , pMapInsert f t pMap -- pMap ) ,r,u,gss,p)) hasState :: RhsState -> ASM Bool hasState alt = ASM $ \ctx@(_,_,u,_,_) -> (alt `inU` u,ctx) newState :: SPPFNode -> RhsState -> ASM () newState sppf alt = ASM $ \(dv,r,u,gss,p) -> ((), (dv, (alt,sppf):r, alt `toU` u, gss , p)) addState sppf alt@(slot,l,i) = ASM $ \(dv,r,u,gss,p) -> let new = not (alt `inU` u) in if new then ((), (dv, (alt,sppf):r, alt `toU` u, gss , p)) else ((), (dv, r, u, gss, p)) getState = ASM $ \(dv,r,u,gss,p) -> case r of [] -> (Nothing, (dv,r,u,gss,p)) (next:rest) -> (Just next, (dv,rest,u,gss,p)) {- case S.size r of 0 -> (Nothing, (dv,r,u,gss,p)) _ -> let ((l,i,slot,sppf),rest) = S.deleteFindMin r in (Just ((slot,l,i),sppf), (dv,rest,u,gss,p))-} popGSS gn i = ASM $ \(dv,r,u,gss,p) -> let res = gssLookup gn gss in (res, (dv,r,u,gss,pInsert gn i p)) where pInsert (x,l) i p = IM.alter inner l p where inner mm = case mm of Nothing -> Just $ M.singleton x [i] Just m -> Just $ M.insertWith (++) x [i] m gssLookup (x,l) gss = maybe [] inner $ IM.lookup l gss where inner = maybe [] id . M.lookup x addGSSEdge (x,l) t = ASM $ \(dv,r,u,gss,p) -> ((), (dv,r,u,gssInsert x l t gss,p)) where gssInsert x l t gss = IM.alter inner l gss where inner mm = case mm of Nothing -> Just $ M.singleton x [t] Just m -> Just $ M.insertWith (++) x [t] m getPops (x,i) = ASM $ \ctx@(dv,r,u,gss,p) -> (pLookup (x,i) p, ctx) where pLookup (x,i) p = maybe [] (maybe [] id . M.lookup x) $ IM.lookup i p logMisMatch tau token i= ASM $ \(dv,r,u,gss,p) -> ((), (dv,r,u,gss,p)) \end{code} %endif %if false \begin{code} instance Show GSlot where show (U0) = "u0" show (GSlot gn) = show gn instance Show SPPFNode where show (SNode (s, l, r)) = "(s: " ++ show s ++ ", " ++ show l ++ ", " ++ show r ++ ")" show (INode (s, l, r)) = "(i: " ++ show s ++ ", " ++ show l ++ ", " ++ show r ++ ")" show (PNode (p, l, k, r)) = "(p: " ++ show p ++ ", " ++ show l ++ ", " ++ show k ++ ", " ++ show r ++ ")" show Dummy = "$" instance Applicative ASM where (<*>) = ap pure = return instance Functor ASM where fmap = liftM instance Monad ASM where return a = ASM $ \p -> (a, p) (ASM m) >>= f = ASM $ \p -> let (a, p') = m p (ASM m') = f a in m' p' \end{code} %endif %if false \begin{code} parse ::Bool -> Grammar -> [Token] -> IO () parse debug grammar@(Grammar start _ _) input' =do let (resContext,prs,selects,follows) = gll debug grammar input' when (debug) $ do writeFile "/tmp/alts.txt" (unlines $ map show prs) writeFile "/tmp/sets.txt" (show selects ++ "\n\n" ++ show follows) proceed debug start (length input') resContext gllSPPF :: Grammar -> [Token] -> SPPF gllSPPF grammar input = let ((sppf,_,_,_,_),_,_,_) = gll False grammar input in sppf gll :: Bool -> Grammar -> [Token] -> (Context, [Alt], SelectMap, FollowMap) gll debug (Grammar start _ rules) input' = (runASM (pLhs (start, 0) >> pCont) context, prs, selects, follows) where prs = [ alt | Rule _ alts _ <- rules, alt <- (reverse alts) ] context = ((M.empty,IM.empty), [], IM.empty, IM.empty, IM.empty) input = Array.array (0,m) $ zip [0..] $ input' ++ [EOS] m = length input' \end{code} %endif \begin{code} pCont :: ASM () pLhs :: LhsState -> ASM () pRhs :: RhsState -> SPPFNode -> ASM () \end{code} Function |pCont| acts as the code-block starting with |L0| in a generated GLL parser. It takes care of the continuation of the algorithm. Function |pLhs| acts as the code-block starting with the label $L_{X}$, if |pLhs| is applied to |X|. Function |pRhs| executes the other instructions of a generated GLL parser (including labels of the form $L_{S_1}$ and $R_{X_1}$ and instructions that aren't labelled). Using pattern-matching the different cases for the different symbols in the right-hand side are given separate definitions. As such, each call to |pRhs| `carries the dot' of the slot in the current state `over' the next symbol. There is also a case for when there is no symbol for the dot to be carried over, at which the pop and return action needs to take place. Note that an |SPPFNode| is given as a separate argument to |pRhs| and no |SPPFNode| is stored in the descriptors (|RhsState|). \subsection{Main parse function} The whole procedure is started from within the function |parse| which receives a start-sybmol, a list of productions and an input string (of tokens) as arguments. \begin{spec} parse :: Nt -> [Pr] -> [Token] -> IO () -- i/o monad parse start prs input' = do proceed (runASM (pLhs (start, 0, (U0,0))) context) where context = (empty, [], S.empty, empty, empty) input = input' ++ [EOS] m = length input' \end{spec} In its |where|-clause are the input string appended with the end-of-string symbol |EOS| and the integer |m| which matches the number of tokens in the (original) input string. Because the functions |pCont|, |pRhs| and |pLhs| are defined in the same |where|-clause, this information is availaible to all these functions. Function |proceed| receives the context after running the entire algorithm (running the computation represented by the |ASM| monad with |runASM|), which is achieved by calling |pLhs| for the start symbol of the grammar with current index |0| and initial |GSSNode| |(U0,0)|. The function |runASM| also receives as argument the initial (empty) context. \subsection{Continuation} \begin{code} pCont = do mnext <- getState case mnext of Nothing -> return () -- no continuation Just (next,sppf) -> do f <- pRhs next sppf f `seq` pCont \end{code} The function |getSPPF| does the clerical work of finding the right |SPPFNode| corresponding to the slot of the next descriptor. \subsection{Left-hand side} Get the alternatives for which the select-test succeeds and add them to the descriptor set |Rcal| and |Ucal|. The implementation of |addState| ensures that no duplicates are added. \begin{code} pLhs (bigx, i) = do let alts = [ (Slot bigx [] beta, i, i) | (Alt bigx beta) <- altsOf bigx , select (input Array.! i) beta bigx ] forM_ alts (addState Dummy) \end{code} The code |forM_ alts addState| is equivalent to \\|forM_ alts (\r -> addState r)| and |forM_ alts (\r -> ...)| can be read as $(\forall r \in \mathit{alts}.\;\ldots)$. Double dash are the characters to start a single line comment (|-- comment|). \subsection{Right-hand side} \subsubsection{$\epsilon$-rule} \begin{code} pRhs (Slot bigx [] [Term Epsilon], l, i) _ = do root <- joinSPPFs slot Dummy l i i pRhs (slot, l, i) root where slot = Slot bigx [Term Epsilon] [] \end{code} \subsubsection{Terminal-case} \begin{code} pRhs (Slot bigx alpha ((Term tau):beta), l, i) sppf = when (input Array.! i == tau) $ do -- token test root <- joinSPPFs slot sppf l i (i+1) pRhs (slot, l, i+1) root where slot = Slot bigx (alpha++[Term tau]) beta \end{code} \begin{code} pRhs (Slot bigx alpha ((Nt bigy):beta), l, i) sppf = do when (select (input Array.! i) ((Nt bigy):beta) bigx) $ do addGSSEdge (bigy,i) ((slot,l), sppf) rs <- getPops (bigy, i) -- has ret been popped? forM_ rs $ \r -> do -- yes, use given extents root <- joinSPPFs slot sppf l i r addState root (slot, l, r) pLhs (bigy, i) where slot = Slot bigx (alpha++[Nt bigy]) beta \end{code} \begin{code} -- pRhs (Slot bigy alpha [], 0, i) sppf _ = return () \end{code} \begin{code} pRhs (Slot bigy alpha [], l, i) ynode = do returns <- popGSS (bigy,l) i -- pop @&@ get child GSSNodes forM_ returns $ \((slot',l'),sppf) -> do root <- joinSPPFs slot' sppf l' l i -- create SPPF for lhs addState root (slot', l', i) -- add new descriptors \end{code} %if false \begin{code} (prodMap,_,_,follows,selects) = fixedMaps start prs follow x = follows ! x select t rhs x = t `member` (selects ! (x,rhs)) altsOf x = prodMap ! x toReturnContext (x,l,r) = IM.alter inner r where inner mm = case mm of Nothing -> Just $ singleLS Just m -> Just $ IM.insertWith (S.union) l singleS m singleLS = IM.fromList [(l,singleS)] singleS = S.singleton x merge m1 m2 = IM.unionWith inner m1 m2 where inner = IM.unionWith S.union \end{code} %endif \begin{code} joinSPPFs (Slot bigx alpha beta) sppf l k r = case (sppf, beta) of -- (Dummy, _:_) -> return snode (Dummy, []) -> do addSPPFEdge xnode pnode addSPPFEdge pnode snode return xnode (_, []) -> do addSPPFEdge xnode pnode addSPPFEdge pnode sppf addSPPFEdge pnode snode return xnode _ -> do addSPPFEdge inode pnode addSPPFEdge pnode sppf addSPPFEdge pnode snode return inode where x = last alpha -- symbol before the dot snode = SNode (x, k, r) xnode = SNode (Nt bigx, l, r) inode = INode ((Slot bigx alpha beta), l, r) pnode = PNode ((Slot bigx alpha beta), l, k, r) \end{code} %if false \begin{code} inReturnContext (SNode (Nt x,l,r)) = maybe False inner . IM.lookup r where inner = maybe False ((x `S.member`)) . IM.lookup l \end{code} %endif %if false \begin{code} proceed :: Bool -> Nt -> Int -> Context -> IO () proceed debug start m ((dv,pMap), r, u, gss, p) = do when debug $ do writeFile "/tmp/sppf.txt" (showD dv ++ "\n" ++ showP pMap) let success = maybe False (const True) $ lookup (SNode (Nt start,0,m)) dv unless success $ do putStrLn "no parse..." when (success) $ do putStrLn ("Descriptors: " ++ show (usize)) putStrLn ("SPPFNodes: " ++ show (length (M.keys dv) + m)) putStrLn ("GSSNodes: " ++ show gsssize) where usize = sum [ S.size s | (l, r2s) <- IM.assocs u, (r,s) <- IM.assocs r2s ] gsssize = 1 + sum [ length $ M.keys x2s | (l,x2s) <- IM.assocs gss ] \end{code} %endif