module GLL.Parser (
Grammar(..), Prods(..), Prod(..), Symbols(..), Symbol(..), Slot(..),
start, prod, nterm, term,
Parseable(..),
parse,
parseWithOptions,
ParseOptions, ParseOption,
strictBinarisation, fullSPPF, allNodes, packedNodesOnly, maximumErrors,
ParseResult(..), SPPF(..), SPPFNode(..), SymbMap, ImdMap, PackMap, EdgeMap, showSPPF,
) where
import Data.Foldable hiding (forM_, toList, sum)
import Prelude hiding (lookup, foldr, fmap, foldl, elem, any)
import Control.Applicative
import Control.Monad
import qualified Data.IntMap as IM
import qualified Data.Map as M
import qualified Data.Array as A
import qualified Data.Set as S
import qualified Data.IntSet as IS
import Data.Text (pack)
import Text.PrettyPrint.HughesPJ as PP
import GLL.Types.Abstract
import GLL.Types.Grammar
string2nt :: String -> Nt
string2nt = pack
start :: String -> Nt
start = string2nt
prod :: String -> Symbols t -> Prod t
prod x = Prod (string2nt x)
nterm :: String -> Symbol t
nterm = Nt . string2nt
term :: t -> Symbol t
term = Term
type Input t = A.Array Int t
type LhsParams t = (Nt , Int , GSSNode t)
type RhsParams t = (Slot t , Int , GSSNode t)
type Rcal t = [(RhsParams t, SPPFNode t)]
type Ucal t = IM.IntMap (IM.IntMap (S.Set (Slot t, GSlot t)))
type GSS t = IM.IntMap (M.Map (GSlot t) [GSSEdge t])
type GSSEdge t = (GSSNode t, SPPFNode t)
type GSSNode t = (GSlot t, Int)
data GSlot t = GSlot (Slot t)
| U0
deriving (Ord, Eq)
type MisMatches t = IM.IntMap (S.Set t)
type Pcal t = IM.IntMap (M.Map (GSlot t) [Int])
data Mutable t = Mutable { mut_success :: Bool
, mut_sppf :: SPPF t
, mut_worklist :: Rcal t
, mut_descriptors :: Ucal t
, mut_gss :: GSS t
, mut_popset :: Pcal t
, mut_mismatches :: MisMatches t
}
data GLL t a = GLL (Flags -> Mutable t -> (a, Mutable t))
runGLL :: GLL t a -> Flags -> Mutable t -> Mutable t
runGLL (GLL f) o p = snd $ f o p
addSPPFEdge f t = GLL $ \flags mut ->
let sppf' = (if symbol_nodes flags then sNodeInsert f t else id) $
(if intermediate_nodes flags then iNodeInsert f t else id) $
(if edges flags then eMapInsert f t else id) $
pMapInsert f t (mut_sppf mut)
in ((),mut{mut_sppf = sppf'})
addDescr sppf alt@(slot,i,(gs,l)) = GLL $ \_ mut ->
let new = maybe True inner $ IM.lookup i (mut_descriptors mut)
where inner m = maybe True (not . ((slot,gs) `S.member`)) $ IM.lookup l m
newU = IM.alter inner i (mut_descriptors mut)
where inner mm = case mm of
Nothing -> Just $ IM.singleton l single
Just m -> Just $ IM.insertWith (S.union) l single m
single = S.singleton (slot,gs)
in if new then ((), mut{mut_worklist = (alt,sppf):(mut_worklist mut)
,mut_descriptors = newU})
else ((), mut)
getDescr = GLL $ \_ mut ->
case mut_worklist mut of
[] -> (Nothing, mut)
(next@(alt,sppf):rest) -> (Just next, mut{mut_worklist = rest})
addPop (gs,l) i = GLL $ \_ mut ->
let newP = IM.alter inner l (mut_popset mut)
where inner mm = case mm of
Nothing -> Just $ M.singleton gs [i]
Just m -> Just $ M.insertWith (++) gs [i] m
in ((), mut{mut_popset = newP})
getChildren (gs,l) = GLL $ \_ mut ->
let res = maybe [] inner $ IM.lookup l (mut_gss mut)
where inner m = maybe [] id $ M.lookup gs m
in (res, mut)
addGSSEdge f@(gs,i) t = GLL $ \_ mut ->
let newGSS = IM.alter inner i (mut_gss mut)
where inner mm = case mm of
Nothing -> Just $ M.singleton gs [t]
Just m -> Just $ M.insertWith (++) gs [t] m
in ((), mut{mut_gss = newGSS})
getPops (gs,l) = GLL $ \_ mut ->
let res = maybe [] inner $ IM.lookup l (mut_popset mut)
where inner = maybe [] id . M.lookup gs
in (res, mut)
addSuccess = GLL $ \_ mut -> ((),mut{mut_success = True})
getFlags = GLL $ \fs ctx -> (fs, ctx)
addMisMatch :: (Ord t) => Int -> S.Set t -> GLL t ()
addMisMatch k ts = GLL $ \flags mut ->
let newM = IM.insertWith S.union k ts (mut_mismatches mut)
newM' | length (IM.keys newM) > max_errors flags = IM.deleteMin newM
| otherwise = newM
in ((), mut{mut_mismatches = newM'})
instance (Show t) => Show (GSlot t) where
show (U0) = "u0"
show (GSlot gn) = show gn
instance (Show t) => Show (SPPFNode t) where
show (SNode (s, l, r)) = "(s: " ++ show s ++ ", " ++ show l ++ ", " ++ show r ++ ")"
show (INode (s, l, r)) = "(i: " ++ show s ++ ", " ++ show l ++ ", " ++ show r ++ ")"
show (PNode (p, l, k, r)) = "(p: " ++ show p ++ ", " ++ show l ++ ", " ++ show k ++ ", " ++ show r ++ ")"
show Dummy = "$"
instance Applicative (GLL t) where
(<*>) = ap
pure = return
instance Functor (GLL t) where
fmap = liftM
instance Monad (GLL t) where
return a = GLL $ \_ p -> (a, p)
(GLL m) >>= f = GLL $ \o p -> let (a, p') = m o p
(GLL m') = f a
in m' o p'
data Flags = Flags { symbol_nodes :: Bool
, intermediate_nodes :: Bool
, edges :: Bool
, flexible_binarisation :: Bool
, max_errors :: Int
}
defaultFlags = Flags False False False True 3
runOptions :: ParseOptions -> Flags
runOptions = foldr ($) defaultFlags
type ParseOption = Flags -> Flags
type ParseOptions = [ParseOption]
fullSPPF :: ParseOption
fullSPPF flags = flags{symbol_nodes = True, intermediate_nodes = True, edges = True}
allNodes :: ParseOption
allNodes flags = flags{symbol_nodes = True, intermediate_nodes = True}
packedNodesOnly :: ParseOption
packedNodesOnly flags = flags{symbol_nodes = False, intermediate_nodes = False, edges = False}
strictBinarisation :: ParseOption
strictBinarisation flags = flags{flexible_binarisation = False}
maximumErrors :: Int -> ParseOption
maximumErrors n flags = flags {max_errors = n}
parse :: (Parseable t) => Grammar t -> [t] -> ParseResult t
parse = parseWithOptions []
parseWithOptions :: Parseable t => ParseOptions -> Grammar t -> [t] -> ParseResult t
parseWithOptions opts grammar@(start,_) str =
let flags = runOptions opts
(mutable,_,_,_) = gll flags m False grammar input
m = length str
input = A.array (0,m) $ zip [0..] $ str ++ [eos]
in resultFromMutable input flags mutable (Nt start, 0, m)
gll :: Parseable t => Flags -> Int -> Bool -> Grammar t -> Input t ->
(Mutable t, [Prod t], SelectMap t, FollowMap t)
gll flags m debug (start, prods) input =
(runGLL (pLhs (start, 0, (U0,0))) flags context, prs, selects, follows)
where
prs = [ alt | alt <- (reverse prods) ]
context = Mutable False emptySPPF [] IM.empty IM.empty IM.empty IM.empty
dispatch = do
mnext <- getDescr
case mnext of
Nothing -> return ()
Just (next,sppf) -> pRhs next sppf
pLhs (bigx, i, gn) = do
let alts = [ ((Slot bigx [] beta, i, gn), first_ts)
| Prod bigx beta <- altsOf bigx
, let first_ts = select beta bigx
]
first_ts = S.unions (map snd alts)
cands = [ descr | (descr, first_ts) <- alts
, any (matches (input A.! i)) first_ts ]
if null cands
then addMisMatch i first_ts
else forM_ cands (addDescr Dummy)
dispatch
pRhs (Slot bigx alpha ((Term tau):beta), i, (gs,l)) sppf =
if (input A.! i `matches` tau)
then do
root <- joinSPPFs slot sppf l i (i+1)
pRhs (slot, i+1, (gs,l)) root
else do
addMisMatch i (S.singleton tau)
dispatch
where slot = Slot bigx (alpha++[Term tau]) beta
pRhs (Slot bigx alpha ((Nt bigy):beta), i, (gs, l)) sppf =
if any (matches (input A.! i)) first_ts
then do
addGSSEdge ret ((gs,l), sppf)
rs <- getPops ret
forM_ rs $ \r -> do
root <- joinSPPFs slot sppf l i r
addDescr root (slot, r, (gs,l))
pLhs (bigy, i, ret)
else do
addMisMatch i first_ts
dispatch
where ret = (GSlot slot, i)
slot = Slot bigx (alpha++[Nt bigy]) beta
first_ts = select ((Nt bigy):beta) bigx
pRhs (Slot bigy alpha [], i, (U0,_)) sppf = do
when (bigy /= start) (error "assert: start symbol with U0")
if i == m
then addSuccess >> dispatch
else addMisMatch i (S.singleton eos) >> dispatch
pRhs (Slot bigx alpha [], i, (gs,l)) Dummy = do
root <- joinSPPFs slot Dummy l i i
pRhs (slot, i, (gs,l)) root
where slot = Slot bigx [] []
pRhs (Slot bigy alpha [], i, gn@(GSlot slot,l)) ynode = do
addPop gn i
returns <- getChildren gn
forM_ returns $ \((gs',l'),sppf) -> do
root <- joinSPPFs slot sppf l' l i
addDescr root (slot, i, (gs',l'))
dispatch
(prodMap,_,_,follows,selects) = fixedMaps start prs
follow x = follows M.! x
select rhs x = selects M.! (x,rhs)
altsOf x = prodMap M.! x
merge m1 m2 = IM.unionWith inner m1 m2
where inner = IM.unionWith S.union
joinSPPFs (Slot bigx alpha beta) sppf l k r = do
flags <- getFlags
case (flexible_binarisation flags, sppf, beta) of
(True,Dummy, _:_) -> return snode
(_,Dummy, []) -> do addSPPFEdge xnode pnode
addSPPFEdge pnode snode
return xnode
(_,_, []) -> do addSPPFEdge xnode pnode
addSPPFEdge pnode sppf
addSPPFEdge pnode snode
return xnode
_ -> do addSPPFEdge inode pnode
addSPPFEdge pnode sppf
addSPPFEdge pnode snode
return inode
where x = last alpha
snode = SNode (x, k, r)
xnode = SNode (Nt bigx, l, r)
inode = INode ((Slot bigx alpha beta), l, r)
pnode = PNode ((Slot bigx alpha beta), l, k, r)
data ParseResult t = ParseResult{ sppf_result :: SPPF t
, res_success :: Bool
, nr_descriptors :: Int
, nr_nterm_nodes :: Int
, nr_term_nodes :: Int
, nr_intermediate_nodes :: Int
, nr_packed_nodes :: Int
, nr_sppf_edges :: Int
, nr_gss_nodes :: Int
, nr_gss_edges :: Int
, error_message :: String
}
resultFromMutable :: (Show t, Ord t) => Input t -> Flags -> Mutable t -> SNode t -> ParseResult t
resultFromMutable inp flags mutable s_node@(s, l, m) =
let u = mut_descriptors mutable
gss = mut_gss mutable
usize = sum [ S.size s | (l, r2s) <- IM.assocs u
, (r,s) <- IM.assocs r2s ]
s_nodes = sum [ S.size s | (l, r2s) <- IM.assocs sMap
, (r, s) <- IM.assocs r2s ]
i_nodes = sum [ S.size s | (l, r2s) <- IM.assocs iMap
, (r, s) <- IM.assocs r2s ]
p_nodes = sum [ IS.size ks | (l, r2j) <- IM.assocs pMap
, (r, j2s) <- IM.assocs r2j
, (j, s2k) <- IM.assocs j2s
, (s, ks) <- M.assocs s2k ]
sppf_edges = sum [ S.size ts | (_, ts) <- M.assocs eMap ]
gss_nodes = 1 + sum [ length $ M.keys x2s| (l,x2s) <- IM.assocs gss]
gss_edges = 1 + sum [ length s | (l,x2s) <- IM.assocs gss
, (x,s) <- M.assocs x2s ]
sppf@(sMap, iMap, pMap, eMap) = mut_sppf mutable
in ParseResult sppf (mut_success mutable) usize s_nodes m i_nodes p_nodes sppf_edges gss_nodes gss_edges (renderErrors inp flags (mut_mismatches mutable))
renderErrors :: Show t => Input t -> Flags -> MisMatches t -> String
renderErrors inp flags mm = render doc
where n = max_errors flags
locs = reverse (IM.assocs mm)
doc = text ("Unsuccessful parse, showing "++ show n ++ " furthest matches") $+$
foldr (\loc -> (ppLoc loc $+$)) PP.empty locs
ppLoc (k, ts) = text ("did not match at position " ++ show k ++ ":") $+$
nest 4 (text (show token)) $+$
nest 4 (text "expected:") $+$
nest 8 (vcat (map ppExp (S.toList ts)))
where token = inp A.! k
ppExp = text . show
instance Show (ParseResult t) where
show res | res_success res = result_string
| otherwise = result_string ++ "\n" ++ error_message res
where result_string = unlines $
[ "Success " ++ show (res_success res)
, "Descriptors: " ++ show (nr_descriptors res)
, "Nonterminal nodes: " ++ show (nr_nterm_nodes res)
, "Terminal nodes: " ++ show (nr_term_nodes res)
, "Intermediate nodes: " ++ show (nr_intermediate_nodes res)
, "Packed nodes: " ++ show (nr_packed_nodes res)
, "SPPF edges: " ++ show (nr_sppf_edges res)
, "GSS nodes: " ++ show (nr_gss_nodes res)
, "GSS edges: " ++ show (nr_gss_edges res)
]