-- | Parsing. {-# LANGUAGE BangPatterns, DeriveFunctor, GADTs, Rank2Types #-} module Text.Earley.Parser ( Report(..) , Result(..) , parser , allParses , fullParses ) where import Control.Applicative import Control.Arrow import Control.Monad.Fix import Control.Monad.ST.Lazy import Data.Functor.Yoneda import Data.ListLike(ListLike) import qualified Data.ListLike as ListLike import Data.STRef.Lazy import Text.Earley.Grammar ------------------------------------------------------------------------------- -- * Concrete rules and productions ------------------------------------------------------------------------------- -- | The concrete rule type that the parser uses data Rule s r e t a = Rule { ruleProd :: ProdR s r e t a , ruleNullable :: {-# UNPACK #-} !(STRef s (Maybe [a])) , ruleConts :: {-# UNPACK #-} !(STRef s (Conts s r e t a r)) } type ProdR s r e t a = Prod (Rule s r) e t a nullable :: Rule s r e t a -> ST s [a] nullable r = do mn <- readSTRef $ ruleNullable r case mn of Just xs -> return xs Nothing -> do writeSTRef (ruleNullable r) $ Just mempty res <- nullableProd $ ruleProd r writeSTRef (ruleNullable r) $ Just res return res nullableProd :: ProdR s r e t a -> ST s [a] nullableProd (Terminal _ _) = return mempty nullableProd (NonTerminal r p) = do as <- nullable r concat <$> mapM (\a -> nullableProd $ fmap ($ a) p) as nullableProd (Pure a) = return [a] nullableProd (Plus a b) = mappend <$> nullableProd a <*> nullableProd b nullableProd (Many p q) = do as <- nullableProd $ (:[]) <$> p <|> pure [] concat <$> mapM (\a -> nullableProd $ fmap ($ a) q) as nullableProd Empty = return mempty nullableProd (Named p _) = nullableProd p ------------------------------------------------------------------------------- -- * States and continuations ------------------------------------------------------------------------------- type Pos = Int -- | An Earley state with result type @a@. data State s r e t a where State :: {-# UNPACK #-} !Pos -> !(ProdR s r e t b) -> {-# UNPACK #-} !(Conts s r e t b a) -> State s r e t a Final :: a -> State s r e t a -- | A continuation accepting an @a@ and producing a @b@. data Cont s r e t a b where Cont :: {-# UNPACK #-} !Pos -> !(ProdR s r e t (a -> b)) -> {-# UNPACK #-} !(Conts s r e t b c) -> Cont s r e t a c FinalCont :: (a -> c) -> Cont s r e t a c type Conts s r e t a c = STRef s [Cont s r e t a c] contraMapCont :: (b -> a) -> Cont s r e t a c -> Cont s r e t b c contraMapCont f (Cont pos p cs) = (Cont pos $! ((. f) <$> p)) cs contraMapCont f (FinalCont g) = FinalCont (g . f) contToState :: a -> Cont s r e t a c -> State s r e t c contToState a (Cont pos p cs) = State pos (($ a) <$> p) cs contToState a (FinalCont f) = Final (f a) -- | Strings of non-ambiguous continuations can be optimised by removing -- indirections. simplifyCont :: Conts s r e t b a -> ST s [Cont s r e t b a] simplifyCont cont = readSTRef cont >>= go False where go !_ [Cont _ (Pure f) cont'] = do ks' <- simplifyCont cont' go True $ map (contraMapCont f) ks' go True ks = do writeSTRef cont ks return ks go False ks = return ks ------------------------------------------------------------------------------- -- * Grammars ------------------------------------------------------------------------------- -- | Interpret an abstract 'Grammar'. grammar :: Grammar (Rule s r) e a -> ST s a grammar g = case g of RuleBind p k -> do c <- newSTRef =<< newSTRef mempty nr <- newSTRef Nothing grammar $ k $ NonTerminal (Rule p nr c) $ Pure id FixBind f k -> do a <- mfix $ fmap grammar f grammar $ k a Return x -> return x -- | Given a grammar, construct an initial state. initialState :: ProdR s a e t a -> ST s (State s a e t a) initialState r = do rs <- newSTRef [FinalCont id] return $ State (-1) r rs ------------------------------------------------------------------------------- -- * Parsing ------------------------------------------------------------------------------- -- | A parsing report, which contains fields that are useful for presenting -- errors to the user if a parse is deemed a failure. Note however that we get -- a report even when we successfully parse something. data Report e i = Report { position :: Int -- ^ The final position in the input (0-based) that the -- parser reached. , expected :: [e] -- ^ The named productions processed at the final -- position. , unconsumed :: i -- ^ The part of the input string that was not consumed, -- which may be empty. } deriving Show -- | The result of a parse. data Result s e i a = Ended (Report e i) -- ^ The parser ended. | Parsed a Int i (i -> ST s (Result s e i a)) -- ^ The parser parsed something, namely an 'a'. The 'Int' is the position -- in the input where it did so, the 'i' is the rest of the input, and the -- function is the parser continuation. This allows incrementally feeding -- the parser more input (e.g. when the 'i' is empty). deriving (Functor) {-# INLINE uncons #-} uncons :: ListLike i t => i -> Maybe (t, i) uncons i | ListLike.null i = Nothing | otherwise = Just (ListLike.head i, ListLike.tail i) {-# INLINE safeTail #-} safeTail :: ListLike i t => i -> i safeTail ts' | ListLike.null ts' = ts' | otherwise = ListLike.tail ts' {-# SPECIALISE parse :: [State s a e t a] -> [State s a e t a] -> ST s () -> [e] -> Pos -> [t] -> ST s (Result s e [t] a) #-} -- | The internal parsing routine parse :: ListLike i t => [State s a e t a] -- ^ States to process at this position -> [State s a e t a] -- ^ States to process at the next position -> ST s () -- ^ Computation that resets the continuation refs of productions -> [e] -- ^ Named productions encountered at this position -> Pos -- ^ The current position in the input string -> i -- ^ The input string -> ST s (Result s e i a) parse [] [] !reset names !pos !ts = do reset return $ Ended Report {position = pos, expected = names, unconsumed = ts} parse [] !next !reset _names !pos !ts = do reset parse next [] (return ()) [] (pos + 1) (safeTail ts) parse (st:ss) !next !reset names !pos !ts = case st of Final a -> return $ Parsed a pos ts $ parse ss next reset names pos State spos pr scont -> case pr of Terminal f p -> case uncons ts of Just (t, _) | f t -> parse ss (State spos (($ t) <$> p) scont : next) reset names pos ts _ -> parse ss next reset names pos ts NonTerminal r p -> do rkref <- readSTRef $ ruleConts r ks <- readSTRef rkref writeSTRef rkref (Cont spos p scont : ks) nulls' <- nullable r let notExpanded = null ks p' = liftYoneda p nulls = fmap (\a -> State spos (lowerYoneda $ ($ a) <$> p') scont) nulls' if notExpanded then do let st' = State pos (ruleProd r) rkref parse (st' : nulls ++ ss) next ((writeSTRef (ruleConts r) =<< newSTRef mempty) >> reset) names pos ts else parse (nulls ++ ss) next reset names pos ts Pure a | spos /= pos -> do conts <- simplifyCont scont parse (map (contToState a) conts ++ ss) next reset names pos ts | otherwise -> parse ss next reset names pos ts Plus p q -> parse (State spos p scont : State spos q scont : ss) next reset names pos ts Many p q -> do rkref <- newSTRef [Cont spos (Many p ((\f as a -> f (a : as)) <$> q)) scont] let st' = State pos p rkref nst = State spos (($ []) <$> q) scont parse (st' : nst : ss) next reset names pos ts Empty -> parse ss next reset names pos ts Named pr' n -> parse (State spos pr' scont : ss) next reset (n : names) pos ts {-# INLINE parser #-} -- | Create a parser from the given grammar. parser :: ListLike i t => (forall r. Grammar r e (Prod r e t a)) -> i -> ST s (Result s e i a) parser g xs = do s <- initialState =<< grammar g parse [s] [] (return ()) [] 0 xs -- | Return all parses from the result of a given parser. The result may -- contain partial parses. The 'Int's are the position at which a result was -- produced. allParses :: (forall s. ST s (Result s e i a)) -> ([(a, Int)], Report e i) allParses p = runST $ p >>= go where go :: Result s e i a -> ST s ([(a, Int)], Report e i) go r = case r of Ended report -> return ([], report) Parsed a pos i k -> fmap (first ((a, pos) :)) $ go =<< k i {-# INLINE fullParses #-} -- | Return all parses that reached the end of the input from the result of a -- given parser. fullParses :: ListLike i t => (forall s. ST s (Result s e i a)) -> ([a], Report e i) fullParses p = runST $ p >>= go where go :: ListLike i t => Result s e i a -> ST s ([a], Report e i) go r = case r of Ended report -> return ([], report) Parsed a _ i k | ListLike.null i -> fmap (first (a :)) $ go =<< k i | otherwise -> go =<< k i