{- | Module : $Header$ Description : Parser combinators Copyright : (c) 1999-2004, Wolfgang Lux 2016 , Jan Tikovsky License : BSD-3-clause Maintainer : bjp@informatik.uni-kiel.de Stability : experimental Portability : portable The parsing combinators implemented in this module are based on the LL(1) parsing combinators developed by Swierstra and Duponcheel. They have been adapted to using continuation passing style in order to work with the lexing combinators described in the previous section. In addition, the facilities for error correction are omitted in this implementation. The two functions 'applyParser' and 'prefixParser' use the specified parser for parsing a string. When 'applyParser' is used, an error is reported if the parser does not consume the whole string, whereas 'prefixParser' discards the rest of the input string in this case. -} {-# LANGUAGE CPP #-} module Curry.Base.LLParseComb ( -- * Data types Parser -- * Parser application , fullParser, prefixParser -- * Basic parsers , position, spanPosition, succeed, failure, symbol -- * parser combinators , (), (<|>), (<|?>), (<*>), (<\>), (<\\>) , (<$>), (<$->), (<*->), (<-*>), (<**>), (), (<.>) , opt, choice, flag, optional, option, many, many1, sepBy, sepBy1 , sepBySp, sepBy1Sp , chainr, chainr1, chainl, chainl1, between, ops -- * Layout combinators , layoutOn, layoutOff, layoutEnd ) where #if __GLASGOW_HASKELL__ < 710 import Control.Applicative (Applicative, (<*>), (<$>), pure) #endif import Control.Monad import qualified Data.Map as Map import Data.Maybe import qualified Data.Set as Set import Curry.Base.LexComb import Curry.Base.Position import Curry.Base.Span (span2Pos, Span, startCol, setDistance) infixl 5 <\>, <\\> infixl 4 <$->, <*->, <-*>, <**>, , <.> infixl 3 <|>, <|?> infixl 2 , `opt` -- --------------------------------------------------------------------------- -- Parser types -- --------------------------------------------------------------------------- -- |Parsing function type ParseFun a s b = (b -> SuccessP s a) -> FailP a -> SuccessP s a -- |CPS-Parser type data Parser a s b = Parser -- Parsing function for empty word (Maybe (ParseFun a s b)) -- Lookup table (continuations for 'Symbol's recognized by the parser) (Map.Map s (Lexer s a -> ParseFun a s b)) instance Symbol s => Functor (Parser a s) where fmap f p = succeed f <*> p instance Symbol s => Applicative (Parser a s) where pure = succeed -- |Apply the result function of the first parser to the result of the -- second parser. Parser Nothing ps1 <*> p2 = Parser Nothing (fmap (flip seqPP p2) ps1) Parser (Just p1) ps1 <*> ~p2@(Parser e2 ps2) = Parser (fmap (seqEE p1) e2) (Map.union (fmap (flip seqPP p2) ps1) (fmap (seqEP p1) ps2)) instance Show s => Show (Parser a s b) where showsPrec p (Parser e ps) = showParen (p >= 10) $ showString "Parser " . shows (isJust e) . showChar ' ' . shows (Map.keysSet ps) -- --------------------------------------------------------------------------- -- Parser application -- --------------------------------------------------------------------------- -- |Apply a parser and lexer to a 'String', whereas the 'FilePath' is used -- to identify the origin of the 'String' in case of parsing errors. fullParser :: Symbol s => Parser a s a -> Lexer s a -> FilePath -> String -> CYM a fullParser p lexer = parse (lexer (choose p lexer successP failP) failP) where successP x pos s | isEOF s = returnP x | otherwise = failP pos (unexpected s) -- |Apply a parser and lexer to parse the beginning of a 'String'. -- The 'FilePath' is used to identify the origin of the 'String' in case of -- parsing errors. prefixParser :: Symbol s => Parser a s a -> Lexer s a -> FilePath -> String -> CYM a prefixParser p lexer = parse (lexer (choose p lexer discardP failP) failP) where discardP x _ _ = returnP x -- |Choose the appropriate parsing function w.r.t. to the next 'Symbol'. choose :: Symbol s => Parser a s b -> Lexer s a -> ParseFun a s b choose (Parser e ps) lexer success failp pos s = case Map.lookup s ps of Just p -> p lexer success failp pos s Nothing -> case e of Just p -> p success failp pos s Nothing -> failp pos (unexpected s) -- |Fail on an unexpected 'Symbol' unexpected :: Symbol s => s -> String unexpected s | isEOF s = "Unexpected end-of-file" | otherwise = "Unexpected token " ++ show s -- --------------------------------------------------------------------------- -- Basic parsers -- --------------------------------------------------------------------------- -- |Return the current position without consuming the input position :: Parser a s Position position = Parser (Just p) Map.empty where p success _ sp = success (span2Pos sp) sp spanPosition :: Symbol s => Parser a s Span spanPosition = Parser (Just p) Map.empty where p success _ sp s = success (setDistance sp (dist (startCol sp) s)) sp s -- |Always succeeding parser succeed :: b -> Parser a s b succeed x = Parser (Just p) Map.empty where p success _ = success x -- |Always failing parser with a given message failure :: String -> Parser a s b failure msg = Parser (Just p) Map.empty where p _ failp pos _ = failp pos msg -- |Create a parser accepting the given 'Symbol' symbol :: s -> Parser a s s symbol s = Parser Nothing (Map.singleton s p) where p lexer success failp _ s' = lexer (success s') failp -- --------------------------------------------------------------------------- -- Parser combinators -- --------------------------------------------------------------------------- -- |Behave like the given parser, but use the given 'String' as the error -- message if the parser fails () :: Symbol s => Parser a s b -> String -> Parser a s b p msg = p <|> failure msg -- |Deterministic choice between two parsers. -- The appropriate parser is chosen based on the next 'Symbol' (<|>) :: Symbol s => Parser a s b -> Parser a s b -> Parser a s b Parser e1 ps1 <|> Parser e2 ps2 | isJust e1 && isJust e2 = failure "Ambiguous parser for empty word" | not (Set.null common) = failure $ "Ambiguous parser for " ++ show common | otherwise = Parser (e1 `mplus` e2) (Map.union ps1 ps2) where common = Map.keysSet ps1 `Set.intersection` Map.keysSet ps2 -- |Non-deterministic choice between two parsers. -- -- The other parsing combinators require that the grammar being parsed -- is LL(1). In some cases it may be difficult or even -- impossible to transform a grammar into LL(1) form. As a remedy, we -- include a non-deterministic version of the choice combinator in -- addition to the deterministic combinator adapted from the paper. For -- every symbol from the intersection of the parser's first sets, the -- combinator '(<|?>)' applies both parsing functions to the input -- stream and uses that one which processes the longer prefix of the -- input stream irrespective of whether it succeeds or fails. If both -- functions recognize the same prefix, we choose the one that succeeds -- and report an ambiguous parse error if both succeed. (<|?>) :: Symbol s => Parser a s b -> Parser a s b -> Parser a s b Parser e1 ps1 <|?> Parser e2 ps2 | isJust e1 && isJust e2 = failure "Ambiguous parser for empty word" | otherwise = Parser (e1 `mplus` e2) (Map.union ps1' ps2) where ps1' = Map.fromList [ (s, maybe p (try p) (Map.lookup s ps2)) | (s, p) <- Map.toList ps1 ] try p1 p2 lexer success failp pos s = closeP1 p2s `thenP` \p2s' -> closeP1 p2f `thenP` \p2f' -> parse' p1 (retry p2s') (retry p2f') where p2s r1 = parse' p2 (select True r1) (select False r1) p2f r1 = parse' p2 (flip (select False) r1) (select False r1) parse' p psucc pfail = p lexer (successK psucc) (failK pfail) pos s successK k x pos' s' = k (pos', success x pos' s') failK k pos' msg = k (pos', failp pos' msg) retry k (pos',p) = closeP0 p `thenP` curry k pos' select suc (pos1, p1) (pos2, p2) = case pos1 `compare` pos2 of GT -> p1 EQ | suc -> failP pos1 $ "Ambiguous parse before " ++ showPosition (span2Pos pos1) | otherwise -> p1 LT -> p2 seqEE :: ParseFun a s (b -> c) -> ParseFun a s b -> ParseFun a s c seqEE p1 p2 success failp = p1 (\f -> p2 (success . f) failp) failp seqEP :: ParseFun a s (b -> c) -> (Lexer s a -> ParseFun a s b) -> Lexer s a -> ParseFun a s c seqEP p1 p2 lexer success failp = p1 (\f -> p2 lexer (success . f) failp) failp seqPP :: Symbol s => (Lexer s a -> ParseFun a s (b -> c)) -> Parser a s b -> Lexer s a -> ParseFun a s c seqPP p1 p2 lexer success failp = p1 lexer (\f -> choose p2 lexer (success . f) failp) failp -- --------------------------------------------------------------------------- -- The combinators \verb|<\\>| and \verb|<\>| can be used to restrict -- the first set of a parser. This is useful for combining two parsers -- with an overlapping first set with the deterministic combinator <|>. -- --------------------------------------------------------------------------- -- |Restrict the first parser by the first 'Symbol's of the second (<\>) :: Symbol s => Parser a s b -> Parser a s c -> Parser a s b p <\> Parser _ ps = p <\\> Map.keys ps -- |Restrict a parser by a list of first 'Symbol's (<\\>) :: Symbol s => Parser a s b -> [s] -> Parser a s b Parser e ps <\\> xs = Parser e (foldr Map.delete ps xs) -- --------------------------------------------------------------------------- -- Other combinators -- Note that some of these combinators have not been published in the -- paper, but were taken from the implementation found on the web. -- --------------------------------------------------------------------------- -- |Replace the result of the parser with the first argument (<$->) :: Symbol s => a -> Parser b s c -> Parser b s a f <$-> p = const f <$> p -- |Apply two parsers in sequence, but return only the result of the first -- parser (<*->) :: Symbol s => Parser a s b -> Parser a s c -> Parser a s b p <*-> q = const <$> p <*> q -- |Apply two parsers in sequence, but return only the result of the second -- parser (<-*>) :: Symbol s => Parser a s b -> Parser a s c -> Parser a s c p <-*> q = const id <$> p <*> q -- |Apply the parsers in sequence and apply the result function of the second -- parse to the result of the first (<**>) :: Symbol s => Parser a s b -> Parser a s (b -> c) -> Parser a s c p <**> q = flip ($) <$> p <*> q -- |Same as (<**>), but only applies the function if the second parser -- succeeded. () :: Symbol s => Parser a s b -> Parser a s (b -> b) -> Parser a s b p q = p <**> (q `opt` id) -- |Flipped function composition on parsers (<.>) :: Symbol s => Parser a s (b -> c) -> Parser a s (c -> d) -> Parser a s (b -> d) p1 <.> p2 = p1 <**> ((.) <$> p2) -- |Try the first parser, but return the second argument if it didn't succeed opt :: Symbol s => Parser a s b -> b -> Parser a s b p `opt` x = p <|> succeed x -- |Choose the first succeeding parser from a non-empty list of parsers choice :: Symbol s => [Parser a s b] -> Parser a s b choice = foldr1 (<|>) -- |Try to apply a given parser and return a boolean value if the parser -- succeeded. flag :: Symbol s => Parser a s b -> Parser a s Bool flag p = True <$-> p `opt` False -- |Try to apply a parser but forget if it succeeded optional :: Symbol s => Parser a s b -> Parser a s () optional p = const () <$> p `opt` () -- |Try to apply a parser and return its result in a 'Maybe' type option :: Symbol s => Parser a s b -> Parser a s (Maybe b) option p = Just <$> p `opt` Nothing -- |Repeatedly apply a parser for 0 or more occurences many :: Symbol s => Parser a s b -> Parser a s [b] many p = many1 p `opt` [] -- |Repeatedly apply a parser for 1 or more occurences many1 :: Symbol s => Parser a s b -> Parser a s [b] many1 p = (:) <$> p <*> many p -- |Parse a list with is separated by a seperator sepBy :: Symbol s => Parser a s b -> Parser a s c -> Parser a s [b] p `sepBy` q = p `sepBy1` q `opt` [] -- |Parse a non-empty list with is separated by a seperator sepBy1 :: Symbol s => Parser a s b -> Parser a s c -> Parser a s [b] p `sepBy1` q = (:) <$> p <*> many (q <-*> p) -- |Parse a list with is separated by a seperator sepBySp :: Symbol s => Parser a s b -> Parser a s c -> Parser a s ([b], [Span]) p `sepBySp` q = p `sepBy1Sp` q `opt` ([], []) sepBy1Sp :: Symbol s => Parser a s b -> Parser a s c -> Parser a s ([b], [Span]) p `sepBy1Sp` q = comb <$> p <*> many ((,) <$> spanPosition <*-> q <*> p) where comb x xs = let (ss, ys) = unzip xs in (x:ys,ss) -- |@chainr p op x@ parses zero or more occurrences of @p@, separated by @op@. -- Returns a value produced by a *right* associative application of all -- functions returned by op. If there are no occurrences of @p@, @x@ is -- returned. chainr :: Symbol s => Parser a s b -> Parser a s (b -> b -> b) -> b -> Parser a s b chainr p op x = chainr1 p op `opt` x -- |Like 'chainr', but parses one or more occurrences of p. chainr1 :: Symbol s => Parser a s b -> Parser a s (b -> b -> b) -> Parser a s b chainr1 p op = r where r = p <**> (flip <$> op <*> r `opt` id) -- |@chainr p op x@ parses zero or more occurrences of @p@, separated by @op@. -- Returns a value produced by a *left* associative application of all -- functions returned by op. If there are no occurrences of @p@, @x@ is -- returned. chainl :: Symbol s => Parser a s b -> Parser a s (b -> b -> b) -> b -> Parser a s b chainl p op x = chainl1 p op `opt` x -- |Like 'chainl', but parses one or more occurrences of p. chainl1 :: Symbol s => Parser a s b -> Parser a s (b -> b -> b) -> Parser a s b chainl1 p op = foldF <$> p <*> many (flip <$> op <*> p) where foldF x [] = x foldF x (f:fs) = foldF (f x) fs -- |Parse an expression between an opening and a closing part. between :: Symbol s => Parser a s b -> Parser a s c -> Parser a s b -> Parser a s c between open p close = open <-*> p <*-> close -- |Parse one of the given operators ops :: Symbol s => [(s, b)] -> Parser a s b ops [] = failure "Curry.Base.LLParseComb.ops: empty list" ops [(s, x)] = x <$-> symbol s ops ((s, x) : rest) = x <$-> symbol s <|> ops rest -- --------------------------------------------------------------------------- -- Layout combinators -- Note that the layout functions grab the next token (and its position). -- After modifying the layout context, the continuation is called with -- the same token and an undefined result. -- --------------------------------------------------------------------------- -- |Disable layout-awareness for the following layoutOff :: Symbol s => Parser a s b layoutOff = Parser (Just off) Map.empty where off success _ pos = pushContext (-1) . success undefined pos -- |Add a new scope for layout layoutOn :: Symbol s => Parser a s b layoutOn = Parser (Just on) Map.empty where on success _ pos = pushContext (column (span2Pos pos)) . success undefined pos -- |End the current layout scope (or re-enable layout-awareness if it is -- currently disabled layoutEnd :: Symbol s => Parser a s b layoutEnd = Parser (Just end) Map.empty where end success _ pos = popContext . success undefined pos