{-# LANGUAGE RankNTypes #-} -- GADTs, -- MultiParamTypeClasses, -- FunctionalDependencies, -- FlexibleInstances, -- FlexibleContexts, -- UndecidableInstances, -- NoMonomorphismRestriction module Text.ParserCombinators.UU.Examples where import Char import Text.ParserCombinators.UU -- |We start out by defining the type of parser we want; by specifying the type of the state we resolve a lot of overloading type Parser a = P (Str Char) a -- | The fuction @`run`@ runs the parser and shows both the result, and the correcting steps which were taken during the parsing process. run :: Show t => P (Str Char) t -> String -> IO () run p inp = do let r@(a, errors) = parse ( (,) <$> p <*> pEnd) (listToStr inp) putStrLn "--" putStrLn "-- @" putStrLn ("-- Result: " ++ show a) if null errors then return () else do putStr ("-- Correcting steps: \n") show_errors errors putStrLn "-- @" -- | Our first two parsers are simple; one recognises a single 'a' character and the other one a single 'b'. Since we will use them later we -- convert the recognsied character into String so they can be easily combined. pa ::Parser String pa = lift <$> pSym 'a' pb :: Parser String pb = lift <$> pSym 'b' lift a = [a] -- | We can now run the parser @`pa`@ on input \"a\", which succeeds: -- -- @ -- Result: \"a\" -- @ test1 = run pa "a" -- | If we run the parser @`pa`@ on the empty input \"\", the expected symbol in inserted, that the position where it was inserted is reported, and -- we get information about what was expected at that position: @run pa \"\"@ -- -- @ -- Result: \"a\" -- Correcting steps: -- Inserted 'a' at position 0 expecting 'a' -- @ test2 = run pa "" -- | Now let's see what happens if we encounter an unexpected symbol, as in @run pa \"b\"@: -- -- @ -- Result: \"a\" -- Correcting steps: -- Deleted 'b' at position 0 expecting 'a' -- Inserted 'a' at position 1 expecting 'a' -- @ test3 = run pa "b" -- | The combinator @\<++>@ applies two parsers sequentially to the input and concatenates their results: @run (pa <++> pa) \"aa\"@: -- -- @ -- Result: \"aa\" -- @ (<++>) :: Parser String -> Parser String -> Parser String p <++> q = (++) <$> p <*> q pa2 = pa <++> pa pa3 = pa <++> pa2 test4 = run pa2 "aa" -- | The function @`pSym`@ is overloaded. The type of its argument determines how to interpret the argument. Thus far we have seen single characters, -- but we may pass ranges as well as argument: @\run (pList (pSym ('a','z'))) \"doaitse\"@ -- -- @ -- Result: "doaitse" -- @ test5 = run (pList (pSym ('a','z'))) "doaitse" paz = pList (pSym ('a', 'z')) -- | An even more general instance of @`pSym`@ takes a triple as argument: a predicate, a string indicating what is expected, -- and the value to insert if nothing can be recognised: @run (pSym (\t -> 'a' <= t && t <= 'z', \"'a'..'z'\", 'k')) \"v\"@ -- -- @ -- Result: 'k' -- Correcting steps: -- Deleted '1' at position 0 expecting 'a'..'z' -- Inserted 'k' at position 1 expecting 'a'..'z' -- @ test6 = run paz' "1" paz' = pSym (\t -> 'a' <= t && t <= 'z', "'a'..'z'", 'k') -- | The parser `pCount` recognises a sequence of elements, throws away the results of the recognition process (@ \<$ @), and just returns the number of returned elements. -- The choice combinator @\<\<|>@ indicates that prefernce is to be given to the left alternative if it can make progress. This enables us to specify greedy strategies: -- @ run (pCount pa) \"aaaaa\"@ -- -- @ -- Result: 5 -- @ pCount p = (+1) <$ p <*> pCount p <<|> pReturn 0 test7 = run (pCount pa) "aaaaa" -- | The parsers are instance of the class Monad and hence we can use the result of a previous parser to construct a following one: @run (do {l <- pCount pa; pExact l pb}) \"aaacabbb\"@ -- -- @ -- Result: [\"b\",\"b\",\"b\",\"b\"] -- Correcting steps: -- Deleted 'c' at position 3 expecting one of ['a', 'b'] -- Inserted 'b' at position 8 expecting 'b' -- @ test8 = run (do {l <- pCount pa; pExact l pb}) "aaacabbb" pExact 0 p = pReturn [] pExact n p = (:) <$> p <*> pExact (n-1) p -- | The function @`amb`@ converts an ambigous parser into one which returns all possible parses: @run (amb ( (++) <$> pa2 <*> pa3 <|> (++) <$> pa3 <*> pa2)) \"aaaaa\"@ -- -- @ -- Result: [\"aaaaa\",\"aaaaa\"] -- @ test9 = run (amb ( (++) <$> pa2 <*> pa3 <|> (++) <$> pa3 <*> pa2)) "aaaaa" -- | The applicative style makes it very easy to merge recognsition and computing a result. As an example we parse a sequence of nested well formed parentheses pairs a,d -- compute the maximum nesting depth: @run ( max <$> pParens ((+1) <$> wfp) <*> wfp `opt` 0) \"((()))()(())\" @ -- -- @ -- Result: 3 -- @ test10 = run wfp "((()))()(())" wfp = max <$> pParens ((+1) <$> wfp) <*> wfp `opt` 0 -- | It is very easy to recognise infix expressions with any number of priorities and operators: -- -- @ -- pOp (c, op) = op <$ pSym c -- sepBy p op = pChainl op p -- expr = foldr sepBy factor [(pOp ('+', (+)) <|> pOp ('-', (-))), pOp ('*' , (*))] -- factor = pNatural <|> pParens expr -- @ -- -- | which we can call: @run expr \"15-3*5\"@ -- -- @ -- Result: 0 -- @ -- -- | Note that also here correction takes place: @run expr \"2 + + 3 5\"@ -- -- @ -- Result: 37 -- Correcting steps: -- Deleted ' ' at position 1 expecting one of ['0'..'9', '*', '-', '+'] -- Inserted '0' at position 3 expecting one of ['(', '0'..'9'] -- Deleted ' ' at position 4 expecting one of ['(', '0'..'9'] -- Deleted ' ' at position 6 expecting one of ['0'..'9', '*', '-', '+'] -- @ test11 = run expr "15-3*5" -- parsing expressions pOp (c, op) = op <$ pSym c expr = foldr pChainl factor [(pOp ('+', (+)) <|> pOp ('-', (-))), pOp ('*' , (*))] factor = pNatural <|> pParens expr -- parsing numbers pDigit :: Parser Char pDigit = pSym ('0', '9') pDigitAsInt = digit2Int <$> pDigit pNatural = foldl (\a b -> a * 10 + b ) 0 <$> pList1 pDigitAsInt digit2Int a = ord a - ord '0' -- | A common case where ambiguity arises is when we e.g. want to recognise identifiers, but only those which are not keywords. -- The combinator `micro` inserts steps with a specfied cost in the result of the parser which can be used to disambiguate: -- -- @ -- ident :: Parser String -- ident = ((:) <$> pSym ('a','z') <*> pMunch (\x -> 'a' <= x && x <= 'z') `micro` 2) <* spaces -- idents = pList1 ident -- pKey keyw = pToken keyw `micro` 1 <* spaces -- spaces :: Parser String -- spaces = pMunch (==' ') -- takes_second_alt = pList ident -- \<|> (\ c t e -> [\"IfThenElse\"] ++ c ++ t ++ e) -- \<$ pKey \"if\" \<*> pList_ng ident -- \<* pKey \"then\" \<*> pList_ng ident -- \<* pKey \"else\" \<*> pList_ng ident -- @ -- -- | A keyword is followed by a small cost @1@, which makes sure that identifiers which have a keyword as a prefix win over the keyword. Identifiers are however -- followed by a cost @2@, with as result that in this case the keyword wins. -- Note that a limitation of this approach is that keywords are only recognised as such when expected! -- -- @ -- test13 = run takes_second_alt \"if a then if else c\" -- test14 = run takes_second_alt \"ifx a then if else c\" -- @ -- -- with results: -- -- @ -- Text.ParserCombinators.UU.Examples> test14 -- Result: [\"IfThenElse\",\"a\",\"if\",\"c\"] -- Text.ParserCombinators.UU.Examples> test14 -- Result: [\"ifx\",\"a\",\"then\",\"if\",\"else\",\"c\"] -- @ ident :: Parser String ident = ((:) <$> pSym ('a','z') <*> pMunch (\x -> 'a' <= x && x <= 'z') `micro` 2) <* spaces idents = pList1 ident pKey keyw = pToken keyw `micro` 1 <* spaces spaces :: Parser String spaces = pMunch (==' ') takes_second_alt = pList ident <|> (\ c t e -> ["IfThenElse"] ++ c ++ t ++ e) <$ pKey "if" <*> pList_ng ident <* pKey "then" <*> pList_ng ident <* pKey "else" <*> pList_ng ident test13 = run takes_second_alt "if a then if else c" test14 = run takes_second_alt "ifx a then if else c" munch = (,,) <$> pa <*> pMunch ( `elem` "^=*") <*> pb -- bracketing expressions pParens :: Parser a -> Parser a pParens p = pSym '(' *> p <* pSym ')' pBracks p = pSym '[' *> p <* pSym ']' pCurlys p = pSym '{' *> p <* pSym '}' -- parsing letters and identifiers pLower = pSym ('a','z') pUpper = pSym ('A','Z') pLetter = pUpper <|> pLower pVarId = (:) <$> pLower <*> pList pIdChar pConId = (:) <$> pUpper <*> pList pIdChar pIdChar = pLower <|> pUpper <|> pDigit <|> pAnySym "='" pAnyToken :: [String] -> Parser String pAnyToken = pAny pToken -- parsing two alternatives and returning both rsults pAscii = pSym ('\000', '\254') pIntList ::Parser [Int] pIntList = pParens ((pSym ';') `pListSep` (read <$> pList (pSym ('0', '9')))) parseIntString :: Parser String parseIntString = pList (pAscii) parseBoth = pPair pIntList parseIntString pPair p q = amb (Left <$> p <|> Right <$> q) main :: IO () main = do test1 run pa "b" run pa2 "bbab" run pa "ba" run pa "aa" run (do {l <- pCount pa; pExact l pb}) "aaacabbbb" run (amb ( (++) <$> pa2 <*> pa3 <|> (++) <$> pa3 <*> pa2)) "aaabaa" run paz "ab1z7" run paz' "m" run paz' "" run (pa <|> pb "just a message") "c" run parseBoth "(123;456;789)" run munch "a^=^**^^b"