{-# OPTIONS_HADDOCK ignore-exports #-} {-# LANGUAGE FlexibleInstances, TypeSynonymInstances, MultiParamTypeClasses, Rank2Types, FlexibleContexts, NoMonomorphismRestriction, CPP #-} -- | This module contains a lot of examples of the typical use of our parser combinator library. -- We strongly encourage you to take a look at the source code. -- At the end you find a @`main`@ function which demonstrates the main characteristics. -- Only the @`run`@ function is exported since it may come in handy elsewhere. module Text.ParserCombinators.UU.Demo.Examples where import Data.Char import Text.ParserCombinators.UU import Text.ParserCombinators.UU.Utils import Text.ParserCombinators.UU.BasicInstances hiding (Parser) import System.IO import GHC.IO.Handle.Types import qualified Data.ListLike as LL -- import Control.Monad #define DEMO(p,i) demo "p" i p type Parser a = P (Str Char String LineColPos) a justamessage = "justamessage" -- | Running the function `show_demos` should give the following output: -- -- >>> run pa "a" -- Result: "a" -- -- >>> run pa "" -- Result: "a" -- Correcting steps: -- Inserted 'a' at position LineColPos 0 0 0 expecting 'a' -- -- >>> run pa "b" -- Result: "a" -- Correcting steps: -- Deleted 'b' at position LineColPos 0 0 0 expecting 'a' -- Inserted 'a' at position LineColPos 0 1 1 expecting 'a' -- -- >>> run ((++) <$> pa <*> pa) "bbab" -- Result: "aa" -- Correcting steps: -- Deleted 'b' at position LineColPos 0 0 0 expecting 'a' -- Deleted 'b' at position LineColPos 0 1 1 expecting 'a' -- Deleted 'b' at position LineColPos 0 3 3 expecting 'a' -- Inserted 'a' at position LineColPos 0 4 4 expecting 'a' -- -- >>> run pa "ba" -- Result: "a" -- Correcting steps: -- Deleted 'b' at position LineColPos 0 0 0 expecting 'a' -- -- >>> run pa "aa" -- Result: "a" -- Correcting steps: -- The token 'a' was not consumed by the parsing process. -- -- >>> run (pCount pa :: Parser Int) "aaa" -- Result: 3 -- -- >>> run (do {l <- pCount pa; pExact l pb}) "aaacabbbbb" -- Result: ["b","b","b","b"] -- Correcting steps: -- Deleted 'c' at position LineColPos 0 3 3 expecting one of ['b', 'a'] -- The token 'b' was not consumed by the parsing process. -- -- >>> run (amb ( (++) <$> pa2 <*> pa3 <|> (++) <$> pa3 <*> pa2)) "aaaaa" -- Result: ["aaaaa","aaaaa"] -- -- >>> run (pList pLower) "doaitse" -- Result: "doaitse" -- -- >>> run paz "abc2ez" -- Result: "abcez" -- Correcting steps: -- Deleted '2' at position LineColPos 0 3 3 expecting 'a'..'z' -- -- >>> run (max <$> pParens ((+1) <$> wfp) <*> wfp `opt` 0) "((()))()(())" -- Result: 3 -- -- >>> run (pa <|> pb <?> justamessage) "c" -- Result: "b" -- Correcting steps: -- Deleted 'c' at position LineColPos 0 0 0 expecting justamessage -- Inserted 'b' at position LineColPos 0 1 1 expecting 'b' -- -- >>> run (amb (pEither parseIntString pIntList)) "(123;456;789)" -- Result: [Left ["123","456","789"],Right [123,456,789]] -- show_demos :: IO () show_demos = do DEMO (pa, "a") DEMO (pa, "" ) DEMO (pa, "b") DEMO (((++) <$> pa <*> pa), "bbab") DEMO (pa, "ba") DEMO (pa, "aa") DEMO ((pCount pa :: Parser Int), "aaa") DEMO ((do {l <- pCount pa; pExact l pb}), "aaacabbbbb") DEMO ((amb ( (++) <$> pa2 <*> pa3 <|> (++) <$> pa3 <*> pa2)), "aaaaa") DEMO ((pList pLower), "doaitse") DEMO (paz, "abc2ez") DEMO ((max <$> pParens ((+1) <$> wfp) <*> wfp `opt` 0), "((()))()(())") DEMO ((pa <|> pb <?> justamessage), "c") DEMO ((amb (pEither parseIntString pIntList)), "(123;456;789)") -- DEMO ((pa *> pMunch ( `elem` "^=*") <* pb), "a^=^**^^b") -- | The fuction @`run`@ runs the parser and shows both the result, and the correcting steps which were taken during the parsing process. run :: Show t => Parser t -> String -> IO () run p inp = do let r@(a, errors) = parse ( (,) <$> p <*> pEnd) (createStr (LineColPos 0 0 0) inp) putStrLn ("-- Result: " ++ show a) if null errors then return () else do putStr ("-- Correcting steps: \n") show_errors errors putStrLn "-- " where show_errors :: (Show a) => [a] -> IO () show_errors = sequence_ . (map (putStrLn . show)) run' p inp = do let r@(a, errors) = parse ( (,) <$> p <*> pEnd) (createStr (LineColPos 0 0 0) inp) if null errors then return () else do putStr ("-- Correcting steps: \n") show_errors errors putStrLn ("-- Result: " ++ show a) putStrLn "-- " where show_errors :: (Show a) => [a] -> IO () show_errors = sequence_ . (map (putStrLn . show)) -- | Our first two parsers are simple; one recognises a single 'a' character and the other one a single 'b'. Since we will use them later we -- convert the recognised character into `String` so they can be easily combined. pa ::Parser String pa = lift <$> pSym 'a' pb :: Parser String pb = lift <$> pSym 'b' pc :: Parser String pc = lift <$> pSym 'c' lift a = [a] (<++>) :: Parser String -> Parser String -> Parser String p <++> q = (++) <$> p <*> q pa2 = pa <++> pa pa3 = pa <++> pa2 paz :: Parser String paz = pList (pSatisfy (\t -> 'a' <= t && t <= 'z') (Insertion "'a'..'z'" 'k' 5)) -- | The applicative style makes it very easy to merge recogition and computing a result. -- As an example we parse a sequence of nested well formed parentheses pairs and -- compute the maximum nesting depth with @`wfp`@: wfp :: Parser Int wfp = max <$> pParens ((+1) <$> wfp) <*> wfp `opt` 0 -- | It is very easy to recognise infix expressions with any number of priorities and operators: -- -- > operators = [[('+', (+)), ('-', (-))], [('*' , (*))], [('^', (^))]] -- > same_prio ops = msum [ op <$ pSym c | (c, op) <- ops] -- > expr = foldr pChainl ( pNatural <|> pParens expr) (map same_prio operators) -- -- -- which we can call: -- -- > run expr "15-3*5+2^5" -- -- > Result: 32 -- -- Note that also here correction takes place: -- -- > run expr "2 + + 3 5" -- -- > Result: 37 -- > Correcting steps: -- > Deleted ' ' at position 1 expecting one of ['0'..'9', '^', '*', '-', '+'] -- > Deleted ' ' at position 3 expecting one of ['(', '0'..'9'] -- > Inserted '0' at position 4 expecting '0'..'9' -- > Deleted ' ' at position 5 expecting one of ['(', '0'..'9'] -- > Deleted ' ' at position 7 expecting one of ['0'..'9', '^', '*', '-', '+'] -- test11 = run expr "15-3*5" expr :: Parser Int operators = [[('+', (+)), ('-', (-))], [('*' , (*))], [('^', (^))]] same_prio ops = foldr (<|>) empty [ op <$ pSym c | (c, op) <- ops] expr = foldr pChainl ( pNatural <|> pParens expr) (map same_prio operators) -- | A common case where ambiguity arises is when we e.g. want to recognise identifiers, -- but only those which are not keywords. -- The combinator `micro` inserts steps with a specfied cost in the result -- of the parser which can be used to disambiguate: -- -- > -- > ident :: Parser String -- > ident = ((:) <$> pSym ('a','z') <*> pMunch (\x -> 'a' <= x && x <= 'z') `micro` 2) <* spaces -- > idents = pList1 ident -- > pKey keyw = pToken keyw `micro` 1 <* spaces -- > spaces :: Parser String -- > spaces = pMunch (==' ') -- > takes_second_alt = pList ident -- > \<|> (\ c t e -> ["IfThenElse"] ++ c ++ t ++ e) -- > \<$ pKey "if" <*> pList_ng ident -- > \<* pKey "then" <*> pList_ng ident -- > \<* pKey "else" <*> pList_ng ident -- -- A keyword is followed by a small cost @1@, which makes sure that -- identifiers which have a keyword as a prefix win over the keyword. Identifiers are however -- followed by a cost @2@, with as result that in this case the keyword wins. -- Note that a limitation of this approach is that keywords are only recognised as such when expected! -- -- > test13 = run takes_second_alt "if a then if else c" -- > test14 = run takes_second_alt "ifx a then if else c" -- -- with results for @test13@ and @test14@: -- -- > Result: ["IfThenElse","a","if","c"] -- > Result: ["ifx","a","then","if", "else","c"] -- -- | A mistake which is made quite often is to construct a parser which can recognise a sequence of elements using one of the -- derived combinators (say @`pList`@), but where the argument parser can recognise the empty string. -- The derived combinators check whether this is the case and terminate the parsing process with an error message: -- -- > run (pList spaces) "" -- > Result: *** Exception: The combinator pList -- > requires that it's argument cannot recognise the empty string -- -- > run (pMaybe spaces) " " -- > Result: *** Exception: The combinator pMaybe -- > requires that it's argument cannot recognise the empty string test16 :: IO () test16 = run (pList spaces) " " ident = ((:) <$> pRange ('a','z') <*> pMunch (\x -> 'a' <= x && x <= 'z') `micro` 2) <* spaces idents = pList1 ident pKey keyw = pToken keyw `micro` 1 <* spaces spaces :: Parser String spaces = pMunch (`elem` " \n") takes_second_alt = pList ident <|> (\ c t e -> ["IfThenElse"] ++ c ++ t ++ e) <$ pKey "if" <*> pList_ng ident <* pKey "then" <*> pList_ng ident <* pKey "else" <*> pList_ng ident test13 = run takes_second_alt "if a then if else c" test14 = run takes_second_alt "ifx a then if else c" pManyTill :: P st a -> P st b -> P st [a] pManyTill p end = [] <$ end <<|> (:) <$> p <*> pManyTill p end simpleComment = string "<!--" *> pManyTill pAscii (string "-->") string ::(IsLocationUpdatedBy loc Char, LL.ListLike state Char) => String -> P (Str Char state loc) String string = pToken pVarId = (:) <$> pLower <*> pList pIdChar pConId = (:) <$> pUpper <*> pList pIdChar pIdChar = pLower <|> pUpper <|> pDigit <|> pAnySym "='" pAnyToken :: (IsLocationUpdatedBy loc Char, LL.ListLike state Char) => [String] -> P (Str Char state loc) String pAnyToken = pAny pToken -- parsing two alternatives and returning both rsults pIntList :: Parser [Int] pIntList = pParens ((pSym ';') `pListSep` (read <$> pList1 (pRange ('0', '9')))) parseIntString :: Parser [String] parseIntString = pParens ((pSym ';') `pListSep` ( pList1 (pRange('0', '9')))) demo :: Show r => String -> String -> P (Str Char String LineColPos) r -> IO () demo str input p= do putStr ("-- >>> run " ++ str ++ " " ++ show input ++ "\n") run p input