{-# LANGUAGE FlexibleContexts #-} module Text.Parsec.Indent ( IndentParserT, withBlock, withLineFold, withBlock', withLineFold', block, lineFold, manyLine, manyIndent, foldLine, foldLine3, foldLine4, foldLine5, (<+/>), (<-/>), runIndent ) where import Text.Parsec import Text.Parsec.Pos import Control.Monad.State import Control.Concatenative {-| A module to construct indentation aware parsers. Many programming language have indentation based syntax rules e.g. python and Haskell. This module exports combinators to create such parsers. The input source can be thought of as a list of tokens. Abstractly each token occurs at a line and a column and has a width. The column number of a token measures is indentation. If t1 and t2 are two tokens then we say that indentation of t1 is more than t2 if the column number of occurrence of t1 is greater than that of t2. Currently this module supports two kind of indentation based syntactic structures which we now describe: [Block] A block of indentation /c/ is a sequence of tokens with indentation at least /c/. Examples for a block is a where clause of Haskell with no explicit braces. [Line fold] A line fold starting at line /l/ and indentation /c/ is a sequence of tokens that start at line /l/ and possibly continue to subsequent lines as long as the indentation is greater than /c/. Such a sequence of lines need to be /folded/ to a single line. An example is MIME headers. Line folding based binding separation is used in Haskell as well. -} type IndentParserT s u m a = ParsecT s u (StateT SourcePos m) a -- | @ 'withLineFold' f a p @ parses @ a @ followed by any number of @ p @ -- that can wrap onto subsequent indented lines, combined by @ f @ withLineFold :: (Stream s (StateT SourcePos m) Char, Monad m) => (a -> [b] -> c) -> IndentParserT s u m a -> IndentParserT s u m b -> IndentParserT s u m c withLineFold f a p = withPos $ do r1 <- a r2 <- many (sameOrIndented >> p) return (f r1 r2) -- | Like 'withLineFold', but throws away initial parse result withLineFold' :: (Stream s (StateT SourcePos m) Char, Monad m) => IndentParserT s u m a -> IndentParserT s u m b -> IndentParserT s u m [b] withLineFold' = withLineFold (flip const) -- | Like 'withBlock', but throws away initial parse result withBlock' :: (Stream s (StateT SourcePos m) Char, Monad m) => IndentParserT s u m a -> IndentParserT s u m b -> IndentParserT s u m [b] withBlock' = withBlock (flip const) -- | Parses one or more times, continuing in subsequent indented lines lineFold :: (Stream s (StateT SourcePos m) Char, Monad m) => IndentParserT s u m a -> IndentParserT s u m [a] lineFold = withLineFold' (return ()) -- | @ 'withBlock' f a p @ parses @ a @ followed by an indented block of @ p @ -- and combines them with @ f @ withBlock :: (Stream s (StateT SourcePos m) Char, Monad m) => (a -> [b] -> c) -> IndentParserT s u m a -> IndentParserT s u m b -> IndentParserT s u m c withBlock f a p = withPos $ do r1 <- a r2 <- option [] (indented >> block p) return (f r1 r2) indented :: (Stream s (StateT SourcePos m) Char, Monad m) => IndentParserT s u m () indented = do pos <- getPosition s <- get if biAp sourceColumn (<) pos s then mzero else do put $ setSourceLine s (sourceLine pos) return () sameOrIndented :: (Stream s (StateT SourcePos m) Char, Monad m) => IndentParserT s u m () sameOrIndented = same <|> indented same :: (Stream s (StateT SourcePos m) Char, Monad m) => IndentParserT s u m () same = do pos <- getPosition s <- get if biAp sourceLine (==) pos s then return () else mzero -- | Parses a block of lines at the same indentation level block :: (Stream s (StateT SourcePos m) Char, Monad m) => IndentParserT s u m a -> IndentParserT s u m [a] block p = withPos $ do r <- many1 (checkIndent >> p) return r withPos :: (Stream s (StateT SourcePos m) Char, Monad m) => IndentParserT s u m a -> IndentParserT s u m a withPos x = do a <- get p <- getPosition r <- put p >> x put a >> return r checkIndent :: (Stream s (StateT SourcePos m) Char, Monad m) => IndentParserT s u m () checkIndent = do s <- get p <- getPosition if biAp sourceColumn (==) p s then return () else mzero -- | Run the result of an indentation sensitive parse runIndent :: Monad m => SourceName -> StateT SourcePos m a -> m a runIndent s = flip evalStateT (initialPos s) -- | Parses many occurances of p without using more than one line manyLine :: (Stream s (StateT SourcePos m) Char, Monad m) => IndentParserT s u m a -> IndentParserT s u m [a] manyLine p = withPos (many (same >> p)) -- | Parses many occurances of p, indenting if over one line manyIndent :: (Stream s (StateT SourcePos m) Char, Monad m) => IndentParserT s u m a -> IndentParserT s u m [a] manyIndent p = withPos (many (sameOrIndented >> p)) -- | Parses two arguments for @ f @ possibly wrapping onto subsequent lines foldLine :: (Stream s (StateT SourcePos m) Char, Monad m) => (a -> b -> c) -> IndentParserT s u m a -> IndentParserT s u m b -> IndentParserT s u m c foldLine f a b = withPos $ do r1 <- a r2 <- sameOrIndented >> b return $ f r1 r2 -- | Parses three arguments for @ f @ possibly wrapping onto subsequent lines foldLine3 :: (Stream s (StateT SourcePos m) Char, Monad m) => (a -> b -> c -> d) -> IndentParserT s u m a -> IndentParserT s u m b -> IndentParserT s u m c -> IndentParserT s u m d foldLine3 f a b c = withPos $ do r1 <- a r2 <- sameOrIndented >> b r3 <- sameOrIndented >> c return $ f r1 r2 r3 -- | Parses four arguments for @ f @ possibly wrapping onto subsequent lines foldLine4 :: (Stream s (StateT SourcePos m) Char, Monad m) => (a -> b -> c -> d -> e) -> IndentParserT s u m a -> IndentParserT s u m b -> IndentParserT s u m c -> IndentParserT s u m d -> IndentParserT s u m e foldLine4 f a b c d = withPos $ do r1 <- a r2 <- sameOrIndented >> b r3 <- sameOrIndented >> c r4 <- sameOrIndented >> d return $ f r1 r2 r3 r4 -- | Parses four arguments for @ f @ possibly wrapping onto subsequent lines foldLine5 :: (Stream s (StateT SourcePos m) Char, Monad m) => (a -> b -> c -> d -> e -> f) -> IndentParserT s u m a -> IndentParserT s u m b -> IndentParserT s u m c -> IndentParserT s u m d -> IndentParserT s u m e -> IndentParserT s u m f foldLine5 f a b c d e = withPos $ do r1 <- a r2 <- sameOrIndented >> b r3 <- sameOrIndented >> c r4 <- sameOrIndented >> d r5 <- sameOrIndented >> e return $ f r1 r2 r3 r4 r5 -- | '<+/>' is to foldLine as 'ap' is to 'liftM2' (<+/>) :: (Stream s (StateT SourcePos m) Char, Monad m) => IndentParserT s u m (a -> b) -> IndentParserT s u m a -> IndentParserT s u m b (<+/>) = foldLine id -- | '<-/>' is like '<+/>', but doesn't apply the function to the parsed value (<-/>) :: (Stream s (StateT SourcePos m) Char, Monad m) => IndentParserT s u m a -> IndentParserT s u m b -> IndentParserT s u m a (<-/>) = foldLine const