{-# LANGUAGE Haskell2010 #-} -- | -- Module : Language.Preprocessor.Unlit -- Copyright : (c) 1992 authors of the Haskell 1.2 Report, -- (c) 2013 Stijn van Drongelen -- -- License : BSD-style -- Maintainer : rhymoid@gmail.com -- Stability : experimental -- Portability : portable -- module Language.Preprocessor.Unlit ( -- * @unlit@ unlit , UnlitError (..) , unlitLinesWith -- * Parsers , ghcParsers , birdParser , latexParser , cppParser , pandocMarkdownParser -- ** Defining custom parsers , Parser (..) , Classified (..) , NeedHug (..) , Active (..) -- * Splitting and joining lines , lines , unlines ) where import Prelude hiding (lines, unlines) import Control.Arrow (first, second, (&&&)) import Data.Char (isSpace) import Data.Function import Data.List hiding (lines, unlines) import Data.Maybe (fromMaybe) import qualified Data.Text as T -- | Runs 'unlitLinesWith' on a blob of text, using the default parsers of GHC. unlit :: T.Text -> ([UnlitError], T.Text) unlit = second unlines . unlitLinesWith ghcParsers . lines -- | Parse a list of lines, given a sequence of parsers. unlitLinesWith :: [Parser T.Text] -> [T.Text] -> ([UnlitError], [T.Text]) unlitLinesWith parsers = (checkErrors &&& map unclassify) . classify parsers where checkErrors :: [Classified x] -> [UnlitError] checkErrors [] = [] checkErrors (c:cs) = concat $ zipWith3 adjacentError [1..] (c:cs) (cs ++ [Blank]) adjacentError :: Integer -> Classified x -> Classified x -> [UnlitError] adjacentError _ (Program NeedHug _ _) Blank = [] adjacentError _ Blank (Program NeedHug _ _) = [] adjacentError ln (Program NeedHug _ _) _ = [NotFollowedByBlank ln] adjacentError ln _ (Program NeedHug _ _) = [NotPrecededByBlank (succ ln)] adjacentError _ _ _ = [] data UnlitError = NotPrecededByBlank Integer | NotFollowedByBlank Integer data Classified x = Program NeedHug Active x -- ^ Program lines. | Comment -- ^ Comment lines. | Blank -- ^ Blank lines. data NeedHug = NeedHug -- ^ A sequence of program lines needs blank lines around it. | Whatever -- ^ This line doesn't care about adjacent lines. data Active = Active -- ^ An active line of code. | Inactive -- ^ An inactive line of code. newtype Parser x = Parser (x -> [x] -> Maybe ([Classified x], Maybe [x])) classify :: [Parser T.Text] -> [T.Text] -> [Classified T.Text] classify parsers = go where go [] = [] go (x:xs) | Just (cs, more) <- checkParsers x xs -- TODO Communicate error when 'more' is Nothing. = cs ++ go (fromMaybe [] more) | T.all isSpace x = Blank : go xs | otherwise = Comment : go xs checkParsers p1 p2 = foldr orElse Nothing . map (\(Parser f) -> f p1 p2) $ parsers unclassify :: Classified T.Text -> T.Text unclassify (Program _ Active x) = x unclassify _ = T.empty ----- -- | The default parsers used by GHC. ghcParsers :: [Parser T.Text] ghcParsers = [latexParser, birdParser, cppParser] -- | Parser for blocks of code delimited by @\\begin{code}@ and @\\end{code}@ -- or @\\begin{pseudocode}@ and @\\end{pseudocode}@. latexParser :: Parser T.Text latexParser = Parser p where p ln lns | Just sbz <- T.pack "\\begin{code}" `T.stripPrefix` sln = block sbz Active (T.pack "\\end{code}") lns | Just sbz <- T.pack "\\begin{pseudocode}" `T.stripPrefix` sln = block sbz Active (T.pack "\\end{pseudocode}") lns | otherwise = Nothing where sln = T.dropWhile isSpace ln block sbz act delim | not (T.all isSpace sbz) -- TODO Communicate error. = block T.empty act delim block _ act delim = Just . first (Blank:) . go where go [] = ([], Nothing) go (ln:lns) | Just _sbz <- delim `T.stripPrefix` sln -- TODO Communicate error if not (T.all isSpace sbz) = ([Blank], Just lns) | otherwise = first (Program Whatever act ln:) (go lns) where sln = T.dropWhile isSpace ln -- | Parser for blocks of code prefixed by @>@ or @<@. birdParser :: Parser T.Text birdParser = Parser p where p ln lns | T.pack ">" `T.isPrefixOf` sln = result Active | T.pack "<" `T.isPrefixOf` sln = result Inactive | otherwise = Nothing where (prefix, sln) = T.span isSpace ln rln = T.concat [prefix, T.pack " ", T.drop 1 ln] result act = Just ([Program NeedHug act rln], Just lns) -- | Parser for lines prefixed by @#@. cppParser :: Parser T.Text cppParser = Parser p where p ln lns | T.pack "#" `T.isPrefixOf` ln = Just ([Program Whatever Active ln], Just lns) | otherwise = Nothing -- | Parser for lines fenced by backticks or tildes. pandocMarkdownParser :: (T.Text -> Active) -> Parser T.Text pandocMarkdownParser testAttributes = Parser p where p ln lns | T.pack "```" `T.isPrefixOf` ln = block (testAttributes btTail) btFence lns | T.pack "~~~" `T.isPrefixOf` ln = block (testAttributes tiTail) tiFence lns | otherwise = Nothing where (btFence, btTail) = T.span (== '`') ln (tiFence, tiTail) = T.span (== '~') ln block act delim = Just . first (Blank:) . go where go [] = ([], Nothing) go (ln:lns) | delim `T.isPrefixOf` ln = ([Blank], Just lns) | otherwise = first (Program NeedHug act ln:) (go lns) --------------- -- Utilities -- --------------- -- | Breaks a 'Text' up into a list of 'Text's at newline sequences. -- The resulting strings do not contain newline sequences. lines :: T.Text -> [T.Text] lines = linesWith [T.pack "\r\n", T.pack "\r", T.pack "\n"] -- U+2028, U+2029? -- | Joins lines of 'Text'. In the resulting string, lines are terminated -- with a single newline character (@'\\n'@). unlines :: [T.Text] -> T.Text unlines = unlinesWith (T.pack "\n") linesWith :: [T.Text] -> T.Text -> [T.Text] linesWith delimiters tx = if T.null tx then [] else go tx where ds = sortBy (compare `on` (negate . T.length)) delimiters stripAnyPrefix t = foldr orElse Nothing . map (flip T.stripPrefix t) $ ds dh = nub . map (T.head) . filter (not . T.null) $ delimiters go t | T.null t = [] | T.null tt = [th] | Just tn <- stripAnyPrefix tt = th : go tn | True = case go tt' of [] -> [th'] (y:ys) -> T.append th' y:ys where (th, tt) = T.span (not . (`elem` dh)) t (th', tt') = (th' `T.snoc` T.head tt', T.tail tt') unlinesWith :: T.Text -> [T.Text] -> T.Text unlinesWith nl = T.concat . map (`T.append` nl) orElse :: Maybe a -> Maybe a -> Maybe a orElse Nothing y = y orElse x _ = x