------------------------------------------------------------------------------ -- | -- Module : LiterateX.Parser -- Description : source parser -- Copyright : Copyright (c) 2021-2023 Travis Cardwell -- License : MIT -- -- This module implements the source parser. ------------------------------------------------------------------------------ {-# LANGUAGE LambdaCase #-} {-# LANGUAGE RecordWildCards #-} module LiterateX.Parser ( -- * API parse ) where -- https://hackage.haskell.org/package/base import Control.Monad (guard) import Data.Maybe (fromMaybe) -- https://hackage.haskell.org/package/conduit import qualified Data.Conduit as C import Data.Conduit (ConduitT) -- https://hackage.haskell.org/package/text import qualified Data.Text as T import Data.Text (Text) -- (literatex) import LiterateX.Types (SourceFormat, SourceLine) import qualified LiterateX.Types.SourceFormat as SourceFormat import qualified LiterateX.Types.SourceLine as SourceLine ------------------------------------------------------------------------------ -- $API -- | Create a "Conduit" transformer that parses the specified source format -- -- The transformer consumes lines of the input and produces a 'SourceLine' for -- each line of input. -- -- @since 0.0.1.0 parse :: Monad m => SourceFormat -> ConduitT Text SourceLine m () parse = parseSourceLines . parserFunctionsFor ------------------------------------------------------------------------------ -- $Internal -- | Parser functions that determine how input is parsed data ParserFunctions = ParserFunctions { isCodeBlank :: !(Text -> Bool) , isDocBlank :: !(Text -> Bool) , isRule :: !(Text -> Bool) , getDoc :: !(Text -> Maybe Text) , getCode :: !(Text -> Text) } ------------------------------------------------------------------------------ -- | Get the parser functions for the specified source format parserFunctionsFor :: SourceFormat -> ParserFunctions parserFunctionsFor = \case SourceFormat.DoubleDash -> lineCommentParserFunctions '-' 2 SourceFormat.DoubleSlash -> lineCommentParserFunctions '/' 2 SourceFormat.Hash -> lineCommentParserFunctions '#' 1 SourceFormat.LiterateHaskell -> literateHaskellParserFunctions SourceFormat.Percent -> lineCommentParserFunctions '%' 1 SourceFormat.LispSemicolons -> lispCommentParserFunctions ------------------------------------------------------------------------------ -- | Get parser functions for source with line-based comments lineCommentParserFunctions :: Char -- ^ comment character -> Int -- ^ number of comment characters to create line comment -> ParserFunctions lineCommentParserFunctions char count = ParserFunctions{..} where docBlank :: Text docBlank = T.pack $ replicate count char prefixLen :: Int prefixLen = count + 1 prefix :: Text prefix = T.pack $ replicate count char ++ " " isCodeBlank :: Text -> Bool isCodeBlank = T.null isDocBlank :: Text -> Bool isDocBlank = (== docBlank) isRule :: Text -> Bool isRule line = T.length line > count && T.all (== char) line getDoc :: Text -> Maybe Text getDoc line = do let (linePrefix, lineSuffix) = T.splitAt prefixLen line guard $ linePrefix == prefix pure lineSuffix getCode :: Text -> Text getCode = id ------------------------------------------------------------------------------ -- | Get parser functions for source with Lisp-style comments -- -- Lisp-style comments begin with one or more semicolons. lispCommentParserFunctions :: ParserFunctions lispCommentParserFunctions = ParserFunctions{..} where isCodeBlank :: Text -> Bool isCodeBlank = T.null isDocBlank :: Text -> Bool isDocBlank line = let len = T.length line in len >= 1 && len <= 4 && T.all (== ';') line isRule :: Text -> Bool isRule line = T.length line > 4 && T.all (== ';') line getDoc :: Text -> Maybe Text getDoc line = do let (linePrefix, (sep, lineSuffix)) = T.splitAt 1 <$> T.breakOn " " line guard $ not (T.null linePrefix) && T.all (== ';') linePrefix guard $ sep == " " pure lineSuffix getCode :: Text -> Text getCode = id ------------------------------------------------------------------------------ -- | Get parser functions for parsing literate Haskell literateHaskellParserFunctions :: ParserFunctions literateHaskellParserFunctions = ParserFunctions{..} where isCodeBlank :: Text -> Bool isCodeBlank = (== ">") isDocBlank :: Text -> Bool isDocBlank = T.null isRule :: Text -> Bool isRule = const False getDoc :: Text -> Maybe Text getDoc line | "> " `T.isPrefixOf` line = Nothing | otherwise = Just line getCode :: Text -> Text getCode line = fromMaybe line $ T.stripPrefix "> " line ------------------------------------------------------------------------------ -- | Create a "Conduit" transformer for the specified parser functions -- -- This function produces a 'SourceLine' for each line of input. A -- 'SourceLine.Shebang' can only be produced on the first line. Note that the -- order that the parser functions are used is significant; the parser -- functions are written for this order. parseSourceLines :: Monad m => ParserFunctions -> ConduitT Text SourceLine m () parseSourceLines ParserFunctions{..} = do mLine <- C.await case mLine of Just line -> do C.yield $ if "#!" `T.isPrefixOf` line then SourceLine.Shebang line else parse' line C.awaitForever $ C.yield . parse' Nothing -> return () where parse' :: Text -> SourceLine parse' line | isCodeBlank line = SourceLine.CodeBlank | isDocBlank line = SourceLine.DocBlank | isRule line = SourceLine.Rule | otherwise = case (getDoc line, getCode line) of (Just doc, _code) -> SourceLine.Doc doc (Nothing, code) -> SourceLine.Code code