{- | Module : Prosidy.Parse Description : Parse raw text into Prosidy documents Copyright : (c) James Alexander Feldman-Crough, 2019 License : MPL-2.0 Maintainer : alex@fldcr.com -} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE DerivingStrategies #-} {-# LANGUAGE ApplicativeDo #-} {-# LANGUAGE TypeApplications #-} {-# LANGUAGE OverloadedStrings #-} module Prosidy.Parse ( -- * Parsing Prosidy types from 'Data.Text.Text' parseDocument , parseDocumentMetadata -- * Reading & parsing Prosidy files , readDocument , readDocumentMetadata -- * Errors , Failure(..) , prettyFailure ) where import Prosidy.Compat import Prelude hiding ( fail ) import Prosidy.Types import Prosidy.Source import Prosidy.Types.Key ( isValidKeyHead , isValidKeyTail , unsafeMakeKey ) import Prosidy.Types.Series ( fromSeqNE , toSeqNE , fromSeq ) import Text.Megaparsec hiding ( token , sourceName ) import Text.Megaparsec.Char ( char , string ) import qualified Data.Char as Char import qualified Data.Text.Lazy as Text.Lazy import qualified Data.HashMap.Strict as HashMap import qualified Data.Sequence as Seq import qualified Data.List as List import qualified Data.HashSet as HashSet import qualified Data.Text.Encoding as Text.Encoding import qualified Text.Megaparsec.Char as Megaparsec import qualified Data.ByteString as ByteString import Control.Applicative ( Alternative ) import Data.Bifunctor ( first ) import Text.Megaparsec.Char.Lexer ( hexadecimal ) import Data.Functor ( ($>) ) import Data.Foldable ( fold , traverse_ ) import Control.Monad ( MonadPlus , void ) import Data.Text ( Text ) import Data.Void ( Void ) import Control.Exception ( Exception , throwIO ) import Control.Monad.Trans.Reader ( ReaderT(..) ) {- If you run into errors, use the following combinator to get Megaparsec to print out its state. > import qualified Text.Megaparsec.Debug > > dbg :: Show a => String -> P a -> P a > dbg txt (P (ReaderT r)) = P . ReaderT $ \src -> > Text.Megaparsec.Debug.dbg txt $ r src -} ------------------------------------------------------------------------------- -- | Parses a Prosidy 'Document' from its source. -- -- The 'FilePath' parameter is only used for error reporting. parseDocument :: FilePath -> Text -> Either Failure Document parseDocument path = runP doc . makeSource path -- | Reads a Prosidy 'Document' from the given 'FilePath'. -- -- Errors will be thrown as exceptions. Use 'parseDocument' for a pure -- implementation. readDocument :: FilePath -> IO Document readDocument filepath = do bytes <- ByteString.readFile filepath either throwIO pure . parseDocument filepath $ Text.Encoding.decodeUtf8With (\_ _ -> Just '\65533') bytes ------------------------------------------------------------------------------- -- | Parses a Prosidy document's header 'Metadata' from source, stopping when the -- header ends. -- -- The 'FilePath' parameter is only used for error reporting. parseDocumentMetadata :: FilePath -> Text -> Either Failure Metadata parseDocumentMetadata path = runP docMetadata . makeSource path -- | Reads a Prosidy document's 'Metadata' header from the given 'FilePath'. -- -- Errors will be thrown as exceptions. Use 'parseDocumentMetadata' for a pure -- implementation. readDocumentMetadata :: FilePath -> IO Metadata readDocumentMetadata filepath = do bytes <- ByteString.readFile filepath either throwIO pure . parseDocumentMetadata filepath $ Text.Encoding.decodeUtf8 bytes ------------------------------------------------------------------------------- -- | A parsing error. -- newtype Failure = Failure (ParseErrorBundle Text Void) deriving newtype (Exception, Show) -- | Pretty-print a 'Failure' into a message acceptable for displaying to -- users. prettyFailure :: Failure -> String prettyFailure (Failure e) = errorBundlePretty e ------------------------------------------------------------------------------- newtype P a = P (ReaderT Source (Parsec Void Text) a) deriving newtype (Functor, Applicative, Alternative, Monad, MonadFail, MonadPlus, MonadParsec Void Text) type MetadataItem = (Key, Maybe Text) runP :: P a -> Source -> Either Failure a runP (P (ReaderT r)) src = first Failure $ parse (r src) (sourceName src) (sourceText src) ------------------------------------------------------------------------------- doc :: P Document doc = do header <- docMetadata body <- Series . Seq.fromList <$> many block eof pure $ Document header body ------------------------------------------------------------------------------- docMetadata :: P Metadata docMetadata = do -- try to read a shebang as the _very_ first line of a document. optional_ $ string "#!" *> skipManyTill anySingle (void newlineOrEOF) -- skip past any blank lines at the start of the document skipSpaces *> skipMany endOfLine -- read each metadata item as a line-wide token items <- many docMetadataItem -- stop when we hit three dashes, alone, on a line docMetadataEnd pure $ foldMap itemToMetadata items docMetadataEnd :: P () docMetadataEnd = do void $ string "---" try $ do skipSpaces newlineOrEOF skipSpaces skipMany endOfLine docMetadataItem :: P MetadataItem docMetadataItem = do itemKey <- key itemVal <- optional $ do metaItemSep option "" text <* skipSpaces endOfLines pure (itemKey, itemVal) ------------------------------------------------------------------------------- block :: P Block block = choice [ BlockTag <$> blockTag , BlockLiteral <$> literalTag , BlockParagraph <$> paragraph ] blockTag :: P BlockTag blockTag = do t <- genericTag (void $ string "#-") blockTagContents emptyLines pure t blockTagContents :: P (Series Block) blockTagContents = choice [ifBraces, ifBlock, ifNothing] where ifBraces = annotateSource $ fmap (foldMap $ \x src -> Series . Seq.singleton . BlockParagraph $ Paragraph x src ) (token tagParagraph) ifBlock = Series . Seq.fromList <$> withBlockDelimiters (emptyLines *> many block) ifNothing = skipSpaces *> endOfLine $> mempty literalTag :: P LiteralTag literalTag = genericTag (void $ string "#=") $ do close <- blockTagDelim (void $ optional_ comment *> newlineOrEOF) literalBody close literalBody :: P () -> P Text literalBody end = do literalLines <- manyTill literalLine (try $ skipSpaces *> end) emptyLines pure $ Text.Lazy.toStrict . Text.Lazy.intercalate "\n" $ literalLines literalLine :: P Text.Lazy.Text literalLine = do line <- takeWhileP (Just "literal text") $ \ch -> ch /= '\r' && ch /= '\n' newlineOrEOF pure $ Text.Lazy.fromStrict line blockTagDelim :: P () -> P (P ()) blockTagDelim slurp = do char ':' maybeLabel <- optional keyLike skipSpaces <* slurp pure $ do string "#:" traverse_ string maybeLabel skipSpaces withBlockDelimiters :: P a -> P a withBlockDelimiters parser = do close <- blockTagDelim endOfLine parser <* close ------------------------------------------------------------------------------- inline :: P Inline inline = choice [InlineTag <$> inlineTag, InlineText <$> fragment] inlineTag :: P InlineTag inlineTag = genericTag sigil . option mempty $ orEmpty tagParagraph where orEmpty = fmap $ maybe mempty (fromSeq . toSeqNE) sigil = try $ do void $ char '#' void . lookAhead $ satisfy isValidKeyHead ------------------------------------------------------------------------------- paragraph :: P Paragraph paragraph = annotateSource $ paragraphLike >>= maybe (fail "empty paragraph encountered") (pure . Paragraph) paragraphLike :: P (Maybe (SeriesNE Inline)) paragraphLike = do ppLines <- paragraphLine `sepEndBy1` endOfLine emptyLines pure . fromSeqNE . Seq.fromList $ List.intercalate [Break] ppLines paragraphLine :: P [Inline] paragraphLine = do headItem <- inline tailItem <- many paragraphInline skipSpaces pure $ headItem : tailItem paragraphInline :: P Inline paragraphInline = (paragraphSpacer $> Break) <|> inline paragraphSpacer :: P () paragraphSpacer = try $ do skipSpaces1 notFollowedBy $ void (string "##") <|> newlineOrEOF tagParagraph :: P (Maybe (SeriesNE Inline)) tagParagraph = between start end $ option Nothing paragraphLike where start = char '{' *> skipSpaces *> emptyLines end = skipSpaces *> emptyLines *> char '}' ------------------------------------------------------------------------------- genericTag :: P () -> P a -> P (Tag a) genericTag sigilParser bodyParser = annotateSource $ do sigilParser thisName <- unsafeMakeKey <$> keyLike thisMetadata <- meta thisContent <- bodyParser pure $ Tag thisName thisMetadata thisContent meta :: P Metadata meta = option mempty $ between start end $ foldMap itemToMetadata <$> metaItem `sepEndBy` metaSep where start = do char '[' skipSpaces skipMany endOfLine end = char ']' metaItem :: P MetadataItem metaItem = do itemKey <- key <* emptyLines itemVal <- optional $ do metaItemSep <* emptyLines option "" quotedText skipSpaces <* emptyLines pure (itemKey, itemVal) metaSep :: P () metaSep = do void $ char ',' skipSpaces emptyLines ------------------------------------------------------------------------------- escape :: P Char escape = label "escape sequence" $ do void $ char '\\' choice [ oneOf @[] "#{}[]:='\"\\" , char 'n' $> '\n' , char 't' $> '\t' , char 'r' $> '\r' , char 'u' *> unicodeEscape ] unicodeEscape :: P Char unicodeEscape = Char.chr <$> hexadecimal keyLike :: P Text keyLike = do void . lookAhead $ satisfy isValidKeyHead takeWhile1P (Just "key") isValidKeyTail key :: P Key key = token $ unsafeMakeKey <$> keyLike metaItemSep :: P () metaItemSep = token . void $ char ':' <|> char '=' quotedText :: P Text quotedText = do delim <- char '\'' <|> char '"' parts <- many $ choice [ Text.Lazy.singleton <$> escape , Text.Lazy.fromStrict <$> takeWhile1P (Just "quoted text") (\ch -> ch /= delim && ch /= '\\') ] void $ char delim skipSpaces pure . Text.Lazy.toStrict . fold $ parts fragment :: P Text fragment = text text :: P Text text = do parts <- word `sepBy1` textSpace pure . Text.Lazy.toStrict . Text.Lazy.intercalate " " $ parts textSpace :: P () textSpace = try $ do skipSpaces1 notFollowedBy $ void (char '#') <|> newlineOrEOF word :: P Text.Lazy.Text word = fmap fold . some $ choice [ Text.Lazy.singleton <$> escape , Text.Lazy.fromStrict <$> takeWhile1P (Just "plain text") (\ch -> not $ HashSet.member ch reserved || Char.isSpace ch) ] where reserved = HashSet.fromList "#{}\\" ------------------------------------------------------------------------------- comment :: P () comment = label "comment" $ do void $ string "##" void $ skipManyTill anySingle (lookAhead newlineOrEOF) endOfLine :: P () endOfLine = -- This rule is a bit hairy! Specifically, there was a bug at the end of -- a file that ended with a comment and no trailing newline. -- -- Because we use `endOfLine` in repeat productions (many, some), -- endOfLine _has_ to consume input to prevent looping forever. -- In order to satisfy this: -- -- 1. If its the end of a file, then we _must_ consume a comment. -- 2. If it's not the end of a file, then we _must_ consume at least -- one newline. commentThenNewline <|> commentThenEOF where commentThenEOF = comment <* eof commentThenNewline = try $ do optional_ comment void Megaparsec.newline skipSpaces endOfLines :: P () endOfLines = skipSome endOfLine emptyLines :: P () emptyLines = skipMany endOfLine spaceChar :: P () spaceChar = do notFollowedBy newlineOrEOF void Megaparsec.spaceChar skipSpaces :: P () skipSpaces = skipMany spaceChar skipSpaces1 :: P () skipSpaces1 = skipSome spaceChar token :: P a -> P a token = (<* skipSpaces) newlineOrEOF :: P () newlineOrEOF = void Megaparsec.newline <|> eof ------------------------------------------------------------------------------- optional_ :: P a -> P () optional_ = option () . void ------------------------------------------------------------------------------- itemToMetadata :: MetadataItem -> Metadata itemToMetadata (k, Just v ) = Metadata mempty (Assoc $ HashMap.singleton k v) itemToMetadata (k, Nothing) = Metadata (Set $ HashSet.singleton k) mempty annotateSource :: P (Maybe Location -> a) -> P a annotateSource (P (ReaderT r)) = P . ReaderT $ \src -> do offset <- Offset . fromIntegral <$> getOffset result <- r src sourceLoc <- maybe (fail sourceLocationError) pure $ getLocation offset src pure . result $ Just sourceLoc sourceLocationError :: String sourceLocationError = "UNEXPECTED: Failed to create a source location."