{-#LANGUAGE NoImplicitPrelude #-} {-#LANGUAGE OverloadedStrings #-} {-#LANGUAGE TypeFamilies #-} {-#LANGUAGE MultiParamTypeClasses #-} {-#LANGUAGE FlexibleInstances #-} {-#LANGUAGE FlexibleContexts #-} {-#LANGUAGE LambdaCase #-} {-#LANGUAGE TupleSections #-} {-#LANGUAGE TypeApplications #-} -- | Parse raw backend data into useful data structures. module Web.Sprinkles.Backends.Parsers ( parseBackendData ) where import Web.Sprinkles.Prelude import Web.Sprinkles.Backends.Data ( BackendData (..) , BackendMeta (..) , BackendSource (..) , Verification (..) , toBackendData , addBackendDataChildren , rawToLBS, rawFromLBS ) import Web.Sprinkles.Backends.Spec ( parserTypes , ParserType (..) ) import Web.Sprinkles.Pandoc (pandocReaderOptions) import Text.Pandoc (Pandoc, PandocPure) import qualified Text.Pandoc as Pandoc import qualified Text.Pandoc.MediaBag as Pandoc import qualified Text.Pandoc.Readers.Creole as Pandoc import Text.Pandoc.Error (PandocError) import Network.Mime (MimeType) import Text.Ginger (ToGVal (..), GVal, Run (..), dict, (~>)) import qualified Data.Aeson as JSON import qualified Data.Aeson.TH as JSON import qualified Data.Yaml as YAML import Data.Char (ord) import Web.Sprinkles.PandocGVal import Network.HTTP.Types (parseQuery, queryToQueryText) -- | Parse raw backend data source into a structured backend data record. parseBackendData :: (MonadIO m, Monad m, Monad n) => BackendSource -> m (BackendData p n h) parseBackendData item@(BackendSource meta body _) = do let t = takeWhile (/= fromIntegral (ord ';')) (bmMimeType meta) parse = fromMaybe parseRawData $ lookup t parsersTable parse item -- | Lookup table of mime types to parsers. parsersTable :: (MonadIO m, Monad m, Monad n) => HashMap MimeType (BackendSource -> m (BackendData p n h)) parsersTable = mapFromList . mconcat $ [ zip mimeTypes (repeat parser) | (mimeTypes, parser) <- parsers ] -- | The parsers we know, by mime types. parsers :: (MonadIO m, Monad m, Monad n) => [([MimeType], BackendSource -> m (BackendData p n h))] parsers = [ (types, getParser p) | (types, p) <- parserTypes ] getParser :: (MonadIO m, Monad m, Monad n) => ParserType -> (BackendSource -> m (BackendData p n h)) getParser ParserJSON = json getParser ParserYAML = yaml getParser ParserFormUrlencoded = urlencodedForm getParser ParserMarkdown = pandoc (Pandoc.readMarkdown pandocReaderOptions) getParser ParserCreole = pandoc (Pandoc.readCreole pandocReaderOptions) getParser ParserTextile = pandoc (Pandoc.readTextile pandocReaderOptions) getParser ParserRST = pandoc (Pandoc.readRST pandocReaderOptions) getParser ParserLaTeX = pandoc (Pandoc.readLaTeX pandocReaderOptions) getParser ParserDocX = pandocWithMedia (Pandoc.readDocx pandocReaderOptions) getParser ParserText = plainText getParser ParserHtml = pandoc (Pandoc.readHtml pandocReaderOptions) -- | All the content types we know how to parse knownContentTypes :: [MimeType] knownContentTypes = concatMap fst parserTypes -- | Parser for raw data (used for static files); this is also the default -- fallback for otherwise unsupported file types. parseRawData :: Monad m => BackendSource -> m (BackendData p n h) parseRawData (BackendSource meta body veri) = return BackendData { bdJSON = JSON.Null , bdGVal = toGVal JSON.Null , bdMeta = meta , bdRaw = body , bdChildren = mapFromList [] , bdVerification = veri } -- | Parser for (utf-8) plaintext documents. plainText :: (MonadIO m, Monad m) => BackendSource -> m (BackendData p n h) plainText item@(BackendSource meta body _) = do textBody <- liftIO $ toStrict . decodeUtf8 @LText <$> rawToLBS body return $ toBackendData item textBody -- | Parser for JSON source data. json :: (MonadIO m, Monad m) => BackendSource -> m (BackendData p n h) json item@(BackendSource meta body _) = do bodyBytes <- liftIO $ rawToLBS body case JSON.eitherDecode bodyBytes of Left err -> fail $ err ++ "\n" ++ show bodyBytes Right json -> return . toBackendData item $ (json :: JSON.Value) -- | Parser for YAML source data. yaml :: (MonadIO m, Monad m) => BackendSource -> m (BackendData p n h) yaml item@(BackendSource meta body _) = do bodyBytes <- liftIO $ rawToLBS body case YAML.decodeEither' (toStrict bodyBytes) of Left err -> fail $ YAML.prettyPrintParseException err ++ "\n" ++ show bodyBytes Right json -> return . toBackendData item $ (json :: JSON.Value) urlencodedForm :: (MonadIO m, Monad m) => BackendSource -> m (BackendData p n h) urlencodedForm item@(BackendSource meta body _) = do bodyBytes <- liftIO $ rawToLBS body return . toBackendData item . asTextHashMap . mapFromList . queryToQueryText . parseQuery . toStrict $ bodyBytes where asTextHashMap :: HashMap Text (Maybe Text) -> HashMap Text (Maybe Text) asTextHashMap = id -- | Parser for Pandoc-supported formats that are read from 'LByteString's. pandocBS :: (MonadIO m, Monad m, Monad n) => (LByteString -> PandocPure Pandoc) -> BackendSource -> m (BackendData p n h) pandocBS reader input@(BackendSource meta body _) = do bodyBytes <- liftIO $ rawToLBS body case Pandoc.runPure $ reader bodyBytes of Left err -> fail . show $ err Right pandoc -> return $ toBackendData input pandoc -- | Parser for Pandoc-supported formats that are read from 'LByteString's, and -- return a 'Pandoc' document plus a 'MediaBag'. pandocWithMedia :: (MonadIO m, Monad m, Monad n) => (LByteString -> PandocPure Pandoc) -> BackendSource -> m (BackendData p n h) pandocWithMedia reader input@(BackendSource meta body _) = do bodyBytes <- liftIO $ rawToLBS body let reader' = do pandoc <- reader bodyBytes mediaBag <- Pandoc.getMediaBag return (pandoc, mediaBag) case Pandoc.runPure reader' of Left err -> fail . show $ err Right (pandoc, mediaBag) -> do -- TODO: marshal mediaBag to backend data item children let base = toBackendData input pandoc children <- mapFromList <$> mediaBagToBackendData mediaBag return $ addBackendDataChildren children base mediaBagToBackendData :: (MonadIO m, Monad m, Monad n) => Pandoc.MediaBag -> m [(Text, BackendData p n h)] mediaBagToBackendData bag = do let metas = Pandoc.mediaDirectory bag forM metas $ \(path, mimeType, contentLength) -> do (_, body) <- maybe (fail $ "Media not found: " <> path) return (Pandoc.lookupMedia path bag) let meta = BackendMeta { bmMimeType = encodeUtf8 . pack @Text $ mimeType , bmMTime = Nothing , bmName = pack path , bmPath = pack path , bmSize = Just $ fromIntegral contentLength } (pack path,) <$> parseBackendData (BackendSource meta (rawFromLBS body) Trusted) -- | Parser for Pandoc-supported formats that are read from 'String's. pandoc :: (MonadIO m, Monad m, Monad n) => (Text -> PandocPure Pandoc) -> BackendSource -> m (BackendData p n h) pandoc reader = pandocBS (reader . toStrict . decodeUtf8)