{-#LANGUAGE NoImplicitPrelude #-}
{-#LANGUAGE OverloadedStrings #-}
{-#LANGUAGE TypeFamilies #-}
{-#LANGUAGE MultiParamTypeClasses #-}
{-#LANGUAGE FlexibleInstances #-}
{-#LANGUAGE FlexibleContexts #-}
{-#LANGUAGE LambdaCase #-}
{-#LANGUAGE TupleSections #-}
{-#LANGUAGE TypeApplications #-}
module Web.Sprinkles.Backends.Parsers
( parseBackendData
)
where
import Web.Sprinkles.Prelude
import Web.Sprinkles.Backends.Data
( BackendData (..)
, BackendMeta (..)
, BackendSource (..)
, Verification (..)
, toBackendData
, addBackendDataChildren
, rawToLBS, rawFromLBS
)
import Web.Sprinkles.Backends.Spec
( parserTypes
, ParserType (..)
)
import Web.Sprinkles.Pandoc (pandocReaderOptions)
import Text.Pandoc (Pandoc, PandocPure)
import qualified Text.Pandoc as Pandoc
import qualified Text.Pandoc.MediaBag as Pandoc
import qualified Text.Pandoc.Readers.Creole as Pandoc
import Text.Pandoc.Error (PandocError)
import Network.Mime (MimeType)
import Text.Ginger (ToGVal (..), GVal, Run (..), dict, (~>))
import qualified Data.Aeson as JSON
import qualified Data.Aeson.TH as JSON
import qualified Data.Yaml as YAML
import Data.Char (ord)
import Web.Sprinkles.PandocGVal
import Network.HTTP.Types (parseQuery, queryToQueryText)
parseBackendData :: (MonadIO m, Monad m, Monad n)
=> BackendSource
-> m (BackendData p n h)
parseBackendData item@(BackendSource meta body _) = do
let t = takeWhile (/= fromIntegral (ord ';')) (bmMimeType meta)
parse = fromMaybe parseRawData $ lookup t parsersTable
parse item
parsersTable :: (MonadIO m, Monad m, Monad n)
=> HashMap MimeType (BackendSource -> m (BackendData p n h))
parsersTable = mapFromList . mconcat $
[ zip mimeTypes (repeat parser) | (mimeTypes, parser) <- parsers ]
parsers :: (MonadIO m, Monad m, Monad n)
=> [([MimeType], BackendSource -> m (BackendData p n h))]
parsers =
[ (types, getParser p) | (types, p) <- parserTypes ]
getParser :: (MonadIO m, Monad m, Monad n)
=> ParserType
-> (BackendSource -> m (BackendData p n h))
getParser ParserJSON = json
getParser ParserYAML = yaml
getParser ParserFormUrlencoded = urlencodedForm
getParser ParserMarkdown = pandoc (Pandoc.readMarkdown pandocReaderOptions)
getParser ParserCreole = pandoc (Pandoc.readCreole pandocReaderOptions)
getParser ParserTextile = pandoc (Pandoc.readTextile pandocReaderOptions)
getParser ParserRST = pandoc (Pandoc.readRST pandocReaderOptions)
getParser ParserLaTeX = pandoc (Pandoc.readLaTeX pandocReaderOptions)
getParser ParserDocX = pandocWithMedia (Pandoc.readDocx pandocReaderOptions)
getParser ParserText = plainText
getParser ParserHtml = pandoc (Pandoc.readHtml pandocReaderOptions)
knownContentTypes :: [MimeType]
knownContentTypes = concatMap fst parserTypes
parseRawData :: Monad m => BackendSource -> m (BackendData p n h)
parseRawData (BackendSource meta body veri) =
return BackendData
{ bdJSON = JSON.Null
, bdGVal = toGVal JSON.Null
, bdMeta = meta
, bdRaw = body
, bdChildren = mapFromList []
, bdVerification = veri
}
plainText :: (MonadIO m, Monad m) => BackendSource -> m (BackendData p n h)
plainText item@(BackendSource meta body _) = do
textBody <- liftIO $ toStrict . decodeUtf8 @LText <$> rawToLBS body
return $ toBackendData item textBody
json :: (MonadIO m, Monad m) => BackendSource -> m (BackendData p n h)
json item@(BackendSource meta body _) = do
bodyBytes <- liftIO $ rawToLBS body
case JSON.eitherDecode bodyBytes of
Left err -> fail $ err ++ "\n" ++ show bodyBytes
Right json -> return . toBackendData item $ (json :: JSON.Value)
yaml :: (MonadIO m, Monad m) => BackendSource -> m (BackendData p n h)
yaml item@(BackendSource meta body _) = do
bodyBytes <- liftIO $ rawToLBS body
case YAML.decodeEither' (toStrict bodyBytes) of
Left err -> fail $ YAML.prettyPrintParseException err ++ "\n" ++ show bodyBytes
Right json -> return . toBackendData item $ (json :: JSON.Value)
urlencodedForm :: (MonadIO m, Monad m) => BackendSource -> m (BackendData p n h)
urlencodedForm item@(BackendSource meta body _) = do
bodyBytes <- liftIO $ rawToLBS body
return .
toBackendData item .
asTextHashMap .
mapFromList .
queryToQueryText .
parseQuery .
toStrict $ bodyBytes
where
asTextHashMap :: HashMap Text (Maybe Text) -> HashMap Text (Maybe Text)
asTextHashMap = id
pandocBS :: (MonadIO m, Monad m, Monad n)
=> (LByteString -> PandocPure Pandoc)
-> BackendSource
-> m (BackendData p n h)
pandocBS reader input@(BackendSource meta body _) = do
bodyBytes <- liftIO $ rawToLBS body
case Pandoc.runPure $ reader bodyBytes of
Left err -> fail . show $ err
Right pandoc -> return $ toBackendData input pandoc
pandocWithMedia :: (MonadIO m, Monad m, Monad n)
=> (LByteString -> PandocPure Pandoc)
-> BackendSource
-> m (BackendData p n h)
pandocWithMedia reader input@(BackendSource meta body _) = do
bodyBytes <- liftIO $ rawToLBS body
let reader' = do
pandoc <- reader bodyBytes
mediaBag <- Pandoc.getMediaBag
return (pandoc, mediaBag)
case Pandoc.runPure reader' of
Left err -> fail . show $ err
Right (pandoc, mediaBag) -> do
let base = toBackendData input pandoc
children <- mapFromList <$> mediaBagToBackendData mediaBag
return $ addBackendDataChildren children base
mediaBagToBackendData :: (MonadIO m, Monad m, Monad n)
=> Pandoc.MediaBag
-> m [(Text, BackendData p n h)]
mediaBagToBackendData bag = do
let metas = Pandoc.mediaDirectory bag
forM metas $ \(path, mimeType, contentLength) -> do
(_, body) <- maybe
(fail $ "Media not found: " <> path)
return
(Pandoc.lookupMedia path bag)
let meta =
BackendMeta
{ bmMimeType = encodeUtf8 . pack @Text $ mimeType
, bmMTime = Nothing
, bmName = pack path
, bmPath = pack path
, bmSize = Just $ fromIntegral contentLength
}
(pack path,) <$> parseBackendData (BackendSource meta (rawFromLBS body) Trusted)
pandoc :: (MonadIO m, Monad m, Monad n)
=> (Text -> PandocPure Pandoc)
-> BackendSource
-> m (BackendData p n h)
pandoc reader =
pandocBS (reader . toStrict . decodeUtf8)