{-# LANGUAGE CPP #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE UndecidableInstances #-}
{-# LANGUAGE ViewPatterns #-}
module Text.Pandoc.Class.PandocMonad
( PandocMonad(..)
, getPOSIXTime
, getZonedTime
, readFileFromDirs
, report
, setTrace
, setRequestHeader
, getLog
, setVerbosity
, getVerbosity
, getMediaBag
, setMediaBag
, insertMedia
, setUserDataDir
, getUserDataDir
, fetchItem
, fetchMediaResource
, getInputFiles
, setInputFiles
, getOutputFile
, setOutputFile
, setResourcePath
, getResourcePath
, readDefaultDataFile
, readDataFile
, fillMediaBag
, toLang
, setTranslations
, translateTerm
, makeCanonical
) where
import Codec.Archive.Zip
import Control.Monad.Except (MonadError (catchError, throwError),
MonadTrans, lift, when)
import Data.Digest.Pure.SHA (sha1, showDigest)
import Data.Maybe (fromMaybe)
import Data.Time (UTCTime)
import Data.Time.Clock.POSIX (POSIXTime, utcTimeToPOSIXSeconds)
import Data.Time.LocalTime (TimeZone, ZonedTime, utcToZonedTime)
import Network.URI ( escapeURIString, nonStrictRelativeTo,
unEscapeString, parseURIReference, isAllowedInURI,
parseURI, URI(..) )
import System.FilePath ((</>), (<.>), takeExtension, dropExtension,
isRelative, splitDirectories)
import System.Random (StdGen)
import Text.Pandoc.BCP47 (Lang(..), parseBCP47, renderLang)
import Text.Pandoc.Class.CommonState (CommonState (..))
import Text.Pandoc.Definition
import Text.Pandoc.Error
import Text.Pandoc.Logging
import Text.Pandoc.MIME (MimeType, getMimeType, extensionFromMimeType)
import Text.Pandoc.MediaBag (MediaBag, lookupMedia)
import Text.Pandoc.Shared (uriPathToPath)
import Text.Pandoc.Translations (Term(..), Translations, lookupTerm,
readTranslations)
import Text.Pandoc.Walk (walkM)
import Text.Parsec (ParsecT, getPosition, sourceLine, sourceName)
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as BL
import qualified Data.Text as T
import qualified Debug.Trace
import qualified System.FilePath.Posix as Posix
import qualified Text.Pandoc.MediaBag as MB
import qualified Text.Pandoc.UTF8 as UTF8
#ifdef EMBED_DATA_FILES
import Text.Pandoc.Data (dataFiles)
#endif
class (Functor m, Applicative m, Monad m, MonadError PandocError m)
=> PandocMonad m where
lookupEnv :: T.Text -> m (Maybe T.Text)
getCurrentTime :: m UTCTime
getCurrentTimeZone :: m TimeZone
newStdGen :: m StdGen
newUniqueHash :: m Int
openURL :: T.Text -> m (B.ByteString, Maybe MimeType)
readFileLazy :: FilePath -> m BL.ByteString
readFileStrict :: FilePath -> m B.ByteString
glob :: String -> m [FilePath]
fileExists :: FilePath -> m Bool
getDataFileName :: FilePath -> m FilePath
getModificationTime :: FilePath -> m UTCTime
getCommonState :: m CommonState
putCommonState :: CommonState -> m ()
getsCommonState :: (CommonState -> a) -> m a
getsCommonState f = f <$> getCommonState
modifyCommonState :: (CommonState -> CommonState) -> m ()
modifyCommonState f = getCommonState >>= putCommonState . f
logOutput :: LogMessage -> m ()
trace :: T.Text -> m ()
trace msg = do
tracing <- getsCommonState stTrace
when tracing $ Debug.Trace.trace ("[trace] " ++ T.unpack msg) (return ())
setVerbosity :: PandocMonad m => Verbosity -> m ()
setVerbosity verbosity =
modifyCommonState $ \st -> st{ stVerbosity = verbosity }
getVerbosity :: PandocMonad m => m Verbosity
getVerbosity = getsCommonState stVerbosity
getLog :: PandocMonad m => m [LogMessage]
getLog = reverse <$> getsCommonState stLog
report :: PandocMonad m => LogMessage -> m ()
report msg = do
verbosity <- getsCommonState stVerbosity
let level = messageVerbosity msg
when (level <= verbosity) $ logOutput msg
modifyCommonState $ \st -> st{ stLog = msg : stLog st }
setTrace :: PandocMonad m => Bool -> m ()
setTrace useTracing = modifyCommonState $ \st -> st{stTrace = useTracing}
setRequestHeader :: PandocMonad m
=> T.Text
-> T.Text
-> m ()
setRequestHeader name val = modifyCommonState $ \st ->
st{ stRequestHeaders =
(name, val) : filter (\(n,_) -> n /= name) (stRequestHeaders st) }
setMediaBag :: PandocMonad m => MediaBag -> m ()
setMediaBag mb = modifyCommonState $ \st -> st{stMediaBag = mb}
getMediaBag :: PandocMonad m => m MediaBag
getMediaBag = getsCommonState stMediaBag
insertMedia :: PandocMonad m => FilePath -> Maybe MimeType -> BL.ByteString -> m ()
insertMedia fp mime bs = do
mb <- getMediaBag
let mb' = MB.insertMedia fp mime bs mb
setMediaBag mb'
getInputFiles :: PandocMonad m => m [FilePath]
getInputFiles = getsCommonState stInputFiles
setInputFiles :: PandocMonad m => [FilePath] -> m ()
setInputFiles fs = do
let sourceURL = case fs of
[] -> Nothing
(x:_) -> case parseURI x of
Just u
| uriScheme u `elem` ["http:","https:"] ->
Just $ show u{ uriQuery = "",
uriFragment = "" }
_ -> Nothing
modifyCommonState $ \st -> st{ stInputFiles = fs
, stSourceURL = T.pack <$> sourceURL }
getOutputFile :: PandocMonad m => m (Maybe FilePath)
getOutputFile = getsCommonState stOutputFile
setOutputFile :: PandocMonad m => Maybe FilePath -> m ()
setOutputFile mbf = modifyCommonState $ \st -> st{ stOutputFile = mbf }
getResourcePath :: PandocMonad m => m [FilePath]
getResourcePath = getsCommonState stResourcePath
setResourcePath :: PandocMonad m => [FilePath] -> m ()
setResourcePath ps = modifyCommonState $ \st -> st{stResourcePath = ps}
getPOSIXTime :: PandocMonad m => m POSIXTime
getPOSIXTime = utcTimeToPOSIXSeconds <$> getCurrentTime
getZonedTime :: PandocMonad m => m ZonedTime
getZonedTime = do
t <- getCurrentTime
tz <- getCurrentTimeZone
return $ utcToZonedTime tz t
readFileFromDirs :: PandocMonad m => [FilePath] -> FilePath -> m (Maybe T.Text)
readFileFromDirs [] _ = return Nothing
readFileFromDirs (d:ds) f = catchError
(Just . T.pack . UTF8.toStringLazy <$> readFileLazy (d </> f))
(\_ -> readFileFromDirs ds f)
toLang :: PandocMonad m => Maybe T.Text -> m (Maybe Lang)
toLang Nothing = return Nothing
toLang (Just s) =
case parseBCP47 s of
Left _ -> do
report $ InvalidLang s
return Nothing
Right l -> return (Just l)
setTranslations :: PandocMonad m => Lang -> m ()
setTranslations lang =
modifyCommonState $ \st -> st{ stTranslations = Just (lang, Nothing) }
getTranslations :: PandocMonad m => m Translations
getTranslations = do
mbtrans <- getsCommonState stTranslations
case mbtrans of
Nothing -> return mempty
Just (_, Just t) -> return t
Just (lang, Nothing) -> do
let translationFile = "translations/" <> renderLang lang <> ".yaml"
let fallbackFile = "translations/" <> langLanguage lang <> ".yaml"
let getTrans fp = do
bs <- readDataFile fp
case readTranslations (UTF8.toText bs) of
Left e -> do
report $ CouldNotLoadTranslations (renderLang lang)
(T.pack fp <> ": " <> e)
modifyCommonState $ \st ->
st{ stTranslations = Nothing }
return mempty
Right t -> do
modifyCommonState $ \st ->
st{ stTranslations = Just (lang, Just t) }
return t
catchError (getTrans $ T.unpack translationFile)
(\_ ->
catchError (getTrans $ T.unpack fallbackFile)
(\e -> do
report $ CouldNotLoadTranslations (renderLang lang)
$ case e of
PandocCouldNotFindDataFileError _ ->
"data file " <> fallbackFile <> " not found"
_ -> ""
modifyCommonState $ \st -> st{ stTranslations = Nothing }
return mempty))
translateTerm :: PandocMonad m => Term -> m T.Text
translateTerm term = do
translations <- getTranslations
case lookupTerm term translations of
Just s -> return s
Nothing -> do
report $ NoTranslation $ T.pack $ show term
return ""
parseURIReference' :: T.Text -> Maybe URI
parseURIReference' s = do
u <- parseURIReference (T.unpack s)
case uriScheme u of
[_] -> Nothing
_ -> Just u
setUserDataDir :: PandocMonad m
=> Maybe FilePath
-> m ()
setUserDataDir mbfp = modifyCommonState $ \st -> st{ stUserDataDir = mbfp }
getUserDataDir :: PandocMonad m
=> m (Maybe FilePath)
getUserDataDir = getsCommonState stUserDataDir
fetchItem :: PandocMonad m
=> T.Text
-> m (B.ByteString, Maybe MimeType)
fetchItem s = do
mediabag <- getMediaBag
case lookupMedia (T.unpack s) mediabag of
Just (mime, bs) -> return (BL.toStrict bs, Just mime)
Nothing -> downloadOrRead s
downloadOrRead :: PandocMonad m
=> T.Text
-> m (B.ByteString, Maybe MimeType)
downloadOrRead s = do
sourceURL <- getsCommonState stSourceURL
case (sourceURL >>= parseURIReference' .
ensureEscaped, ensureEscaped s) of
(Just u, s') ->
case parseURIReference' s' of
Just u' -> openURL $ T.pack $ show $ u' `nonStrictRelativeTo` u
Nothing -> openURL s'
(Nothing, s'@(T.unpack -> ('/':'/':c:_))) | c /= '?' ->
case parseURIReference' s' of
Just u' -> openURL $ T.pack $ show $ u' `nonStrictRelativeTo` httpcolon
Nothing -> openURL s'
(Nothing, s') ->
case parseURI (T.unpack s') of
Just u' | uriScheme u' == "file:" ->
readLocalFile $ uriPathToPath (T.pack $ uriPath u')
Just u' | length (uriScheme u') > 2 -> openURL (T.pack $ show u')
_ -> readLocalFile fp
where readLocalFile f = do
resourcePath <- getResourcePath
cont <- if isRelative f
then withPaths resourcePath readFileStrict f
else readFileStrict f
return (cont, mime)
httpcolon = URI{ uriScheme = "http:",
uriAuthority = Nothing,
uriPath = "",
uriQuery = "",
uriFragment = "" }
dropFragmentAndQuery = T.takeWhile (\c -> c /= '?' && c /= '#')
fp = unEscapeString $ T.unpack $ dropFragmentAndQuery s
mime = getMimeType $ case takeExtension fp of
".gz" -> dropExtension fp
".svgz" -> dropExtension fp ++ ".svg"
x -> x
ensureEscaped = T.pack . escapeURIString isAllowedInURI . T.unpack . T.map convertSlash
convertSlash '\\' = '/'
convertSlash x = x
getDefaultReferenceDocx :: PandocMonad m => m Archive
getDefaultReferenceDocx = do
let paths = ["[Content_Types].xml",
"_rels/.rels",
"docProps/app.xml",
"docProps/core.xml",
"docProps/custom.xml",
"word/document.xml",
"word/fontTable.xml",
"word/footnotes.xml",
"word/comments.xml",
"word/numbering.xml",
"word/settings.xml",
"word/webSettings.xml",
"word/styles.xml",
"word/_rels/document.xml.rels",
"word/_rels/footnotes.xml.rels",
"word/theme/theme1.xml"]
let toLazy = BL.fromChunks . (:[])
let pathToEntry path = do
epochtime <- (floor . utcTimeToPOSIXSeconds) <$> getCurrentTime
contents <- toLazy <$> readDataFile ("docx/" ++ path)
return $ toEntry path epochtime contents
datadir <- getUserDataDir
mbArchive <- case datadir of
Nothing -> return Nothing
Just d -> do
exists <- fileExists (d </> "reference.docx")
if exists
then return (Just (d </> "reference.docx"))
else return Nothing
case mbArchive of
Just arch -> toArchive <$> readFileLazy arch
Nothing -> foldr addEntryToArchive emptyArchive <$>
mapM pathToEntry paths
getDefaultReferenceODT :: PandocMonad m => m Archive
getDefaultReferenceODT = do
let paths = ["mimetype",
"manifest.rdf",
"styles.xml",
"content.xml",
"meta.xml",
"settings.xml",
"Configurations2/accelerator/current.xml",
"Thumbnails/thumbnail.png",
"META-INF/manifest.xml"]
let pathToEntry path = do epochtime <- floor `fmap` getPOSIXTime
contents <- (BL.fromChunks . (:[])) `fmap`
readDataFile ("odt/" ++ path)
return $ toEntry path epochtime contents
datadir <- getUserDataDir
mbArchive <- case datadir of
Nothing -> return Nothing
Just d -> do
exists <- fileExists (d </> "reference.odt")
if exists
then return (Just (d </> "reference.odt"))
else return Nothing
case mbArchive of
Just arch -> toArchive <$> readFileLazy arch
Nothing -> foldr addEntryToArchive emptyArchive <$>
mapM pathToEntry paths
getDefaultReferencePptx :: PandocMonad m => m Archive
getDefaultReferencePptx = do
let paths = [ "[Content_Types].xml"
, "_rels/.rels"
, "docProps/app.xml"
, "docProps/core.xml"
, "ppt/_rels/presentation.xml.rels"
, "ppt/presProps.xml"
, "ppt/presentation.xml"
, "ppt/slideLayouts/_rels/slideLayout1.xml.rels"
, "ppt/slideLayouts/_rels/slideLayout2.xml.rels"
, "ppt/slideLayouts/_rels/slideLayout3.xml.rels"
, "ppt/slideLayouts/_rels/slideLayout4.xml.rels"
, "ppt/slideLayouts/_rels/slideLayout5.xml.rels"
, "ppt/slideLayouts/_rels/slideLayout6.xml.rels"
, "ppt/slideLayouts/_rels/slideLayout7.xml.rels"
, "ppt/slideLayouts/_rels/slideLayout8.xml.rels"
, "ppt/slideLayouts/_rels/slideLayout9.xml.rels"
, "ppt/slideLayouts/_rels/slideLayout10.xml.rels"
, "ppt/slideLayouts/_rels/slideLayout11.xml.rels"
, "ppt/slideLayouts/slideLayout1.xml"
, "ppt/slideLayouts/slideLayout10.xml"
, "ppt/slideLayouts/slideLayout11.xml"
, "ppt/slideLayouts/slideLayout2.xml"
, "ppt/slideLayouts/slideLayout3.xml"
, "ppt/slideLayouts/slideLayout4.xml"
, "ppt/slideLayouts/slideLayout5.xml"
, "ppt/slideLayouts/slideLayout6.xml"
, "ppt/slideLayouts/slideLayout7.xml"
, "ppt/slideLayouts/slideLayout8.xml"
, "ppt/slideLayouts/slideLayout9.xml"
, "ppt/slideMasters/_rels/slideMaster1.xml.rels"
, "ppt/slideMasters/slideMaster1.xml"
, "ppt/slides/_rels/slide1.xml.rels"
, "ppt/slides/slide1.xml"
, "ppt/slides/_rels/slide2.xml.rels"
, "ppt/slides/slide2.xml"
, "ppt/slides/_rels/slide3.xml.rels"
, "ppt/slides/slide3.xml"
, "ppt/slides/_rels/slide4.xml.rels"
, "ppt/slides/slide4.xml"
, "ppt/tableStyles.xml"
, "ppt/theme/theme1.xml"
, "ppt/viewProps.xml"
, "ppt/notesMasters/notesMaster1.xml"
, "ppt/notesMasters/_rels/notesMaster1.xml.rels"
, "ppt/notesSlides/notesSlide1.xml"
, "ppt/notesSlides/_rels/notesSlide1.xml.rels"
, "ppt/notesSlides/notesSlide2.xml"
, "ppt/notesSlides/_rels/notesSlide2.xml.rels"
, "ppt/theme/theme2.xml"
]
let toLazy = BL.fromChunks . (:[])
let pathToEntry path = do
epochtime <- (floor . utcTimeToPOSIXSeconds) <$> getCurrentTime
contents <- toLazy <$> readDataFile ("pptx/" ++ path)
return $ toEntry path epochtime contents
datadir <- getUserDataDir
mbArchive <- case datadir of
Nothing -> return Nothing
Just d -> do
exists <- fileExists (d </> "reference.pptx")
if exists
then return (Just (d </> "reference.pptx"))
else return Nothing
case mbArchive of
Just arch -> toArchive <$> readFileLazy arch
Nothing -> foldr addEntryToArchive emptyArchive <$>
mapM pathToEntry paths
readDataFile :: PandocMonad m => FilePath -> m B.ByteString
readDataFile fname = do
datadir <- getUserDataDir
case datadir of
Nothing -> readDefaultDataFile fname
Just userDir -> do
exists <- fileExists (userDir </> fname)
if exists
then readFileStrict (userDir </> fname)
else readDefaultDataFile fname
readDefaultDataFile :: PandocMonad m => FilePath -> m B.ByteString
readDefaultDataFile "reference.docx" =
(B.concat . BL.toChunks . fromArchive) <$> getDefaultReferenceDocx
readDefaultDataFile "reference.pptx" =
(B.concat . BL.toChunks . fromArchive) <$> getDefaultReferencePptx
readDefaultDataFile "reference.odt" =
(B.concat . BL.toChunks . fromArchive) <$> getDefaultReferenceODT
readDefaultDataFile fname =
#ifdef EMBED_DATA_FILES
case lookup (makeCanonical fname) dataFiles of
Nothing -> throwError $ PandocCouldNotFindDataFileError $ T.pack fname
Just contents -> return contents
#else
getDataFileName fname' >>= checkExistence >>= readFileStrict
where fname' = if fname == "MANUAL.txt" then fname else "data" </> fname
checkExistence :: PandocMonad m => FilePath -> m FilePath
checkExistence fn = do
exists <- fileExists fn
if exists
then return fn
else throwError $ PandocCouldNotFindDataFileError $ T.pack fn
#endif
makeCanonical :: FilePath -> FilePath
makeCanonical = Posix.joinPath . transformPathParts . splitDirectories
where transformPathParts = reverse . foldl go []
go as "." = as
go (_:as) ".." = as
go as x = x : as
withPaths :: PandocMonad m => [FilePath] -> (FilePath -> m a) -> FilePath -> m a
withPaths [] _ fp = throwError $ PandocResourceNotFound $ T.pack fp
withPaths (p:ps) action fp =
catchError (action (p </> fp))
(\_ -> withPaths ps action fp)
fetchMediaResource :: PandocMonad m
=> T.Text -> m (FilePath, Maybe MimeType, BL.ByteString)
fetchMediaResource src = do
(bs, mt) <- downloadOrRead src
let ext = fromMaybe (T.pack $ takeExtension $ T.unpack src)
(mt >>= extensionFromMimeType)
let bs' = BL.fromChunks [bs]
let basename = showDigest $ sha1 bs'
let fname = basename <.> T.unpack ext
return (fname, mt, bs')
fillMediaBag :: PandocMonad m => Pandoc -> m Pandoc
fillMediaBag d = walkM handleImage d
where handleImage :: PandocMonad m => Inline -> m Inline
handleImage (Image attr lab (src, tit)) = catchError
(do mediabag <- getMediaBag
case lookupMedia (T.unpack src) mediabag of
Just (_, _) -> return $ Image attr lab (src, tit)
Nothing -> do
(fname, mt, bs) <- fetchMediaResource src
insertMedia fname mt bs
return $ Image attr lab (T.pack fname, tit))
(\e ->
case e of
PandocResourceNotFound _ -> do
report $ CouldNotFetchResource src
"replacing image with description"
return $ Span ("",["image"],[]) lab
PandocHttpError u er -> do
report $ CouldNotFetchResource u
(T.pack $ show er ++ "\rReplacing image with description.")
return $ Span ("",["image"],[]) lab
_ -> throwError e)
handleImage x = return x
instance (MonadTrans t, PandocMonad m, Functor (t m),
MonadError PandocError (t m), Monad (t m),
Applicative (t m)) => PandocMonad (t m) where
lookupEnv = lift . lookupEnv
getCurrentTime = lift getCurrentTime
getCurrentTimeZone = lift getCurrentTimeZone
newStdGen = lift newStdGen
newUniqueHash = lift newUniqueHash
openURL = lift . openURL
readFileLazy = lift . readFileLazy
readFileStrict = lift . readFileStrict
glob = lift . glob
fileExists = lift . fileExists
getDataFileName = lift . getDataFileName
getModificationTime = lift . getModificationTime
getCommonState = lift getCommonState
putCommonState = lift . putCommonState
logOutput = lift . logOutput
instance {-# OVERLAPS #-} PandocMonad m => PandocMonad (ParsecT s st m) where
lookupEnv = lift . lookupEnv
getCurrentTime = lift getCurrentTime
getCurrentTimeZone = lift getCurrentTimeZone
newStdGen = lift newStdGen
newUniqueHash = lift newUniqueHash
openURL = lift . openURL
readFileLazy = lift . readFileLazy
readFileStrict = lift . readFileStrict
glob = lift . glob
fileExists = lift . fileExists
getDataFileName = lift . getDataFileName
getModificationTime = lift . getModificationTime
getCommonState = lift getCommonState
putCommonState = lift . putCommonState
trace msg = do
tracing <- getsCommonState stTrace
when tracing $ do
pos <- getPosition
Debug.Trace.trace
("[trace] Parsed " ++ T.unpack msg ++ " at line " ++
show (sourceLine pos) ++
if sourceName pos == "chunk"
then " of chunk"
else "")
(return ())
logOutput = lift . logOutput