module Text.Pandoc.App (
convertWithOpts
, Opt(..)
, LineEnding(..)
, Filter(..)
, defaultOpts
, parseOptions
, options
, applyFilters
) where
import qualified Control.Exception as E
import Control.Monad
import Control.Monad.Except (catchError, throwError)
import Control.Monad.Trans
import Data.Aeson (defaultOptions)
import Data.Aeson.TH (deriveJSON)
import qualified Data.ByteString as BS
import qualified Data.ByteString.Lazy as B
import Data.Char (toLower, toUpper)
import Data.List (find, intercalate, isPrefixOf, isSuffixOf, sort)
import qualified Data.Map as M
import Data.Maybe (fromMaybe, isJust, isNothing)
import Data.Monoid
import qualified Data.Set as Set
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TE
import qualified Data.Text.Encoding.Error as TE
import Data.Yaml (decode)
import qualified Data.Yaml as Yaml
import GHC.Generics
import Network.URI (URI (..), parseURI)
import Paths_pandoc (getDataDir)
import Data.Aeson.Encode.Pretty (encodePretty', Config(..), keyOrder,
defConfig, Indent(..), NumberFormat(..))
import Skylighting (Style, Syntax (..), defaultSyntaxMap, parseTheme,
pygments)
import Skylighting.Parser (addSyntaxDefinition, parseSyntaxDefinition)
import System.Console.GetOpt
import System.Directory (getAppUserDataDirectory)
import System.Environment (getArgs, getProgName)
import System.Exit (exitSuccess)
import System.FilePath
import System.IO (nativeNewline, stdout)
import qualified System.IO as IO (Newline (..))
import System.IO.Error (isDoesNotExistError)
import Text.Pandoc
import Text.Pandoc.BCP47 (Lang (..), parseBCP47)
import Text.Pandoc.Builder (setMeta, deleteMeta)
import Text.Pandoc.Filter (Filter (JSONFilter, LuaFilter), applyFilters)
import Text.Pandoc.Highlighting (highlightingStyles)
import Text.Pandoc.PDF (makePDF)
import Text.Pandoc.SelfContained (makeDataURI, makeSelfContained)
import Text.Pandoc.Shared (eastAsianLineBreakFilter, stripEmptyParagraphs,
headerShift, isURI, ordNub, safeRead, tabFilter)
import qualified Text.Pandoc.UTF8 as UTF8
import Text.Pandoc.Writers.Math (defaultKaTeXURL, defaultMathJaxURL)
import Text.Pandoc.XML (toEntities)
import Text.Printf
#ifndef _WINDOWS
import System.Posix.IO (stdOutput)
import System.Posix.Terminal (queryTerminal)
#endif
data LineEnding = LF | CRLF | Native deriving (Show, Generic)
parseOptions :: [OptDescr (Opt -> IO Opt)] -> Opt -> IO Opt
parseOptions options' defaults = do
rawArgs <- map UTF8.decodeArg <$> getArgs
prg <- getProgName
let (actions, args, unrecognizedOpts, errors) =
getOpt' Permute options' rawArgs
let unknownOptionErrors =
foldr (handleUnrecognizedOption . takeWhile (/= '=')) []
unrecognizedOpts
unless (null errors && null unknownOptionErrors) $
E.throwIO $ PandocOptionError $
concat errors ++ unlines unknownOptionErrors ++
("Try " ++ prg ++ " --help for more information.")
opts <- foldl (>>=) (return defaults) actions
return (opts{ optInputFiles = args })
latexEngines :: [String]
latexEngines = ["pdflatex", "lualatex", "xelatex"]
htmlEngines :: [String]
htmlEngines = ["wkhtmltopdf", "weasyprint", "prince"]
engines :: [(String, String)]
engines = map ("html",) htmlEngines ++
map ("html5",) htmlEngines ++
map ("latex",) latexEngines ++
map ("beamer",) latexEngines ++
[ ("ms", "pdfroff")
, ("context", "context")
]
pdfEngines :: [String]
pdfEngines = ordNub $ map snd engines
pdfWriterAndProg :: Maybe String
-> Maybe String
-> IO (String, Maybe String)
pdfWriterAndProg mWriter mEngine = do
let panErr msg = liftIO $ E.throwIO $ PandocAppError msg
case go mWriter mEngine of
Right (writ, prog) -> return (writ, Just prog)
Left err -> panErr err
where
go Nothing Nothing = Right ("latex", "pdflatex")
go (Just writer) Nothing = (writer,) <$> engineForWriter writer
go Nothing (Just engine) = (,engine) <$> writerForEngine engine
go (Just writer) (Just engine) =
case find (== (baseWriterName writer, engine)) engines of
Just _ -> Right (writer, engine)
Nothing -> Left $ "pdf-engine " ++ engine ++
" is not compatible with output format " ++ writer
writerForEngine eng = case [f | (f,e) <- engines, e == eng] of
fmt : _ -> Right fmt
[] -> Left $
"pdf-engine " ++ eng ++ " not known"
engineForWriter w = case [e | (f,e) <- engines, f == baseWriterName w] of
eng : _ -> Right eng
[] -> Left $
"cannot produce pdf output from " ++ w
convertWithOpts :: Opt -> IO ()
convertWithOpts opts = do
let outputFile = fromMaybe "-" (optOutputFile opts)
let filters = optFilters opts
let verbosity = optVerbosity opts
when (optDumpArgs opts) $
do UTF8.hPutStrLn stdout outputFile
mapM_ (UTF8.hPutStrLn stdout) (optInputFiles opts)
exitSuccess
epubMetadata <- case optEpubMetadata opts of
Nothing -> return Nothing
Just fp -> Just <$> UTF8.readFile fp
let isPandocCiteproc (JSONFilter f) = takeBaseName f == "pandoc-citeproc"
isPandocCiteproc _ = False
let needsCiteproc = isJust (lookup "bibliography" (optMetadata opts)) &&
optCiteMethod opts `notElem` [Natbib, Biblatex] &&
all (not . isPandocCiteproc) filters
let filters' = if needsCiteproc then JSONFilter "pandoc-citeproc" : filters
else filters
let sources = case optInputFiles opts of
[] -> ["-"]
xs | optIgnoreArgs opts -> ["-"]
| otherwise -> xs
datadir <- case optDataDir opts of
Nothing -> E.catch
(Just <$> getAppUserDataDirectory "pandoc")
(\e -> let _ = (e :: E.SomeException)
in return Nothing)
Just _ -> return $ optDataDir opts
let readerName = fromMaybe ( defaultReaderName
(if any isURI sources
then "html"
else "markdown") sources) (optReader opts)
let nonPdfWriterName Nothing = defaultWriterName outputFile
nonPdfWriterName (Just x) = x
let pdfOutput = map toLower (takeExtension outputFile) == ".pdf"
(writerName, maybePdfProg) <-
if pdfOutput
then pdfWriterAndProg (optWriter opts) (optPdfEngine opts)
else return (nonPdfWriterName $ optWriter opts, Nothing)
let format = baseWriterName
$ takeFileName writerName
(writer, writerExts) <-
if ".lua" `isSuffixOf` format
then return (TextWriter
(\o d -> writeCustom writerName o d)
:: Writer PandocIO, mempty)
else case getWriter writerName of
Left e -> E.throwIO $ PandocAppError $
if format == "pdf"
then e ++
"\nTo create a pdf using pandoc, use " ++
"-t latex|beamer|context|ms|html5" ++
"\nand specify an output file with " ++
".pdf extension (-o filename.pdf)."
else e
Right (w, es) -> return (w :: Writer PandocIO, es)
(reader, readerExts) <-
case getReader readerName of
Right (r, es) -> return (r :: Reader PandocIO, es)
Left e -> E.throwIO $ PandocAppError e'
where e' = case readerName of
"pdf" -> e ++
"\nPandoc can convert to PDF, but not from PDF."
"doc" -> e ++
"\nPandoc can convert from DOCX, but not from DOC.\nTry using Word to save your DOC file as DOCX, and convert that with pandoc."
_ -> e
let standalone = optStandalone opts || not (isTextFormat format) || pdfOutput
let addStringAsVariable varname s vars = return $ (varname, s) : vars
highlightStyle <- lookupHighlightStyle $ optHighlightStyle opts
let addSyntaxMap existingmap f = do
res <- parseSyntaxDefinition f
case res of
Left errstr -> E.throwIO $ PandocSyntaxMapError errstr
Right syn -> return $ addSyntaxDefinition syn existingmap
syntaxMap <- foldM addSyntaxMap defaultSyntaxMap
(optSyntaxDefinitions opts)
#ifdef _WINDOWS
let istty = True
#else
istty <- queryTerminal stdOutput
#endif
when (not (isTextFormat format) && istty && isNothing ( optOutputFile opts)) $
E.throwIO $ PandocAppError $
"Cannot write " ++ format ++ " output to terminal.\n" ++
"Specify an output file using the -o option, or " ++
"use '-o -' to force output to stdout."
let convertTabs = tabFilter (if optPreserveTabs opts || readerName == "t2t"
then 0
else optTabStop opts)
readSources :: [FilePath] -> PandocIO Text
readSources srcs = convertTabs . T.intercalate (T.pack "\n") <$>
mapM readSource srcs
let runIO' :: PandocIO a -> IO a
runIO' f = do
(res, reports) <- runIOorExplode $ do
setTrace (optTrace opts)
setVerbosity verbosity
x <- f
rs <- getLog
return (x, rs)
case optLogFile opts of
Nothing -> return ()
Just logfile -> B.writeFile logfile (encodeLogMessages reports)
let isWarning msg = messageVerbosity msg == WARNING
when (optFailIfWarnings opts && any isWarning reports) $
E.throwIO PandocFailOnWarningError
return res
let eol = case optEol opts of
CRLF -> IO.CRLF
LF -> IO.LF
Native -> nativeNewline
let withList _ [] vars = return vars
withList f (x:xs) vars = f x vars >>= withList f xs
let addContentsAsVariable varname fp vars = do
s <- UTF8.toString <$> readFileStrict fp
return $ (varname, s) : vars
runIO' $ do
setUserDataDir datadir
setInputFiles (optInputFiles opts)
setOutputFile (optOutputFile opts)
variables <-
withList (addStringAsVariable "sourcefile")
(reverse $ optInputFiles opts)
(("outputfile", fromMaybe "-" (optOutputFile opts))
: optVariables opts)
>>=
withList (addContentsAsVariable "include-before")
(optIncludeBeforeBody opts)
>>=
withList (addContentsAsVariable "include-after")
(optIncludeAfterBody opts)
>>=
withList (addContentsAsVariable "header-includes")
(optIncludeInHeader opts)
>>=
withList (addStringAsVariable "css") (optCss opts)
>>=
maybe return (addStringAsVariable "title-prefix")
(optTitlePrefix opts)
>>=
maybe return (addStringAsVariable "epub-cover-image")
(optEpubCoverImage opts)
>>=
(\vars -> case optHTMLMathMethod opts of
LaTeXMathML Nothing -> do
s <- UTF8.toString <$> readDataFile "LaTeXMathML.js"
return $ ("mathml-script", s) : vars
_ -> return vars)
>>=
(\vars -> if format == "dzslides"
then do
dztempl <- UTF8.toString <$> readDataFile
("dzslides" </> "template.html")
let dzline = "<!-- {{{{ dzslides core"
let dzcore = unlines
$ dropWhile (not . (dzline `isPrefixOf`))
$ lines dztempl
return $ ("dzslides-core", dzcore) : vars
else return vars)
abbrevs <- (Set.fromList . filter (not . null) . lines) <$>
case optAbbreviations opts of
Nothing -> UTF8.toString <$> readDataFile "abbreviations"
Just f -> UTF8.toString <$> readFileStrict f
templ <- case optTemplate opts of
_ | not standalone -> return Nothing
Nothing -> Just <$> getDefaultTemplate format
Just tp -> do
let tp' = case takeExtension tp of
"" -> tp <.> format
_ -> tp
Just . UTF8.toString <$>
(readFileStrict tp' `catchError`
(\e ->
case e of
PandocIOError _ e' |
isDoesNotExistError e' ->
readDataFile ("templates" </> tp')
_ -> throwError e))
metadata <- if format == "jats" &&
isNothing (lookup "csl" (optMetadata opts)) &&
isNothing (lookup "citation-style" (optMetadata opts))
then do
jatsCSL <- readDataFile "jats.csl"
let jatsEncoded = makeDataURI
("application/xml", jatsCSL)
return $ ("csl", jatsEncoded) : optMetadata opts
else return $ optMetadata opts
case lookup "lang" (optMetadata opts) of
Just l -> case parseBCP47 l of
Left _ -> return ()
Right l' -> setTranslations l'
Nothing -> setTranslations $ Lang "en" "" "US" []
let writerOptions = def {
writerTemplate = templ
, writerVariables = variables
, writerTabStop = optTabStop opts
, writerTableOfContents = optTableOfContents opts
, writerHTMLMathMethod = optHTMLMathMethod opts
, writerIncremental = optIncremental opts
, writerCiteMethod = optCiteMethod opts
, writerNumberSections = optNumberSections opts
, writerNumberOffset = optNumberOffset opts
, writerSectionDivs = optSectionDivs opts
, writerExtensions = writerExts
, writerReferenceLinks = optReferenceLinks opts
, writerReferenceLocation = optReferenceLocation opts
, writerDpi = optDpi opts
, writerWrapText = optWrapText opts
, writerColumns = optColumns opts
, writerEmailObfuscation = optEmailObfuscation opts
, writerIdentifierPrefix = optIdentifierPrefix opts
, writerHtmlQTags = optHtmlQTags opts
, writerTopLevelDivision = optTopLevelDivision opts
, writerListings = optListings opts
, writerSlideLevel = optSlideLevel opts
, writerHighlightStyle = highlightStyle
, writerSetextHeaders = optSetextHeaders opts
, writerEpubSubdirectory = optEpubSubdirectory opts
, writerEpubMetadata = epubMetadata
, writerEpubFonts = optEpubFonts opts
, writerEpubChapterLevel = optEpubChapterLevel opts
, writerTOCDepth = optTOCDepth opts
, writerReferenceDoc = optReferenceDoc opts
, writerSyntaxMap = syntaxMap
}
let readerOpts = def{
readerStandalone = standalone
, readerColumns = optColumns opts
, readerTabStop = optTabStop opts
, readerIndentedCodeClasses = optIndentedCodeClasses opts
, readerDefaultImageExtension =
optDefaultImageExtension opts
, readerTrackChanges = optTrackChanges opts
, readerAbbreviations = abbrevs
, readerExtensions = readerExts
, readerStripComments = optStripComments opts
}
let transforms = (case optBaseHeaderLevel opts of
x | x > 1 -> (headerShift (x 1) :)
| otherwise -> id) .
(if optStripEmptyParagraphs opts
then (stripEmptyParagraphs :)
else id) .
(if extensionEnabled Ext_east_asian_line_breaks
readerExts &&
not (extensionEnabled Ext_east_asian_line_breaks
writerExts &&
writerWrapText writerOptions == WrapPreserve)
then (eastAsianLineBreakFilter :)
else id) $
[]
let sourceToDoc :: [FilePath] -> PandocIO Pandoc
sourceToDoc sources' =
case reader of
TextReader r
| optFileScope opts || readerName == "json" ->
mconcat <$> mapM (readSource >=> r readerOpts) sources
| otherwise ->
readSources sources' >>= r readerOpts
ByteStringReader r ->
mconcat <$> mapM (readFile' >=> r readerOpts) sources
when (readerName == "markdown_github" ||
writerName == "markdown_github") $
report $ Deprecated "markdown_github" "Use gfm instead."
setResourcePath (optResourcePath opts)
mapM_ (uncurry setRequestHeader) (optRequestHeaders opts)
doc <- sourceToDoc sources >>=
( (if isJust (optExtractMedia opts)
then fillMediaBag
else return)
>=> return . addMetadata metadata
>=> applyTransforms transforms
>=> applyFilters readerOpts filters' [format]
>=> maybe return extractMedia (optExtractMedia opts)
)
case writer of
ByteStringWriter f -> f writerOptions doc >>= writeFnBinary outputFile
TextWriter f -> case maybePdfProg of
Just pdfProg -> do
res <- makePDF pdfProg (optPdfEngineArgs opts) f
writerOptions doc
case res of
Right pdf -> writeFnBinary outputFile pdf
Left err' -> liftIO $
E.throwIO $ PandocPDFError $
TL.unpack (TE.decodeUtf8With TE.lenientDecode err')
Nothing -> do
let htmlFormat = format `elem`
["html","html4","html5","s5","slidy",
"slideous","dzslides","revealjs"]
handleEntities = if (htmlFormat ||
format == "docbook4" ||
format == "docbook5" ||
format == "docbook") && optAscii opts
then toEntities
else id
addNl = if standalone
then id
else (<> T.singleton '\n')
output <- (addNl . handleEntities) <$> f writerOptions doc
writerFn eol outputFile =<<
if optSelfContained opts && htmlFormat
then T.pack <$> makeSelfContained (T.unpack output)
else return output
type Transform = Pandoc -> Pandoc
isTextFormat :: String -> Bool
isTextFormat s = s `notElem` ["odt","docx","epub2","epub3","epub","pptx"]
data Opt = Opt
{ optTabStop :: Int
, optPreserveTabs :: Bool
, optStandalone :: Bool
, optReader :: Maybe String
, optWriter :: Maybe String
, optTableOfContents :: Bool
, optBaseHeaderLevel :: Int
, optTemplate :: Maybe FilePath
, optVariables :: [(String,String)]
, optMetadata :: [(String, String)]
, optOutputFile :: Maybe FilePath
, optInputFiles :: [FilePath]
, optNumberSections :: Bool
, optNumberOffset :: [Int]
, optSectionDivs :: Bool
, optIncremental :: Bool
, optSelfContained :: Bool
, optHtmlQTags :: Bool
, optHighlightStyle :: Maybe String
, optSyntaxDefinitions :: [FilePath]
, optTopLevelDivision :: TopLevelDivision
, optHTMLMathMethod :: HTMLMathMethod
, optAbbreviations :: Maybe FilePath
, optReferenceDoc :: Maybe FilePath
, optEpubSubdirectory :: String
, optEpubMetadata :: Maybe FilePath
, optEpubFonts :: [FilePath]
, optEpubChapterLevel :: Int
, optEpubCoverImage :: Maybe FilePath
, optTOCDepth :: Int
, optDumpArgs :: Bool
, optIgnoreArgs :: Bool
, optVerbosity :: Verbosity
, optTrace :: Bool
, optLogFile :: Maybe FilePath
, optFailIfWarnings :: Bool
, optReferenceLinks :: Bool
, optReferenceLocation :: ReferenceLocation
, optDpi :: Int
, optWrapText :: WrapOption
, optColumns :: Int
, optFilters :: [Filter]
, optEmailObfuscation :: ObfuscationMethod
, optIdentifierPrefix :: String
, optStripEmptyParagraphs :: Bool
, optIndentedCodeClasses :: [String]
, optDataDir :: Maybe FilePath
, optCiteMethod :: CiteMethod
, optListings :: Bool
, optPdfEngine :: Maybe String
, optPdfEngineArgs :: [String]
, optSlideLevel :: Maybe Int
, optSetextHeaders :: Bool
, optAscii :: Bool
, optDefaultImageExtension :: String
, optExtractMedia :: Maybe FilePath
, optTrackChanges :: TrackChanges
, optFileScope :: Bool
, optTitlePrefix :: Maybe String
, optCss :: [FilePath]
, optIncludeBeforeBody :: [FilePath]
, optIncludeAfterBody :: [FilePath]
, optIncludeInHeader :: [FilePath]
, optResourcePath :: [FilePath]
, optRequestHeaders :: [(String, String)]
, optEol :: LineEnding
, optStripComments :: Bool
} deriving (Generic, Show)
defaultOpts :: Opt
defaultOpts = Opt
{ optTabStop = 4
, optPreserveTabs = False
, optStandalone = False
, optReader = Nothing
, optWriter = Nothing
, optTableOfContents = False
, optBaseHeaderLevel = 1
, optTemplate = Nothing
, optVariables = []
, optMetadata = []
, optOutputFile = Nothing
, optInputFiles = []
, optNumberSections = False
, optNumberOffset = [0,0,0,0,0,0]
, optSectionDivs = False
, optIncremental = False
, optSelfContained = False
, optHtmlQTags = False
, optHighlightStyle = Just "pygments"
, optSyntaxDefinitions = []
, optTopLevelDivision = TopLevelDefault
, optHTMLMathMethod = PlainMath
, optAbbreviations = Nothing
, optReferenceDoc = Nothing
, optEpubSubdirectory = "EPUB"
, optEpubMetadata = Nothing
, optEpubFonts = []
, optEpubChapterLevel = 1
, optEpubCoverImage = Nothing
, optTOCDepth = 3
, optDumpArgs = False
, optIgnoreArgs = False
, optVerbosity = WARNING
, optTrace = False
, optLogFile = Nothing
, optFailIfWarnings = False
, optReferenceLinks = False
, optReferenceLocation = EndOfDocument
, optDpi = 96
, optWrapText = WrapAuto
, optColumns = 72
, optFilters = []
, optEmailObfuscation = NoObfuscation
, optIdentifierPrefix = ""
, optStripEmptyParagraphs = False
, optIndentedCodeClasses = []
, optDataDir = Nothing
, optCiteMethod = Citeproc
, optListings = False
, optPdfEngine = Nothing
, optPdfEngineArgs = []
, optSlideLevel = Nothing
, optSetextHeaders = True
, optAscii = False
, optDefaultImageExtension = ""
, optExtractMedia = Nothing
, optTrackChanges = AcceptChanges
, optFileScope = False
, optTitlePrefix = Nothing
, optCss = []
, optIncludeBeforeBody = []
, optIncludeAfterBody = []
, optIncludeInHeader = []
, optResourcePath = ["."]
, optRequestHeaders = []
, optEol = Native
, optStripComments = False
}
addMetadata :: [(String, String)] -> Pandoc -> Pandoc
addMetadata kvs pdc = foldr addMeta (removeMetaKeys kvs pdc) kvs
addMeta :: (String, String) -> Pandoc -> Pandoc
addMeta (k, v) (Pandoc meta bs) = Pandoc meta' bs
where meta' = case lookupMeta k meta of
Nothing -> setMeta k v' meta
Just (MetaList xs) ->
setMeta k (MetaList (xs ++ [v'])) meta
Just x -> setMeta k (MetaList [x, v']) meta
v' = readMetaValue v
removeMetaKeys :: [(String,String)] -> Pandoc -> Pandoc
removeMetaKeys kvs pdc = foldr (deleteMeta . fst) pdc kvs
readMetaValue :: String -> MetaValue
readMetaValue s = case decode (UTF8.fromString s) of
Just (Yaml.String t) -> MetaString $ T.unpack t
Just (Yaml.Bool b) -> MetaBool b
_ -> MetaString s
defaultReaderName :: String -> [FilePath] -> String
defaultReaderName fallback [] = fallback
defaultReaderName fallback (x:xs) =
case takeExtension (map toLower x) of
".xhtml" -> "html"
".html" -> "html"
".htm" -> "html"
".md" -> "markdown"
".markdown" -> "markdown"
".muse" -> "muse"
".tex" -> "latex"
".latex" -> "latex"
".ltx" -> "latex"
".rst" -> "rst"
".org" -> "org"
".lhs" -> "markdown+lhs"
".db" -> "docbook"
".opml" -> "opml"
".wiki" -> "mediawiki"
".dokuwiki" -> "dokuwiki"
".textile" -> "textile"
".native" -> "native"
".json" -> "json"
".docx" -> "docx"
".t2t" -> "t2t"
".epub" -> "epub"
".odt" -> "odt"
".pdf" -> "pdf"
".doc" -> "doc"
_ -> defaultReaderName fallback xs
defaultWriterName :: FilePath -> String
defaultWriterName "-" = "html"
defaultWriterName x =
case takeExtension (map toLower x) of
"" -> "markdown"
".tex" -> "latex"
".latex" -> "latex"
".ltx" -> "latex"
".context" -> "context"
".ctx" -> "context"
".rtf" -> "rtf"
".rst" -> "rst"
".s5" -> "s5"
".native" -> "native"
".json" -> "json"
".txt" -> "markdown"
".text" -> "markdown"
".md" -> "markdown"
".muse" -> "muse"
".markdown" -> "markdown"
".textile" -> "textile"
".lhs" -> "markdown+lhs"
".texi" -> "texinfo"
".texinfo" -> "texinfo"
".db" -> "docbook"
".odt" -> "odt"
".docx" -> "docx"
".epub" -> "epub"
".org" -> "org"
".asciidoc" -> "asciidoc"
".adoc" -> "asciidoc"
".fb2" -> "fb2"
".opml" -> "opml"
".icml" -> "icml"
".tei.xml" -> "tei"
".tei" -> "tei"
".ms" -> "ms"
".roff" -> "ms"
".pptx" -> "pptx"
['.',y] | y `elem` ['1'..'9'] -> "man"
_ -> "html"
applyTransforms :: Monad m => [Transform] -> Pandoc -> m Pandoc
applyTransforms transforms d = return $ foldr ($) d transforms
readSource :: FilePath -> PandocIO Text
readSource "-" = liftIO (UTF8.toText <$> BS.getContents)
readSource src = case parseURI src of
Just u | uriScheme u `elem` ["http:","https:"] ->
readURI src
| uriScheme u == "file:" ->
liftIO $ UTF8.toText <$>
BS.readFile (uriPath u)
_ -> liftIO $ UTF8.toText <$>
BS.readFile src
readURI :: FilePath -> PandocIO Text
readURI src = UTF8.toText . fst <$> openURL src
readFile' :: MonadIO m => FilePath -> m B.ByteString
readFile' "-" = liftIO B.getContents
readFile' f = liftIO $ B.readFile f
writeFnBinary :: MonadIO m => FilePath -> B.ByteString -> m ()
writeFnBinary "-" = liftIO . B.putStr
writeFnBinary f = liftIO . B.writeFile (UTF8.encodePath f)
writerFn :: MonadIO m => IO.Newline -> FilePath -> Text -> m ()
writerFn eol "-" = liftIO . UTF8.putStrWith eol . T.unpack
writerFn eol f = liftIO . UTF8.writeFileWith eol f . T.unpack
lookupHighlightStyle :: Maybe String -> IO (Maybe Style)
lookupHighlightStyle Nothing = return Nothing
lookupHighlightStyle (Just s)
| takeExtension s == ".theme" =
do contents <- B.readFile s
case parseTheme contents of
Left _ -> E.throwIO $ PandocOptionError $
"Could not read highlighting theme " ++ s
Right sty -> return (Just sty)
| otherwise =
case lookup (map toLower s) highlightingStyles of
Just sty -> return (Just sty)
Nothing -> E.throwIO $ PandocOptionError $
"Unknown highlight-style " ++ s
options :: [OptDescr (Opt -> IO Opt)]
options =
[ Option "fr" ["from","read"]
(ReqArg
(\arg opt -> return opt { optReader =
Just (map toLower arg) })
"FORMAT")
""
, Option "tw" ["to","write"]
(ReqArg
(\arg opt -> return opt { optWriter =
Just (map toLower arg) })
"FORMAT")
""
, Option "o" ["output"]
(ReqArg
(\arg opt -> return opt { optOutputFile = Just arg })
"FILE")
""
, Option "" ["data-dir"]
(ReqArg
(\arg opt -> return opt { optDataDir = Just arg })
"DIRECTORY")
""
, Option "" ["base-header-level"]
(ReqArg
(\arg opt ->
case safeRead arg of
Just t | t > 0 && t < 6 ->
return opt{ optBaseHeaderLevel = t }
_ -> E.throwIO $ PandocOptionError
"base-header-level must be 1-5")
"NUMBER")
""
, Option "" ["strip-empty-paragraphs"]
(NoArg
(\opt -> do
deprecatedOption "--stripEmptyParagraphs"
"Use +empty_paragraphs extension."
return opt{ optStripEmptyParagraphs = True }))
""
, Option "" ["indented-code-classes"]
(ReqArg
(\arg opt -> return opt { optIndentedCodeClasses = words $
map (\c -> if c == ',' then ' ' else c) arg })
"STRING")
""
, Option "F" ["filter"]
(ReqArg
(\arg opt -> return opt { optFilters =
JSONFilter arg : optFilters opt })
"PROGRAM")
""
, Option "" ["lua-filter"]
(ReqArg
(\arg opt -> return opt { optFilters =
LuaFilter arg : optFilters opt })
"SCRIPTPATH")
""
, Option "p" ["preserve-tabs"]
(NoArg
(\opt -> return opt { optPreserveTabs = True }))
""
, Option "" ["tab-stop"]
(ReqArg
(\arg opt ->
case safeRead arg of
Just t | t > 0 -> return opt { optTabStop = t }
_ -> E.throwIO $ PandocOptionError
"tab-stop must be a number greater than 0")
"NUMBER")
""
, Option "" ["track-changes"]
(ReqArg
(\arg opt -> do
action <- case arg of
"accept" -> return AcceptChanges
"reject" -> return RejectChanges
"all" -> return AllChanges
_ -> E.throwIO $ PandocOptionError
("Unknown option for track-changes: " ++ arg)
return opt { optTrackChanges = action })
"accept|reject|all")
""
, Option "" ["file-scope"]
(NoArg
(\opt -> return opt { optFileScope = True }))
""
, Option "" ["extract-media"]
(ReqArg
(\arg opt ->
return opt { optExtractMedia = Just arg })
"PATH")
""
, Option "s" ["standalone"]
(NoArg
(\opt -> return opt { optStandalone = True }))
""
, Option "" ["template"]
(ReqArg
(\arg opt ->
return opt{ optTemplate = Just arg,
optStandalone = True })
"FILE")
""
, Option "M" ["metadata"]
(ReqArg
(\arg opt -> do
let (key, val) = splitField arg
return opt{ optMetadata = (key, val) : optMetadata opt })
"KEY[:VALUE]")
""
, Option "V" ["variable"]
(ReqArg
(\arg opt -> do
let (key, val) = splitField arg
return opt{ optVariables = (key, val) : optVariables opt })
"KEY[:VALUE]")
""
, Option "D" ["print-default-template"]
(ReqArg
(\arg _ -> do
templ <- runIO $ do
setUserDataDir Nothing
getDefaultTemplate arg
case templ of
Right t -> UTF8.hPutStr stdout t
Left e -> E.throwIO e
exitSuccess)
"FORMAT")
""
, Option "" ["print-default-data-file"]
(ReqArg
(\arg _ -> do
runIOorExplode $
readDefaultDataFile arg >>= liftIO . BS.hPutStr stdout
exitSuccess)
"FILE")
""
, Option "" ["print-highlight-style"]
(ReqArg
(\arg _ -> do
sty <- fromMaybe pygments <$>
lookupHighlightStyle (Just arg)
B.putStr $ encodePretty'
defConfig{confIndent = Spaces 4
,confCompare = keyOrder
(map T.pack
["text-color"
,"background-color"
,"line-number-color"
,"line-number-background-color"
,"bold"
,"italic"
,"underline"
,"text-styles"])
,confNumFormat = Generic
,confTrailingNewline = True} sty
exitSuccess)
"STYLE|FILE")
""
, Option "" ["dpi"]
(ReqArg
(\arg opt ->
case safeRead arg of
Just t | t > 0 -> return opt { optDpi = t }
_ -> E.throwIO $ PandocOptionError
"dpi must be a number greater than 0")
"NUMBER")
""
, Option "" ["eol"]
(ReqArg
(\arg opt ->
case toLower <$> arg of
"crlf" -> return opt { optEol = CRLF }
"lf" -> return opt { optEol = LF }
"native" -> return opt { optEol = Native }
_ -> E.throwIO $ PandocOptionError
"--eol must be crlf, lf, or native")
"crlf|lf|native")
""
, Option "" ["wrap"]
(ReqArg
(\arg opt ->
case safeRead ("Wrap" ++ uppercaseFirstLetter arg) of
Just o -> return opt { optWrapText = o }
Nothing -> E.throwIO $ PandocOptionError
"--wrap must be auto, none, or preserve")
"auto|none|preserve")
""
, Option "" ["columns"]
(ReqArg
(\arg opt ->
case safeRead arg of
Just t | t > 0 -> return opt { optColumns = t }
_ -> E.throwIO $ PandocOptionError
"columns must be a number greater than 0")
"NUMBER")
""
, Option "" ["strip-comments"]
(NoArg
(\opt -> return opt { optStripComments = True }))
""
, Option "" ["toc", "table-of-contents"]
(NoArg
(\opt -> return opt { optTableOfContents = True }))
""
, Option "" ["toc-depth"]
(ReqArg
(\arg opt ->
case safeRead arg of
Just t | t >= 1 && t <= 6 ->
return opt { optTOCDepth = t }
_ -> E.throwIO $ PandocOptionError
"TOC level must be a number between 1 and 6")
"NUMBER")
""
, Option "" ["no-highlight"]
(NoArg
(\opt -> return opt { optHighlightStyle = Nothing }))
""
, Option "" ["highlight-style"]
(ReqArg
(\arg opt -> return opt{ optHighlightStyle = Just arg })
"STYLE|FILE")
""
, Option "" ["syntax-definition"]
(ReqArg
(\arg opt -> return opt{ optSyntaxDefinitions = arg :
optSyntaxDefinitions opt })
"FILE")
""
, Option "H" ["include-in-header"]
(ReqArg
(\arg opt -> return opt{ optIncludeInHeader =
arg : optIncludeInHeader opt,
optStandalone = True })
"FILE")
""
, Option "B" ["include-before-body"]
(ReqArg
(\arg opt -> return opt{ optIncludeBeforeBody =
arg : optIncludeBeforeBody opt,
optStandalone = True })
"FILE")
""
, Option "A" ["include-after-body"]
(ReqArg
(\arg opt -> return opt{ optIncludeAfterBody =
arg : optIncludeAfterBody opt,
optStandalone = True })
"FILE")
""
, Option "" ["resource-path"]
(ReqArg
(\arg opt -> return opt { optResourcePath =
splitSearchPath arg })
"SEARCHPATH")
""
, Option "" ["request-header"]
(ReqArg
(\arg opt -> do
let (key, val) = splitField arg
return opt{ optRequestHeaders =
(key, val) : optRequestHeaders opt })
"NAME:VALUE")
""
, Option "" ["self-contained"]
(NoArg
(\opt -> return opt { optSelfContained = True,
optStandalone = True }))
""
, Option "" ["html-q-tags"]
(NoArg
(\opt ->
return opt { optHtmlQTags = True }))
""
, Option "" ["ascii"]
(NoArg
(\opt -> return opt { optAscii = True }))
""
, Option "" ["reference-links"]
(NoArg
(\opt -> return opt { optReferenceLinks = True } ))
""
, Option "" ["reference-location"]
(ReqArg
(\arg opt -> do
action <- case arg of
"block" -> return EndOfBlock
"section" -> return EndOfSection
"document" -> return EndOfDocument
_ -> E.throwIO $ PandocOptionError
("Unknown option for reference-location: " ++ arg)
return opt { optReferenceLocation = action })
"block|section|document")
""
, Option "" ["atx-headers"]
(NoArg
(\opt -> return opt { optSetextHeaders = False } ))
""
, Option "" ["top-level-division"]
(ReqArg
(\arg opt -> do
let tldName = "TopLevel" ++ uppercaseFirstLetter arg
case safeRead tldName of
Just tlDiv -> return opt { optTopLevelDivision = tlDiv }
_ -> E.throwIO $ PandocOptionError
("Top-level division must be " ++
"section, chapter, part, or default"))
"section|chapter|part")
""
, Option "N" ["number-sections"]
(NoArg
(\opt -> return opt { optNumberSections = True }))
""
, Option "" ["number-offset"]
(ReqArg
(\arg opt ->
case safeRead ('[':arg ++ "]") of
Just ns -> return opt { optNumberOffset = ns,
optNumberSections = True }
_ -> E.throwIO $ PandocOptionError
"could not parse number-offset")
"NUMBERS")
""
, Option "" ["listings"]
(NoArg
(\opt -> return opt { optListings = True }))
""
, Option "i" ["incremental"]
(NoArg
(\opt -> return opt { optIncremental = True }))
""
, Option "" ["slide-level"]
(ReqArg
(\arg opt ->
case safeRead arg of
Just t | t >= 1 && t <= 6 ->
return opt { optSlideLevel = Just t }
_ -> E.throwIO $ PandocOptionError
"slide level must be a number between 1 and 6")
"NUMBER")
""
, Option "" ["section-divs"]
(NoArg
(\opt -> return opt { optSectionDivs = True }))
""
, Option "" ["default-image-extension"]
(ReqArg
(\arg opt -> return opt { optDefaultImageExtension = arg })
"extension")
""
, Option "" ["email-obfuscation"]
(ReqArg
(\arg opt -> do
method <- case arg of
"references" -> return ReferenceObfuscation
"javascript" -> return JavascriptObfuscation
"none" -> return NoObfuscation
_ -> E.throwIO $ PandocOptionError
("Unknown obfuscation method: " ++ arg)
return opt { optEmailObfuscation = method })
"none|javascript|references")
""
, Option "" ["id-prefix"]
(ReqArg
(\arg opt -> return opt { optIdentifierPrefix = arg })
"STRING")
""
, Option "T" ["title-prefix"]
(ReqArg
(\arg opt -> do
let newvars = ("title-prefix", arg) : optVariables opt
return opt { optVariables = newvars,
optStandalone = True })
"STRING")
""
, Option "c" ["css"]
(ReqArg
(\arg opt -> return opt{ optCss = arg : optCss opt })
"URL")
""
, Option "" ["reference-doc"]
(ReqArg
(\arg opt ->
return opt { optReferenceDoc = Just arg })
"FILE")
""
, Option "" ["epub-subdirectory"]
(ReqArg
(\arg opt ->
return opt { optEpubSubdirectory = arg })
"DIRNAME")
""
, Option "" ["epub-cover-image"]
(ReqArg
(\arg opt ->
return opt { optVariables =
("epub-cover-image", arg) : optVariables opt })
"FILE")
""
, Option "" ["epub-metadata"]
(ReqArg
(\arg opt -> return opt { optEpubMetadata = Just arg })
"FILE")
""
, Option "" ["epub-embed-font"]
(ReqArg
(\arg opt ->
return opt{ optEpubFonts = arg : optEpubFonts opt })
"FILE")
""
, Option "" ["epub-chapter-level"]
(ReqArg
(\arg opt ->
case safeRead arg of
Just t | t >= 1 && t <= 6 ->
return opt { optEpubChapterLevel = t }
_ -> E.throwIO $ PandocOptionError
"chapter level must be a number between 1 and 6")
"NUMBER")
""
, Option "" ["pdf-engine"]
(ReqArg
(\arg opt -> do
let b = takeBaseName arg
if b `elem` pdfEngines
then return opt { optPdfEngine = Just arg }
else E.throwIO $ PandocOptionError $ "pdf-engine must be one of "
++ intercalate ", " pdfEngines)
"PROGRAM")
""
, Option "" ["pdf-engine-opt"]
(ReqArg
(\arg opt -> do
let oldArgs = optPdfEngineArgs opt
return opt { optPdfEngineArgs = oldArgs ++ [arg]})
"STRING")
""
, Option "" ["bibliography"]
(ReqArg
(\arg opt -> return opt{ optMetadata =
("bibliography", arg) : optMetadata opt })
"FILE")
""
, Option "" ["csl"]
(ReqArg
(\arg opt ->
return opt{ optMetadata =
("csl", arg) : optMetadata opt })
"FILE")
""
, Option "" ["citation-abbreviations"]
(ReqArg
(\arg opt ->
return opt{ optMetadata =
("citation-abbreviations", arg): optMetadata opt })
"FILE")
""
, Option "" ["natbib"]
(NoArg
(\opt -> return opt { optCiteMethod = Natbib }))
""
, Option "" ["biblatex"]
(NoArg
(\opt -> return opt { optCiteMethod = Biblatex }))
""
, Option "" ["mathml"]
(NoArg
(\opt ->
return opt { optHTMLMathMethod = MathML }))
""
, Option "" ["webtex"]
(OptArg
(\arg opt -> do
let url' = fromMaybe "https://latex.codecogs.com/png.latex?" arg
return opt { optHTMLMathMethod = WebTeX url' })
"URL")
""
, Option "" ["mathjax"]
(OptArg
(\arg opt -> do
let url' = fromMaybe (defaultMathJaxURL ++
"MathJax.js?config=TeX-AMS_CHTML-full") arg
return opt { optHTMLMathMethod = MathJax url'})
"URL")
""
, Option "" ["katex"]
(OptArg
(\arg opt ->
return opt
{ optHTMLMathMethod = KaTeX $
fromMaybe defaultKaTeXURL arg })
"URL")
""
, Option "m" ["latexmathml", "asciimathml"]
(OptArg
(\arg opt -> do
deprecatedOption "--latexmathml, --asciimathml, -m" ""
return opt { optHTMLMathMethod = LaTeXMathML arg })
"URL")
""
, Option "" ["mimetex"]
(OptArg
(\arg opt -> do
deprecatedOption "--mimetex" ""
let url' = case arg of
Just u -> u ++ "?"
Nothing -> "/cgi-bin/mimetex.cgi?"
return opt { optHTMLMathMethod = WebTeX url' })
"URL")
""
, Option "" ["jsmath"]
(OptArg
(\arg opt -> do
deprecatedOption "--jsmath" ""
return opt { optHTMLMathMethod = JsMath arg})
"URL")
""
, Option "" ["gladtex"]
(NoArg
(\opt -> do
deprecatedOption "--gladtex" ""
return opt { optHTMLMathMethod = GladTeX }))
""
, Option "" ["abbreviations"]
(ReqArg
(\arg opt -> return opt { optAbbreviations = Just arg })
"FILE")
""
, Option "" ["trace"]
(NoArg
(\opt -> return opt { optTrace = True }))
""
, Option "" ["dump-args"]
(NoArg
(\opt -> return opt { optDumpArgs = True }))
""
, Option "" ["ignore-args"]
(NoArg
(\opt -> return opt { optIgnoreArgs = True }))
""
, Option "" ["verbose"]
(NoArg
(\opt -> return opt { optVerbosity = INFO }))
""
, Option "" ["quiet"]
(NoArg
(\opt -> return opt { optVerbosity = ERROR }))
""
, Option "" ["fail-if-warnings"]
(NoArg
(\opt -> return opt { optFailIfWarnings = True }))
""
, Option "" ["log"]
(ReqArg
(\arg opt -> return opt{ optLogFile = Just arg })
"FILE")
""
, Option "" ["bash-completion"]
(NoArg
(\_ -> do
ddir <- getDataDir
tpl <- runIOorExplode $
UTF8.toString <$>
readDefaultDataFile "bash_completion.tpl"
let optnames (Option shorts longs _ _) =
map (\c -> ['-',c]) shorts ++
map ("--" ++) longs
let allopts = unwords (concatMap optnames options)
UTF8.hPutStrLn stdout $ printf tpl allopts
(unwords readersNames)
(unwords writersNames)
(unwords $ map fst highlightingStyles)
ddir
exitSuccess ))
""
, Option "" ["list-input-formats"]
(NoArg
(\_ -> do
mapM_ (UTF8.hPutStrLn stdout) readersNames
exitSuccess ))
""
, Option "" ["list-output-formats"]
(NoArg
(\_ -> do
mapM_ (UTF8.hPutStrLn stdout) writersNames
exitSuccess ))
""
, Option "" ["list-extensions"]
(OptArg
(\arg _ -> do
let exts = getDefaultExtensions (fromMaybe "markdown" arg)
let showExt x = (if extensionEnabled x exts
then '+'
else '-') : drop 4 (show x)
mapM_ (UTF8.hPutStrLn stdout . showExt)
([minBound..maxBound] :: [Extension])
exitSuccess )
"FORMAT")
""
, Option "" ["list-highlight-languages"]
(NoArg
(\_ -> do
let langs = [ T.unpack (T.toLower (sShortname s))
| s <- M.elems defaultSyntaxMap
, sShortname s `notElem`
[T.pack "Alert", T.pack "Alert_indent"]
]
mapM_ (UTF8.hPutStrLn stdout) langs
exitSuccess ))
""
, Option "" ["list-highlight-styles"]
(NoArg
(\_ -> do
mapM_ (UTF8.hPutStrLn stdout . fst) highlightingStyles
exitSuccess ))
""
, Option "v" ["version"]
(NoArg
(\_ -> do
prg <- getProgName
defaultDatadir <- E.catch
(getAppUserDataDirectory "pandoc")
(\e -> let _ = (e :: E.SomeException)
in return "")
UTF8.hPutStrLn stdout (prg ++ " " ++ pandocVersion ++
compileInfo ++ "\nDefault user data directory: " ++
defaultDatadir ++ copyrightMessage)
exitSuccess ))
""
, Option "h" ["help"]
(NoArg
(\_ -> do
prg <- getProgName
UTF8.hPutStr stdout (usageMessage prg options)
exitSuccess ))
""
]
usageMessage :: String -> [OptDescr (Opt -> IO Opt)] -> String
usageMessage programName = usageInfo (programName ++ " [OPTIONS] [FILES]")
copyrightMessage :: String
copyrightMessage = intercalate "\n" [
"",
"Copyright (C) 2006-2018 John MacFarlane",
"Web: http://pandoc.org",
"This is free software; see the source for copying conditions.",
"There is no warranty, not even for merchantability or fitness",
"for a particular purpose." ]
compileInfo :: String
compileInfo =
"\nCompiled with pandoc-types " ++ VERSION_pandoc_types ++ ", texmath " ++
VERSION_texmath ++ ", skylighting " ++ VERSION_skylighting
handleUnrecognizedOption :: String -> [String] -> [String]
handleUnrecognizedOption "--smart" =
(("--smart/-S has been removed. Use +smart or -smart extension instead.\n" ++
"For example: pandoc -f markdown+smart -t markdown-smart.") :)
handleUnrecognizedOption "--normalize" =
("--normalize has been removed. Normalization is now automatic." :)
handleUnrecognizedOption "-S" = handleUnrecognizedOption "--smart"
handleUnrecognizedOption "--old-dashes" =
("--old-dashes has been removed. Use +old_dashes extension instead." :)
handleUnrecognizedOption "--no-wrap" =
("--no-wrap has been removed. Use --wrap=none instead." :)
handleUnrecognizedOption "--latex-engine" =
("--latex-engine has been removed. Use --pdf-engine instead." :)
handleUnrecognizedOption "--latex-engine-opt" =
("--latex-engine-opt has been removed. Use --pdf-engine-opt instead." :)
handleUnrecognizedOption "--chapters" =
("--chapters has been removed. Use --top-level-division=chapter instead." :)
handleUnrecognizedOption "--reference-docx" =
("--reference-docx has been removed. Use --reference-doc instead." :)
handleUnrecognizedOption "--reference-odt" =
("--reference-odt has been removed. Use --reference-doc instead." :)
handleUnrecognizedOption "--parse-raw" =
("--parse-raw/-R has been removed. Use +raw_html or +raw_tex extension.\n" :)
handleUnrecognizedOption "--epub-stylesheet" =
("--epub-stylesheet has been removed. Use --css instead.\n" :)
handleUnrecognizedOption "-R" = handleUnrecognizedOption "--parse-raw"
handleUnrecognizedOption x =
(("Unknown option " ++ x ++ ".") :)
uppercaseFirstLetter :: String -> String
uppercaseFirstLetter (c:cs) = toUpper c : cs
uppercaseFirstLetter [] = []
readersNames :: [String]
readersNames = sort (map fst (readers :: [(String, Reader PandocIO)]))
writersNames :: [String]
writersNames = sort (map fst (writers :: [(String, Writer PandocIO)]))
splitField :: String -> (String, String)
splitField s =
case break (`elem` ":=") s of
(k,_:v) -> (k,v)
(k,[]) -> (k,"true")
baseWriterName :: String -> String
baseWriterName = takeWhile (\c -> c /= '+' && c /= '-')
deprecatedOption :: String -> String -> IO ()
deprecatedOption o msg =
runIO (report $ Deprecated o msg) >>=
\r -> case r of
Right () -> return ()
Left e -> E.throwIO e
$(deriveJSON defaultOptions ''LineEnding)
$(deriveJSON defaultOptions ''Opt)