module Text.Pandoc.Shared (
splitBy,
splitByIndices,
splitStringByIndices,
substitute,
backslashEscapes,
escapeStringUsing,
stripTrailingNewlines,
trim,
triml,
trimr,
stripFirstAndLast,
camelCaseToHyphenated,
toRomanNumeral,
escapeURI,
tabFilter,
normalizeDate,
orderedListMarkers,
normalizeSpaces,
normalize,
stringify,
compactify,
compactify',
Element (..),
hierarchicalize,
uniqueIdent,
isHeaderBlock,
headerShift,
isTightList,
renderTags',
inDirectory,
readDataFile,
readDataFileUTF8,
fetchItem,
openURL,
err,
warn,
safeRead
) where
import Text.Pandoc.Definition
import Text.Pandoc.Generic
import Text.Pandoc.Builder (Blocks)
import qualified Text.Pandoc.Builder as B
import qualified Text.Pandoc.UTF8 as UTF8
import System.Environment (getProgName)
import System.Exit (exitWith, ExitCode(..))
import Data.Char ( toLower, isLower, isUpper, isAlpha,
isLetter, isDigit, isSpace )
import Data.List ( find, isPrefixOf, intercalate )
import Network.URI ( escapeURIString, isAbsoluteURI, parseURI )
import System.Directory
import Text.Pandoc.MIME (getMimeType)
import System.FilePath ( (</>), takeExtension, dropExtension )
import Data.Generics (Typeable, Data)
import qualified Control.Monad.State as S
import Control.Monad (msum)
import Text.Pandoc.Pretty (charWidth)
import System.Locale (defaultTimeLocale)
import Data.Time
import System.IO (stderr)
import Text.HTML.TagSoup (renderTagsOptions, RenderOptions(..), Tag(..),
renderOptions)
import qualified Data.ByteString as B
import Network.HTTP (findHeader, rspBody, simpleHTTP, RequestMethod(..),
HeaderName(..), mkRequest)
#ifdef EMBED_DATA_FILES
import Data.FileEmbed
#else
import Paths_pandoc (getDataFileName)
#endif
splitBy :: (a -> Bool) -> [a] -> [[a]]
splitBy _ [] = []
splitBy isSep lst =
let (first, rest) = break isSep lst
rest' = dropWhile isSep rest
in first:(splitBy isSep rest')
splitByIndices :: [Int] -> [a] -> [[a]]
splitByIndices [] lst = [lst]
splitByIndices (x:xs) lst = first:(splitByIndices (map (\y -> y x) xs) rest)
where (first, rest) = splitAt x lst
splitStringByIndices :: [Int] -> [Char] -> [[Char]]
splitStringByIndices [] lst = [lst]
splitStringByIndices (x:xs) lst =
let (first, rest) = splitAt' x lst in
first : (splitStringByIndices (map (\y -> y x) xs) rest)
splitAt' :: Int -> [Char] -> ([Char],[Char])
splitAt' _ [] = ([],[])
splitAt' n xs | n <= 0 = ([],xs)
splitAt' n (x:xs) = (x:ys,zs)
where (ys,zs) = splitAt' (n charWidth x) xs
substitute :: (Eq a) => [a] -> [a] -> [a] -> [a]
substitute _ _ [] = []
substitute [] _ xs = xs
substitute target replacement lst@(x:xs) =
if target `isPrefixOf` lst
then replacement ++ substitute target replacement (drop (length target) lst)
else x : substitute target replacement xs
backslashEscapes :: [Char]
-> [(Char, String)]
backslashEscapes = map (\ch -> (ch, ['\\',ch]))
escapeStringUsing :: [(Char, String)] -> String -> String
escapeStringUsing _ [] = ""
escapeStringUsing escapeTable (x:xs) =
case (lookup x escapeTable) of
Just str -> str ++ rest
Nothing -> x:rest
where rest = escapeStringUsing escapeTable xs
stripTrailingNewlines :: String -> String
stripTrailingNewlines = reverse . dropWhile (== '\n') . reverse
trim :: String -> String
trim = triml . trimr
triml :: String -> String
triml = dropWhile (`elem` " \r\n\t")
trimr :: String -> String
trimr = reverse . triml . reverse
stripFirstAndLast :: String -> String
stripFirstAndLast str =
drop 1 $ take ((length str) 1) str
camelCaseToHyphenated :: String -> String
camelCaseToHyphenated [] = ""
camelCaseToHyphenated (a:b:rest) | isLower a && isUpper b =
a:'-':(toLower b):(camelCaseToHyphenated rest)
camelCaseToHyphenated (a:rest) = (toLower a):(camelCaseToHyphenated rest)
toRomanNumeral :: Int -> String
toRomanNumeral x =
if x >= 4000 || x < 0
then "?"
else case x of
_ | x >= 1000 -> "M" ++ toRomanNumeral (x 1000)
_ | x >= 900 -> "CM" ++ toRomanNumeral (x 900)
_ | x >= 500 -> "D" ++ toRomanNumeral (x 500)
_ | x >= 400 -> "CD" ++ toRomanNumeral (x 400)
_ | x >= 100 -> "C" ++ toRomanNumeral (x 100)
_ | x >= 90 -> "XC" ++ toRomanNumeral (x 90)
_ | x >= 50 -> "L" ++ toRomanNumeral (x 50)
_ | x >= 40 -> "XL" ++ toRomanNumeral (x 40)
_ | x >= 10 -> "X" ++ toRomanNumeral (x 10)
_ | x >= 9 -> "IX" ++ toRomanNumeral (x 5)
_ | x >= 5 -> "V" ++ toRomanNumeral (x 5)
_ | x >= 4 -> "IV" ++ toRomanNumeral (x 4)
_ | x >= 1 -> "I" ++ toRomanNumeral (x 1)
_ -> ""
escapeURI :: String -> String
escapeURI = escapeURIString (not . isSpace)
tabFilter :: Int
-> String
-> String
tabFilter tabStop =
let go _ [] = ""
go _ ('\n':xs) = '\n' : go tabStop xs
go _ ('\r':'\n':xs) = '\n' : go tabStop xs
go _ ('\r':xs) = '\n' : go tabStop xs
go spsToNextStop ('\t':xs) =
if tabStop == 0
then '\t' : go tabStop xs
else replicate spsToNextStop ' ' ++ go tabStop xs
go 1 (x:xs) =
x : go tabStop xs
go spsToNextStop (x:xs) =
x : go (spsToNextStop 1) xs
in go tabStop
normalizeDate :: String -> Maybe String
normalizeDate s = fmap (formatTime defaultTimeLocale "%F")
(msum $ map (\fs -> parsetimeWith fs s) formats :: Maybe Day)
where parsetimeWith = parseTime defaultTimeLocale
formats = ["%x","%m/%d/%Y", "%D","%F", "%d %b %Y",
"%d %B %Y", "%b. %d, %Y", "%B %d, %Y"]
orderedListMarkers :: (Int, ListNumberStyle, ListNumberDelim) -> [String]
orderedListMarkers (start, numstyle, numdelim) =
let singleton c = [c]
nums = case numstyle of
DefaultStyle -> map show [start..]
Example -> map show [start..]
Decimal -> map show [start..]
UpperAlpha -> drop (start 1) $ cycle $
map singleton ['A'..'Z']
LowerAlpha -> drop (start 1) $ cycle $
map singleton ['a'..'z']
UpperRoman -> map toRomanNumeral [start..]
LowerRoman -> map (map toLower . toRomanNumeral) [start..]
inDelim str = case numdelim of
DefaultDelim -> str ++ "."
Period -> str ++ "."
OneParen -> str ++ ")"
TwoParens -> "(" ++ str ++ ")"
in map inDelim nums
normalizeSpaces :: [Inline] -> [Inline]
normalizeSpaces = cleanup . dropWhile isSpaceOrEmpty
where cleanup [] = []
cleanup (Space:rest) = case dropWhile isSpaceOrEmpty rest of
[] -> []
(x:xs) -> Space : x : cleanup xs
cleanup ((Str ""):rest) = cleanup rest
cleanup (x:rest) = x : cleanup rest
isSpaceOrEmpty :: Inline -> Bool
isSpaceOrEmpty Space = True
isSpaceOrEmpty (Str "") = True
isSpaceOrEmpty _ = False
normalize :: (Eq a, Data a) => a -> a
normalize = topDown removeEmptyBlocks .
topDown consolidateInlines .
bottomUp (removeEmptyInlines . removeTrailingInlineSpaces)
removeEmptyBlocks :: [Block] -> [Block]
removeEmptyBlocks (Null : xs) = removeEmptyBlocks xs
removeEmptyBlocks (BulletList [] : xs) = removeEmptyBlocks xs
removeEmptyBlocks (OrderedList _ [] : xs) = removeEmptyBlocks xs
removeEmptyBlocks (DefinitionList [] : xs) = removeEmptyBlocks xs
removeEmptyBlocks (RawBlock _ [] : xs) = removeEmptyBlocks xs
removeEmptyBlocks (x:xs) = x : removeEmptyBlocks xs
removeEmptyBlocks [] = []
removeEmptyInlines :: [Inline] -> [Inline]
removeEmptyInlines (Emph [] : zs) = removeEmptyInlines zs
removeEmptyInlines (Strong [] : zs) = removeEmptyInlines zs
removeEmptyInlines (Subscript [] : zs) = removeEmptyInlines zs
removeEmptyInlines (Superscript [] : zs) = removeEmptyInlines zs
removeEmptyInlines (SmallCaps [] : zs) = removeEmptyInlines zs
removeEmptyInlines (Strikeout [] : zs) = removeEmptyInlines zs
removeEmptyInlines (RawInline _ [] : zs) = removeEmptyInlines zs
removeEmptyInlines (Code _ [] : zs) = removeEmptyInlines zs
removeEmptyInlines (Str "" : zs) = removeEmptyInlines zs
removeEmptyInlines (x : xs) = x : removeEmptyInlines xs
removeEmptyInlines [] = []
removeTrailingInlineSpaces :: [Inline] -> [Inline]
removeTrailingInlineSpaces = reverse . removeLeadingInlineSpaces . reverse
removeLeadingInlineSpaces :: [Inline] -> [Inline]
removeLeadingInlineSpaces = dropWhile isSpaceOrEmpty
consolidateInlines :: [Inline] -> [Inline]
consolidateInlines (Str x : ys) =
case concat (x : map fromStr strs) of
"" -> consolidateInlines rest
n -> Str n : consolidateInlines rest
where
(strs, rest) = span isStr ys
isStr (Str _) = True
isStr _ = False
fromStr (Str z) = z
fromStr _ = error "consolidateInlines - fromStr - not a Str"
consolidateInlines (Space : ys) = Space : rest
where isSp Space = True
isSp _ = False
rest = consolidateInlines $ dropWhile isSp ys
consolidateInlines (Emph xs : Emph ys : zs) = consolidateInlines $
Emph (xs ++ ys) : zs
consolidateInlines (Strong xs : Strong ys : zs) = consolidateInlines $
Strong (xs ++ ys) : zs
consolidateInlines (Subscript xs : Subscript ys : zs) = consolidateInlines $
Subscript (xs ++ ys) : zs
consolidateInlines (Superscript xs : Superscript ys : zs) = consolidateInlines $
Superscript (xs ++ ys) : zs
consolidateInlines (SmallCaps xs : SmallCaps ys : zs) = consolidateInlines $
SmallCaps (xs ++ ys) : zs
consolidateInlines (Strikeout xs : Strikeout ys : zs) = consolidateInlines $
Strikeout (xs ++ ys) : zs
consolidateInlines (RawInline f x : RawInline f' y : zs) | f == f' =
consolidateInlines $ RawInline f (x ++ y) : zs
consolidateInlines (Code a1 x : Code a2 y : zs) | a1 == a2 =
consolidateInlines $ Code a1 (x ++ y) : zs
consolidateInlines (x : xs) = x : consolidateInlines xs
consolidateInlines [] = []
stringify :: [Inline] -> String
stringify = queryWith go
where go :: Inline -> [Char]
go Space = " "
go (Str x) = x
go (Code _ x) = x
go (Math _ x) = x
go LineBreak = " "
go _ = ""
compactify :: [[Block]]
-> [[Block]]
compactify [] = []
compactify items =
case (init items, last items) of
(_,[]) -> items
(others, final) ->
case last final of
Para a -> case (filter isPara $ concat items) of
[_] -> others ++ [init final ++ [Plain a]]
_ -> items
_ -> items
compactify' :: [Blocks]
-> [Blocks]
compactify' [] = []
compactify' items =
let (others, final) = (init items, last items)
in case reverse (B.toList final) of
(Para a:xs) -> case [Para x | Para x <- concatMap B.toList items] of
[_] -> others ++ [B.fromList (reverse $ Plain a : xs)]
_ -> items
_ -> items
isPara :: Block -> Bool
isPara (Para _) = True
isPara _ = False
data Element = Blk Block
| Sec Int [Int] String [Inline] [Element]
deriving (Eq, Read, Show, Typeable, Data)
inlineListToIdentifier :: [Inline] -> String
inlineListToIdentifier =
dropWhile (not . isAlpha) . intercalate "-" . words .
map (nbspToSp . toLower) .
filter (\c -> isLetter c || isDigit c || c `elem` "_-. ") .
stringify
where nbspToSp '\160' = ' '
nbspToSp x = x
hierarchicalize :: [Block] -> [Element]
hierarchicalize blocks = S.evalState (hierarchicalizeWithIds blocks) []
hierarchicalizeWithIds :: [Block] -> S.State [Int] [Element]
hierarchicalizeWithIds [] = return []
hierarchicalizeWithIds ((Header level (ident,_,_) title'):xs) = do
lastnum <- S.get
let lastnum' = take level lastnum
let newnum = if length lastnum' >= level
then init lastnum' ++ [last lastnum' + 1]
else lastnum ++ replicate (level length lastnum 1) 0 ++ [1]
S.put newnum
let (sectionContents, rest) = break (headerLtEq level) xs
sectionContents' <- hierarchicalizeWithIds sectionContents
rest' <- hierarchicalizeWithIds rest
return $ Sec level newnum ident title' sectionContents' : rest'
hierarchicalizeWithIds (x:rest) = do
rest' <- hierarchicalizeWithIds rest
return $ (Blk x) : rest'
headerLtEq :: Int -> Block -> Bool
headerLtEq level (Header l _ _) = l <= level
headerLtEq _ _ = False
uniqueIdent :: [Inline] -> [String] -> String
uniqueIdent title' usedIdents =
let baseIdent = case inlineListToIdentifier title' of
"" -> "section"
x -> x
numIdent n = baseIdent ++ "-" ++ show n
in if baseIdent `elem` usedIdents
then case find (\x -> numIdent x `notElem` usedIdents) ([1..60000] :: [Int]) of
Just x -> numIdent x
Nothing -> baseIdent
else baseIdent
isHeaderBlock :: Block -> Bool
isHeaderBlock (Header _ _ _) = True
isHeaderBlock _ = False
headerShift :: Int -> Pandoc -> Pandoc
headerShift n = bottomUp shift
where shift :: Block -> Block
shift (Header level attr inner) = Header (level + n) attr inner
shift x = x
isTightList :: [[Block]] -> Bool
isTightList = and . map firstIsPlain
where firstIsPlain (Plain _ : _) = True
firstIsPlain _ = False
renderTags' :: [Tag String] -> String
renderTags' = renderTagsOptions
renderOptions{ optMinimize = \x ->
let y = map toLower x
in y == "hr" || y == "br" ||
y == "img" || y == "meta" ||
y == "link"
, optRawTag = \x ->
let y = map toLower x
in y == "script" || y == "style" }
inDirectory :: FilePath -> IO a -> IO a
inDirectory path action = do
oldDir <- getCurrentDirectory
setCurrentDirectory path
result <- action
setCurrentDirectory oldDir
return result
#ifdef EMBED_DATA_FILES
dataFiles :: [(FilePath, B.ByteString)]
dataFiles = $(embedDir "data")
#endif
readDefaultDataFile :: FilePath -> IO B.ByteString
readDefaultDataFile fname =
#ifdef EMBED_DATA_FILES
case lookup fname dataFiles of
Nothing -> ioError $ userError
$ "Data file `" ++ fname ++ "' does not exist"
Just contents -> return contents
#else
getDataFileName ("data" </> fname) >>= B.readFile
#endif
readDataFile :: Maybe FilePath -> FilePath -> IO B.ByteString
readDataFile Nothing fname = readDefaultDataFile fname
readDataFile (Just userDir) fname = do
exists <- doesFileExist (userDir </> fname)
if exists
then B.readFile (userDir </> fname)
else readDefaultDataFile fname
readDataFileUTF8 :: Maybe FilePath -> FilePath -> IO String
readDataFileUTF8 userDir fname =
UTF8.toString `fmap` readDataFile userDir fname
fetchItem :: String -> String -> IO (B.ByteString, Maybe String)
fetchItem sourceDir s =
case s of
_ | isAbsoluteURI s -> openURL s
| isAbsoluteURI sourceDir -> openURL $ sourceDir ++ "/" ++ s
| otherwise -> do
let mime = case takeExtension s of
".gz" -> getMimeType $ dropExtension s
x -> getMimeType x
let f = sourceDir </> s
cont <- B.readFile f
return (cont, mime)
openURL :: String -> IO (B.ByteString, Maybe String)
openURL u = getBodyAndMimeType =<< simpleHTTP (getReq u)
where getReq v = case parseURI v of
Nothing -> error $ "Could not parse URI: " ++ v
Just u' -> mkRequest GET u'
getBodyAndMimeType (Left e) = fail (show e)
getBodyAndMimeType (Right r) = return (rspBody r, findHeader HdrContentType r)
err :: Int -> String -> IO a
err exitCode msg = do
name <- getProgName
UTF8.hPutStrLn stderr $ name ++ ": " ++ msg
exitWith $ ExitFailure exitCode
return undefined
warn :: String -> IO ()
warn msg = do
name <- getProgName
UTF8.hPutStrLn stderr $ name ++ ": " ++ msg
safeRead :: (Monad m, Read a) => String -> m a
safeRead s = case reads s of
(d,x):_
| all isSpace x -> return d
_ -> fail $ "Could not read `" ++ s ++ "'"