{-# LANGUAGE NoImplicitPrelude #-}
module Text.Pandoc.XML ( escapeCharForXML,
escapeStringForXML,
inTags,
selfClosingTag,
inTagsSimple,
inTagsIndented,
toEntities,
toHtml5Entities,
fromEntities ) where
import Prelude
import Data.Char (isAscii, isSpace, ord)
import Data.Text (Text)
import qualified Data.Text as T
import Text.HTML.TagSoup.Entity (lookupEntity, htmlEntities)
import Text.Pandoc.Pretty
import qualified Data.Map as M
escapeCharForXML :: Char -> String
escapeCharForXML x = case x of
'&' -> "&"
'<' -> "<"
'>' -> ">"
'"' -> """
c -> [c]
escapeStringForXML :: String -> String
escapeStringForXML = concatMap escapeCharForXML . filter isLegalXMLChar
where isLegalXMLChar c = c == '\t' || c == '\n' || c == '\r' ||
(c >= '\x20' && c <= '\xD7FF') ||
(c >= '\xE000' && c <= '\xFFFD') ||
(c >= '\x10000' && c <= '\x10FFFF')
escapeNls :: String -> String
escapeNls (x:xs)
| x == '\n' = " " ++ escapeNls xs
| otherwise = x : escapeNls xs
escapeNls [] = []
attributeList :: [(String, String)] -> Doc
attributeList = hcat . map
(\(a, b) -> text (' ' : escapeStringForXML a ++ "=\"" ++
escapeNls (escapeStringForXML b) ++ "\""))
inTags:: Bool -> String -> [(String, String)] -> Doc -> Doc
inTags isIndented tagType attribs contents =
let openTag = char '<' <> text tagType <> attributeList attribs <>
char '>'
closeTag = text "</" <> text tagType <> char '>'
in if isIndented
then openTag $$ nest 2 contents $$ closeTag
else openTag <> contents <> closeTag
selfClosingTag :: String -> [(String, String)] -> Doc
selfClosingTag tagType attribs =
char '<' <> text tagType <> attributeList attribs <> text " />"
inTagsSimple :: String -> Doc -> Doc
inTagsSimple tagType = inTags False tagType []
inTagsIndented :: String -> Doc -> Doc
inTagsIndented tagType = inTags True tagType []
toEntities :: Text -> Text
toEntities = T.concatMap go
where go c | isAscii c = T.singleton c
| otherwise = T.pack ("&#" ++ show (ord c) ++ ";")
toHtml5Entities :: Text -> Text
toHtml5Entities = T.concatMap go
where go c | isAscii c = T.singleton c
| otherwise =
case M.lookup c html5EntityMap of
Just t -> T.singleton '&' <> t <> T.singleton ';'
Nothing -> T.pack ("&#" ++ show (ord c) ++ ";")
html5EntityMap :: M.Map Char Text
html5EntityMap = foldr go mempty htmlEntities
where go (ent, s) entmap =
case s of
[c] -> M.insertWith
(\new old -> if T.length new > T.length old
then old
else new) c ent' entmap
where ent' = T.takeWhile (/=';') (T.pack ent)
_ -> entmap
fromEntities :: String -> String
fromEntities ('&':xs) =
case lookupEntity ent' of
Just c -> c ++ fromEntities rest
Nothing -> '&' : fromEntities xs
where (ent, rest) = case break (\c -> isSpace c || c == ';') xs of
(zs,';':ys) -> (zs,ys)
(zs, ys) -> (zs,ys)
ent' = case ent of
'#':'X':ys -> '#':'x':ys
'#':_ -> ent
_ -> ent ++ ";"
fromEntities (x:xs) = x : fromEntities xs
fromEntities [] = []