module Text.Pandoc.XML ( escapeCharForXML,
escapeStringForXML,
inTags,
selfClosingTag,
inTagsSimple,
inTagsIndented,
toEntities,
fromEntities ) where
import Data.Char (isAscii, isSpace, ord)
import Data.Text (Text)
import qualified Data.Text as T
import Text.HTML.TagSoup.Entity (lookupEntity)
import Text.Pandoc.Pretty
escapeCharForXML :: Char -> String
escapeCharForXML x = case x of
'&' -> "&"
'<' -> "<"
'>' -> ">"
'"' -> """
c -> [c]
escapeStringForXML :: String -> String
escapeStringForXML = concatMap escapeCharForXML
escapeNls :: String -> String
escapeNls (x:xs)
| x == '\n' = " " ++ escapeNls xs
| otherwise = x : escapeNls xs
escapeNls [] = []
attributeList :: [(String, String)] -> Doc
attributeList = hcat . map
(\(a, b) -> text (' ' : escapeStringForXML a ++ "=\"" ++
escapeNls (escapeStringForXML b) ++ "\""))
inTags:: Bool -> String -> [(String, String)] -> Doc -> Doc
inTags isIndented tagType attribs contents =
let openTag = char '<' <> text tagType <> attributeList attribs <>
char '>'
closeTag = text "</" <> text tagType <> char '>'
in if isIndented
then openTag $$ nest 2 contents $$ closeTag
else openTag <> contents <> closeTag
selfClosingTag :: String -> [(String, String)] -> Doc
selfClosingTag tagType attribs =
char '<' <> text tagType <> attributeList attribs <> text " />"
inTagsSimple :: String -> Doc -> Doc
inTagsSimple tagType = inTags False tagType []
inTagsIndented :: String -> Doc -> Doc
inTagsIndented tagType = inTags True tagType []
toEntities :: Text -> Text
toEntities = T.concatMap go
where go c | isAscii c = T.singleton c
| otherwise = T.pack ("&#" ++ show (ord c) ++ ";")
fromEntities :: String -> String
fromEntities ('&':xs) =
case lookupEntity ent' of
Just c -> c ++ fromEntities rest
Nothing -> '&' : fromEntities xs
where (ent, rest) = case break (\c -> isSpace c || c == ';') xs of
(zs,';':ys) -> (zs,ys)
(zs, ys) -> (zs,ys)
ent' = case ent of
'#':'X':ys -> '#':'x':ys
'#':_ -> ent
_ -> ent ++ ";"
fromEntities (x:xs) = x : fromEntities xs
fromEntities [] = []