{-# LANGUAGE OverloadedStrings #-}
module Data.XML.Parser.Low
( module Data.XML.Parser.Low.Entity
, module Data.XML.Parser.Low.Name
, module Data.XML.Parser.Low
) where
import Control.Applicative
import Control.Arrow ((>>>))
import Control.Monad
import Data.Char
import Data.Functor
import Data.Text (Text)
import qualified Data.Text as Text
import Data.XML.Parser.Low.Entity
import Data.XML.Parser.Low.Name
import Numeric
import Text.Parser.Char
import Text.Parser.Combinators
data Content = ContentText Text | ContentReference Reference
deriving (Eq, Ord, Read, Show)
expandContent :: Alternative m => EntityDecoder -> Content -> m Text
expandContent _ (ContentText t) = pure t
expandContent f (ContentReference r) = expandReference f r
expandContents :: Alternative m => Monad m => EntityDecoder -> [Content] -> m Text
expandContents f contents = mconcat <$> mapM (expandContent f) contents
data Reference = EntityRef Text | CharRef Char
deriving (Eq, Ord, Read, Show)
expandReference :: Alternative m => EntityDecoder -> Reference -> m Text
expandReference _ (CharRef c) = pure $ Text.pack [c]
expandReference f (EntityRef name) = maybe empty pure $ runEntityDecoder f name
expandReference' :: Reference -> Maybe Text
expandReference' = expandReference decodePredefinedEntities
tokenSingleQuote :: CharParsing m => m Char
tokenSingleQuote = char '\''
tokenDoubleQuote :: CharParsing m => m Char
tokenDoubleQuote = char '"'
tokenQuote :: CharParsing m => m Char
tokenQuote = tokenSingleQuote <|> tokenDoubleQuote
tokenWhitespace :: CharParsing m => m String
tokenWhitespace = some (satisfy isXmlSpace) where
isXmlSpace ' ' = True
isXmlSpace '\t' = True
isXmlSpace '\r' = True
isXmlSpace '\n' = True
isXmlSpace _ = False
tokenEqual :: CharParsing m => Monad m => m ()
tokenEqual = do
optional tokenWhitespace
char '='
optional tokenWhitespace
return ()
tokenReference :: CharParsing m => Monad m => m Reference
tokenReference = (EntityRef <$> entityRef) <|> (CharRef <$> decCharRef) <|> (CharRef <$> hexCharRef) where
entityRef = char '&' *> tokenName <* char ';'
decCharRef = between (string "&#") (char ';') $
some digit >>= (readDec >>> liftParser "decimal") <&> chr
hexCharRef = between (string "&#x") (char ';') $
some hexDigit >>= (readHex >>> liftParser "hexadecimal") <&> chr
liftParser _ ((result, _):_) = return result
liftParser message _ = unexpected $ "Failed to parse " <> message
tokenContent :: CharParsing m => Monad m => String -> m Content
tokenContent forbiddenChars = (ContentText . Text.pack <$> some (noneOf $ '&':forbiddenChars))
<|> (ContentReference <$> tokenReference)
tokenEntityDeclarationOpen :: CharParsing m => m ()
tokenEntityDeclarationOpen = void $ string "<!ENTITY"
tokenInstructionOpen :: CharParsing m => Monad m => m Text
tokenInstructionOpen = do
string "<?"
name <- tokenName
guard $ Text.toLower name /= "xml"
return name
tokenInstructionClose :: CharParsing m => m ()
tokenInstructionClose = void $ string "?>"
tokenCdataOpen :: CharParsing m => m ()
tokenCdataOpen = void $ string "<![CDATA["
tokenCdataClose :: CharParsing m => m ()
tokenCdataClose = void $ string "]]>"
tokenCommentOpen :: CharParsing m => m ()
tokenCommentOpen = void $ string "<!--"
tokenCommentClose :: CharParsing m => m ()
tokenCommentClose = void $ string "-->"
tokenDoctypeOpen :: CharParsing m => m ()
tokenDoctypeOpen = void $ string "<!DOCTYPE"
tokenXmlDeclarationOpen :: CharParsing m => m ()
tokenXmlDeclarationOpen = void $ string "<?xml"
tokenXmlDeclarationClose :: CharParsing m => m ()
tokenXmlDeclarationClose = void $ string "?>"
tokenEmptyElementTagClose :: CharParsing m => m ()
tokenEmptyElementTagClose = void $ string "/>"
tokenStartTagOpen :: CharParsing m => Monad m => m QName
tokenStartTagOpen = char '<' *> tokenQualifiedName
tokenEndTagOpen :: CharParsing m => Monad m => m QName
tokenEndTagOpen = string "</" *> tokenQualifiedName
tokenElementClose :: CharParsing m => m ()
tokenElementClose = void $ char '>'