-- | Document type declaration parsers. -- -- -- -- All documentation examples assume the following setup: -- -- > :set -XOverloadedStrings -- > import Data.Attoparsec.ByteString module Data.XML.Parser.Mid.Doctype ( ExternalID(..) , externalID , GeneralEntityDeclaration(..) , generalEntityDeclaration , Doctype(..) , doctype ) where import Control.Applicative import Data.Maybe import qualified Data.Text as Text import Data.Text (Text) import Data.XML.Parser.Low import Text.Parser.Char import Text.Parser.Combinators -- $setup -- >>> :set -XOverloadedStrings -- >>> import Data.Attoparsec.ByteString -- | External entity identifier -- -- data ExternalID = PublicID Text Text | SystemID Text deriving (Eq, Ord, Read, Show) -- | data GeneralEntityDeclaration = GeneralEntityDeclaration Text [Content] deriving (Eq, Ord, Read, Show) -- | data Doctype = Doctype Text (Maybe ExternalID) [GeneralEntityDeclaration] deriving (Eq, Ord, Read, Show) -- | -- -- >>> parseOnly generalEntityDeclaration "" -- Right (GeneralEntityDeclaration "d" [ContentReference (CharRef '\r')]) -- >>> parseOnly generalEntityDeclaration "" -- Right (GeneralEntityDeclaration "da" [ContentReference (CharRef '\r'),ContentReference (CharRef '\n')]) -- >>> parseOnly generalEntityDeclaration "" -- Right (GeneralEntityDeclaration "Pub-Status" [ContentText "This is a pre-release of the specification."]) generalEntityDeclaration :: CharParsing m => Monad m => m GeneralEntityDeclaration generalEntityDeclaration = do tokenEntityDeclarationOpen tokenWhitespace name <- tokenName tokenWhitespace quote <- tokenQuote definition <- many (tokenContent $ quote:"%") char quote optional tokenWhitespace tokenElementClose return $ GeneralEntityDeclaration name definition -- | -- -- >>> parseOnly externalID "PUBLIC '-//Textuality//TEXT Standard open-hatch boilerplate//EN' 'http://www.textuality.com/boilerplate/OpenHatch.xml'" -- Right (PublicID "-//Textuality//TEXT Standard open-hatch boilerplate//EN" "http://www.textuality.com/boilerplate/OpenHatch.xml") -- >>> parseOnly externalID "SYSTEM '../grafix/OpenHatch.gif'" -- Right (SystemID "../grafix/OpenHatch.gif") externalID :: CharParsing m => Monad m => m ExternalID externalID = publicID <|> systemID where publicID = do string "PUBLIC" tokenWhitespace a <- systemLiteral tokenWhitespace b <- systemLiteral return $ PublicID a b systemID = string "SYSTEM" *> tokenWhitespace *> (SystemID <$> systemLiteral) systemLiteral = Text.pack <$> manyQuoted anyChar -- | -- -- >>> parseOnly doctype "" -- Right (Doctype "greeting" (Just (SystemID "hello.dtd")) []) -- >>> parseOnly doctype " ]>" -- Right (Doctype "foo" Nothing [GeneralEntityDeclaration "x" [ContentReference (EntityRef "lt")]]) doctype :: CharParsing m => Monad m => m Doctype doctype = do tokenDoctypeOpen tokenWhitespace name <- tokenName externalID <- optional $ tokenWhitespace >> externalID optional tokenWhitespace entities <- fromMaybe mempty <$> optional (between (char '[' >> optional tokenWhitespace) (optional tokenWhitespace >> char ']') $ many generalEntityDeclaration) tokenElementClose return $ Doctype name externalID entities quoted :: CharParsing m => Monad m => m a -> m a quoted x = x `surroundedBy` tokenSingleQuote <|> x `surroundedBy` tokenDoubleQuote manyQuoted :: CharParsing m => Monad m => m a -> m [a] manyQuoted x = manyQuotedBy tokenSingleQuote x <|> manyQuotedBy tokenDoubleQuote x where manyQuotedBy quote x = do quote manyTill x (try quote)