module Data.DTD.Parse.Unresolved
(
dtd
, textDecl
, dtdComponent
, entityDecl
, entityValue
, pERef
, notation
, notationSrc
, elementDecl
, contentDecl
, contentModel
, repeatChar
, attList
, attDecl
, attDeclPERef
, attType
, attDefault
, instruction
, comment
, externalID
, name
, nameSS
, quoted
, skipWS
, ws
)
where
import Data.DTD.Types.Unresolved
import Data.XML.Types (ExternalID(PublicID, SystemID),
Instruction(Instruction))
import Data.Attoparsec.Text (Parser, try, satisfy, takeTill,
anyChar, char, digit)
import qualified Data.Attoparsec.Text as A
import Data.Attoparsec.Combinator (manyTill, choice, sepBy1)
import Data.Functor ((<$>))
import Control.Applicative (pure, optional, (<*>), (<*), (*>), (<|>),
Applicative, many)
import Control.Monad (guard)
import Data.Text (Text)
import Data.Char (isSpace)
import qualified Data.Text as T
(<*.) :: Parser a -> T.Text -> Parser a
a <*. b = a <* A.string b
(.*>) :: T.Text -> Parser a -> Parser a
a .*> b = A.string a *> b
data PreParse =
PPERef PERef
| PInstruction Instruction
| PComment Text
| PMarkup [MarkupText]
deriving (Eq, Show)
data MarkupText = MTUnquoted Text | MTQuoted Text | MTPERef PERef
deriving (Eq, Show)
dtd :: Parser DTD
dtd = DTD <$> (skipWS *> optional (textDecl <* skipWS)) <*>
many (dtdComponent <* skipWS)
textDecl :: Parser DTDTextDecl
textDecl = do
"<?" .*> xml *> ws *> skipWS
enc1 <- optional $ try encoding
ver <- optional $ try (maybeSpace version enc1)
enc <- maybe (maybeSpace encoding ver) return enc1
skipWS *> "?>" .*> pure (DTDTextDecl ver enc)
where
xml = (A.string "X" <|> A.string "x") *>
(A.string "M" <|> A.string "m") *>
(A.string "L" <|> A.string "l")
version = attr "version" $ const versionNum
versionNum = T.append <$> A.string "1." <*> (T.singleton <$> digit)
encoding = attr "encoding" $ takeTill . (==)
attr name' val = try (attrQ '"' name' val) <|> attrQ '\'' name' val
attrQ q name' val = name' .*> skipWS *> "=" .*> skipWS *>
char q *> val q <* char q
maybeSpace p = maybe p (const $ ws *> skipWS *> p)
dtdComponent :: Parser DTDComponent
dtdComponent = choice $ map try
[ DTDPERef <$> pERef
, DTDEntityDecl <$> entityDecl
, DTDElementDecl <$> elementDecl
, DTDAttList <$> attList
, DTDNotation <$> notation
, DTDInstruction <$> instruction
, DTDCondSecBegin <$> condSecBegin
, condSecEnd
] ++
[ DTDComment <$> comment
]
condSecBegin :: Parser (Either PERef Bool)
condSecBegin = "INCLUDE" .*> pure (Right True)
<|> "IGNORE" .*> pure (Right False)
<|> "<![" .*> (Left <$> pERef) <*. "["
condSecEnd :: Parser DTDComponent
condSecEnd = "]]>" .*> return DTDCondSecEnd
instruction :: Parser Instruction
instruction = Instruction <$> ("<?" .*> skipWS *> nameSS) <*>
idata <*. "?>"
where
idata = T.concat . concat <$> manyTillS chunk (A.string "?>")
chunk = list2 . T.singleton <$> anyChar <*> takeTill (== '?')
entityDecl :: Parser EntityDecl
entityDecl = "<!ENTITY" .*> ws *> skipWS *>
choice [try internalParam, try externalParam,
try internalGen, externalGen]
<* skipWS <*. ">"
where
internalParam = InternalParameterEntityDecl <$>
(param *> nameSS) <*> entityValue
externalParam = ExternalParameterEntityDecl <$>
(param *> nameSS) <*> externalID
internalGen = InternalGeneralEntityDecl <$> nameSS <*> entityValue
externalGen = ExternalGeneralEntityDecl <$>
nameSS <*> externalID <*> optional (try ndata)
param = "%" .*> ws *> skipWS
ndata = skipWS *> "NDATA" .*> ws *> skipWS *> name
name :: Parser Text
name = nonNull $ takeTill notNameChar
where
notNameChar c = isSpace c || c `elem` syntaxChars
syntaxChars = "()[]<>!%&;'\"?*+|,="
nonNull parser = do
text <- parser
guard . not . T.null $ text
return text
nameSS :: Parser Text
nameSS = name <* skipWS
nameSSP :: Parser (Either PERef Text)
nameSSP = ((Left <$> pERef) <|> (Right <$> name)) <* skipWS
entityValue :: Parser [EntityValue]
entityValue = try (quotedVal '"') <|> quotedVal '\''
where
quotedVal q = char q *> manyTill (content q) (char q)
content q = EntityPERef <$> try pERef <|> EntityText <$> text q
text q = takeTill $ \c -> c == '%' || c == q
entityValueUnquoted :: Parser [EntityValue]
entityValueUnquoted = many $
((EntityPERef <$> pERef) <|>
(EntityText <$> (A.takeWhile1 $ not . flip elem "%>")))
pERef :: Parser PERef
pERef = "%" .*> name <*. ";"
elementDecl :: Parser ElementDecl
elementDecl = ElementDecl <$> ("<!ELEMENT" .*> ws *> skipWS *> nameSSP) <*>
contentDecl <* skipWS <*. ">"
contentDecl :: Parser ContentDecl
contentDecl = choice $ map try
[ pure ContentEmpty <*. "EMPTY"
, pure ContentAny <*. "ANY"
, ContentMixed <$> pcdata
, ContentPERef <$> pERef
] ++
[ ContentElement <$> entityValueUnquoted
]
where
pcdata = "(" .*> skipWS *> "#PCDATA" .*> skipWS *>
(try tags <|> noTagsNoStar)
tags = many ("|" .*> skipWS *> nameSS) <*. ")*"
noTagsNoStar = ")" .*> pure []
contentModel :: Parser ContentModel
contentModel = choice $ map (<*> repeatChar)
[ CMChoice <$> try (cmList '|')
, CMSeq <$> try (cmList ',')
, CMName <$> name
]
where
cmList sep = "(" .*> skipWS *>
((contentModel <* skipWS) `sepBy1` (char sep *> skipWS)) <*. ")"
repeatChar :: Parser Repeat
repeatChar = choice
[ char '?' *> pure ZeroOrOne
, char '*' *> pure ZeroOrMore
, char '+' *> pure OneOrMore
, pure One
]
attList :: Parser AttList
attList = AttList <$> ("<!ATTLIST" .*> ws *> skipWS *> nameSSP) <*>
many attDeclPERef <*. ">"
attDeclPERef :: Parser AttDeclPERef
attDeclPERef = (ADPPERef <$> pERef <* skipWS) <|> (ADPDecl <$> attDecl)
attDecl :: Parser AttDecl
attDecl = AttDecl <$>
nameSS <*> attTypePERef <* skipWS <*> attDefault <* skipWS
attTypePERef :: Parser AttTypePERef
attTypePERef = (ATPPERef <$> pERef) <|> (ATPType <$> attType)
attType :: Parser AttType
attType = choice $ map try
[ "CDATA" .*> ws *> pure AttStringType
, "ID" .*> ws *> pure AttIDType
, "IDREF" .*> ws *> pure AttIDRefType
, "IDREFS" .*> ws *> pure AttIDRefsType
, "ENTITY" .*> ws *> pure AttEntityType
, "ENTITIES" .*> ws *> pure AttEntitiesType
, "NMTOKEN" .*> ws *> pure AttNmTokenType
, "NMTOKENS" .*> ws *> pure AttNmTokensType
, AttEnumType <$> enumType
] ++
[ AttNotationType <$> notationType
]
where
enumType = nameList
notationType = "NOTATION" .*> ws *> skipWS *> nameList
nameList = "(" .*> skipWS *>
(nameSS `sepBy1` ("|" .*> skipWS)) <*. ")"
attDefault :: Parser AttDefault
attDefault = choice $ map try
[ "#REQUIRED" .*> pure AttRequired
, "#IMPLIED" .*> pure AttImplied
, AttFixed <$> ("#FIXED" .*> ws *> skipWS *> quoted)
] ++
[ AttDefaultValue <$> quoted
]
quoted :: Parser Text
quoted = quotedWith '"' <|> quotedWith '\''
where
quotedWith q = char q *> takeTill (== q) <* char q
notation :: Parser Notation
notation = Notation <$>
("<!NOTATION" .*> ws *> skipWS *> name) <* ws <* skipWS <*>
notationSrc <*. ">"
notationSrc :: Parser NotationSource
notationSrc = try system <|> public
where
system = NotationSysID <$>
("SYSTEM" .*> ws *> skipWS *> quoted <* ws <* skipWS)
public = mkPublic <$>
("PUBLIC" .*> ws *> skipWS *> quoted) <*>
optional (try $ ws *> skipWS *> quoted) <* skipWS
mkPublic pubID = maybe (NotationPubID pubID) (NotationPubSysID pubID)
externalID :: Parser ExternalID
externalID = try system <|> public
where
system = SystemID <$> ("SYSTEM" .*> ws *> skipWS *> quoted)
public = PublicID <$> ("PUBLIC" .*> ws *> skipWS *> quoted) <*
ws <* skipWS <*> quoted
comment :: Parser Text
comment = "<!--" .*> (T.concat . concat <$> manyTillS chunk (A.string "--")) <*. ">"
where
chunk = list2 . T.singleton <$> anyChar <*> takeTill (== '-')
isXMLSpace :: Char -> Bool
isXMLSpace = (`elem` "\x20\x9\xD\xA")
ws :: Parser Char
ws = satisfy isXMLSpace
skipWS :: Parser ()
skipWS = A.skipWhile isXMLSpace
manyTillS :: Parser a -> Parser Text -> Parser [a]
manyTillS = manyTill
list2 :: a -> a -> [a]
list2 x y = [x, y]