module Text.XML.HXT.Parser.XmlDTDParser
( parseXmlDTDdecl
, parseXmlDTDdeclPart
, parseXmlDTDEntityValue
, elementDecl
, attlistDecl
, entityDecl
, notationDecl
)
where
import Data.Maybe
import Text.ParserCombinators.Parsec
import Text.ParserCombinators.Parsec.Pos
import Text.XML.HXT.DOM.Interface
import Text.XML.HXT.DOM.ShowXml
( xshow
)
import Text.XML.HXT.DOM.XmlNode ( mkDTDElem'
, mkText'
, mkError'
, isText
, isDTD
, getText
, getDTDPart
, getDTDAttrl
, getChildren
, setChildren
)
import qualified Text.XML.HXT.Parser.XmlTokenParser as XT
import Text.XML.HXT.Parser.XmlCharParser ( XParser
, XPState(..)
, withoutNormNewline
)
import qualified Text.XML.HXT.Parser.XmlCharParser as XC
( xmlSpaceChar )
import qualified Text.XML.HXT.Parser.XmlDTDTokenParser as XD
( dtdToken )
type LocalState = (Int, [(Int, String, SourcePos)])
type SParser a = XParser LocalState a
initialState :: SourcePos -> XPState LocalState
initialState p = withoutNormNewline (0, [(0, sourceName p, p)])
updateLocalState :: (LocalState -> LocalState) -> SParser ()
updateLocalState upd
= updateState $ \ xps -> xps { xps_userState = upd $ xps_userState xps }
pushPar :: String -> SParser ()
pushPar n = do
p <- getPosition
updateLocalState (\ (i, s) -> (i+1, (i+1, n, p) : s))
setPosition ( newPos (sourceName p ++ " (line " ++ show (sourceLine p) ++ ", column " ++ show (sourceColumn p) ++ ") in content of parameter entity ref %" ++ n ++ ";") 1 1)
popPar :: SParser ()
popPar = do
oldPos <- getPos
updateLocalState pop
setPosition oldPos
where
pop (i, [(_, s, p)]) = (i+1, [(i+1, s, p)])
pop (i, _t:s) = (i, s)
pop (_i, []) = undefined
getParNo :: SParser Int
getParNo = do
s <- getState
let (_i, (top, _n, _p) : _s) = xps_userState s
return top
getPos :: SParser SourcePos
getPos = do
s <- getState
let (_i, (_top, _n, p) : _s) = xps_userState s
return p
delPE :: SParser ()
delPE = do
_ <- char '\0'
return ()
startPE :: SParser ()
startPE
= do
try ( do
delPE
n <- many1 (satisfy (/= '\0'))
delPE
pushPar n
)
endPE :: SParser ()
endPE
= do
try (do
delPE
delPE
popPar
)
inSamePE :: SParser a -> SParser a
inSamePE p
= do
i <- getParNo
r <- p
j <- getParNo
if (i == j)
then return r
else fail $ "parameter entity contents does not fit into the structure of a DTD declarations"
xmlSpaceChar :: SParser ()
xmlSpaceChar = ( XC.xmlSpaceChar
>>
return ()
)
<|>
startPE
<|>
endPE
<?> "white space"
skipS :: SParser ()
skipS
= skipMany1 xmlSpaceChar
>>
return ()
skipS0 :: SParser ()
skipS0
= skipMany xmlSpaceChar
>>
return ()
name :: SParser XmlTree
name
= do
n <- XT.name
return (mkDTDElem' NAME [(a_name, n)] [])
nmtoken :: SParser XmlTree
nmtoken
= do
n <- XT.nmtoken
return (mkDTDElem' NAME [(a_name, n)] [])
elementDecl :: SParser XmlTrees
elementDecl
= between (try $ string "<!ELEMENT") (char '>') elementDeclBody
elementDeclBody :: SParser XmlTrees
elementDeclBody
= do
skipS
n <- XT.name
skipS
(al, cl) <- contentspec
skipS0
return [mkDTDElem' ELEMENT ((a_name, n) : al) cl]
contentspec :: SParser (Attributes, XmlTrees)
contentspec
= simplespec k_empty v_empty
<|>
simplespec k_any v_any
<|>
inSamePE mixed
<|>
inSamePE children
<?> "content specification"
where
simplespec kw v
= do
_ <- XT.keyword kw
return ([(a_type, v)], [])
children :: SParser (Attributes, XmlTrees)
children
= ( do
(al, cl) <- choiceOrSeq
modifier <- optOrRep
return ([(a_type, v_children)], [mkDTDElem' CONTENT (modifier ++ al) cl])
)
<?> "element content"
optOrRep :: SParser Attributes
optOrRep
= do
m <- option "" (XT.mkList (oneOf "?*+"))
return [(a_modifier, m)]
choiceOrSeq :: SParser (Attributes, XmlTrees)
choiceOrSeq
= inSamePE $
do
cl <- try ( do
lpar
choiceOrSeqBody
)
rpar
return cl
choiceOrSeqBody :: SParser (Attributes, XmlTrees)
choiceOrSeqBody
= do
cp1 <- cp
choiceOrSeq1 cp1
where
choiceOrSeq1 :: XmlTree -> SParser (Attributes, XmlTrees)
choiceOrSeq1 c1
= ( do
bar
c2 <- cp
cl <- many ( do
bar
cp
)
return ([(a_kind, v_choice)], (c1 : c2 : cl))
)
<|>
( do
cl <- many ( do
comma
cp
)
return ([(a_kind, v_seq)], (c1 : cl))
)
<?> "sequence or choice"
cp :: SParser XmlTree
cp
= ( do
n <- name
m <- optOrRep
return ( case m of
[(_, "")] -> n
_ -> mkDTDElem' CONTENT (m ++ [(a_kind, v_seq)]) [n]
)
)
<|>
( do
(al, cl) <- choiceOrSeq
m <- optOrRep
return (mkDTDElem' CONTENT (m ++ al) cl)
)
mixed :: SParser (Attributes, XmlTrees)
mixed
= ( do
_ <- try ( do
lpar
string k_pcdata
)
nl <- many ( do
bar
name
)
rpar
if null nl
then do
_ <- option ' ' (char '*')
return ( [ (a_type, v_pcdata) ]
, []
)
else do
_ <- char '*' <?> "closing parent for mixed content (\")*\")"
return ( [ (a_type, v_mixed) ]
, [ mkDTDElem' CONTENT [ (a_modifier, "*")
, (a_kind, v_choice)
] nl
]
)
)
<?> "mixed content"
attlistDecl :: SParser XmlTrees
attlistDecl
= between (try $ string "<!ATTLIST") (char '>') attlistDeclBody
attlistDeclBody :: SParser XmlTrees
attlistDeclBody
= do
skipS
n <- XT.name
al <- many attDef
skipS0
return (map (mkDTree n) al)
where
mkDTree n' (al, cl)
= mkDTDElem' ATTLIST ((a_name, n') : al) cl
attDef :: SParser (Attributes, XmlTrees)
attDef
= do
n <- try ( do
skipS
XT.name
) <?> "attribute name"
skipS
(t, cl) <- attType
skipS
d <- defaultDecl
return (((a_value, n) : d) ++ t, cl)
attType :: SParser (Attributes, XmlTrees)
attType
= tokenizedOrStringType
<|>
enumeration
<|>
notationType
<?> "attribute type"
tokenizedOrStringType :: SParser (Attributes, XmlTrees)
tokenizedOrStringType
= do
n <- choice $ map XT.keyword typl
return ([(a_type, n)], [])
where
typl = [ k_cdata
, k_idrefs
, k_idref
, k_id
, k_entity
, k_entities
, k_nmtokens
, k_nmtoken
]
enumeration :: SParser (Attributes, XmlTrees)
enumeration
= do
nl <- inSamePE (between lpar rpar (sepBy1 nmtoken bar))
return ([(a_type, k_enumeration)], nl)
notationType :: SParser (Attributes, XmlTrees)
notationType
= do
_ <- XT.keyword k_notation
skipS
nl <- inSamePE (between lpar rpar ( sepBy1 name bar ))
return ([(a_type, k_notation)], nl)
defaultDecl :: SParser Attributes
defaultDecl
= ( do
str <- try $ string k_required
return [(a_kind, str)]
)
<|>
( do
str <- try $ string k_implied
return [(a_kind, str)]
)
<|>
( do
l <- fixed
v <- XT.attrValueT
return ((a_default, xshow v) : l)
)
<?> "default declaration"
where
fixed = option [(a_kind, k_default)]
( do
_ <- try $ string k_fixed
skipS
return [(a_kind, k_fixed)]
)
entityDecl :: SParser XmlTrees
entityDecl
= between ( try $ string "<!ENTITY" ) (char '>') entityDeclBody
entityDeclBody :: SParser XmlTrees
entityDeclBody
= do
skipS
( peDecl
<|>
geDecl
<?> "entity declaration" )
geDecl :: SParser XmlTrees
geDecl
= do
n <- XT.name
skipS
(al, cl) <- entityDef
skipS0
return [mkDTDElem' ENTITY ((a_name, n) : al) cl]
entityDef :: SParser (Attributes, XmlTrees)
entityDef
= entityValue
<|>
externalEntitySpec
externalEntitySpec :: SParser (Attributes, XmlTrees)
externalEntitySpec
= do
al <- externalID
nd <- option [] nDataDecl
return ((al ++ nd), [])
peDecl :: SParser XmlTrees
peDecl
= do
_ <- char '%'
skipS
n <- XT.name
skipS
(al, cs) <- peDef
skipS0
return [mkDTDElem' PENTITY ((a_name, n) : al) cs]
peDef :: SParser (Attributes, XmlTrees)
peDef
= entityValue
<|>
do
al <- externalID
return (al, [])
entityValue :: XParser s (Attributes, XmlTrees)
entityValue
= do
v <- XT.entityValueT
return ([], v)
externalID :: SParser Attributes
externalID
= ( do
_ <- XT.keyword k_system
skipS
lit <- XT.systemLiteral
return [(k_system, lit)]
)
<|>
( do
_ <- XT.keyword k_public
skipS
pl <- XT.pubidLiteral
skipS
sl <- XT.systemLiteral
return [ (k_system, sl)
, (k_public, pl) ]
)
<?> "SYSTEM or PUBLIC declaration"
nDataDecl :: SParser Attributes
nDataDecl
= do
_ <- try ( do
skipS
XT.keyword k_ndata
)
skipS
n <- XT.name
return [(k_ndata, n)]
notationDecl :: SParser XmlTrees
notationDecl
= between (try $ string "<!NOTATION") (char '>' <?> "notation declaration") notationDeclBody
notationDeclBody :: SParser XmlTrees
notationDeclBody
= do
skipS
n <- XT.name
skipS
eid <- ( try externalID
<|>
publicID
)
skipS0
return [mkDTDElem' NOTATION ((a_name, n) : eid) []]
publicID :: SParser Attributes
publicID
= do
_ <- XT.keyword k_public
skipS
l <- XT.pubidLiteral
return [(k_public, l)]
condSectCondBody :: SParser XmlTrees
condSectCondBody
= do
skipS0
n <- XT.name
skipS0
let n' = stringToUpper n
if n' `elem` [k_include, k_ignore]
then return [mkText' n']
else fail $ "INCLUDE or IGNORE expected in conditional section"
separator :: Char -> SParser ()
separator c
= do
_ <- try ( do
skipS0
char c
)
skipS0
<?> [c]
bar, comma, lpar, rpar :: SParser ()
bar = separator '|'
comma = separator ','
lpar
= do
_ <- char '('
skipS0
rpar
= do
skipS0
_ <- char ')'
return ()
parseXmlDTDEntityValue :: XmlTree -> XmlTrees
parseXmlDTDEntityValue t
| isDTDPEref t
= ( either
( (:[]) . mkError' c_err . (++ "\n") . show )
( \cl' -> if null cl'
then [mkText' ""]
else cl'
)
.
runParser parser (withoutNormNewline ()) source
) input
| otherwise
= []
where
al = fromMaybe [] . getDTDAttrl $ t
cl = getChildren t
parser = XT.entityTokensT "%&"
source = "value of parameter entity " ++ lookupDef "" a_peref al
input = xshow cl
parseXmlDTDdeclPart :: XmlTree -> XmlTrees
parseXmlDTDdeclPart t
| isDTDPEref t
= ( (:[])
.
either
( mkError' c_err . (++ "\n") . show )
( flip setChildren $ t )
.
runParser parser (withoutNormNewline ()) source
) input
| otherwise
= []
where
al = fromMaybe [] . getDTDAttrl $ t
cl = getChildren t
parser = many XD.dtdToken
source = "value of parameter entity " ++ lookupDef "" a_peref al
input = xshow cl
parseXmlDTDdecl :: XmlTree -> XmlTrees
parseXmlDTDdecl t
| isDTD t
= ( either ((:[]) . mkError' c_err . (++ "\n") . show) id
.
runParser parser (initialState pos) source
) input
| otherwise
= []
where
dtdElem = fromJust . getDTDPart $ t
al = fromMaybe [] . getDTDAttrl $ t
cl = getChildren t
dtdParsers
= [ (ELEMENT, elementDeclBody)
, (ATTLIST, attlistDeclBody)
, (ENTITY, entityDeclBody)
, (NOTATION, notationDeclBody)
, (CONDSECT, condSectCondBody)
]
source = lookupDef "DTD declaration" a_source al
line = lookupDef "1" a_line al
column = lookupDef "1" a_column al
pos = newPos source (read line) (read column)
parser = do
setPosition pos
res <- fromJust . lookup dtdElem $ dtdParsers
eof
return res
input = concatMap collectText cl
collectText :: XmlTree -> String
collectText t
| isText t
= fromMaybe "" . getText $ t
| isDTDPEref t
= prefixPe ++ concatMap collectText (getChildren t) ++ suffixPe
| otherwise
= ""
where
al = fromMaybe [] . getDTDAttrl $ t
delPe = "\0"
prefixPe = delPe ++ lookupDef "???" a_peref al ++ delPe
suffixPe = delPe ++ delPe
isDTDPEref :: XmlTree -> Bool
isDTDPEref
= maybe False (== PEREF) . getDTDPart