{-# LANGUAGE CPP #-} -- | A non-validating XML parser. For the input grammar, see -- . module Text.XML.HaXml.Parse ( -- * Parse a whole document xmlParse, xmlParse' -- * Parse just a DTD , dtdParse, dtdParse' -- * Parse a partial document , xmlParseWith -- * Individual parsers for use with /xmlParseWith/ and module SAX , document, element, content , comment, cdsect, chardata , reference, doctypedecl , processinginstruction , elemtag, qname, name, tok , elemOpenTag, elemCloseTag , emptySTs, XParser -- * These general utility functions don't belong here , fst3, snd3, thd3 ) where -- An XML parser, written using a slightly extended version of the -- Hutton/Meijer parser combinators. The input is tokenised internally -- by the lexer xmlLex. Whilst parsing, we gather a symbol -- table of entity references. PERefs must be defined before use, so we -- expand their uses as we encounter them, forcing the remainder of the -- input to be re-lexed and re-parsed. GERefs are simply stored for -- later retrieval. import Prelude hiding (either,maybe,sequence) import qualified Prelude (either) import Data.Maybe hiding (maybe) import Data.List (intercalate) import Data.Char (isSpace,isDigit,isHexDigit) import Control.Monad hiding (sequence) import Numeric (readDec,readHex) import Text.XML.HaXml.Types import Text.XML.HaXml.Namespaces import Text.XML.HaXml.Posn import Text.XML.HaXml.Lex import Text.ParserCombinators.Poly.State import System.FilePath (combine, dropFileName) #if ( defined(__GLASGOW_HASKELL__) && __GLASGOW_HASKELL__ > 502 ) || \ ( defined(__NHC__) && __NHC__ > 114 ) || defined(__HUGS__) import System.IO.Unsafe (unsafePerformIO) #elif defined(__GLASGOW_HASKELL__) import IOExts (unsafePerformIO) #elif defined(__NHC__) import IOExtras (unsafePerformIO) #elif defined(__HBC__) import UnsafePerformIO #endif -- #define DEBUG #if defined(DEBUG) # if ( defined(__GLASGOW_HASKELL__) && __GLASGOW_HASKELL__ > 502 ) || \ ( defined(__NHC__) && __NHC__ > 114 ) || defined(__HUGS__) import Debug.Trace(trace) # elif defined(__GLASGOW_HASKELL__) import IOExts(trace) # elif defined(__NHC__) || defined(__HBC__) import NonStdTrace # endif v `debug` s = trace s v #else v `debug` s = v #endif debug :: a -> String -> a -- | To parse a whole document, @xmlParse file content@ takes a filename -- (for generating error reports) and the string content of that file. -- A parse error causes program failure, with message to stderr. xmlParse :: String -> String -> Document Posn -- | To parse a whole document, @xmlParse' file content@ takes a filename -- (for generating error reports) and the string content of that file. -- Any parse error message is passed back to the caller through the -- @Either@ type. xmlParse' :: String -> String -> Either String (Document Posn) -- | To parse just a DTD, @dtdParse file content@ takes a filename -- (for generating error reports) and the string content of that -- file. If no DTD was found, you get @Nothing@ rather than an error. -- However, if a DTD is found but contains errors, the program crashes. dtdParse :: String -> String -> Maybe DocTypeDecl -- | To parse just a DTD, @dtdParse' file content@ takes a filename -- (for generating error reports) and the string content of that -- file. If no DTD was found, you get @Right Nothing@. -- If a DTD was found but contains errors, you get a @Left message@. dtdParse' :: String -> String -> Either String (Maybe DocTypeDecl) xmlParse name = Prelude.either error id . xmlParse' name dtdParse name = Prelude.either error id . dtdParse' name xmlParse' name = fst3 . runParser (toEOF document) emptySTs . xmlLex name dtdParse' name = fst3 . runParser justDTD emptySTs . xmlLex name toEOF :: XParser a -> XParser a toEOF = id -- there are other possible implementations... -- | To parse a partial document, e.g. from an XML-based stream protocol, -- where you may later want to get more document elements from the same -- stream. Arguments are: a parser for the item you want, and the -- already-lexed input to parse from. Returns the item you wanted -- (or an error message), plus the remainder of the input. xmlParseWith :: XParser a -> [(Posn,TokenT)] -> (Either String a, [(Posn,TokenT)]) xmlParseWith p = (\(v,_,s)->(v,s)) . runParser p emptySTs ---- Symbol table stuff ---- type SymTabs = (SymTab PEDef, SymTab EntityDef) -- | Some empty symbol tables for GE and PE references. emptySTs :: SymTabs emptySTs = (emptyST, emptyST) addPE :: String -> PEDef -> SymTabs -> SymTabs addPE n v (pe,ge) = (addST n v pe, ge) addGE :: String -> EntityDef -> SymTabs -> SymTabs addGE n v (pe,ge) = let newge = addST n v ge in newge `seq` (pe, newge) lookupPE :: String -> SymTabs -> Maybe PEDef lookupPE s (pe,_ge) = lookupST s pe flattenEV :: EntityValue -> String flattenEV (EntityValue evs) = concatMap flatten evs where flatten (EVString s) = s flatten (EVRef (RefEntity r)) = "&" ++r++";" flatten (EVRef (RefChar r)) = "&#"++show r++";" -- flatten (EVPERef n) = "%" ++n++";" ---- Misc ---- fst3 :: (a,b,c) -> a snd3 :: (a,b,c) -> b thd3 :: (a,b,c) -> c fst3 (a,_,_) = a snd3 (_,a,_) = a thd3 (_,_,a) = a ---- Auxiliary Parsing Functions ---- -- | XParser is just a specialisation of the PolyState parser. type XParser a = Parser SymTabs (Posn,TokenT) a -- | Return the next token from the input only if it matches the given token. tok :: TokenT -> XParser TokenT tok t = do (p,t') <- next case t' of TokError _ -> report failBad (show t) p t' _ | t'==t -> return t | otherwise -> report fail (show t) p t' nottok :: [TokenT] -> XParser TokenT nottok ts = do (p,t) <- next if t`elem`ts then report fail ("no "++show t) p t else return t -- | Return a qualified name (although the namespace qualification is not -- processed here; this is merely to get the correct type). qname :: XParser QName qname = fmap N name -- | Return just a name, e.g. element name, attribute name. name :: XParser Name name = do (p,tok) <- next case tok of TokName s -> return s TokError _ -> report failBad "a name" p tok _ -> report fail "a name" p tok string, freetext :: XParser String string = do (p,t) <- next case t of TokName s -> return s _ -> report fail "text" p t freetext = do (p,t) <- next case t of TokFreeText s -> return s _ -> report fail "text" p t maybe :: XParser a -> XParser (Maybe a) maybe p = ( Just <$> p) `onFail` return Nothing either :: XParser a -> XParser b -> XParser (Either a b) either p q = ( Left <$> p) `onFail` ( Right <$> q) word :: String -> XParser () word s = do { x <- next ; case x of (_p,TokName n) | s==n -> return () (_p,TokFreeText n) | s==n -> return () ( p,t@(TokError _)) -> report failBad (show s) p t ( p,t) -> report fail (show s) p t } posn :: XParser Posn posn = do { x@(p,_) <- next ; reparse [x] ; return p } nmtoken :: XParser NmToken nmtoken = string `onFail` freetext failP, failBadP :: String -> XParser a failP msg = do { p <- posn; fail (msg++"\n at "++show p) } failBadP msg = do { p <- posn; failBad (msg++"\n at "++show p) } report :: (String->XParser a) -> String -> Posn -> TokenT -> XParser a report fail expect p t = fail ("Expected "++expect++" but found "++show t ++"\n in "++show p) adjustErrP :: XParser a -> (String->String) -> XParser a p `adjustErrP` f = p `onFail` do pn <- posn (p `adjustErr` f) `adjustErr` (++show pn) peRef :: XParser a -> XParser a peRef p = p `onFail` do pn <- posn n <- pereference tr <- stQuery (lookupPE n) `debug` ("Looking up %"++n) case tr of Just (PEDefEntityValue ev) -> do reparse (xmlReLex (posInNewCxt ("macro %"++n++";") (Just pn)) (flattenEV ev)) `debug` (" defn: "++flattenEV ev) peRef p Just (PEDefExternalID (PUBLIC _ (SystemLiteral f))) -> do let f' = combine (dropFileName $ posnFilename pn) f val = unsafePerformIO (readFile f') reparse (xmlReLex (posInNewCxt f' (Just pn)) val) `debug` (" reading from file "++f') peRef p Just (PEDefExternalID (SYSTEM (SystemLiteral f))) -> do let f' = combine (dropFileName $ posnFilename pn) f val = unsafePerformIO (readFile f') reparse (xmlReLex (posInNewCxt f' (Just pn)) val) `debug` (" reading from file "++f') peRef p Nothing -> fail ("PEReference use before definition: "++"%"++n++";" ++"\n at "++show pn) blank :: XParser a -> XParser a blank p = p `onFail` do n <- pereference tr <- stQuery (lookupPE n) `debug` ("Looking up %"++n++" (is blank?)") case tr of Just (PEDefEntityValue ev) | all isSpace (flattenEV ev) -> do blank p `debug` "Empty macro definition" Just _ -> failP ("expected a blank PERef macro: "++"%"++n++";") Nothing -> failP ("PEReference use before definition: "++"%"++n++";") ---- XML Parsing Functions ---- justDTD :: XParser (Maybe DocTypeDecl) justDTD = do (ExtSubset _ ds) <- extsubset `debug` "Trying external subset" if null ds then fail "empty" else return (Just (DTD (N "extsubset") Nothing (concatMap extract ds))) `onFail` do (Prolog _ _ dtd _) <- prolog return dtd where extract (ExtMarkupDecl m) = [m] extract (ExtConditionalSect (IncludeSect i)) = concatMap extract i extract (ExtConditionalSect (IgnoreSect _i)) = [] -- | Return an entire XML document including prolog and trailing junk. document :: XParser (Document Posn) document = do p <- prolog `adjustErr` ("unrecognisable XML prolog\n"++) e <- element ms <- many misc (_,ge) <- stGet return (Document p ge e ms) -- | Return an XML comment. comment :: XParser Comment comment = do bracket (tok TokCommentOpen) (tok TokCommentClose) freetext -- tok TokCommentOpen -- commit $ do -- c <- freetext -- tok TokCommentClose -- return c -- | Parse a processing instruction. processinginstruction :: XParser ProcessingInstruction processinginstruction = do tok TokPIOpen commit $ do n <- string `onFail` failP "processing instruction has no target" f <- freetext tok TokPIClose `onFail` failP ("missing ?> in in " raise ((runParser aux emptySTs . xmlReLex p) s) where aux = do v <- versioninfo `onFail` failP "missing XML version info" e <- maybe encodingdecl s <- maybe sddecl return (XMLDecl v e s) raise (Left err, _, _) = failP err raise (Right ok, _, _) = return ok versioninfo :: XParser VersionInfo versioninfo = do word "version" `onFail` word "VERSION" tok TokEqual bracket (tok TokQuote) (commit $ tok TokQuote) freetext misc :: XParser Misc misc = oneOf' [ ("", Comment <$> comment) , ("", PI <$> processinginstruction) ] -- | Return a DOCTYPE decl, indicating a DTD. doctypedecl :: XParser DocTypeDecl doctypedecl = do tok TokSpecialOpen tok (TokSpecial DOCTYPEx) commit $ do n <- qname eid <- maybe externalid es <- maybe (bracket (tok TokSqOpen) (commit $ tok TokSqClose) (many (peRef markupdecl))) blank (tok TokAnyClose) `onFail` failP "missing > in DOCTYPE decl" return (DTD n eid (fromMaybe [] es)) -- | Return a DTD markup decl, e.g. ELEMENT, ATTLIST, etc markupdecl :: XParser MarkupDecl markupdecl = oneOf' [ ("ELEMENT", Element <$> elementdecl ) , ("ATTLIST", AttList <$> attlistdecl ) , ("ENTITY", Entity <$> entitydecl ) , ("NOTATION", Notation <$> notationdecl) , ("misc", MarkupMisc <$> misc ) ] `adjustErrP` ("when looking for a markup decl,\n"++) -- (\ (ELEMENT, ATTLIST, ENTITY, NOTATION, , or ") extsubset :: XParser ExtSubset extsubset = do td <- maybe textdecl ds <- many (peRef extsubsetdecl) return (ExtSubset td ds) extsubsetdecl :: XParser ExtSubsetDecl extsubsetdecl = ( ExtMarkupDecl <$> markupdecl) `onFail` ( ExtConditionalSect <$> conditionalsect) sddecl :: XParser SDDecl sddecl = do word "standalone" `onFail` word "STANDALONE" commit $ do tok TokEqual `onFail` failP "missing = in 'standalone' decl" bracket (tok TokQuote) (commit $ tok TokQuote) ( (word "yes" >> return True) `onFail` (word "no" >> return False) `onFail` failP "'standalone' decl requires 'yes' or 'no' value" ) {- element :: XParser (Element Posn) element = do tok TokAnyOpen (ElemTag n as) <- elemtag oneOf' [ ("self-closing tag <"++n++"/>" , do tok TokEndClose return (Elem n as [])) , ("after open tag <"++n++">" , do tok TokAnyClose cs <- many content p <- posn m <- bracket (tok TokEndOpen) (commit $ tok TokAnyClose) qname checkmatch p n m return (Elem n as cs)) ] `adjustErr` (("in element tag "++n++",\n")++) -} -- | Return a complete element including all its inner content. element :: XParser (Element Posn) element = do tok TokAnyOpen (ElemTag n as) <- elemtag ( do tok TokEndClose commit (return (Elem n as [])) `onFail` do tok TokAnyClose commit $ do return (Elem n as) `apply` manyFinally content (do p <- posn m <- bracket (tok TokEndOpen) (commit $ tok TokAnyClose) qname checkmatch p n m) ) `adjustErrBad` (("in element tag "++printableName n++",\n")++) checkmatch :: Posn -> QName -> QName -> XParser () checkmatch p n m = if n == m then return () else failBad ("tag <"++printableName n++"> terminated by \n at "++show p) -- | Parse only the parts between angle brackets in an element tag. elemtag :: XParser ElemTag elemtag = do n <- qname `adjustErrBad` ("malformed element tag\n"++) as <- many attribute return (ElemTag n as) -- | For use with stream parsers - returns the complete opening element tag. elemOpenTag :: XParser ElemTag elemOpenTag = do tok TokAnyOpen e <- elemtag tok TokAnyClose return e -- | For use with stream parsers - accepts a closing tag, provided it -- matches the given element name. elemCloseTag :: QName -> XParser () elemCloseTag n = do tok TokEndOpen p <- posn m <- qname tok TokAnyClose checkmatch p n m attribute :: XParser Attribute attribute = do n <- qname `adjustErr` ("malformed attribute name\n"++) tok TokEqual `onFail` failBadP "missing = in attribute" v <- attvalue `onFail` failBadP "missing attvalue" return (n,v) -- | Return a content particle, e.g. text, element, reference, etc content :: XParser (Content Posn) content = do { p <- posn ; c' <- content' ; return (c' p) } where content' = oneOf' [ ("element", CElem <$> element ) , ("chardata", CString False <$> chardata ) , ("reference", CRef <$> reference) , ("CDATA", CString True <$> cdsect ) , ("misc", CMisc <$> misc ) ] `adjustErrP` ("when looking for a content item,\n"++) -- (\ (element, text, reference, CDATA section, , or ") elementdecl :: XParser ElementDecl elementdecl = do tok TokSpecialOpen tok (TokSpecial ELEMENTx) n <- peRef qname `adjustErrBad` ("expecting identifier in ELEMENT decl\n"++) c <- peRef contentspec `adjustErrBad` (("in content spec of ELEMENT decl: " ++printableName n++"\n")++) blank (tok TokAnyClose) `onFail` failBadP ("expected > terminating ELEMENT decl" ++"\n element name was "++show (printableName n) ++"\n contentspec was "++(\ (ContentSpec p)-> debugShowCP p) c) return (ElementDecl n c) contentspec :: XParser ContentSpec contentspec = oneOf' [ ("EMPTY", peRef (word "EMPTY") >> return EMPTY) , ("ANY", peRef (word "ANY") >> return ANY) , ("mixed", Mixed <$> peRef mixed) , ("simple", ContentSpec <$> peRef cp) ] -- `adjustErr` ("when looking for content spec,\n"++) -- `adjustErr` (++"\nLooking for content spec (EMPTY, ANY, mixed, etc)") choice :: XParser [CP] choice = do bracket (tok TokBraOpen `debug` "Trying choice") (blank (tok TokBraClose `debug` "Succeeded with choice")) (peRef cp `sepBy1` blank (tok TokPipe)) sequence :: XParser [CP] sequence = do bracket (tok TokBraOpen `debug` "Trying sequence") (blank (tok TokBraClose `debug` "Succeeded with sequence")) (peRef cp `sepBy1` blank (tok TokComma)) cp :: XParser CP cp = oneOf [ do n <- qname m <- modifier let c = TagName n m return c `debug` ("ContentSpec: name "++debugShowCP c) , do ss <- sequence m <- modifier let c = Seq ss m return c `debug` ("ContentSpec: sequence "++debugShowCP c) , do cs <- choice m <- modifier let c = Choice cs m return c `debug` ("ContentSpec: choice "++debugShowCP c) ] `adjustErr` (++"\nwhen looking for a content particle") modifier :: XParser Modifier modifier = oneOf [ tok TokStar >> return Star , tok TokQuery >> return Query , tok TokPlus >> return Plus , return None ] -- just for debugging debugShowCP :: CP -> String debugShowCP cp = case cp of TagName n m -> printableName n++debugShowModifier m Choice cps m -> '(': intercalate "|" (map debugShowCP cps)++")"++debugShowModifier m Seq cps m -> '(': intercalate "," (map debugShowCP cps)++")"++debugShowModifier m debugShowModifier :: Modifier -> String debugShowModifier modifier = case modifier of None -> "" Query -> "?" Star -> "*" Plus -> "+" ---- mixed :: XParser Mixed mixed = do tok TokBraOpen peRef (do tok TokHash word "PCDATA") commit $ oneOf [ do cs <- many (peRef (do tok TokPipe peRef qname)) blank (tok TokBraClose >> tok TokStar) return (PCDATAplus cs) , blank (tok TokBraClose >> tok TokStar) >> return PCDATA , blank (tok TokBraClose) >> return PCDATA ] `adjustErrP` (++"\nLooking for mixed content spec (#PCDATA | ...)*\n") attlistdecl :: XParser AttListDecl attlistdecl = do tok TokSpecialOpen tok (TokSpecial ATTLISTx) n <- peRef qname `adjustErrBad` ("expecting identifier in ATTLIST\n"++) ds <- peRef (many1 (peRef attdef)) blank (tok TokAnyClose) `onFail` failBadP "missing > terminating ATTLIST" return (AttListDecl n ds) attdef :: XParser AttDef attdef = do n <- peRef qname `adjustErr` ("expecting attribute name\n"++) t <- peRef atttype `adjustErr` (("within attlist defn: " ++printableName n++",\n")++) d <- peRef defaultdecl `adjustErr` (("in attlist defn: " ++printableName n++",\n")++) return (AttDef n t d) atttype :: XParser AttType atttype = oneOf' [ ("CDATA", word "CDATA" >> return StringType) , ("tokenized", TokenizedType <$> tokenizedtype) , ("enumerated", EnumeratedType <$> enumeratedtype) ] `adjustErr` ("looking for ATTTYPE,\n"++) -- `adjustErr` (++"\nLooking for ATTTYPE (CDATA, tokenized, or enumerated") tokenizedtype :: XParser TokenizedType tokenizedtype = oneOf [ word "ID" >> return ID , word "IDREF" >> return IDREF , word "IDREFS" >> return IDREFS , word "ENTITY" >> return ENTITY , word "ENTITIES" >> return ENTITIES , word "NMTOKEN" >> return NMTOKEN , word "NMTOKENS" >> return NMTOKENS ] `onFail` do { t <- next ; failP ("Expected one of" ++" (ID, IDREF, IDREFS, ENTITY, ENTITIES, NMTOKEN, NMTOKENS)" ++"\nbut got "++show t) } enumeratedtype :: XParser EnumeratedType enumeratedtype = oneOf' [ ("NOTATION", NotationType <$> notationtype) , ("enumerated", Enumeration <$> enumeration) ] `adjustErr` ("looking for an enumerated or NOTATION type,\n"++) notationtype :: XParser NotationType notationtype = do word "NOTATION" bracket (tok TokBraOpen) (commit $ blank $ tok TokBraClose) (peRef name `sepBy1` peRef (tok TokPipe)) enumeration :: XParser Enumeration enumeration = bracket (tok TokBraOpen) (commit $ blank $ tok TokBraClose) (peRef nmtoken `sepBy1` blank (peRef (tok TokPipe))) defaultdecl :: XParser DefaultDecl defaultdecl = oneOf' [ ("REQUIRED", tok TokHash >> word "REQUIRED" >> return REQUIRED) , ("IMPLIED", tok TokHash >> word "IMPLIED" >> return IMPLIED) , ("FIXED", do f <- maybe (tok TokHash >> word "FIXED" >> return FIXED) a <- peRef attvalue return (DefaultTo a f) ) ] `adjustErr` ("looking for an attribute default decl,\n"++) conditionalsect :: XParser ConditionalSect conditionalsect = oneOf' [ ( "INCLUDE" , do tok TokSectionOpen peRef (tok (TokSection INCLUDEx)) p <- posn tok TokSqOpen `onFail` failBadP "missing [ after INCLUDE" i <- many (peRef extsubsetdecl) tok TokSectionClose `onFail` failBadP ("missing ]]> for INCLUDE section" ++"\n begun at "++show p) return (IncludeSect i)) , ( "IGNORE" , do tok TokSectionOpen peRef (tok (TokSection IGNOREx)) p <- posn tok TokSqOpen `onFail` failBadP "missing [ after IGNORE" many newIgnore -- many ignoresectcontents tok TokSectionClose `onFail` failBadP ("missing ]]> for IGNORE section" ++"\n begun at "++show p) return (IgnoreSect [])) ] `adjustErr` ("in a conditional section,\n"++) newIgnore :: XParser Ignore newIgnore = ( do tok TokSectionOpen many newIgnore `debug` "IGNORING conditional section" tok TokSectionClose return Ignore `debug` "end of IGNORED conditional section") `onFail` ( do t <- nottok [TokSectionOpen,TokSectionClose] return Ignore `debug` ("ignoring: "++show t)) --- obsolete? --ignoresectcontents :: XParser IgnoreSectContents --ignoresectcontents = do -- i <- ignore -- is <- many (do tok TokSectionOpen -- ic <- ignoresectcontents -- tok TokSectionClose -- ig <- ignore -- return (ic,ig)) -- return (IgnoreSectContents i is) -- --ignore :: XParser Ignore --ignore = do -- is <- many1 (nottok [TokSectionOpen,TokSectionClose]) -- return Ignore `debug` ("ignored all of: "++show is) ---- -- | Return either a general entity reference, or a character reference. reference :: XParser Reference reference = do bracket (tok TokAmp) (tok TokSemi) (freetext >>= val) where val ('#':'x':i) | all isHexDigit i = return . RefChar . fst . head . readHex $ i val ('#':i) | all isDigit i = return . RefChar . fst . head . readDec $ i val name = return . RefEntity $ name {- -- following is incorrect reference = ( RefChar <$> charref) `onFail` ( RefEntity <$> entityref) entityref :: XParser EntityRef entityref = do bracket (tok TokAmp) (commit $ tok TokSemi) name charref :: XParser CharRef charref = do bracket (tok TokAmp) (commit $ tok TokSemi) (freetext >>= readCharVal) where readCharVal ('#':'x':i) = return . fst . head . readHex $ i readCharVal ('#':i) = return . fst . head . readDec $ i readCharVal _ = mzero -} pereference :: XParser PEReference pereference = do bracket (tok TokPercent) (tok TokSemi) nmtoken entitydecl :: XParser EntityDecl entitydecl = ( EntityGEDecl <$> gedecl) `onFail` ( EntityPEDecl <$> pedecl) gedecl :: XParser GEDecl gedecl = do tok TokSpecialOpen tok (TokSpecial ENTITYx) n <- name e <- entitydef `adjustErrBad` (("in general entity defn "++n++",\n")++) tok TokAnyClose `onFail` failBadP ("expected > terminating G ENTITY decl "++n) stUpdate (addGE n e) `debug` ("added GE defn &"++n++";") return (GEDecl n e) pedecl :: XParser PEDecl pedecl = do tok TokSpecialOpen tok (TokSpecial ENTITYx) tok TokPercent n <- name e <- pedef `adjustErrBad` (("in parameter entity defn "++n++",\n")++) tok TokAnyClose `onFail` failBadP ("expected > terminating P ENTITY decl "++n) stUpdate (addPE n e) `debug` ("added PE defn %"++n++";\n"++show e) return (PEDecl n e) entitydef :: XParser EntityDef entitydef = oneOf' [ ("entityvalue", DefEntityValue <$> entityvalue) , ("external", do eid <- externalid ndd <- maybe ndatadecl return (DefExternalID eid ndd)) ] pedef :: XParser PEDef pedef = oneOf' [ ("entityvalue", PEDefEntityValue <$> entityvalue) , ("externalid", PEDefExternalID <$> externalid ) ] externalid :: XParser ExternalID externalid = oneOf' [ ("SYSTEM", do word "SYSTEM" SYSTEM <$> systemliteral) , ("PUBLIC", do word "PUBLIC" p <- pubidliteral PUBLIC p <$> systemliteral) ] `adjustErr` ("looking for an external id,\n"++) ndatadecl :: XParser NDataDecl ndatadecl = do word "NDATA" NDATA <$> name textdecl :: XParser TextDecl textdecl = do tok TokPIOpen word "xml" `onFail` word "XML" v <- maybe versioninfo e <- encodingdecl tok TokPIClose `onFail` failP "expected ?> terminating text decl" return (TextDecl v e) --extparsedent :: XParser (ExtParsedEnt Posn) --extparsedent = do -- t <- maybe textdecl -- c <- content -- return (ExtParsedEnt t c) -- --extpe :: XParser ExtPE --extpe = do -- t <- maybe textdecl -- e <- many (peRef extsubsetdecl) -- return (ExtPE t e) encodingdecl :: XParser EncodingDecl encodingdecl = do word "encoding" `onFail` word "ENCODING" tok TokEqual `onFail` failBadP "expected = in 'encoding' decl" f <- bracket (tok TokQuote) (commit $ tok TokQuote) freetext return (EncodingDecl f) notationdecl :: XParser NotationDecl notationdecl = do tok TokSpecialOpen tok (TokSpecial NOTATIONx) n <- name e <- either externalid publicid tok TokAnyClose `onFail` failBadP ("expected > terminating NOTATION decl "++n) return (NOTATION n e) publicid :: XParser PublicID publicid = do word "PUBLIC" PUBLICID <$> pubidliteral entityvalue :: XParser EntityValue entityvalue = do -- evs <- bracket (tok TokQuote) (commit $ tok TokQuote) (many (peRef ev)) tok TokQuote pn <- posn evs <- many ev tok TokQuote `onFail` failBadP "expected quote to terminate entityvalue" -- quoted text must be rescanned for possible PERefs st <- stGet Prelude.either failBad (return . EntityValue) . fst3 $ runParser (many ev) st (reLexEntityValue (\s-> stringify (lookupPE s st)) pn (flattenEV (EntityValue evs))) where stringify (Just (PEDefEntityValue ev)) = Just (flattenEV ev) stringify _ = Nothing ev :: XParser EV ev = oneOf' [ ("string", EVString <$> (string`onFail`freetext)) , ("reference", EVRef <$> reference) ] `adjustErr` ("looking for entity value,\n"++) attvalue :: XParser AttValue attvalue = do avs <- bracket (tok TokQuote) (commit $ tok TokQuote) (many (either freetext reference)) return (AttValue avs) systemliteral :: XParser SystemLiteral systemliteral = do s <- bracket (tok TokQuote) (commit $ tok TokQuote) freetext return (SystemLiteral s) -- note: refs &...; not permitted pubidliteral :: XParser PubidLiteral pubidliteral = do s <- bracket (tok TokQuote) (commit $ tok TokQuote) freetext return (PubidLiteral s) -- note: freetext is too liberal here -- | Return parsed freetext (i.e. until the next markup) chardata :: XParser CharData chardata = freetext