-- | This is a parser for HTML documents.  Unlike for XML documents, it
--   must include a certain amount of error-correction to account for
--   HTML features like self-terminating tags, unterminated tags, and
--   incorrect nesting.  The input is tokenised by the
--   XML lexer (a separate lexer is not required for HTML).

-- It uses a slightly extended version of the Hutton/Meijer parser
-- combinators.

module Text.XML.HaXml.Html.Parse
  ( htmlParse
  , htmlParse'
  ) where

import Prelude hiding (either,maybe,sequence)
import qualified Prelude (either)
import Data.Maybe hiding (maybe)
import Data.Char (toLower, {-isSpace,-} isDigit, isHexDigit)
import Numeric (readDec,readHex)
import Control.Monad

import Text.XML.HaXml.Types
import Text.XML.HaXml.Namespaces
import Text.XML.HaXml.Lex
import Text.XML.HaXml.Posn
import Text.ParserCombinators.Poly.Plain

--  #define DEBUG

#if defined(DEBUG)
#  if ( defined(__GLASGOW_HASKELL__) && __GLASGOW_HASKELL__ > 502 ) || \
      ( defined(__NHC__) && __NHC__ > 114 ) || defined(__HUGS__)
import Debug.Trace(trace)
#  elif defined(__GLASGOW_HASKELL__)
import IOExts(trace)
#  elif defined(__NHC__) || defined(__HBC__)
import NonStdTrace
#  endif
debug :: Monad m => String -> m ()
debug s = trace s (return ())
#else
debug :: Monad m => String -> m ()
debug :: forall (m :: * -> *). Monad m => String -> m ()
debug String
_ = forall (m :: * -> *) a. Monad m => a -> m a
return ()
#endif


-- | The first argument is the name of the file, the second is the string
--   contents of the file.  The result is the generic representation of
--   an XML document.  Any errors cause program failure with message to stderr.
htmlParse :: String -> String -> Document Posn
htmlParse :: String -> String -> Document Posn
htmlParse String
file = forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
Prelude.either forall a. HasCallStack => String -> a
error forall a. a -> a
id forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String -> Either String (Document Posn)
htmlParse' String
file

-- | The first argument is the name of the file, the second is the string
--   contents of the file.  The result is the generic representation of
--   an XML document.  Any parsing errors are returned in the @Either@ type.
htmlParse' :: String -> String -> Either String (Document Posn)
htmlParse' :: String -> String -> Either String (Document Posn)
htmlParse' String
file = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall i. Document i -> Document i
simplify forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a, b) -> a
fst
                  forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall t a. Parser t a -> [t] -> (Either String a, [t])
runParser HParser (Document Posn)
document forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String -> [(Posn, TokenT)]
xmlLex String
file

---- Document simplification ----

simplify :: Document i -> Document i
simplify :: forall i. Document i -> Document i
simplify (Document Prolog
p SymTab EntityDef
st (Elem QName
n [Attribute]
avs [Content i]
cs) [Misc]
ms) =
    forall i.
Prolog -> SymTab EntityDef -> Element i -> [Misc] -> Document i
Document Prolog
p SymTab EntityDef
st (forall i. QName -> [Attribute] -> [Content i] -> Element i
Elem QName
n [Attribute]
avs (forall {i}. (Content i -> Bool) -> [Content i] -> [Content i]
deepfilter forall {i}. Content i -> Bool
simp [Content i]
cs)) [Misc]
ms
  where
    simp :: Content i -> Bool
simp (CElem (Elem (N String
"null") [] []) i
_) = Bool
False
    simp (CElem (Elem QName
t [Attribute]
_ []) i
_)
        | QName -> String
localName QName
t forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"font",String
"p",String
"i",String
"b",String
"em",String
"tt",String
"big",String
"small"]
                                           = Bool
False
 -- simp (CString False s _) | all isSpace s = False
    simp Content i
_ = Bool
True
    deepfilter :: (Content i -> Bool) -> [Content i] -> [Content i]
deepfilter Content i -> Bool
f =
        forall a. (a -> Bool) -> [a] -> [a]
filter Content i -> Bool
f forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a -> b) -> [a] -> [b]
map (\Content i
c-> case Content i
c of
                          CElem (Elem QName
t [Attribute]
avs [Content i]
cs) i
i
                                  -> forall i. Element i -> i -> Content i
CElem (forall i. QName -> [Attribute] -> [Content i] -> Element i
Elem QName
t [Attribute]
avs ((Content i -> Bool) -> [Content i] -> [Content i]
deepfilter Content i -> Bool
f [Content i]
cs)) i
i
                          Content i
_       -> Content i
c)

-- opening any of these, they close again immediately
selfclosingtags :: [String]
selfclosingtags :: [String]
selfclosingtags = [String
"img",String
"hr",String
"br",String
"meta",String
"col",String
"link",String
"base"
                  ,String
"param",String
"area",String
"frame",String
"input"]

-- closing this, implicitly closes any of those which are contained in it
closeInnerTags :: [(String,[String])]
closeInnerTags :: [(String, [String])]
closeInnerTags =
  [ (String
"ul",      [String
"li"])
  , (String
"ol",      [String
"li"])
  , (String
"dl",      [String
"dt",String
"dd"])
  , (String
"tr",      [String
"th",String
"td"])
  , (String
"div",     [String
"p"])
  , (String
"thead",   [String
"th",String
"tr",String
"td"])
  , (String
"tfoot",   [String
"th",String
"tr",String
"td"])
  , (String
"tbody",   [String
"th",String
"tr",String
"td"])
  , (String
"table",   [String
"th",String
"tr",String
"td",String
"thead",String
"tfoot",String
"tbody"])
  , (String
"caption", [String
"p"])
  , (String
"th",      [String
"p"])
  , (String
"td",      [String
"p"])
  , (String
"li",      [String
"p"])
  , (String
"dt",      [String
"p"])
  , (String
"dd",      [String
"p"])
  , (String
"object",  [String
"p"])
  , (String
"map",     [String
"p"])
  , (String
"body",    [String
"p"])
  ]

-- opening this, implicitly closes that
closes :: Name -> Name -> Bool
String
"a"     closes :: String -> String -> Bool
`closes` String
"a"      =  Bool
True
String
"li"    `closes` String
"li"     =  Bool
True
String
"th"    `closes`  String
t       | String
t forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"th",String
"td"]      =  Bool
True
String
"td"    `closes`  String
t       | String
t forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"th",String
"td"]      =  Bool
True
String
"tr"    `closes`  String
t       | String
t forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"th",String
"td",String
"tr"] =  Bool
True
String
"dt"    `closes`  String
t       | String
t forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"dt",String
"dd"]      =  Bool
True
String
"dd"    `closes`  String
t       | String
t forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"dt",String
"dd"]      =  Bool
True
String
"form"  `closes` String
"form"   = Bool
True
String
"label" `closes` String
"label"  = Bool
True
String
_       `closes` String
"option" = Bool
True
String
"thead" `closes` String
t        | String
t forall a. Eq a => a -> a -> Bool
== String
"colgroup"                = Bool
True
String
"tfoot" `closes` String
t        | String
t forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"thead",String
"colgroup"]  = Bool
True
String
"tbody" `closes` String
t        | String
t forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"tbody",String
"tfoot",String
"thead",String
"colgroup"] = Bool
True
String
"colgroup" `closes` String
"colgroup"  = Bool
True
String
t `closes` String
"p"
    | String
t forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"p",String
"h1",String
"h2",String
"h3",String
"h4",String
"h5",String
"h6"
               ,String
"hr",String
"div",String
"ul",String
"dl",String
"ol",String
"table"]  =  Bool
True
String
_ `closes` String
_ = Bool
False


---- Auxiliary Parsing Functions ----

type HParser a = Parser (Posn,TokenT) a

tok :: TokenT -> HParser TokenT
tok :: TokenT -> HParser TokenT
tok TokenT
t = do (Posn
p,TokenT
t') <- forall t. Parser t t
next
           case TokenT
t' of TokError String
_    -> forall a.
(String -> HParser a) -> String -> Posn -> TokenT -> HParser a
report forall (p :: * -> *) a. PolyParse p => String -> p a
failBad (forall a. Show a => a -> String
show TokenT
t) Posn
p TokenT
t'
                      TokenT
_ | TokenT
t'forall a. Eq a => a -> a -> Bool
==TokenT
t     -> forall (m :: * -> *) a. Monad m => a -> m a
return TokenT
t
                        | Bool
otherwise -> forall a.
(String -> HParser a) -> String -> Posn -> TokenT -> HParser a
report forall (m :: * -> *) a. MonadFail m => String -> m a
fail (forall a. Show a => a -> String
show TokenT
t) Posn
p TokenT
t'

qname :: HParser QName
qname :: HParser QName
qname = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap String -> QName
N HParser String
name

name :: HParser Name
--name = do {(p,TokName s) <- next; return s}
name :: HParser String
name = do (Posn
p,TokenT
tok) <- forall t. Parser t t
next
          case TokenT
tok of
            TokName String
s  -> forall (m :: * -> *) a. Monad m => a -> m a
return String
s
            TokError String
_ -> forall a.
(String -> HParser a) -> String -> Posn -> TokenT -> HParser a
report forall (p :: * -> *) a. PolyParse p => String -> p a
failBad String
"a name" Posn
p TokenT
tok
            TokenT
_          -> forall a.
(String -> HParser a) -> String -> Posn -> TokenT -> HParser a
report forall (m :: * -> *) a. MonadFail m => String -> m a
fail String
"a name" Posn
p TokenT
tok

string, freetext :: HParser String
string :: HParser String
string   = do (Posn
p,TokenT
t) <- forall t. Parser t t
next
              case TokenT
t of TokName String
s -> forall (m :: * -> *) a. Monad m => a -> m a
return String
s
                        TokenT
_         -> forall a.
(String -> HParser a) -> String -> Posn -> TokenT -> HParser a
report forall (m :: * -> *) a. MonadFail m => String -> m a
fail String
"text" Posn
p TokenT
t
freetext :: HParser String
freetext = do (Posn
p,TokenT
t) <- forall t. Parser t t
next
              case TokenT
t of TokFreeText String
s -> forall (m :: * -> *) a. Monad m => a -> m a
return String
s
                        TokenT
_             -> forall a.
(String -> HParser a) -> String -> Posn -> TokenT -> HParser a
report forall (m :: * -> *) a. MonadFail m => String -> m a
fail String
"text" Posn
p TokenT
t

maybe :: HParser a -> HParser (Maybe a)
maybe :: forall a. HParser a -> HParser (Maybe a)
maybe HParser a
p =
    (forall a. a -> Maybe a
Just forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> HParser a
p) forall t a. Parser t a -> Parser t a -> Parser t a
`onFail`
    forall (m :: * -> *) a. Monad m => a -> m a
return forall a. Maybe a
Nothing

either :: HParser a -> HParser b -> HParser (Either a b)
either :: forall a b. HParser a -> HParser b -> HParser (Either a b)
either HParser a
p HParser b
q =
    (forall a b. a -> Either a b
Left forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> HParser a
p) forall t a. Parser t a -> Parser t a -> Parser t a
`onFail`
    (forall a b. b -> Either a b
Right forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> HParser b
q)

word :: String -> HParser ()
word :: String -> HParser ()
word String
s = do { (Posn, TokenT)
x <- forall t. Parser t t
next
            ; case (Posn, TokenT)
x of
                (Posn
_p,TokName String
n)     | String
sforall a. Eq a => a -> a -> Bool
==String
n -> forall (m :: * -> *) a. Monad m => a -> m a
return ()
                (Posn
_p,TokFreeText String
n) | String
sforall a. Eq a => a -> a -> Bool
==String
n -> forall (m :: * -> *) a. Monad m => a -> m a
return ()
                ( Posn
p,t :: TokenT
t@(TokError String
_)) -> forall a.
(String -> HParser a) -> String -> Posn -> TokenT -> HParser a
report forall (p :: * -> *) a. PolyParse p => String -> p a
failBad (forall a. Show a => a -> String
show String
s) Posn
p TokenT
t
                ( Posn
p,TokenT
t) -> forall a.
(String -> HParser a) -> String -> Posn -> TokenT -> HParser a
report forall (m :: * -> *) a. MonadFail m => String -> m a
fail (forall a. Show a => a -> String
show String
s) Posn
p TokenT
t
            }

posn :: HParser Posn
posn :: HParser Posn
posn = do { x :: (Posn, TokenT)
x@(Posn
p,TokenT
_) <- forall t. Parser t t
next
          ; forall t. [t] -> Parser t ()
reparse [(Posn, TokenT)
x]
          ; forall (m :: * -> *) a. Monad m => a -> m a
return Posn
p
          } forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` forall (m :: * -> *) a. Monad m => a -> m a
return Posn
noPos

nmtoken :: HParser NmToken
nmtoken :: HParser String
nmtoken = HParser String
string forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` HParser String
freetext

failP, failBadP :: String -> HParser a
failP :: forall a. String -> HParser a
failP String
msg    = do { Posn
p <- HParser Posn
posn; forall (m :: * -> *) a. MonadFail m => String -> m a
fail (String
msgforall a. [a] -> [a] -> [a]
++String
"\n    at "forall a. [a] -> [a] -> [a]
++forall a. Show a => a -> String
show Posn
p) }
failBadP :: forall a. String -> HParser a
failBadP String
msg = do { Posn
p <- HParser Posn
posn; forall (p :: * -> *) a. PolyParse p => String -> p a
failBad (String
msgforall a. [a] -> [a] -> [a]
++String
"\n    at "forall a. [a] -> [a] -> [a]
++forall a. Show a => a -> String
show Posn
p) }

report :: (String->HParser a) -> String -> Posn -> TokenT -> HParser a
report :: forall a.
(String -> HParser a) -> String -> Posn -> TokenT -> HParser a
report String -> HParser a
fail String
expect Posn
p TokenT
t = String -> HParser a
fail (String
"Expected "forall a. [a] -> [a] -> [a]
++forall a. Show a => a -> String
show String
expectforall a. [a] -> [a] -> [a]
++String
" but found "forall a. [a] -> [a] -> [a]
++forall a. Show a => a -> String
show TokenT
t
                               forall a. [a] -> [a] -> [a]
++String
"\n  at "forall a. [a] -> [a] -> [a]
++forall a. Show a => a -> String
show Posn
p)

adjustErrP :: HParser a -> (String->String) -> HParser a
HParser a
p adjustErrP :: forall a. HParser a -> (String -> String) -> HParser a
`adjustErrP` String -> String
f = HParser a
p forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` do Posn
pn <- HParser Posn
posn
                                 (HParser a
p forall (p :: * -> *) a.
Commitment p =>
p a -> (String -> String) -> p a
`adjustErr` String -> String
f) forall (p :: * -> *) a.
Commitment p =>
p a -> (String -> String) -> p a
`adjustErr` (forall a. [a] -> [a] -> [a]
++forall a. Show a => a -> String
show Posn
pn)

---- XML Parsing Functions ----

document :: HParser (Document Posn)
document :: HParser (Document Posn)
document = do
    Prolog
p     <- Parser (Posn, TokenT) Prolog
prolog forall (p :: * -> *) a.
Commitment p =>
p a -> (String -> String) -> p a
`adjustErr` (String
"unrecognisable XML prolog\n"forall a. [a] -> [a] -> [a]
++)
    [(Stack, Element Posn)]
ht    <- forall (p :: * -> *) a. PolyParse p => p a -> p [a]
many1 (String -> HParser (Stack, Element Posn)
element String
"HTML document")
    [Misc]
ms    <- forall (f :: * -> *) a. Alternative f => f a -> f [a]
many HParser Misc
misc
    forall (m :: * -> *) a. Monad m => a -> m a
return (forall i.
Prolog -> SymTab EntityDef -> Element i -> [Misc] -> Document i
Document Prolog
p forall a. SymTab a
emptyST (case forall a b. (a -> b) -> [a] -> [b]
map forall a b. (a, b) -> b
snd [(Stack, Element Posn)]
ht of
                                  [Element Posn
e] -> Element Posn
e
                                  [Element Posn]
es  -> forall i. QName -> [Attribute] -> [Content i] -> Element i
Elem (String -> QName
N String
"html") [] (forall a b. (a -> b) -> [a] -> [b]
map Element Posn -> Content Posn
mkCElem [Element Posn]
es))
                               [Misc]
ms)
  where mkCElem :: Element Posn -> Content Posn
mkCElem Element Posn
e = forall i. Element i -> i -> Content i
CElem Element Posn
e Posn
noPos

comment :: HParser Comment
comment :: HParser String
comment = do
    forall (p :: * -> *) bra ket a.
PolyParse p =>
p bra -> p ket -> p a -> p a
bracket (TokenT -> HParser TokenT
tok TokenT
TokCommentOpen) (TokenT -> HParser TokenT
tok TokenT
TokCommentClose) HParser String
freetext

processinginstruction :: HParser ProcessingInstruction
processinginstruction :: HParser ProcessingInstruction
processinginstruction = do
    TokenT -> HParser TokenT
tok TokenT
TokPIOpen
    forall (p :: * -> *) a. Commitment p => p a -> p a
commit forall a b. (a -> b) -> a -> b
$ do
      String
n <- HParser String
string  forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` forall a. String -> HParser a
failP String
"processing instruction has no target"
      String
f <- HParser String
freetext
      (TokenT -> HParser TokenT
tok TokenT
TokPIClose forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` TokenT -> HParser TokenT
tok TokenT
TokAnyClose) forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` forall a. String -> HParser a
failP String
"missing ?> or >"
      forall (m :: * -> *) a. Monad m => a -> m a
return (String
n, String
f)

cdsect :: HParser CDSect
cdsect :: HParser String
cdsect = do
    TokenT -> HParser TokenT
tok TokenT
TokSectionOpen
    forall (p :: * -> *) bra ket a.
PolyParse p =>
p bra -> p ket -> p a -> p a
bracket (TokenT -> HParser TokenT
tok (Section -> TokenT
TokSection Section
CDATAx)) (forall (p :: * -> *) a. Commitment p => p a -> p a
commit forall a b. (a -> b) -> a -> b
$ TokenT -> HParser TokenT
tok TokenT
TokSectionClose) HParser String
chardata

prolog :: HParser Prolog
prolog :: Parser (Posn, TokenT) Prolog
prolog = do
    Maybe XMLDecl
x <- forall a. HParser a -> HParser (Maybe a)
maybe HParser XMLDecl
xmldecl
    [Misc]
m1 <- forall (f :: * -> *) a. Alternative f => f a -> f [a]
many HParser Misc
misc
    Maybe DocTypeDecl
dtd <- forall a. HParser a -> HParser (Maybe a)
maybe HParser DocTypeDecl
doctypedecl
    [Misc]
m2 <- forall (f :: * -> *) a. Alternative f => f a -> f [a]
many HParser Misc
misc
    forall (m :: * -> *) a. Monad m => a -> m a
return (Maybe XMLDecl -> [Misc] -> Maybe DocTypeDecl -> [Misc] -> Prolog
Prolog Maybe XMLDecl
x [Misc]
m1 Maybe DocTypeDecl
dtd [Misc]
m2)

xmldecl :: HParser XMLDecl
xmldecl :: HParser XMLDecl
xmldecl = do
    TokenT -> HParser TokenT
tok TokenT
TokPIOpen
    String -> HParser ()
word String
"xml" forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` String -> HParser ()
word String
"XML"
    Posn
p <- HParser Posn
posn
    String
s <- HParser String
freetext
    TokenT -> HParser TokenT
tok TokenT
TokPIClose forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` forall a. String -> HParser a
failBadP String
"missing ?> in <?xml ...?>"
    (forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
Prelude.either forall a. String -> HParser a
failP forall (m :: * -> *) a. Monad m => a -> m a
return forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a, b) -> a
fst forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall t a. Parser t a -> [t] -> (Either String a, [t])
runParser HParser XMLDecl
aux forall b c a. (b -> c) -> (a -> b) -> a -> c
. Posn -> String -> [(Posn, TokenT)]
xmlReLex Posn
p) String
s
  where
    aux :: HParser XMLDecl
aux = do
      String
v <- HParser String
versioninfo  forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` forall a. String -> HParser a
failP String
"missing XML version info"
      Maybe EncodingDecl
e <- forall a. HParser a -> HParser (Maybe a)
maybe HParser EncodingDecl
encodingdecl
      Maybe Bool
s <- forall a. HParser a -> HParser (Maybe a)
maybe HParser Bool
sddecl
      forall (m :: * -> *) a. Monad m => a -> m a
return (String -> Maybe EncodingDecl -> Maybe Bool -> XMLDecl
XMLDecl String
v Maybe EncodingDecl
e Maybe Bool
s)

versioninfo :: HParser VersionInfo
versioninfo :: HParser String
versioninfo = do
    String -> HParser ()
word String
"version" forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` String -> HParser ()
word String
"VERSION"
    TokenT -> HParser TokenT
tok TokenT
TokEqual
    forall (p :: * -> *) bra ket a.
PolyParse p =>
p bra -> p ket -> p a -> p a
bracket (TokenT -> HParser TokenT
tok TokenT
TokQuote) (forall (p :: * -> *) a. Commitment p => p a -> p a
commit forall a b. (a -> b) -> a -> b
$ TokenT -> HParser TokenT
tok TokenT
TokQuote) HParser String
freetext

misc :: HParser Misc
misc :: HParser Misc
misc =
    forall (p :: * -> *) a. Commitment p => [(String, p a)] -> p a
oneOf' [ (String
"<!--comment-->", String -> Misc
Comment forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> HParser String
comment)
           , (String
"<?PI?>",         ProcessingInstruction -> Misc
PI forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> HParser ProcessingInstruction
processinginstruction)
           ]


-- Question: for HTML, should we disallow in-line DTDs, allowing only externals?
-- Answer: I think so.

doctypedecl :: HParser DocTypeDecl
doctypedecl :: HParser DocTypeDecl
doctypedecl = do
    TokenT -> HParser TokenT
tok TokenT
TokSpecialOpen
    TokenT -> HParser TokenT
tok (Special -> TokenT
TokSpecial Special
DOCTYPEx)
    forall (p :: * -> *) a. Commitment p => p a -> p a
commit forall a b. (a -> b) -> a -> b
$ do
      QName
n <- HParser QName
qname
      Maybe ExternalID
eid <- forall a. HParser a -> HParser (Maybe a)
maybe HParser ExternalID
externalid
--    es <- maybe (bracket (tok TokSqOpen) (commit $ tok TokSqClose)) (many markupdecl)
      TokenT -> HParser TokenT
tok TokenT
TokAnyClose  forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` forall a. String -> HParser a
failP String
"missing > in DOCTYPE decl"
--    return (DTD n eid (case es of { Nothing -> []; Just e -> e }))
      forall (m :: * -> *) a. Monad m => a -> m a
return (QName -> Maybe ExternalID -> [MarkupDecl] -> DocTypeDecl
DTD QName
n Maybe ExternalID
eid [])

--markupdecl :: HParser MarkupDecl
--markupdecl =
--    (Element <$> elementdecl) `onFail`
--    (AttList <$> attlistdecl) `onFail`
--    (Entity <$> entitydecl) `onFail`
--    (Notation <$> notationdecl) `onFail`
--    (MarkupMisc <$> misc) `onFail`
--    PEREF(MarkupPE,markupdecl)
--
--extsubset :: HParser ExtSubset
--extsubset = do
--    td <- maybe textdecl
--    ds <- many extsubsetdecl
--    return (ExtSubset td ds)
--
--extsubsetdecl :: HParser ExtSubsetDecl
--extsubsetdecl =
--    (ExtMarkupDecl <$> markupdecl) `onFail`
--    (ExtConditionalSect <$> conditionalsect) `onFail`
--    PEREF(ExtPEReference,extsubsetdecl)

sddecl :: HParser SDDecl
sddecl :: HParser Bool
sddecl = do
    String -> HParser ()
word String
"standalone" forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` String -> HParser ()
word String
"STANDALONE"
    forall (p :: * -> *) a. Commitment p => p a -> p a
commit forall a b. (a -> b) -> a -> b
$ do
      TokenT -> HParser TokenT
tok TokenT
TokEqual forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` forall a. String -> HParser a
failP String
"missing = in 'standalone' decl"
      forall (p :: * -> *) bra ket a.
PolyParse p =>
p bra -> p ket -> p a -> p a
bracket (TokenT -> HParser TokenT
tok TokenT
TokQuote) (forall (p :: * -> *) a. Commitment p => p a -> p a
commit forall a b. (a -> b) -> a -> b
$ TokenT -> HParser TokenT
tok TokenT
TokQuote)
              ( (String -> HParser ()
word String
"yes" forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return Bool
True) forall t a. Parser t a -> Parser t a -> Parser t a
`onFail`
                (String -> HParser ()
word String
"no" forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return Bool
False) forall t a. Parser t a -> Parser t a -> Parser t a
`onFail`
                forall a. String -> HParser a
failP String
"'standalone' decl requires 'yes' or 'no' value" )




----
-- VERY IMPORTANT NOTE: The stack returned here contains those tags which
-- have been closed implicitly and need to be reopened again at the
-- earliest opportunity.
type Stack = [(Name,[Attribute])]

element :: Name -> HParser (Stack,Element Posn)
element :: String -> HParser (Stack, Element Posn)
element String
ctx =
  do
    TokenT -> HParser TokenT
tok TokenT
TokAnyOpen
    (ElemTag (N String
e) [Attribute]
avs) <- HParser ElemTag
elemtag
    ( if String
e String -> String -> Bool
`closes` String
ctx then
         -- insert the missing close-tag, fail forward, and reparse.
         ( do forall (m :: * -> *). Monad m => String -> m ()
debug String
"/"
              [TokenT] -> HParser ()
unparse ([TokenT
TokEndOpen, String -> TokenT
TokName String
ctx, TokenT
TokAnyClose,
                        TokenT
TokAnyOpen, String -> TokenT
TokName String
e] forall a. [a] -> [a] -> [a]
++ [Attribute] -> [TokenT]
reformatAttrs [Attribute]
avs)
              forall (m :: * -> *) a. Monad m => a -> m a
return ([], forall i. QName -> [Attribute] -> [Content i] -> Element i
Elem (String -> QName
N String
"null") [] []))
      else if String
e forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String]
selfclosingtags then
         -- complete the parse straightaway.
         ( do TokenT -> HParser TokenT
tok TokenT
TokEndClose   -- self-closing <tag />
              forall (m :: * -> *). Monad m => String -> m ()
debug (String
eforall a. [a] -> [a] -> [a]
++String
"[+]")
              forall (m :: * -> *) a. Monad m => a -> m a
return ([], forall i. QName -> [Attribute] -> [Content i] -> Element i
Elem (String -> QName
N String
e) [Attribute]
avs [])) forall t a. Parser t a -> Parser t a -> Parser t a
`onFail`
     --  ( do tok TokAnyClose   -- sequence <tag></tag> (**not HTML?**)
     --       debug (e++"[+")
     --       n <- bracket (tok TokEndOpen) (commit $ tok TokAnyClose) qname
     --       debug "]"
     --       if e == (map toLower n :: Name)
     --         then return ([], Elem e avs [])
     --         else return (error "no nesting in empty tag")) `onFail`
         ( do TokenT -> HParser TokenT
tok TokenT
TokAnyClose   -- <tag> with no close (e.g. <IMG>)
              forall (m :: * -> *). Monad m => String -> m ()
debug (String
eforall a. [a] -> [a] -> [a]
++String
"[+]")
              forall (m :: * -> *) a. Monad m => a -> m a
return ([], forall i. QName -> [Attribute] -> [Content i] -> Element i
Elem (String -> QName
N String
e) [Attribute]
avs []))
      else
         ( do TokenT -> HParser TokenT
tok TokenT
TokEndClose
              forall (m :: * -> *). Monad m => String -> m ()
debug (String
eforall a. [a] -> [a] -> [a]
++String
"[]")
              forall (m :: * -> *) a. Monad m => a -> m a
return ([], forall i. QName -> [Attribute] -> [Content i] -> Element i
Elem (String -> QName
N String
e) [Attribute]
avs [])) forall t a. Parser t a -> Parser t a -> Parser t a
`onFail`
         ( do TokenT -> HParser TokenT
tok TokenT
TokAnyClose forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` forall a. String -> HParser a
failP String
"missing > or /> in element tag"
              forall (m :: * -> *). Monad m => String -> m ()
debug (String
eforall a. [a] -> [a] -> [a]
++String
"[")
           -- zz <- many (content e)
           -- n <- bracket (tok TokEndOpen) (commit $ tok TokAnyClose) qname
              [(Stack, Content Posn)]
zz <- forall (p :: * -> *) a z. PolyParse p => p a -> p z -> p [a]
manyFinally (String -> HParser (Stack, Content Posn)
content String
e)
                                (TokenT -> HParser TokenT
tok TokenT
TokEndOpen)
              (N String
n) <- HParser QName
qname
              forall (p :: * -> *) a. Commitment p => p a -> p a
commit (TokenT -> HParser TokenT
tok TokenT
TokAnyClose)
              forall (m :: * -> *). Monad m => String -> m ()
debug String
"]"
              let ([Stack]
ss,[Content Posn]
cs) = forall a b. [(a, b)] -> ([a], [b])
unzip [(Stack, Content Posn)]
zz
              let s :: Stack
s       = if forall (t :: * -> *) a. Foldable t => t a -> Bool
null [Stack]
ss then [] else forall a. [a] -> a
last [Stack]
ss
              ( if String
e forall a. Eq a => a -> a -> Bool
== (forall a b. (a -> b) -> [a] -> [b]
map Char -> Char
toLower String
n :: Name) then
                  do [TokenT] -> HParser ()
unparse (Stack -> [TokenT]
reformatTags (String -> Stack -> Stack
closeInner String
e Stack
s))
                     forall (m :: * -> *). Monad m => String -> m ()
debug String
"^"
                     forall (m :: * -> *) a. Monad m => a -> m a
return ([], forall i. QName -> [Attribute] -> [Content i] -> Element i
Elem (String -> QName
N String
e) [Attribute]
avs [Content Posn]
cs)
                else
                  do [TokenT] -> HParser ()
unparse [TokenT
TokEndOpen, String -> TokenT
TokName String
n, TokenT
TokAnyClose]
                     forall (m :: * -> *). Monad m => String -> m ()
debug String
"-"
                     forall (m :: * -> *) a. Monad m => a -> m a
return ((String
e,[Attribute]
avs)forall a. a -> [a] -> [a]
:Stack
s, forall i. QName -> [Attribute] -> [Content i] -> Element i
Elem (String -> QName
N String
e) [Attribute]
avs [Content Posn]
cs))
         ) forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` forall a. String -> HParser a
failP (String
"failed to repair non-matching tags in context: "forall a. [a] -> [a] -> [a]
++String
ctx))

closeInner :: Name -> [(Name,[Attribute])] -> [(Name,[Attribute])]
closeInner :: String -> Stack -> Stack
closeInner String
c Stack
ts =
    case forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup String
c [(String, [String])]
closeInnerTags of
      (Just [String]
these) -> forall a. (a -> Bool) -> [a] -> [a]
filter ((forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` [String]
these)forall b c a. (b -> c) -> (a -> b) -> a -> c
.forall a b. (a, b) -> a
fst) Stack
ts
      Maybe [String]
Nothing      -> Stack
ts

unparse :: [TokenT] -> Parser (Posn, TokenT) ()
unparse :: [TokenT] -> HParser ()
unparse [TokenT]
ts = do Posn
p <- HParser Posn
posn
                forall t. [t] -> Parser t ()
reparse (forall a b. [a] -> [b] -> [(a, b)]
zip (forall a. a -> [a]
repeat Posn
p) [TokenT]
ts)

reformatAttrs :: [(QName, AttValue)] -> [TokenT]
reformatAttrs :: [Attribute] -> [TokenT]
reformatAttrs = forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap Attribute -> [TokenT]
f0
    where f0 :: Attribute -> [TokenT]
f0 (QName
a, v :: AttValue
v@(AttValue [Either String Reference]
_)) = [ String -> TokenT
TokName (QName -> String
printableName QName
a), TokenT
TokEqual
                                   , TokenT
TokQuote, String -> TokenT
TokFreeText (forall a. Show a => a -> String
show AttValue
v), TokenT
TokQuote ]

reformatTags :: [(String, [(QName, AttValue)])] -> [TokenT]
reformatTags :: Stack -> [TokenT]
reformatTags = forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap (String, [Attribute]) -> [TokenT]
f0
    where f0 :: (String, [Attribute]) -> [TokenT]
f0 (String
t,[Attribute]
avs) = [TokenT
TokAnyOpen, String -> TokenT
TokName String
t]forall a. [a] -> [a] -> [a]
++[Attribute] -> [TokenT]
reformatAttrs [Attribute]
avsforall a. [a] -> [a] -> [a]
++[TokenT
TokAnyClose]

content :: Name -> HParser (Stack,Content Posn)
content :: String -> HParser (Stack, Content Posn)
content String
ctx = do { Posn
p <- HParser Posn
posn ; Posn -> HParser (Stack, Content Posn)
content' Posn
p }
  where content' :: Posn -> HParser (Stack, Content Posn)
content' Posn
p = forall (p :: * -> *) a. Commitment p => [(String, p a)] -> p a
oneOf'
          [ ( String
"element", String -> HParser (Stack, Element Posn)
element String
ctx forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \(Stack
s,Element Posn
e)-> forall (m :: * -> *) a. Monad m => a -> m a
return (Stack
s, forall i. Element i -> i -> Content i
CElem Element Posn
e Posn
p))
          , ( String
"chardata", HParser String
chardata forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \String
s-> forall (m :: * -> *) a. Monad m => a -> m a
return ([], forall i. Bool -> String -> i -> Content i
CString Bool
False String
s Posn
p))
          , ( String
"reference", HParser Reference
reference forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \Reference
r-> forall (m :: * -> *) a. Monad m => a -> m a
return ([], forall i. Reference -> i -> Content i
CRef Reference
r Posn
p))
          , ( String
"cdsect", HParser String
cdsect forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \String
c-> forall (m :: * -> *) a. Monad m => a -> m a
return ([], forall i. Bool -> String -> i -> Content i
CString Bool
True String
c Posn
p))
          , ( String
"misc", HParser Misc
misc forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \Misc
m->  forall (m :: * -> *) a. Monad m => a -> m a
return ([], forall i. Misc -> i -> Content i
CMisc Misc
m Posn
p))
          ] forall a. HParser a -> (String -> String) -> HParser a
`adjustErrP` (String
"when looking for a content item,\n"forall a. [a] -> [a] -> [a]
++)

----
elemtag :: HParser ElemTag
elemtag :: HParser ElemTag
elemtag = do
    (N String
n) <- HParser QName
qname forall (p :: * -> *) a.
PolyParse p =>
p a -> (String -> String) -> p a
`adjustErrBad` (String
"malformed element tag\n"forall a. [a] -> [a] -> [a]
++)
    [Attribute]
as <- forall (f :: * -> *) a. Alternative f => f a -> f [a]
many HParser Attribute
attribute
    forall (m :: * -> *) a. Monad m => a -> m a
return (QName -> [Attribute] -> ElemTag
ElemTag (String -> QName
N  forall a b. (a -> b) -> a -> b
$ forall a b. (a -> b) -> [a] -> [b]
map Char -> Char
toLower String
n) [Attribute]
as)

attribute :: HParser Attribute
attribute :: HParser Attribute
attribute = do
    (N String
n) <- HParser QName
qname
    AttValue
v <- (do TokenT -> HParser TokenT
tok TokenT
TokEqual
             Parser (Posn, TokenT) AttValue
attvalue) forall t a. Parser t a -> Parser t a -> Parser t a
`onFail`
         forall (m :: * -> *) a. Monad m => a -> m a
return ([Either String Reference] -> AttValue
AttValue [forall a b. a -> Either a b
Left String
"TRUE"])
    forall (m :: * -> *) a. Monad m => a -> m a
return (String -> QName
N forall a b. (a -> b) -> a -> b
$ forall a b. (a -> b) -> [a] -> [b]
map Char -> Char
toLower String
n, AttValue
v)

--elementdecl :: HParser ElementDecl
--elementdecl = do
--    tok TokSpecialOpen
--    tok (TokSpecial ELEMENTx)
--    n <- qname `onFail` failP "missing identifier in ELEMENT decl"
--    c <- contentspec `onFail` failP "missing content spec in ELEMENT decl"
--    tok TokAnyClose `onFail` failP "expected > terminating ELEMENT decl"
--    return (ElementDecl n c)
--
--contentspec :: HParser ContentSpec
--contentspec =
--    ( word "EMPTY" >> return EMPTY) `onFail`
--    ( word "ANY" >> return ANY) `onFail`
--    (Mixed <$> mixed) `onFail`
--    (ContentSpec <$> cp) `onFail`
--    PEREF(ContentPE,contentspec)
--
--choice :: HParser [CP]
--choice = do
--    bracket (tok TokBraOpen) (tok TokBraClose)
--            (cp `sepby1` (tok TokPipe))
--
--sequence :: HParser [CP]
--sequence = do
--    bracket (tok TokBraOpen) (tok TokBraClose)
--            (cp `sepby1` (tok TokComma))
--
--cp :: HParser CP
--cp =
--    ( do n <- qname
--         m <- modifier
--         return (TagName n m)) `onFail`
--    ( do ss <- sequence
--         m <- modifier
--         return (Seq ss m)) `onFail`
--    ( do cs <- choice
--         m <- modifier
--         return (Choice cs m)) `onFail`
--    PEREF(CPPE,cp)
--
--modifier :: HParser Modifier
--modifier =
--    ( tok TokStar >> return Star) `onFail`
--    ( tok TokQuery >> return Query) `onFail`
--    ( tok TokPlus >> return Plus) `onFail`
--    ( return None)
--
--mixed :: HParser Mixed
--mixed = do
--    tok TokBraOpen
--    tok TokHash
--    word "PCDATA"
--    cont
--  where
--    cont = ( tok TokBraClose >> return PCDATA) `onFail`
--           ( do cs <- many ( do tok TokPipe
--                                n <- qname
--                                return n)
--                tok TokBraClose
--                tok TokStar
--                return (PCDATAplus cs))
--
--attlistdecl :: HParser AttListDecl
--attlistdecl = do
--    tok TokSpecialOpen
--    tok (TokSpecial ATTLISTx)
--    n <- qname `onFail` failP "missing identifier in ATTLIST"
--    ds <- many attdef
--    tok TokAnyClose `onFail` failP "missing > terminating ATTLIST"
--    return (AttListDecl n ds)
--
--attdef :: HParser AttDef
--attdef = do
--    n <- qname
--    t <- atttype `onFail` failP "missing attribute type in attlist defn"
--    d <- defaultdecl
--    return (AttDef n t d)
--
--atttype :: HParser AttType
--atttype =
--    ( word "CDATA" >> return StringType) `onFail`
--    (TokenizedType <$> tokenizedtype) `onFail`
--    (EnumeratedType <$> enumeratedtype)
--
--tokenizedtype :: HParser TokenizedType
--tokenizedtype =
--    ( word "ID" >> return ID) `onFail`
--    ( word "IDREF" >> return IDREF) `onFail`
--    ( word "IDREFS" >> return IDREFS) `onFail`
--    ( word "ENTITY" >> return ENTITY) `onFail`
--    ( word "ENTITIES" >> return ENTITIES) `onFail`
--    ( word "NMTOKEN" >> return NMTOKEN) `onFail`
--    ( word "NMTOKENS" >> return NMTOKENS)
--
--enumeratedtype :: HParser EnumeratedType
--enumeratedtype =
--    (NotationType <$> notationtype) `onFail`
--    (Enumeration <$> enumeration)
--
--notationtype :: HParser NotationType
--notationtype = do
--    word "NOTATION"
--    bracket (tok TokBraOpen) (tok TokBraClose)
--            (name `sepby1` (tok TokPipe))
--
--enumeration :: HParser Enumeration
--enumeration =
--    bracket (tok TokBraOpen) (tok TokBraClose)
--            (nmtoken `sepby1` (tok TokPipe))
--
--defaultdecl :: HParser DefaultDecl
--defaultdecl =
--    ( tok TokHash >> word "REQUIRED" >> return REQUIRED) `onFail`
--    ( tok TokHash >> word "IMPLIED" >> return IMPLIED) `onFail`
--    ( do f <- maybe (tok TokHash >> word "FIXED" >> return FIXED)
--         a <- attvalue
--         return (DefaultTo a f))
--
--conditionalsect :: HParser ConditionalSect
--conditionalsect =
--    ( do tok TokSectionOpen
--         tok (TokSection INCLUDEx)
--         tok TokSqOpen `onFail` failP "missing [ after INCLUDE"
--         i <- extsubsetdecl `onFail` failP "missing ExtSubsetDecl in INCLUDE"
--         tok TokSectionClose `onFail` failP "missing ] after INCLUDE"
--         return (IncludeSect i)) `onFail`
--    ( do tok TokSectionOpen
--         tok (TokSection IGNOREx)
--         tok TokSqOpen `onFail` failP "missing [ after IGNORE"
--         i <- many ignoresectcontents
--         tok TokSectionClose `onFail` failP "missing ] after IGNORE"
--         return (IgnoreSect i))
--
--ignoresectcontents :: HParser IgnoreSectContents
--ignoresectcontents = do
--    i <- ignore
--    is <- many (do tok TokSectionOpen
--                   ic <- ignoresectcontents
--                   tok TokSectionClose
--                   ig <- ignore
--                   return (ic,ig))
--    return (IgnoreSectContents i is)
--
--ignore :: HParser Ignore
--ignore = Ignore <$> freetext

reference :: HParser Reference
reference :: HParser Reference
reference = do
    forall (p :: * -> *) bra ket a.
PolyParse p =>
p bra -> p ket -> p a -> p a
bracket (TokenT -> HParser TokenT
tok TokenT
TokAmp) (TokenT -> HParser TokenT
tok TokenT
TokSemi) (HParser String
freetext forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= forall {m :: * -> *}. Monad m => String -> m Reference
val)
  where
    val :: String -> m Reference
val (Char
'#':Char
'x':String
i) | forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Char -> Bool
isHexDigit String
i
                    = forall (m :: * -> *) a. Monad m => a -> m a
return forall b c a. (b -> c) -> (a -> b) -> a -> c
. CharRef -> Reference
RefChar forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a, b) -> a
fst forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. [a] -> a
head forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. (Eq a, Num a) => ReadS a
readHex forall a b. (a -> b) -> a -> b
$ String
i
    val (Char
'#':String
i)     | forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Char -> Bool
isDigit String
i
                    = forall (m :: * -> *) a. Monad m => a -> m a
return forall b c a. (b -> c) -> (a -> b) -> a -> c
. CharRef -> Reference
RefChar forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a, b) -> a
fst forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. [a] -> a
head forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. (Eq a, Num a) => ReadS a
readDec forall a b. (a -> b) -> a -> b
$ String
i
    val String
ent         = forall (m :: * -> *) a. Monad m => a -> m a
return forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Reference
RefEntity forall a b. (a -> b) -> a -> b
$ String
ent

{-
reference :: HParser Reference
reference =
    (RefChar <$> charref) `onFail`
    (RefEntity <$> entityref)

entityref :: HParser EntityRef
entityref = do
    n <- bracket (tok TokAmp) (commit $ tok TokSemi) name
    return n

charref :: HParser CharRef
charref = do
    bracket (tok TokAmp) (commit $ tok TokSemi) (freetext >>= readCharVal)
  where
    readCharVal ('#':'x':i) = return . fst . head . readHex $ i
    readCharVal ('#':i)     = return . fst . head . readDec $ i
    readCharVal _           = mzero
-}

--pereference :: HParser PEReference
--pereference = do
--    bracket (tok TokPercent) (tok TokSemi) nmtoken
--
--entitydecl :: HParser EntityDecl
--entitydecl =
--    (EntityGEDecl <$> gedecl) `onFail`
--    (EntityPEDecl <$> pedecl)
--
--gedecl :: HParser GEDecl
--gedecl = do
--    tok TokSpecialOpen
--    tok (TokSpecial ENTITYx)
--    n <- name
--    e <- entitydef `onFail` failP "missing entity defn in G ENTITY decl"
--    tok TokAnyClose `onFail` failP "expected > terminating G ENTITY decl"
--    return (GEDecl n e)
--
--pedecl :: HParser PEDecl
--pedecl = do
--    tok TokSpecialOpen
--    tok (TokSpecial ENTITYx)
--    tok TokPercent
--    n <- name
--    e <- pedef `onFail` failP "missing entity defn in P ENTITY decl"
--    tok TokAnyClose `onFail` failP "expected > terminating P ENTITY decl"
--    return (PEDecl n e)
--
--entitydef :: HParser EntityDef
--entitydef =
--    (DefEntityValue <$> entityvalue) `onFail`
--    ( do eid <- externalid
--         ndd <- maybe ndatadecl
--         return (DefExternalID eid ndd))
--
--pedef :: HParser PEDef
--pedef =
--    (PEDefEntityValue <$> entityvalue) `onFail`
--    (PEDefExternalID <$> externalid)

externalid :: HParser ExternalID
externalid :: HParser ExternalID
externalid =
    ( do String -> HParser ()
word String
"SYSTEM"
         SystemLiteral -> ExternalID
SYSTEM forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> HParser SystemLiteral
systemliteral) forall t a. Parser t a -> Parser t a -> Parser t a
`onFail`
    ( do String -> HParser ()
word String
"PUBLIC"
         PubidLiteral
p <- HParser PubidLiteral
pubidliteral
         PubidLiteral -> SystemLiteral -> ExternalID
PUBLIC PubidLiteral
p forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> HParser SystemLiteral
systemliteral forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` forall (m :: * -> *) a. Monad m => a -> m a
return (String -> SystemLiteral
SystemLiteral String
"")
    )

--ndatadecl :: HParser NDataDecl
--ndatadecl = do
--    word "NDATA"
--    n <- name
--    return (NDATA n)

--textdecl :: HParser TextDecl
--textdecl = do
--    tok TokPIOpen
--    (word "xml" `onFail` word "XML")
--    v <- maybe versioninfo
--    e <- encodingdecl
--    tok TokPIClose `onFail` failP "expected ?> terminating text decl"
--    return (TextDecl v e)

--extparsedent :: HParser ExtParsedEnt
--extparsedent = do
--    t <- maybe textdecl
--    (_,c) <- (content "")
--    return (ExtParsedEnt t c)
--
--extpe :: HParser ExtPE
--extpe = do
--    t <- maybe textdecl
--    e <- extsubsetdecl
--    return (ExtPE t e)

encodingdecl :: HParser EncodingDecl
encodingdecl :: HParser EncodingDecl
encodingdecl = do
    String -> HParser ()
word String
"encoding" forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` String -> HParser ()
word String
"ENCODING"
    TokenT -> HParser TokenT
tok TokenT
TokEqual forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` forall a. String -> HParser a
failBadP String
"expected = in 'encoding' decl"
    String
f <- forall (p :: * -> *) bra ket a.
PolyParse p =>
p bra -> p ket -> p a -> p a
bracket (TokenT -> HParser TokenT
tok TokenT
TokQuote) (forall (p :: * -> *) a. Commitment p => p a -> p a
commit forall a b. (a -> b) -> a -> b
$ TokenT -> HParser TokenT
tok TokenT
TokQuote) HParser String
freetext
    forall (m :: * -> *) a. Monad m => a -> m a
return (String -> EncodingDecl
EncodingDecl String
f)

--notationdecl :: HParser NotationDecl
--notationdecl = do
--    tok TokSpecialOpen
--    word "NOTATION"
--    n <- name
--    e <- either externalid publicid
--    tok TokAnyClose `onFail` failP "expected > terminating NOTATION decl"
--    return (NOTATION n e)

--publicid :: HParser PublicID
--publicid = do
--    word "PUBLICID"
--    p <- pubidliteral
--    return (PUBLICID p)

--entityvalue :: HParser EntityValue
--entityvalue = do
--    evs <- bracket (tok TokQuote) (commit $ tok TokQuote) (many ev)
--    return (EntityValue evs)

--ev :: HParser EV
--ev =
--    (EVString <$> freetext) `onFail`
-- -- PEREF(EVPERef,ev) `onFail`
--    (EVRef <$> reference)

attvalue :: HParser AttValue
attvalue :: Parser (Posn, TokenT) AttValue
attvalue =
  ( do [Either String Reference]
avs <- forall (p :: * -> *) bra ket a.
PolyParse p =>
p bra -> p ket -> p a -> p a
bracket (TokenT -> HParser TokenT
tok TokenT
TokQuote) (forall (p :: * -> *) a. Commitment p => p a -> p a
commit forall a b. (a -> b) -> a -> b
$ TokenT -> HParser TokenT
tok TokenT
TokQuote)
                      (forall (f :: * -> *) a. Alternative f => f a -> f [a]
many (forall a b. HParser a -> HParser b -> HParser (Either a b)
either HParser String
freetext HParser Reference
reference))
       forall (m :: * -> *) a. Monad m => a -> m a
return ([Either String Reference] -> AttValue
AttValue [Either String Reference]
avs) ) forall t a. Parser t a -> Parser t a -> Parser t a
`onFail`
  ( do String
v <- HParser String
nmtoken
       String
s <- (TokenT -> HParser TokenT
tok TokenT
TokPercent forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return String
"%") forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` forall (m :: * -> *) a. Monad m => a -> m a
return String
""
       forall (m :: * -> *) a. Monad m => a -> m a
return ([Either String Reference] -> AttValue
AttValue [forall a b. a -> Either a b
Left (String
vforall a. [a] -> [a] -> [a]
++String
s)]) ) forall t a. Parser t a -> Parser t a -> Parser t a
`onFail`
  ( do String
s <- forall (p :: * -> *) a. PolyParse p => [p a] -> p a
oneOf [ TokenT -> HParser TokenT
tok TokenT
TokPlus forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return String
"+"
                  , TokenT -> HParser TokenT
tok TokenT
TokHash forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return String
"#"
                  ]
       String
v <- HParser String
nmtoken
       forall (m :: * -> *) a. Monad m => a -> m a
return ([Either String Reference] -> AttValue
AttValue [forall a b. a -> Either a b
Left (String
sforall a. [a] -> [a] -> [a]
++String
v)]) ) forall t a. Parser t a -> Parser t a -> Parser t a
`onFail`
  forall a. String -> HParser a
failP String
"Badly formatted attribute value"

systemliteral :: HParser SystemLiteral
systemliteral :: HParser SystemLiteral
systemliteral = do
    String
s <- forall (p :: * -> *) bra ket a.
PolyParse p =>
p bra -> p ket -> p a -> p a
bracket (TokenT -> HParser TokenT
tok TokenT
TokQuote) (forall (p :: * -> *) a. Commitment p => p a -> p a
commit forall a b. (a -> b) -> a -> b
$ TokenT -> HParser TokenT
tok TokenT
TokQuote) HParser String
freetext
    forall (m :: * -> *) a. Monad m => a -> m a
return (String -> SystemLiteral
SystemLiteral String
s)            -- note: need to fold &...; escapes

pubidliteral :: HParser PubidLiteral
pubidliteral :: HParser PubidLiteral
pubidliteral = do
    String
s <- forall (p :: * -> *) bra ket a.
PolyParse p =>
p bra -> p ket -> p a -> p a
bracket (TokenT -> HParser TokenT
tok TokenT
TokQuote) (forall (p :: * -> *) a. Commitment p => p a -> p a
commit forall a b. (a -> b) -> a -> b
$ TokenT -> HParser TokenT
tok TokenT
TokQuote) HParser String
freetext
    forall (m :: * -> *) a. Monad m => a -> m a
return (String -> PubidLiteral
PubidLiteral String
s)             -- note: need to fold &...; escapes

chardata :: HParser CharData
chardata :: HParser String
chardata = HParser String
freetext -- <&> CharData