-- | This is a parser for HTML documents.  Unlike for XML documents, it
--   must include a certain amount of error-correction to account for
--   HTML features like self-terminating tags, unterminated tags, and
--   incorrect nesting.  The input is tokenised by the
--   XML lexer (a separate lexer is not required for HTML).

-- It uses a slightly extended version of the Hutton/Meijer parser
-- combinators.

module Text.XML.HaXml.Html.Parse
  ( htmlParse
  , htmlParse'
  ) where

import Prelude hiding (either,maybe,sequence)
import qualified Prelude (either)
import Data.Maybe hiding (maybe)
import Data.Char (toLower, {-isSpace,-} isDigit, isHexDigit)
import Numeric (readDec,readHex)
import Control.Monad

import Text.XML.HaXml.Types
import Text.XML.HaXml.Namespaces
import Text.XML.HaXml.Lex
import Text.XML.HaXml.Posn
import Text.ParserCombinators.Poly.Plain

--  #define DEBUG

#if defined(DEBUG)
#  if ( defined(__GLASGOW_HASKELL__) && __GLASGOW_HASKELL__ > 502 ) || \
      ( defined(__NHC__) && __NHC__ > 114 ) || defined(__HUGS__)
import Debug.Trace(trace)
#  elif defined(__GLASGOW_HASKELL__)
import IOExts(trace)
#  elif defined(__NHC__) || defined(__HBC__)
import NonStdTrace
#  endif
debug :: Monad m => String -> m ()
debug s = trace s (return ())
#else
debug :: Monad m => String -> m ()
debug :: forall (m :: * -> *). Monad m => String -> m ()
debug String
_ = () -> m ()
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return ()
#endif


-- | The first argument is the name of the file, the second is the string
--   contents of the file.  The result is the generic representation of
--   an XML document.  Any errors cause program failure with message to stderr.
htmlParse :: String -> String -> Document Posn
htmlParse :: String -> String -> Document Posn
htmlParse String
file = (String -> Document Posn)
-> (Document Posn -> Document Posn)
-> Either String (Document Posn)
-> Document Posn
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
Prelude.either String -> Document Posn
forall a. HasCallStack => String -> a
error Document Posn -> Document Posn
forall a. a -> a
id (Either String (Document Posn) -> Document Posn)
-> (String -> Either String (Document Posn))
-> String
-> Document Posn
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String -> Either String (Document Posn)
htmlParse' String
file

-- | The first argument is the name of the file, the second is the string
--   contents of the file.  The result is the generic representation of
--   an XML document.  Any parsing errors are returned in the @Either@ type.
htmlParse' :: String -> String -> Either String (Document Posn)
htmlParse' :: String -> String -> Either String (Document Posn)
htmlParse' String
file = (Document Posn -> Document Posn)
-> Either String (Document Posn) -> Either String (Document Posn)
forall a b. (a -> b) -> Either String a -> Either String b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Document Posn -> Document Posn
forall i. Document i -> Document i
simplify (Either String (Document Posn) -> Either String (Document Posn))
-> (String -> Either String (Document Posn))
-> String
-> Either String (Document Posn)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Either String (Document Posn), [(Posn, TokenT)])
-> Either String (Document Posn)
forall a b. (a, b) -> a
fst
                  ((Either String (Document Posn), [(Posn, TokenT)])
 -> Either String (Document Posn))
-> (String -> (Either String (Document Posn), [(Posn, TokenT)]))
-> String
-> Either String (Document Posn)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Parser (Posn, TokenT) (Document Posn)
-> [(Posn, TokenT)]
-> (Either String (Document Posn), [(Posn, TokenT)])
forall t a. Parser t a -> [t] -> (Either String a, [t])
runParser Parser (Posn, TokenT) (Document Posn)
document ([(Posn, TokenT)]
 -> (Either String (Document Posn), [(Posn, TokenT)]))
-> (String -> [(Posn, TokenT)])
-> String
-> (Either String (Document Posn), [(Posn, TokenT)])
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String -> [(Posn, TokenT)]
xmlLex String
file

---- Document simplification ----

simplify :: Document i -> Document i
simplify :: forall i. Document i -> Document i
simplify (Document Prolog
p SymTab EntityDef
st (Elem QName
n [Attribute]
avs [Content i]
cs) [Misc]
ms) =
    Prolog -> SymTab EntityDef -> Element i -> [Misc] -> Document i
forall i.
Prolog -> SymTab EntityDef -> Element i -> [Misc] -> Document i
Document Prolog
p SymTab EntityDef
st (QName -> [Attribute] -> [Content i] -> Element i
forall i. QName -> [Attribute] -> [Content i] -> Element i
Elem QName
n [Attribute]
avs ((Content i -> Bool) -> [Content i] -> [Content i]
forall {i}. (Content i -> Bool) -> [Content i] -> [Content i]
deepfilter Content i -> Bool
forall {i}. Content i -> Bool
simp [Content i]
cs)) [Misc]
ms
  where
    simp :: Content i -> Bool
simp (CElem (Elem (N String
"null") [] []) i
_) = Bool
False
    simp (CElem (Elem QName
t [Attribute]
_ []) i
_)
        | QName -> String
localName QName
t String -> [String] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"font",String
"p",String
"i",String
"b",String
"em",String
"tt",String
"big",String
"small"]
                                           = Bool
False
 -- simp (CString False s _) | all isSpace s = False
    simp Content i
_ = Bool
True
    deepfilter :: (Content i -> Bool) -> [Content i] -> [Content i]
deepfilter Content i -> Bool
f =
        (Content i -> Bool) -> [Content i] -> [Content i]
forall a. (a -> Bool) -> [a] -> [a]
filter Content i -> Bool
f ([Content i] -> [Content i])
-> ([Content i] -> [Content i]) -> [Content i] -> [Content i]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Content i -> Content i) -> [Content i] -> [Content i]
forall a b. (a -> b) -> [a] -> [b]
map (\Content i
c-> case Content i
c of
                          CElem (Elem QName
t [Attribute]
avs [Content i]
cs) i
i
                                  -> Element i -> i -> Content i
forall i. Element i -> i -> Content i
CElem (QName -> [Attribute] -> [Content i] -> Element i
forall i. QName -> [Attribute] -> [Content i] -> Element i
Elem QName
t [Attribute]
avs ((Content i -> Bool) -> [Content i] -> [Content i]
deepfilter Content i -> Bool
f [Content i]
cs)) i
i
                          Content i
_       -> Content i
c)

-- opening any of these, they close again immediately
selfclosingtags :: [String]
selfclosingtags :: [String]
selfclosingtags = [String
"img",String
"hr",String
"br",String
"meta",String
"col",String
"link",String
"base"
                  ,String
"param",String
"area",String
"frame",String
"input"]

-- closing this, implicitly closes any of those which are contained in it
closeInnerTags :: [(String,[String])]
closeInnerTags :: [(String, [String])]
closeInnerTags =
  [ (String
"ul",      [String
"li"])
  , (String
"ol",      [String
"li"])
  , (String
"dl",      [String
"dt",String
"dd"])
  , (String
"tr",      [String
"th",String
"td"])
  , (String
"div",     [String
"p"])
  , (String
"thead",   [String
"th",String
"tr",String
"td"])
  , (String
"tfoot",   [String
"th",String
"tr",String
"td"])
  , (String
"tbody",   [String
"th",String
"tr",String
"td"])
  , (String
"table",   [String
"th",String
"tr",String
"td",String
"thead",String
"tfoot",String
"tbody"])
  , (String
"caption", [String
"p"])
  , (String
"th",      [String
"p"])
  , (String
"td",      [String
"p"])
  , (String
"li",      [String
"p"])
  , (String
"dt",      [String
"p"])
  , (String
"dd",      [String
"p"])
  , (String
"object",  [String
"p"])
  , (String
"map",     [String
"p"])
  , (String
"body",    [String
"p"])
  ]

-- opening this, implicitly closes that
closes :: Name -> Name -> Bool
String
"a"     closes :: String -> String -> Bool
`closes` String
"a"      =  Bool
True
String
"li"    `closes` String
"li"     =  Bool
True
String
"th"    `closes`  String
t       | String
t String -> [String] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"th",String
"td"]      =  Bool
True
String
"td"    `closes`  String
t       | String
t String -> [String] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"th",String
"td"]      =  Bool
True
String
"tr"    `closes`  String
t       | String
t String -> [String] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"th",String
"td",String
"tr"] =  Bool
True
String
"dt"    `closes`  String
t       | String
t String -> [String] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"dt",String
"dd"]      =  Bool
True
String
"dd"    `closes`  String
t       | String
t String -> [String] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"dt",String
"dd"]      =  Bool
True
String
"form"  `closes` String
"form"   = Bool
True
String
"label" `closes` String
"label"  = Bool
True
String
_       `closes` String
"option" = Bool
True
String
"thead" `closes` String
t        | String
t String -> String -> Bool
forall a. Eq a => a -> a -> Bool
== String
"colgroup"                = Bool
True
String
"tfoot" `closes` String
t        | String
t String -> [String] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"thead",String
"colgroup"]  = Bool
True
String
"tbody" `closes` String
t        | String
t String -> [String] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"tbody",String
"tfoot",String
"thead",String
"colgroup"] = Bool
True
String
"colgroup" `closes` String
"colgroup"  = Bool
True
String
t `closes` String
"p"
    | String
t String -> [String] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"p",String
"h1",String
"h2",String
"h3",String
"h4",String
"h5",String
"h6"
               ,String
"hr",String
"div",String
"ul",String
"dl",String
"ol",String
"table"]  =  Bool
True
String
_ `closes` String
_ = Bool
False


---- Auxiliary Parsing Functions ----

type HParser a = Parser (Posn,TokenT) a

tok :: TokenT -> HParser TokenT
tok :: TokenT -> HParser TokenT
tok TokenT
t = do (Posn
p,TokenT
t') <- Parser (Posn, TokenT) (Posn, TokenT)
forall t. Parser t t
next
           case TokenT
t' of TokError String
_    -> (String -> HParser TokenT)
-> String -> Posn -> TokenT -> HParser TokenT
forall a.
(String -> HParser a) -> String -> Posn -> TokenT -> HParser a
report String -> HParser TokenT
forall (p :: * -> *) a. PolyParse p => String -> p a
failBad (TokenT -> String
forall a. Show a => a -> String
show TokenT
t) Posn
p TokenT
t'
                      TokenT
_ | TokenT
t'TokenT -> TokenT -> Bool
forall a. Eq a => a -> a -> Bool
==TokenT
t     -> TokenT -> HParser TokenT
forall a. a -> Parser (Posn, TokenT) a
forall (m :: * -> *) a. Monad m => a -> m a
return TokenT
t
                        | Bool
otherwise -> (String -> HParser TokenT)
-> String -> Posn -> TokenT -> HParser TokenT
forall a.
(String -> HParser a) -> String -> Posn -> TokenT -> HParser a
report String -> HParser TokenT
forall a. String -> Parser (Posn, TokenT) a
forall (m :: * -> *) a. MonadFail m => String -> m a
fail (TokenT -> String
forall a. Show a => a -> String
show TokenT
t) Posn
p TokenT
t'

qname :: HParser QName
qname :: HParser QName
qname = (String -> QName) -> Parser (Posn, TokenT) String -> HParser QName
forall a b.
(a -> b) -> Parser (Posn, TokenT) a -> Parser (Posn, TokenT) b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap String -> QName
N Parser (Posn, TokenT) String
name

name :: HParser Name
--name = do {(p,TokName s) <- next; return s}
name :: Parser (Posn, TokenT) String
name = do (Posn
p,TokenT
tok) <- Parser (Posn, TokenT) (Posn, TokenT)
forall t. Parser t t
next
          case TokenT
tok of
            TokName String
s  -> String -> Parser (Posn, TokenT) String
forall a. a -> Parser (Posn, TokenT) a
forall (m :: * -> *) a. Monad m => a -> m a
return String
s
            TokError String
_ -> (String -> Parser (Posn, TokenT) String)
-> String -> Posn -> TokenT -> Parser (Posn, TokenT) String
forall a.
(String -> HParser a) -> String -> Posn -> TokenT -> HParser a
report String -> Parser (Posn, TokenT) String
forall (p :: * -> *) a. PolyParse p => String -> p a
failBad String
"a name" Posn
p TokenT
tok
            TokenT
_          -> (String -> Parser (Posn, TokenT) String)
-> String -> Posn -> TokenT -> Parser (Posn, TokenT) String
forall a.
(String -> HParser a) -> String -> Posn -> TokenT -> HParser a
report String -> Parser (Posn, TokenT) String
forall a. String -> Parser (Posn, TokenT) a
forall (m :: * -> *) a. MonadFail m => String -> m a
fail String
"a name" Posn
p TokenT
tok

string, freetext :: HParser String
string :: Parser (Posn, TokenT) String
string   = do (Posn
p,TokenT
t) <- Parser (Posn, TokenT) (Posn, TokenT)
forall t. Parser t t
next
              case TokenT
t of TokName String
s -> String -> Parser (Posn, TokenT) String
forall a. a -> Parser (Posn, TokenT) a
forall (m :: * -> *) a. Monad m => a -> m a
return String
s
                        TokenT
_         -> (String -> Parser (Posn, TokenT) String)
-> String -> Posn -> TokenT -> Parser (Posn, TokenT) String
forall a.
(String -> HParser a) -> String -> Posn -> TokenT -> HParser a
report String -> Parser (Posn, TokenT) String
forall a. String -> Parser (Posn, TokenT) a
forall (m :: * -> *) a. MonadFail m => String -> m a
fail String
"text" Posn
p TokenT
t
freetext :: Parser (Posn, TokenT) String
freetext = do (Posn
p,TokenT
t) <- Parser (Posn, TokenT) (Posn, TokenT)
forall t. Parser t t
next
              case TokenT
t of TokFreeText String
s -> String -> Parser (Posn, TokenT) String
forall a. a -> Parser (Posn, TokenT) a
forall (m :: * -> *) a. Monad m => a -> m a
return String
s
                        TokenT
_             -> (String -> Parser (Posn, TokenT) String)
-> String -> Posn -> TokenT -> Parser (Posn, TokenT) String
forall a.
(String -> HParser a) -> String -> Posn -> TokenT -> HParser a
report String -> Parser (Posn, TokenT) String
forall a. String -> Parser (Posn, TokenT) a
forall (m :: * -> *) a. MonadFail m => String -> m a
fail String
"text" Posn
p TokenT
t

maybe :: HParser a -> HParser (Maybe a)
maybe :: forall a. HParser a -> HParser (Maybe a)
maybe HParser a
p =
    (a -> Maybe a
forall a. a -> Maybe a
Just (a -> Maybe a) -> HParser a -> Parser (Posn, TokenT) (Maybe a)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> HParser a
p) Parser (Posn, TokenT) (Maybe a)
-> Parser (Posn, TokenT) (Maybe a)
-> Parser (Posn, TokenT) (Maybe a)
forall t a. Parser t a -> Parser t a -> Parser t a
`onFail`
    Maybe a -> Parser (Posn, TokenT) (Maybe a)
forall a. a -> Parser (Posn, TokenT) a
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe a
forall a. Maybe a
Nothing

either :: HParser a -> HParser b -> HParser (Either a b)
either :: forall a b. HParser a -> HParser b -> HParser (Either a b)
either HParser a
p HParser b
q =
    (a -> Either a b
forall a b. a -> Either a b
Left (a -> Either a b)
-> HParser a -> Parser (Posn, TokenT) (Either a b)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> HParser a
p) Parser (Posn, TokenT) (Either a b)
-> Parser (Posn, TokenT) (Either a b)
-> Parser (Posn, TokenT) (Either a b)
forall t a. Parser t a -> Parser t a -> Parser t a
`onFail`
    (b -> Either a b
forall a b. b -> Either a b
Right (b -> Either a b)
-> HParser b -> Parser (Posn, TokenT) (Either a b)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> HParser b
q)

word :: String -> HParser ()
word :: String -> HParser ()
word String
s = do { (Posn, TokenT)
x <- Parser (Posn, TokenT) (Posn, TokenT)
forall t. Parser t t
next
            ; case (Posn, TokenT)
x of
                (Posn
_p,TokName String
n)     | String
sString -> String -> Bool
forall a. Eq a => a -> a -> Bool
==String
n -> () -> HParser ()
forall a. a -> Parser (Posn, TokenT) a
forall (m :: * -> *) a. Monad m => a -> m a
return ()
                (Posn
_p,TokFreeText String
n) | String
sString -> String -> Bool
forall a. Eq a => a -> a -> Bool
==String
n -> () -> HParser ()
forall a. a -> Parser (Posn, TokenT) a
forall (m :: * -> *) a. Monad m => a -> m a
return ()
                ( Posn
p,t :: TokenT
t@(TokError String
_)) -> (String -> HParser ()) -> String -> Posn -> TokenT -> HParser ()
forall a.
(String -> HParser a) -> String -> Posn -> TokenT -> HParser a
report String -> HParser ()
forall (p :: * -> *) a. PolyParse p => String -> p a
failBad (String -> String
forall a. Show a => a -> String
show String
s) Posn
p TokenT
t
                ( Posn
p,TokenT
t) -> (String -> HParser ()) -> String -> Posn -> TokenT -> HParser ()
forall a.
(String -> HParser a) -> String -> Posn -> TokenT -> HParser a
report String -> HParser ()
forall a. String -> Parser (Posn, TokenT) a
forall (m :: * -> *) a. MonadFail m => String -> m a
fail (String -> String
forall a. Show a => a -> String
show String
s) Posn
p TokenT
t
            }

posn :: HParser Posn
posn :: HParser Posn
posn = do { x :: (Posn, TokenT)
x@(Posn
p,TokenT
_) <- Parser (Posn, TokenT) (Posn, TokenT)
forall t. Parser t t
next
          ; [(Posn, TokenT)] -> HParser ()
forall t. [t] -> Parser t ()
reparse [(Posn, TokenT)
x]
          ; Posn -> HParser Posn
forall a. a -> Parser (Posn, TokenT) a
forall (m :: * -> *) a. Monad m => a -> m a
return Posn
p
          } HParser Posn -> HParser Posn -> HParser Posn
forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` Posn -> HParser Posn
forall a. a -> Parser (Posn, TokenT) a
forall (m :: * -> *) a. Monad m => a -> m a
return Posn
noPos

nmtoken :: HParser NmToken
nmtoken :: Parser (Posn, TokenT) String
nmtoken = Parser (Posn, TokenT) String
string Parser (Posn, TokenT) String
-> Parser (Posn, TokenT) String -> Parser (Posn, TokenT) String
forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` Parser (Posn, TokenT) String
freetext

failP, failBadP :: String -> HParser a
failP :: forall a. String -> Parser (Posn, TokenT) a
failP String
msg    = do { Posn
p <- HParser Posn
posn; String -> HParser a
forall a. String -> Parser (Posn, TokenT) a
forall (m :: * -> *) a. MonadFail m => String -> m a
fail (String
msgString -> String -> String
forall a. [a] -> [a] -> [a]
++String
"\n    at "String -> String -> String
forall a. [a] -> [a] -> [a]
++Posn -> String
forall a. Show a => a -> String
show Posn
p) }
failBadP :: forall a. String -> Parser (Posn, TokenT) a
failBadP String
msg = do { Posn
p <- HParser Posn
posn; String -> HParser a
forall (p :: * -> *) a. PolyParse p => String -> p a
failBad (String
msgString -> String -> String
forall a. [a] -> [a] -> [a]
++String
"\n    at "String -> String -> String
forall a. [a] -> [a] -> [a]
++Posn -> String
forall a. Show a => a -> String
show Posn
p) }

report :: (String->HParser a) -> String -> Posn -> TokenT -> HParser a
report :: forall a.
(String -> HParser a) -> String -> Posn -> TokenT -> HParser a
report String -> HParser a
fail String
expect Posn
p TokenT
t = String -> HParser a
fail (String
"Expected "String -> String -> String
forall a. [a] -> [a] -> [a]
++String -> String
forall a. Show a => a -> String
show String
expectString -> String -> String
forall a. [a] -> [a] -> [a]
++String
" but found "String -> String -> String
forall a. [a] -> [a] -> [a]
++TokenT -> String
forall a. Show a => a -> String
show TokenT
t
                               String -> String -> String
forall a. [a] -> [a] -> [a]
++String
"\n  at "String -> String -> String
forall a. [a] -> [a] -> [a]
++Posn -> String
forall a. Show a => a -> String
show Posn
p)

adjustErrP :: HParser a -> (String->String) -> HParser a
HParser a
p adjustErrP :: forall a. HParser a -> (String -> String) -> HParser a
`adjustErrP` String -> String
f = HParser a
p HParser a -> HParser a -> HParser a
forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` do Posn
pn <- HParser Posn
posn
                                 (HParser a
p HParser a -> (String -> String) -> HParser a
forall a. HParser a -> (String -> String) -> HParser a
forall (p :: * -> *) a.
Commitment p =>
p a -> (String -> String) -> p a
`adjustErr` String -> String
f) HParser a -> (String -> String) -> HParser a
forall a. HParser a -> (String -> String) -> HParser a
forall (p :: * -> *) a.
Commitment p =>
p a -> (String -> String) -> p a
`adjustErr` (String -> String -> String
forall a. [a] -> [a] -> [a]
++Posn -> String
forall a. Show a => a -> String
show Posn
pn)

---- XML Parsing Functions ----

document :: HParser (Document Posn)
document :: Parser (Posn, TokenT) (Document Posn)
document = do
    Prolog
p     <- HParser Prolog
prolog HParser Prolog -> (String -> String) -> HParser Prolog
forall a. HParser a -> (String -> String) -> HParser a
forall (p :: * -> *) a.
Commitment p =>
p a -> (String -> String) -> p a
`adjustErr` (String
"unrecognisable XML prolog\n"String -> String -> String
forall a. [a] -> [a] -> [a]
++)
    [(Stack, Element Posn)]
ht    <- Parser (Posn, TokenT) (Stack, Element Posn)
-> Parser (Posn, TokenT) [(Stack, Element Posn)]
forall (p :: * -> *) a. PolyParse p => p a -> p [a]
many1 (String -> Parser (Posn, TokenT) (Stack, Element Posn)
element String
"HTML document")
    [Misc]
ms    <- Parser (Posn, TokenT) Misc -> Parser (Posn, TokenT) [Misc]
forall a. Parser (Posn, TokenT) a -> Parser (Posn, TokenT) [a]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many Parser (Posn, TokenT) Misc
misc
    Document Posn -> Parser (Posn, TokenT) (Document Posn)
forall a. a -> Parser (Posn, TokenT) a
forall (m :: * -> *) a. Monad m => a -> m a
return (Prolog
-> SymTab EntityDef -> Element Posn -> [Misc] -> Document Posn
forall i.
Prolog -> SymTab EntityDef -> Element i -> [Misc] -> Document i
Document Prolog
p SymTab EntityDef
forall a. SymTab a
emptyST (case ((Stack, Element Posn) -> Element Posn)
-> [(Stack, Element Posn)] -> [Element Posn]
forall a b. (a -> b) -> [a] -> [b]
map (Stack, Element Posn) -> Element Posn
forall a b. (a, b) -> b
snd [(Stack, Element Posn)]
ht of
                                  [Element Posn
e] -> Element Posn
e
                                  [Element Posn]
es  -> QName -> [Attribute] -> [Content Posn] -> Element Posn
forall i. QName -> [Attribute] -> [Content i] -> Element i
Elem (String -> QName
N String
"html") [] ((Element Posn -> Content Posn) -> [Element Posn] -> [Content Posn]
forall a b. (a -> b) -> [a] -> [b]
map Element Posn -> Content Posn
mkCElem [Element Posn]
es))
                               [Misc]
ms)
  where mkCElem :: Element Posn -> Content Posn
mkCElem Element Posn
e = Element Posn -> Posn -> Content Posn
forall i. Element i -> i -> Content i
CElem Element Posn
e Posn
noPos

comment :: HParser Comment
comment :: Parser (Posn, TokenT) String
comment = do
    HParser TokenT
-> HParser TokenT
-> Parser (Posn, TokenT) String
-> Parser (Posn, TokenT) String
forall (p :: * -> *) bra ket a.
PolyParse p =>
p bra -> p ket -> p a -> p a
bracket (TokenT -> HParser TokenT
tok TokenT
TokCommentOpen) (TokenT -> HParser TokenT
tok TokenT
TokCommentClose) Parser (Posn, TokenT) String
freetext

processinginstruction :: HParser ProcessingInstruction
processinginstruction :: HParser ProcessingInstruction
processinginstruction = do
    TokenT -> HParser TokenT
tok TokenT
TokPIOpen
    HParser ProcessingInstruction -> HParser ProcessingInstruction
forall a. Parser (Posn, TokenT) a -> Parser (Posn, TokenT) a
forall (p :: * -> *) a. Commitment p => p a -> p a
commit (HParser ProcessingInstruction -> HParser ProcessingInstruction)
-> HParser ProcessingInstruction -> HParser ProcessingInstruction
forall a b. (a -> b) -> a -> b
$ do
      String
n <- Parser (Posn, TokenT) String
string  Parser (Posn, TokenT) String
-> Parser (Posn, TokenT) String -> Parser (Posn, TokenT) String
forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` String -> Parser (Posn, TokenT) String
forall a. String -> Parser (Posn, TokenT) a
failP String
"processing instruction has no target"
      String
f <- Parser (Posn, TokenT) String
freetext
      (TokenT -> HParser TokenT
tok TokenT
TokPIClose HParser TokenT -> HParser TokenT -> HParser TokenT
forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` TokenT -> HParser TokenT
tok TokenT
TokAnyClose) HParser TokenT -> HParser TokenT -> HParser TokenT
forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` String -> HParser TokenT
forall a. String -> Parser (Posn, TokenT) a
failP String
"missing ?> or >"
      ProcessingInstruction -> HParser ProcessingInstruction
forall a. a -> Parser (Posn, TokenT) a
forall (m :: * -> *) a. Monad m => a -> m a
return (String
n, String
f)

cdsect :: HParser CDSect
cdsect :: Parser (Posn, TokenT) String
cdsect = do
    TokenT -> HParser TokenT
tok TokenT
TokSectionOpen
    HParser TokenT
-> HParser TokenT
-> Parser (Posn, TokenT) String
-> Parser (Posn, TokenT) String
forall (p :: * -> *) bra ket a.
PolyParse p =>
p bra -> p ket -> p a -> p a
bracket (TokenT -> HParser TokenT
tok (Section -> TokenT
TokSection Section
CDATAx)) (HParser TokenT -> HParser TokenT
forall a. Parser (Posn, TokenT) a -> Parser (Posn, TokenT) a
forall (p :: * -> *) a. Commitment p => p a -> p a
commit (HParser TokenT -> HParser TokenT)
-> HParser TokenT -> HParser TokenT
forall a b. (a -> b) -> a -> b
$ TokenT -> HParser TokenT
tok TokenT
TokSectionClose) Parser (Posn, TokenT) String
chardata

prolog :: HParser Prolog
prolog :: HParser Prolog
prolog = do
    Maybe XMLDecl
x <- HParser XMLDecl -> HParser (Maybe XMLDecl)
forall a. HParser a -> HParser (Maybe a)
maybe HParser XMLDecl
xmldecl
    [Misc]
m1 <- Parser (Posn, TokenT) Misc -> Parser (Posn, TokenT) [Misc]
forall a. Parser (Posn, TokenT) a -> Parser (Posn, TokenT) [a]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many Parser (Posn, TokenT) Misc
misc
    Maybe DocTypeDecl
dtd <- HParser DocTypeDecl -> HParser (Maybe DocTypeDecl)
forall a. HParser a -> HParser (Maybe a)
maybe HParser DocTypeDecl
doctypedecl
    [Misc]
m2 <- Parser (Posn, TokenT) Misc -> Parser (Posn, TokenT) [Misc]
forall a. Parser (Posn, TokenT) a -> Parser (Posn, TokenT) [a]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many Parser (Posn, TokenT) Misc
misc
    Prolog -> HParser Prolog
forall a. a -> Parser (Posn, TokenT) a
forall (m :: * -> *) a. Monad m => a -> m a
return (Maybe XMLDecl -> [Misc] -> Maybe DocTypeDecl -> [Misc] -> Prolog
Prolog Maybe XMLDecl
x [Misc]
m1 Maybe DocTypeDecl
dtd [Misc]
m2)

xmldecl :: HParser XMLDecl
xmldecl :: HParser XMLDecl
xmldecl = do
    TokenT -> HParser TokenT
tok TokenT
TokPIOpen
    String -> HParser ()
word String
"xml" HParser () -> HParser () -> HParser ()
forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` String -> HParser ()
word String
"XML"
    Posn
p <- HParser Posn
posn
    String
s <- Parser (Posn, TokenT) String
freetext
    TokenT -> HParser TokenT
tok TokenT
TokPIClose HParser TokenT -> HParser TokenT -> HParser TokenT
forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` String -> HParser TokenT
forall a. String -> Parser (Posn, TokenT) a
failBadP String
"missing ?> in <?xml ...?>"
    ((String -> HParser XMLDecl)
-> (XMLDecl -> HParser XMLDecl)
-> Either String XMLDecl
-> HParser XMLDecl
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
Prelude.either String -> HParser XMLDecl
forall a. String -> Parser (Posn, TokenT) a
failP XMLDecl -> HParser XMLDecl
forall a. a -> Parser (Posn, TokenT) a
forall (m :: * -> *) a. Monad m => a -> m a
return (Either String XMLDecl -> HParser XMLDecl)
-> (String -> Either String XMLDecl) -> String -> HParser XMLDecl
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Either String XMLDecl, [(Posn, TokenT)]) -> Either String XMLDecl
forall a b. (a, b) -> a
fst ((Either String XMLDecl, [(Posn, TokenT)])
 -> Either String XMLDecl)
-> (String -> (Either String XMLDecl, [(Posn, TokenT)]))
-> String
-> Either String XMLDecl
forall b c a. (b -> c) -> (a -> b) -> a -> c
. HParser XMLDecl
-> [(Posn, TokenT)] -> (Either String XMLDecl, [(Posn, TokenT)])
forall t a. Parser t a -> [t] -> (Either String a, [t])
runParser HParser XMLDecl
aux ([(Posn, TokenT)] -> (Either String XMLDecl, [(Posn, TokenT)]))
-> (String -> [(Posn, TokenT)])
-> String
-> (Either String XMLDecl, [(Posn, TokenT)])
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Posn -> String -> [(Posn, TokenT)]
xmlReLex Posn
p) String
s
  where
    aux :: HParser XMLDecl
aux = do
      String
v <- Parser (Posn, TokenT) String
versioninfo  Parser (Posn, TokenT) String
-> Parser (Posn, TokenT) String -> Parser (Posn, TokenT) String
forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` String -> Parser (Posn, TokenT) String
forall a. String -> Parser (Posn, TokenT) a
failP String
"missing XML version info"
      Maybe EncodingDecl
e <- HParser EncodingDecl -> HParser (Maybe EncodingDecl)
forall a. HParser a -> HParser (Maybe a)
maybe HParser EncodingDecl
encodingdecl
      Maybe Bool
s <- HParser Bool -> HParser (Maybe Bool)
forall a. HParser a -> HParser (Maybe a)
maybe HParser Bool
sddecl
      XMLDecl -> HParser XMLDecl
forall a. a -> Parser (Posn, TokenT) a
forall (m :: * -> *) a. Monad m => a -> m a
return (String -> Maybe EncodingDecl -> Maybe Bool -> XMLDecl
XMLDecl String
v Maybe EncodingDecl
e Maybe Bool
s)

versioninfo :: HParser VersionInfo
versioninfo :: Parser (Posn, TokenT) String
versioninfo = do
    String -> HParser ()
word String
"version" HParser () -> HParser () -> HParser ()
forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` String -> HParser ()
word String
"VERSION"
    TokenT -> HParser TokenT
tok TokenT
TokEqual
    HParser TokenT
-> HParser TokenT
-> Parser (Posn, TokenT) String
-> Parser (Posn, TokenT) String
forall (p :: * -> *) bra ket a.
PolyParse p =>
p bra -> p ket -> p a -> p a
bracket (TokenT -> HParser TokenT
tok TokenT
TokQuote) (HParser TokenT -> HParser TokenT
forall a. Parser (Posn, TokenT) a -> Parser (Posn, TokenT) a
forall (p :: * -> *) a. Commitment p => p a -> p a
commit (HParser TokenT -> HParser TokenT)
-> HParser TokenT -> HParser TokenT
forall a b. (a -> b) -> a -> b
$ TokenT -> HParser TokenT
tok TokenT
TokQuote) Parser (Posn, TokenT) String
freetext

misc :: HParser Misc
misc :: Parser (Posn, TokenT) Misc
misc =
    [(String, Parser (Posn, TokenT) Misc)]
-> Parser (Posn, TokenT) Misc
forall a.
[(String, Parser (Posn, TokenT) a)] -> Parser (Posn, TokenT) a
forall (p :: * -> *) a. Commitment p => [(String, p a)] -> p a
oneOf' [ (String
"<!--comment-->", String -> Misc
Comment (String -> Misc)
-> Parser (Posn, TokenT) String -> Parser (Posn, TokenT) Misc
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser (Posn, TokenT) String
comment)
           , (String
"<?PI?>",         ProcessingInstruction -> Misc
PI (ProcessingInstruction -> Misc)
-> HParser ProcessingInstruction -> Parser (Posn, TokenT) Misc
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> HParser ProcessingInstruction
processinginstruction)
           ]


-- Question: for HTML, should we disallow in-line DTDs, allowing only externals?
-- Answer: I think so.

doctypedecl :: HParser DocTypeDecl
doctypedecl :: HParser DocTypeDecl
doctypedecl = do
    TokenT -> HParser TokenT
tok TokenT
TokSpecialOpen
    TokenT -> HParser TokenT
tok (Special -> TokenT
TokSpecial Special
DOCTYPEx)
    HParser DocTypeDecl -> HParser DocTypeDecl
forall a. Parser (Posn, TokenT) a -> Parser (Posn, TokenT) a
forall (p :: * -> *) a. Commitment p => p a -> p a
commit (HParser DocTypeDecl -> HParser DocTypeDecl)
-> HParser DocTypeDecl -> HParser DocTypeDecl
forall a b. (a -> b) -> a -> b
$ do
      QName
n <- HParser QName
qname
      Maybe ExternalID
eid <- HParser ExternalID -> HParser (Maybe ExternalID)
forall a. HParser a -> HParser (Maybe a)
maybe HParser ExternalID
externalid
--    es <- maybe (bracket (tok TokSqOpen) (commit $ tok TokSqClose)) (many markupdecl)
      TokenT -> HParser TokenT
tok TokenT
TokAnyClose  HParser TokenT -> HParser TokenT -> HParser TokenT
forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` String -> HParser TokenT
forall a. String -> Parser (Posn, TokenT) a
failP String
"missing > in DOCTYPE decl"
--    return (DTD n eid (case es of { Nothing -> []; Just e -> e }))
      DocTypeDecl -> HParser DocTypeDecl
forall a. a -> Parser (Posn, TokenT) a
forall (m :: * -> *) a. Monad m => a -> m a
return (QName -> Maybe ExternalID -> [MarkupDecl] -> DocTypeDecl
DTD QName
n Maybe ExternalID
eid [])

--markupdecl :: HParser MarkupDecl
--markupdecl =
--    (Element <$> elementdecl) `onFail`
--    (AttList <$> attlistdecl) `onFail`
--    (Entity <$> entitydecl) `onFail`
--    (Notation <$> notationdecl) `onFail`
--    (MarkupMisc <$> misc) `onFail`
--    PEREF(MarkupPE,markupdecl)
--
--extsubset :: HParser ExtSubset
--extsubset = do
--    td <- maybe textdecl
--    ds <- many extsubsetdecl
--    return (ExtSubset td ds)
--
--extsubsetdecl :: HParser ExtSubsetDecl
--extsubsetdecl =
--    (ExtMarkupDecl <$> markupdecl) `onFail`
--    (ExtConditionalSect <$> conditionalsect) `onFail`
--    PEREF(ExtPEReference,extsubsetdecl)

sddecl :: HParser SDDecl
sddecl :: HParser Bool
sddecl = do
    String -> HParser ()
word String
"standalone" HParser () -> HParser () -> HParser ()
forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` String -> HParser ()
word String
"STANDALONE"
    HParser Bool -> HParser Bool
forall a. Parser (Posn, TokenT) a -> Parser (Posn, TokenT) a
forall (p :: * -> *) a. Commitment p => p a -> p a
commit (HParser Bool -> HParser Bool) -> HParser Bool -> HParser Bool
forall a b. (a -> b) -> a -> b
$ do
      TokenT -> HParser TokenT
tok TokenT
TokEqual HParser TokenT -> HParser TokenT -> HParser TokenT
forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` String -> HParser TokenT
forall a. String -> Parser (Posn, TokenT) a
failP String
"missing = in 'standalone' decl"
      HParser TokenT -> HParser TokenT -> HParser Bool -> HParser Bool
forall (p :: * -> *) bra ket a.
PolyParse p =>
p bra -> p ket -> p a -> p a
bracket (TokenT -> HParser TokenT
tok TokenT
TokQuote) (HParser TokenT -> HParser TokenT
forall a. Parser (Posn, TokenT) a -> Parser (Posn, TokenT) a
forall (p :: * -> *) a. Commitment p => p a -> p a
commit (HParser TokenT -> HParser TokenT)
-> HParser TokenT -> HParser TokenT
forall a b. (a -> b) -> a -> b
$ TokenT -> HParser TokenT
tok TokenT
TokQuote)
              ( (String -> HParser ()
word String
"yes" HParser () -> HParser Bool -> HParser Bool
forall a b.
Parser (Posn, TokenT) a
-> Parser (Posn, TokenT) b -> Parser (Posn, TokenT) b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Bool -> HParser Bool
forall a. a -> Parser (Posn, TokenT) a
forall (m :: * -> *) a. Monad m => a -> m a
return Bool
True) HParser Bool -> HParser Bool -> HParser Bool
forall t a. Parser t a -> Parser t a -> Parser t a
`onFail`
                (String -> HParser ()
word String
"no" HParser () -> HParser Bool -> HParser Bool
forall a b.
Parser (Posn, TokenT) a
-> Parser (Posn, TokenT) b -> Parser (Posn, TokenT) b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Bool -> HParser Bool
forall a. a -> Parser (Posn, TokenT) a
forall (m :: * -> *) a. Monad m => a -> m a
return Bool
False) HParser Bool -> HParser Bool -> HParser Bool
forall t a. Parser t a -> Parser t a -> Parser t a
`onFail`
                String -> HParser Bool
forall a. String -> Parser (Posn, TokenT) a
failP String
"'standalone' decl requires 'yes' or 'no' value" )




----
-- VERY IMPORTANT NOTE: The stack returned here contains those tags which
-- have been closed implicitly and need to be reopened again at the
-- earliest opportunity.
type Stack = [(Name,[Attribute])]

element :: Name -> HParser (Stack,Element Posn)
element :: String -> Parser (Posn, TokenT) (Stack, Element Posn)
element String
ctx =
  do
    TokenT -> HParser TokenT
tok TokenT
TokAnyOpen
    (ElemTag (N String
e) [Attribute]
avs) <- HParser ElemTag
elemtag
    ( if String
e String -> String -> Bool
`closes` String
ctx then
         -- insert the missing close-tag, fail forward, and reparse.
         ( do String -> HParser ()
forall (m :: * -> *). Monad m => String -> m ()
debug String
"/"
              [TokenT] -> HParser ()
unparse ([TokenT
TokEndOpen, String -> TokenT
TokName String
ctx, TokenT
TokAnyClose,
                        TokenT
TokAnyOpen, String -> TokenT
TokName String
e] [TokenT] -> [TokenT] -> [TokenT]
forall a. [a] -> [a] -> [a]
++ [Attribute] -> [TokenT]
reformatAttrs [Attribute]
avs)
              (Stack, Element Posn)
-> Parser (Posn, TokenT) (Stack, Element Posn)
forall a. a -> Parser (Posn, TokenT) a
forall (m :: * -> *) a. Monad m => a -> m a
return ([], QName -> [Attribute] -> [Content Posn] -> Element Posn
forall i. QName -> [Attribute] -> [Content i] -> Element i
Elem (String -> QName
N String
"null") [] []))
      else if String
e String -> [String] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String]
selfclosingtags then
         -- complete the parse straightaway.
         ( do TokenT -> HParser TokenT
tok TokenT
TokEndClose   -- self-closing <tag />
              String -> HParser ()
forall (m :: * -> *). Monad m => String -> m ()
debug (String
eString -> String -> String
forall a. [a] -> [a] -> [a]
++String
"[+]")
              (Stack, Element Posn)
-> Parser (Posn, TokenT) (Stack, Element Posn)
forall a. a -> Parser (Posn, TokenT) a
forall (m :: * -> *) a. Monad m => a -> m a
return ([], QName -> [Attribute] -> [Content Posn] -> Element Posn
forall i. QName -> [Attribute] -> [Content i] -> Element i
Elem (String -> QName
N String
e) [Attribute]
avs [])) Parser (Posn, TokenT) (Stack, Element Posn)
-> Parser (Posn, TokenT) (Stack, Element Posn)
-> Parser (Posn, TokenT) (Stack, Element Posn)
forall t a. Parser t a -> Parser t a -> Parser t a
`onFail`
     --  ( do tok TokAnyClose   -- sequence <tag></tag> (**not HTML?**)
     --       debug (e++"[+")
     --       n <- bracket (tok TokEndOpen) (commit $ tok TokAnyClose) qname
     --       debug "]"
     --       if e == (map toLower n :: Name)
     --         then return ([], Elem e avs [])
     --         else return (error "no nesting in empty tag")) `onFail`
         ( do TokenT -> HParser TokenT
tok TokenT
TokAnyClose   -- <tag> with no close (e.g. <IMG>)
              String -> HParser ()
forall (m :: * -> *). Monad m => String -> m ()
debug (String
eString -> String -> String
forall a. [a] -> [a] -> [a]
++String
"[+]")
              (Stack, Element Posn)
-> Parser (Posn, TokenT) (Stack, Element Posn)
forall a. a -> Parser (Posn, TokenT) a
forall (m :: * -> *) a. Monad m => a -> m a
return ([], QName -> [Attribute] -> [Content Posn] -> Element Posn
forall i. QName -> [Attribute] -> [Content i] -> Element i
Elem (String -> QName
N String
e) [Attribute]
avs []))
      else
         ( do TokenT -> HParser TokenT
tok TokenT
TokEndClose
              String -> HParser ()
forall (m :: * -> *). Monad m => String -> m ()
debug (String
eString -> String -> String
forall a. [a] -> [a] -> [a]
++String
"[]")
              (Stack, Element Posn)
-> Parser (Posn, TokenT) (Stack, Element Posn)
forall a. a -> Parser (Posn, TokenT) a
forall (m :: * -> *) a. Monad m => a -> m a
return ([], QName -> [Attribute] -> [Content Posn] -> Element Posn
forall i. QName -> [Attribute] -> [Content i] -> Element i
Elem (String -> QName
N String
e) [Attribute]
avs [])) Parser (Posn, TokenT) (Stack, Element Posn)
-> Parser (Posn, TokenT) (Stack, Element Posn)
-> Parser (Posn, TokenT) (Stack, Element Posn)
forall t a. Parser t a -> Parser t a -> Parser t a
`onFail`
         ( do TokenT -> HParser TokenT
tok TokenT
TokAnyClose HParser TokenT -> HParser TokenT -> HParser TokenT
forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` String -> HParser TokenT
forall a. String -> Parser (Posn, TokenT) a
failP String
"missing > or /> in element tag"
              String -> HParser ()
forall (m :: * -> *). Monad m => String -> m ()
debug (String
eString -> String -> String
forall a. [a] -> [a] -> [a]
++String
"[")
           -- zz <- many (content e)
           -- n <- bracket (tok TokEndOpen) (commit $ tok TokAnyClose) qname
              [(Stack, Content Posn)]
zz <- Parser (Posn, TokenT) (Stack, Content Posn)
-> HParser TokenT -> Parser (Posn, TokenT) [(Stack, Content Posn)]
forall (p :: * -> *) a z. PolyParse p => p a -> p z -> p [a]
manyFinally (String -> Parser (Posn, TokenT) (Stack, Content Posn)
content String
e)
                                (TokenT -> HParser TokenT
tok TokenT
TokEndOpen)
              (N String
n) <- HParser QName
qname
              HParser TokenT -> HParser TokenT
forall a. Parser (Posn, TokenT) a -> Parser (Posn, TokenT) a
forall (p :: * -> *) a. Commitment p => p a -> p a
commit (TokenT -> HParser TokenT
tok TokenT
TokAnyClose)
              String -> HParser ()
forall (m :: * -> *). Monad m => String -> m ()
debug String
"]"
              let ([Stack]
ss,[Content Posn]
cs) = [(Stack, Content Posn)] -> ([Stack], [Content Posn])
forall a b. [(a, b)] -> ([a], [b])
unzip [(Stack, Content Posn)]
zz
              let s :: Stack
s       = if [Stack] -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [Stack]
ss then [] else [Stack] -> Stack
forall a. HasCallStack => [a] -> a
last [Stack]
ss
              ( if String
e String -> String -> Bool
forall a. Eq a => a -> a -> Bool
== ((Char -> Char) -> String -> String
forall a b. (a -> b) -> [a] -> [b]
map Char -> Char
toLower String
n :: Name) then
                  do [TokenT] -> HParser ()
unparse (Stack -> [TokenT]
reformatTags (String -> Stack -> Stack
closeInner String
e Stack
s))
                     String -> HParser ()
forall (m :: * -> *). Monad m => String -> m ()
debug String
"^"
                     (Stack, Element Posn)
-> Parser (Posn, TokenT) (Stack, Element Posn)
forall a. a -> Parser (Posn, TokenT) a
forall (m :: * -> *) a. Monad m => a -> m a
return ([], QName -> [Attribute] -> [Content Posn] -> Element Posn
forall i. QName -> [Attribute] -> [Content i] -> Element i
Elem (String -> QName
N String
e) [Attribute]
avs [Content Posn]
cs)
                else
                  do [TokenT] -> HParser ()
unparse [TokenT
TokEndOpen, String -> TokenT
TokName String
n, TokenT
TokAnyClose]
                     String -> HParser ()
forall (m :: * -> *). Monad m => String -> m ()
debug String
"-"
                     (Stack, Element Posn)
-> Parser (Posn, TokenT) (Stack, Element Posn)
forall a. a -> Parser (Posn, TokenT) a
forall (m :: * -> *) a. Monad m => a -> m a
return ((String
e,[Attribute]
avs)(String, [Attribute]) -> Stack -> Stack
forall a. a -> [a] -> [a]
:Stack
s, QName -> [Attribute] -> [Content Posn] -> Element Posn
forall i. QName -> [Attribute] -> [Content i] -> Element i
Elem (String -> QName
N String
e) [Attribute]
avs [Content Posn]
cs))
         ) Parser (Posn, TokenT) (Stack, Element Posn)
-> Parser (Posn, TokenT) (Stack, Element Posn)
-> Parser (Posn, TokenT) (Stack, Element Posn)
forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` String -> Parser (Posn, TokenT) (Stack, Element Posn)
forall a. String -> Parser (Posn, TokenT) a
failP (String
"failed to repair non-matching tags in context: "String -> String -> String
forall a. [a] -> [a] -> [a]
++String
ctx))

closeInner :: Name -> [(Name,[Attribute])] -> [(Name,[Attribute])]
closeInner :: String -> Stack -> Stack
closeInner String
c Stack
ts =
    case String -> [(String, [String])] -> Maybe [String]
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup String
c [(String, [String])]
closeInnerTags of
      (Just [String]
these) -> ((String, [Attribute]) -> Bool) -> Stack -> Stack
forall a. (a -> Bool) -> [a] -> [a]
filter ((String -> [String] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` [String]
these)(String -> Bool)
-> ((String, [Attribute]) -> String)
-> (String, [Attribute])
-> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
.(String, [Attribute]) -> String
forall a b. (a, b) -> a
fst) Stack
ts
      Maybe [String]
Nothing      -> Stack
ts

unparse :: [TokenT] -> Parser (Posn, TokenT) ()
unparse :: [TokenT] -> HParser ()
unparse [TokenT]
ts = do Posn
p <- HParser Posn
posn
                [(Posn, TokenT)] -> HParser ()
forall t. [t] -> Parser t ()
reparse ([Posn] -> [TokenT] -> [(Posn, TokenT)]
forall a b. [a] -> [b] -> [(a, b)]
zip (Posn -> [Posn]
forall a. a -> [a]
repeat Posn
p) [TokenT]
ts)

reformatAttrs :: [(QName, AttValue)] -> [TokenT]
reformatAttrs :: [Attribute] -> [TokenT]
reformatAttrs = (Attribute -> [TokenT]) -> [Attribute] -> [TokenT]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap Attribute -> [TokenT]
f0
    where f0 :: Attribute -> [TokenT]
f0 (QName
a, v :: AttValue
v@(AttValue [Either String Reference]
_)) = [ String -> TokenT
TokName (QName -> String
printableName QName
a), TokenT
TokEqual
                                   , TokenT
TokQuote, String -> TokenT
TokFreeText (AttValue -> String
forall a. Show a => a -> String
show AttValue
v), TokenT
TokQuote ]

reformatTags :: [(String, [(QName, AttValue)])] -> [TokenT]
reformatTags :: Stack -> [TokenT]
reformatTags = ((String, [Attribute]) -> [TokenT]) -> Stack -> [TokenT]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap (String, [Attribute]) -> [TokenT]
f0
    where f0 :: (String, [Attribute]) -> [TokenT]
f0 (String
t,[Attribute]
avs) = [TokenT
TokAnyOpen, String -> TokenT
TokName String
t][TokenT] -> [TokenT] -> [TokenT]
forall a. [a] -> [a] -> [a]
++[Attribute] -> [TokenT]
reformatAttrs [Attribute]
avs[TokenT] -> [TokenT] -> [TokenT]
forall a. [a] -> [a] -> [a]
++[TokenT
TokAnyClose]

content :: Name -> HParser (Stack,Content Posn)
content :: String -> Parser (Posn, TokenT) (Stack, Content Posn)
content String
ctx = do { Posn
p <- HParser Posn
posn ; Posn -> Parser (Posn, TokenT) (Stack, Content Posn)
content' Posn
p }
  where content' :: Posn -> Parser (Posn, TokenT) (Stack, Content Posn)
content' Posn
p = [(String, Parser (Posn, TokenT) (Stack, Content Posn))]
-> Parser (Posn, TokenT) (Stack, Content Posn)
forall a.
[(String, Parser (Posn, TokenT) a)] -> Parser (Posn, TokenT) a
forall (p :: * -> *) a. Commitment p => [(String, p a)] -> p a
oneOf'
          [ ( String
"element", String -> Parser (Posn, TokenT) (Stack, Element Posn)
element String
ctx Parser (Posn, TokenT) (Stack, Element Posn)
-> ((Stack, Element Posn)
    -> Parser (Posn, TokenT) (Stack, Content Posn))
-> Parser (Posn, TokenT) (Stack, Content Posn)
forall a b.
Parser (Posn, TokenT) a
-> (a -> Parser (Posn, TokenT) b) -> Parser (Posn, TokenT) b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \(Stack
s,Element Posn
e)-> (Stack, Content Posn)
-> Parser (Posn, TokenT) (Stack, Content Posn)
forall a. a -> Parser (Posn, TokenT) a
forall (m :: * -> *) a. Monad m => a -> m a
return (Stack
s, Element Posn -> Posn -> Content Posn
forall i. Element i -> i -> Content i
CElem Element Posn
e Posn
p))
          , ( String
"chardata", Parser (Posn, TokenT) String
chardata Parser (Posn, TokenT) String
-> (String -> Parser (Posn, TokenT) (Stack, Content Posn))
-> Parser (Posn, TokenT) (Stack, Content Posn)
forall a b.
Parser (Posn, TokenT) a
-> (a -> Parser (Posn, TokenT) b) -> Parser (Posn, TokenT) b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \String
s-> (Stack, Content Posn)
-> Parser (Posn, TokenT) (Stack, Content Posn)
forall a. a -> Parser (Posn, TokenT) a
forall (m :: * -> *) a. Monad m => a -> m a
return ([], Bool -> String -> Posn -> Content Posn
forall i. Bool -> String -> i -> Content i
CString Bool
False String
s Posn
p))
          , ( String
"reference", HParser Reference
reference HParser Reference
-> (Reference -> Parser (Posn, TokenT) (Stack, Content Posn))
-> Parser (Posn, TokenT) (Stack, Content Posn)
forall a b.
Parser (Posn, TokenT) a
-> (a -> Parser (Posn, TokenT) b) -> Parser (Posn, TokenT) b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \Reference
r-> (Stack, Content Posn)
-> Parser (Posn, TokenT) (Stack, Content Posn)
forall a. a -> Parser (Posn, TokenT) a
forall (m :: * -> *) a. Monad m => a -> m a
return ([], Reference -> Posn -> Content Posn
forall i. Reference -> i -> Content i
CRef Reference
r Posn
p))
          , ( String
"cdsect", Parser (Posn, TokenT) String
cdsect Parser (Posn, TokenT) String
-> (String -> Parser (Posn, TokenT) (Stack, Content Posn))
-> Parser (Posn, TokenT) (Stack, Content Posn)
forall a b.
Parser (Posn, TokenT) a
-> (a -> Parser (Posn, TokenT) b) -> Parser (Posn, TokenT) b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \String
c-> (Stack, Content Posn)
-> Parser (Posn, TokenT) (Stack, Content Posn)
forall a. a -> Parser (Posn, TokenT) a
forall (m :: * -> *) a. Monad m => a -> m a
return ([], Bool -> String -> Posn -> Content Posn
forall i. Bool -> String -> i -> Content i
CString Bool
True String
c Posn
p))
          , ( String
"misc", Parser (Posn, TokenT) Misc
misc Parser (Posn, TokenT) Misc
-> (Misc -> Parser (Posn, TokenT) (Stack, Content Posn))
-> Parser (Posn, TokenT) (Stack, Content Posn)
forall a b.
Parser (Posn, TokenT) a
-> (a -> Parser (Posn, TokenT) b) -> Parser (Posn, TokenT) b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \Misc
m->  (Stack, Content Posn)
-> Parser (Posn, TokenT) (Stack, Content Posn)
forall a. a -> Parser (Posn, TokenT) a
forall (m :: * -> *) a. Monad m => a -> m a
return ([], Misc -> Posn -> Content Posn
forall i. Misc -> i -> Content i
CMisc Misc
m Posn
p))
          ] Parser (Posn, TokenT) (Stack, Content Posn)
-> (String -> String)
-> Parser (Posn, TokenT) (Stack, Content Posn)
forall a. HParser a -> (String -> String) -> HParser a
`adjustErrP` (String
"when looking for a content item,\n"String -> String -> String
forall a. [a] -> [a] -> [a]
++)

----
elemtag :: HParser ElemTag
elemtag :: HParser ElemTag
elemtag = do
    (N String
n) <- HParser QName
qname HParser QName -> (String -> String) -> HParser QName
forall (p :: * -> *) a.
PolyParse p =>
p a -> (String -> String) -> p a
`adjustErrBad` (String
"malformed element tag\n"String -> String -> String
forall a. [a] -> [a] -> [a]
++)
    [Attribute]
as <- Parser (Posn, TokenT) Attribute
-> Parser (Posn, TokenT) [Attribute]
forall a. Parser (Posn, TokenT) a -> Parser (Posn, TokenT) [a]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many Parser (Posn, TokenT) Attribute
attribute
    ElemTag -> HParser ElemTag
forall a. a -> Parser (Posn, TokenT) a
forall (m :: * -> *) a. Monad m => a -> m a
return (QName -> [Attribute] -> ElemTag
ElemTag (String -> QName
N  (String -> QName) -> String -> QName
forall a b. (a -> b) -> a -> b
$ (Char -> Char) -> String -> String
forall a b. (a -> b) -> [a] -> [b]
map Char -> Char
toLower String
n) [Attribute]
as)

attribute :: HParser Attribute
attribute :: Parser (Posn, TokenT) Attribute
attribute = do
    (N String
n) <- HParser QName
qname
    AttValue
v <- (do TokenT -> HParser TokenT
tok TokenT
TokEqual
             Parser (Posn, TokenT) AttValue
attvalue) Parser (Posn, TokenT) AttValue
-> Parser (Posn, TokenT) AttValue -> Parser (Posn, TokenT) AttValue
forall t a. Parser t a -> Parser t a -> Parser t a
`onFail`
         AttValue -> Parser (Posn, TokenT) AttValue
forall a. a -> Parser (Posn, TokenT) a
forall (m :: * -> *) a. Monad m => a -> m a
return ([Either String Reference] -> AttValue
AttValue [String -> Either String Reference
forall a b. a -> Either a b
Left String
"TRUE"])
    Attribute -> Parser (Posn, TokenT) Attribute
forall a. a -> Parser (Posn, TokenT) a
forall (m :: * -> *) a. Monad m => a -> m a
return (String -> QName
N (String -> QName) -> String -> QName
forall a b. (a -> b) -> a -> b
$ (Char -> Char) -> String -> String
forall a b. (a -> b) -> [a] -> [b]
map Char -> Char
toLower String
n, AttValue
v)

--elementdecl :: HParser ElementDecl
--elementdecl = do
--    tok TokSpecialOpen
--    tok (TokSpecial ELEMENTx)
--    n <- qname `onFail` failP "missing identifier in ELEMENT decl"
--    c <- contentspec `onFail` failP "missing content spec in ELEMENT decl"
--    tok TokAnyClose `onFail` failP "expected > terminating ELEMENT decl"
--    return (ElementDecl n c)
--
--contentspec :: HParser ContentSpec
--contentspec =
--    ( word "EMPTY" >> return EMPTY) `onFail`
--    ( word "ANY" >> return ANY) `onFail`
--    (Mixed <$> mixed) `onFail`
--    (ContentSpec <$> cp) `onFail`
--    PEREF(ContentPE,contentspec)
--
--choice :: HParser [CP]
--choice = do
--    bracket (tok TokBraOpen) (tok TokBraClose)
--            (cp `sepby1` (tok TokPipe))
--
--sequence :: HParser [CP]
--sequence = do
--    bracket (tok TokBraOpen) (tok TokBraClose)
--            (cp `sepby1` (tok TokComma))
--
--cp :: HParser CP
--cp =
--    ( do n <- qname
--         m <- modifier
--         return (TagName n m)) `onFail`
--    ( do ss <- sequence
--         m <- modifier
--         return (Seq ss m)) `onFail`
--    ( do cs <- choice
--         m <- modifier
--         return (Choice cs m)) `onFail`
--    PEREF(CPPE,cp)
--
--modifier :: HParser Modifier
--modifier =
--    ( tok TokStar >> return Star) `onFail`
--    ( tok TokQuery >> return Query) `onFail`
--    ( tok TokPlus >> return Plus) `onFail`
--    ( return None)
--
--mixed :: HParser Mixed
--mixed = do
--    tok TokBraOpen
--    tok TokHash
--    word "PCDATA"
--    cont
--  where
--    cont = ( tok TokBraClose >> return PCDATA) `onFail`
--           ( do cs <- many ( do tok TokPipe
--                                n <- qname
--                                return n)
--                tok TokBraClose
--                tok TokStar
--                return (PCDATAplus cs))
--
--attlistdecl :: HParser AttListDecl
--attlistdecl = do
--    tok TokSpecialOpen
--    tok (TokSpecial ATTLISTx)
--    n <- qname `onFail` failP "missing identifier in ATTLIST"
--    ds <- many attdef
--    tok TokAnyClose `onFail` failP "missing > terminating ATTLIST"
--    return (AttListDecl n ds)
--
--attdef :: HParser AttDef
--attdef = do
--    n <- qname
--    t <- atttype `onFail` failP "missing attribute type in attlist defn"
--    d <- defaultdecl
--    return (AttDef n t d)
--
--atttype :: HParser AttType
--atttype =
--    ( word "CDATA" >> return StringType) `onFail`
--    (TokenizedType <$> tokenizedtype) `onFail`
--    (EnumeratedType <$> enumeratedtype)
--
--tokenizedtype :: HParser TokenizedType
--tokenizedtype =
--    ( word "ID" >> return ID) `onFail`
--    ( word "IDREF" >> return IDREF) `onFail`
--    ( word "IDREFS" >> return IDREFS) `onFail`
--    ( word "ENTITY" >> return ENTITY) `onFail`
--    ( word "ENTITIES" >> return ENTITIES) `onFail`
--    ( word "NMTOKEN" >> return NMTOKEN) `onFail`
--    ( word "NMTOKENS" >> return NMTOKENS)
--
--enumeratedtype :: HParser EnumeratedType
--enumeratedtype =
--    (NotationType <$> notationtype) `onFail`
--    (Enumeration <$> enumeration)
--
--notationtype :: HParser NotationType
--notationtype = do
--    word "NOTATION"
--    bracket (tok TokBraOpen) (tok TokBraClose)
--            (name `sepby1` (tok TokPipe))
--
--enumeration :: HParser Enumeration
--enumeration =
--    bracket (tok TokBraOpen) (tok TokBraClose)
--            (nmtoken `sepby1` (tok TokPipe))
--
--defaultdecl :: HParser DefaultDecl
--defaultdecl =
--    ( tok TokHash >> word "REQUIRED" >> return REQUIRED) `onFail`
--    ( tok TokHash >> word "IMPLIED" >> return IMPLIED) `onFail`
--    ( do f <- maybe (tok TokHash >> word "FIXED" >> return FIXED)
--         a <- attvalue
--         return (DefaultTo a f))
--
--conditionalsect :: HParser ConditionalSect
--conditionalsect =
--    ( do tok TokSectionOpen
--         tok (TokSection INCLUDEx)
--         tok TokSqOpen `onFail` failP "missing [ after INCLUDE"
--         i <- extsubsetdecl `onFail` failP "missing ExtSubsetDecl in INCLUDE"
--         tok TokSectionClose `onFail` failP "missing ] after INCLUDE"
--         return (IncludeSect i)) `onFail`
--    ( do tok TokSectionOpen
--         tok (TokSection IGNOREx)
--         tok TokSqOpen `onFail` failP "missing [ after IGNORE"
--         i <- many ignoresectcontents
--         tok TokSectionClose `onFail` failP "missing ] after IGNORE"
--         return (IgnoreSect i))
--
--ignoresectcontents :: HParser IgnoreSectContents
--ignoresectcontents = do
--    i <- ignore
--    is <- many (do tok TokSectionOpen
--                   ic <- ignoresectcontents
--                   tok TokSectionClose
--                   ig <- ignore
--                   return (ic,ig))
--    return (IgnoreSectContents i is)
--
--ignore :: HParser Ignore
--ignore = Ignore <$> freetext

reference :: HParser Reference
reference :: HParser Reference
reference = do
    HParser TokenT
-> HParser TokenT -> HParser Reference -> HParser Reference
forall (p :: * -> *) bra ket a.
PolyParse p =>
p bra -> p ket -> p a -> p a
bracket (TokenT -> HParser TokenT
tok TokenT
TokAmp) (TokenT -> HParser TokenT
tok TokenT
TokSemi) (Parser (Posn, TokenT) String
freetext Parser (Posn, TokenT) String
-> (String -> HParser Reference) -> HParser Reference
forall a b.
Parser (Posn, TokenT) a
-> (a -> Parser (Posn, TokenT) b) -> Parser (Posn, TokenT) b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= String -> HParser Reference
forall {m :: * -> *}. Monad m => String -> m Reference
val)
  where
    val :: String -> m Reference
val (Char
'#':Char
'x':String
i) | (Char -> Bool) -> String -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Char -> Bool
isHexDigit String
i
                    = Reference -> m Reference
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return (Reference -> m Reference)
-> (String -> Reference) -> String -> m Reference
forall b c a. (b -> c) -> (a -> b) -> a -> c
. CharRef -> Reference
RefChar (CharRef -> Reference)
-> (String -> CharRef) -> String -> Reference
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (CharRef, String) -> CharRef
forall a b. (a, b) -> a
fst ((CharRef, String) -> CharRef)
-> (String -> (CharRef, String)) -> String -> CharRef
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [(CharRef, String)] -> (CharRef, String)
forall a. HasCallStack => [a] -> a
head ([(CharRef, String)] -> (CharRef, String))
-> (String -> [(CharRef, String)]) -> String -> (CharRef, String)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [(CharRef, String)]
forall a. (Eq a, Num a) => ReadS a
readHex (String -> m Reference) -> String -> m Reference
forall a b. (a -> b) -> a -> b
$ String
i
    val (Char
'#':String
i)     | (Char -> Bool) -> String -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Char -> Bool
isDigit String
i
                    = Reference -> m Reference
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return (Reference -> m Reference)
-> (String -> Reference) -> String -> m Reference
forall b c a. (b -> c) -> (a -> b) -> a -> c
. CharRef -> Reference
RefChar (CharRef -> Reference)
-> (String -> CharRef) -> String -> Reference
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (CharRef, String) -> CharRef
forall a b. (a, b) -> a
fst ((CharRef, String) -> CharRef)
-> (String -> (CharRef, String)) -> String -> CharRef
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [(CharRef, String)] -> (CharRef, String)
forall a. HasCallStack => [a] -> a
head ([(CharRef, String)] -> (CharRef, String))
-> (String -> [(CharRef, String)]) -> String -> (CharRef, String)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [(CharRef, String)]
forall a. (Eq a, Num a) => ReadS a
readDec (String -> m Reference) -> String -> m Reference
forall a b. (a -> b) -> a -> b
$ String
i
    val String
ent         = Reference -> m Reference
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return (Reference -> m Reference)
-> (String -> Reference) -> String -> m Reference
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Reference
RefEntity (String -> m Reference) -> String -> m Reference
forall a b. (a -> b) -> a -> b
$ String
ent

{-
reference :: HParser Reference
reference =
    (RefChar <$> charref) `onFail`
    (RefEntity <$> entityref)

entityref :: HParser EntityRef
entityref = do
    n <- bracket (tok TokAmp) (commit $ tok TokSemi) name
    return n

charref :: HParser CharRef
charref = do
    bracket (tok TokAmp) (commit $ tok TokSemi) (freetext >>= readCharVal)
  where
    readCharVal ('#':'x':i) = return . fst . head . readHex $ i
    readCharVal ('#':i)     = return . fst . head . readDec $ i
    readCharVal _           = mzero
-}

--pereference :: HParser PEReference
--pereference = do
--    bracket (tok TokPercent) (tok TokSemi) nmtoken
--
--entitydecl :: HParser EntityDecl
--entitydecl =
--    (EntityGEDecl <$> gedecl) `onFail`
--    (EntityPEDecl <$> pedecl)
--
--gedecl :: HParser GEDecl
--gedecl = do
--    tok TokSpecialOpen
--    tok (TokSpecial ENTITYx)
--    n <- name
--    e <- entitydef `onFail` failP "missing entity defn in G ENTITY decl"
--    tok TokAnyClose `onFail` failP "expected > terminating G ENTITY decl"
--    return (GEDecl n e)
--
--pedecl :: HParser PEDecl
--pedecl = do
--    tok TokSpecialOpen
--    tok (TokSpecial ENTITYx)
--    tok TokPercent
--    n <- name
--    e <- pedef `onFail` failP "missing entity defn in P ENTITY decl"
--    tok TokAnyClose `onFail` failP "expected > terminating P ENTITY decl"
--    return (PEDecl n e)
--
--entitydef :: HParser EntityDef
--entitydef =
--    (DefEntityValue <$> entityvalue) `onFail`
--    ( do eid <- externalid
--         ndd <- maybe ndatadecl
--         return (DefExternalID eid ndd))
--
--pedef :: HParser PEDef
--pedef =
--    (PEDefEntityValue <$> entityvalue) `onFail`
--    (PEDefExternalID <$> externalid)

externalid :: HParser ExternalID
externalid :: HParser ExternalID
externalid =
    ( do String -> HParser ()
word String
"SYSTEM"
         SystemLiteral -> ExternalID
SYSTEM (SystemLiteral -> ExternalID)
-> Parser (Posn, TokenT) SystemLiteral -> HParser ExternalID
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser (Posn, TokenT) SystemLiteral
systemliteral) HParser ExternalID -> HParser ExternalID -> HParser ExternalID
forall t a. Parser t a -> Parser t a -> Parser t a
`onFail`
    ( do String -> HParser ()
word String
"PUBLIC"
         PubidLiteral
p <- HParser PubidLiteral
pubidliteral
         PubidLiteral -> SystemLiteral -> ExternalID
PUBLIC PubidLiteral
p (SystemLiteral -> ExternalID)
-> Parser (Posn, TokenT) SystemLiteral -> HParser ExternalID
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser (Posn, TokenT) SystemLiteral
systemliteral Parser (Posn, TokenT) SystemLiteral
-> Parser (Posn, TokenT) SystemLiteral
-> Parser (Posn, TokenT) SystemLiteral
forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` SystemLiteral -> Parser (Posn, TokenT) SystemLiteral
forall a. a -> Parser (Posn, TokenT) a
forall (m :: * -> *) a. Monad m => a -> m a
return (String -> SystemLiteral
SystemLiteral String
"")
    )

--ndatadecl :: HParser NDataDecl
--ndatadecl = do
--    word "NDATA"
--    n <- name
--    return (NDATA n)

--textdecl :: HParser TextDecl
--textdecl = do
--    tok TokPIOpen
--    (word "xml" `onFail` word "XML")
--    v <- maybe versioninfo
--    e <- encodingdecl
--    tok TokPIClose `onFail` failP "expected ?> terminating text decl"
--    return (TextDecl v e)

--extparsedent :: HParser ExtParsedEnt
--extparsedent = do
--    t <- maybe textdecl
--    (_,c) <- (content "")
--    return (ExtParsedEnt t c)
--
--extpe :: HParser ExtPE
--extpe = do
--    t <- maybe textdecl
--    e <- extsubsetdecl
--    return (ExtPE t e)

encodingdecl :: HParser EncodingDecl
encodingdecl :: HParser EncodingDecl
encodingdecl = do
    String -> HParser ()
word String
"encoding" HParser () -> HParser () -> HParser ()
forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` String -> HParser ()
word String
"ENCODING"
    TokenT -> HParser TokenT
tok TokenT
TokEqual HParser TokenT -> HParser TokenT -> HParser TokenT
forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` String -> HParser TokenT
forall a. String -> Parser (Posn, TokenT) a
failBadP String
"expected = in 'encoding' decl"
    String
f <- HParser TokenT
-> HParser TokenT
-> Parser (Posn, TokenT) String
-> Parser (Posn, TokenT) String
forall (p :: * -> *) bra ket a.
PolyParse p =>
p bra -> p ket -> p a -> p a
bracket (TokenT -> HParser TokenT
tok TokenT
TokQuote) (HParser TokenT -> HParser TokenT
forall a. Parser (Posn, TokenT) a -> Parser (Posn, TokenT) a
forall (p :: * -> *) a. Commitment p => p a -> p a
commit (HParser TokenT -> HParser TokenT)
-> HParser TokenT -> HParser TokenT
forall a b. (a -> b) -> a -> b
$ TokenT -> HParser TokenT
tok TokenT
TokQuote) Parser (Posn, TokenT) String
freetext
    EncodingDecl -> HParser EncodingDecl
forall a. a -> Parser (Posn, TokenT) a
forall (m :: * -> *) a. Monad m => a -> m a
return (String -> EncodingDecl
EncodingDecl String
f)

--notationdecl :: HParser NotationDecl
--notationdecl = do
--    tok TokSpecialOpen
--    word "NOTATION"
--    n <- name
--    e <- either externalid publicid
--    tok TokAnyClose `onFail` failP "expected > terminating NOTATION decl"
--    return (NOTATION n e)

--publicid :: HParser PublicID
--publicid = do
--    word "PUBLICID"
--    p <- pubidliteral
--    return (PUBLICID p)

--entityvalue :: HParser EntityValue
--entityvalue = do
--    evs <- bracket (tok TokQuote) (commit $ tok TokQuote) (many ev)
--    return (EntityValue evs)

--ev :: HParser EV
--ev =
--    (EVString <$> freetext) `onFail`
-- -- PEREF(EVPERef,ev) `onFail`
--    (EVRef <$> reference)

attvalue :: HParser AttValue
attvalue :: Parser (Posn, TokenT) AttValue
attvalue =
  ( do [Either String Reference]
avs <- HParser TokenT
-> HParser TokenT
-> Parser (Posn, TokenT) [Either String Reference]
-> Parser (Posn, TokenT) [Either String Reference]
forall (p :: * -> *) bra ket a.
PolyParse p =>
p bra -> p ket -> p a -> p a
bracket (TokenT -> HParser TokenT
tok TokenT
TokQuote) (HParser TokenT -> HParser TokenT
forall a. Parser (Posn, TokenT) a -> Parser (Posn, TokenT) a
forall (p :: * -> *) a. Commitment p => p a -> p a
commit (HParser TokenT -> HParser TokenT)
-> HParser TokenT -> HParser TokenT
forall a b. (a -> b) -> a -> b
$ TokenT -> HParser TokenT
tok TokenT
TokQuote)
                      (Parser (Posn, TokenT) (Either String Reference)
-> Parser (Posn, TokenT) [Either String Reference]
forall a. Parser (Posn, TokenT) a -> Parser (Posn, TokenT) [a]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many (Parser (Posn, TokenT) String
-> HParser Reference
-> Parser (Posn, TokenT) (Either String Reference)
forall a b. HParser a -> HParser b -> HParser (Either a b)
either Parser (Posn, TokenT) String
freetext HParser Reference
reference))
       AttValue -> Parser (Posn, TokenT) AttValue
forall a. a -> Parser (Posn, TokenT) a
forall (m :: * -> *) a. Monad m => a -> m a
return ([Either String Reference] -> AttValue
AttValue [Either String Reference]
avs) ) Parser (Posn, TokenT) AttValue
-> Parser (Posn, TokenT) AttValue -> Parser (Posn, TokenT) AttValue
forall t a. Parser t a -> Parser t a -> Parser t a
`onFail`
  ( do String
v <- Parser (Posn, TokenT) String
nmtoken
       String
s <- (TokenT -> HParser TokenT
tok TokenT
TokPercent HParser TokenT
-> Parser (Posn, TokenT) String -> Parser (Posn, TokenT) String
forall a b.
Parser (Posn, TokenT) a
-> Parser (Posn, TokenT) b -> Parser (Posn, TokenT) b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> String -> Parser (Posn, TokenT) String
forall a. a -> Parser (Posn, TokenT) a
forall (m :: * -> *) a. Monad m => a -> m a
return String
"%") Parser (Posn, TokenT) String
-> Parser (Posn, TokenT) String -> Parser (Posn, TokenT) String
forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` String -> Parser (Posn, TokenT) String
forall a. a -> Parser (Posn, TokenT) a
forall (m :: * -> *) a. Monad m => a -> m a
return String
""
       AttValue -> Parser (Posn, TokenT) AttValue
forall a. a -> Parser (Posn, TokenT) a
forall (m :: * -> *) a. Monad m => a -> m a
return ([Either String Reference] -> AttValue
AttValue [String -> Either String Reference
forall a b. a -> Either a b
Left (String
vString -> String -> String
forall a. [a] -> [a] -> [a]
++String
s)]) ) Parser (Posn, TokenT) AttValue
-> Parser (Posn, TokenT) AttValue -> Parser (Posn, TokenT) AttValue
forall t a. Parser t a -> Parser t a -> Parser t a
`onFail`
  ( do String
s <- [Parser (Posn, TokenT) String] -> Parser (Posn, TokenT) String
forall (p :: * -> *) a. PolyParse p => [p a] -> p a
oneOf [ TokenT -> HParser TokenT
tok TokenT
TokPlus HParser TokenT
-> Parser (Posn, TokenT) String -> Parser (Posn, TokenT) String
forall a b.
Parser (Posn, TokenT) a
-> Parser (Posn, TokenT) b -> Parser (Posn, TokenT) b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> String -> Parser (Posn, TokenT) String
forall a. a -> Parser (Posn, TokenT) a
forall (m :: * -> *) a. Monad m => a -> m a
return String
"+"
                  , TokenT -> HParser TokenT
tok TokenT
TokHash HParser TokenT
-> Parser (Posn, TokenT) String -> Parser (Posn, TokenT) String
forall a b.
Parser (Posn, TokenT) a
-> Parser (Posn, TokenT) b -> Parser (Posn, TokenT) b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> String -> Parser (Posn, TokenT) String
forall a. a -> Parser (Posn, TokenT) a
forall (m :: * -> *) a. Monad m => a -> m a
return String
"#"
                  ]
       String
v <- Parser (Posn, TokenT) String
nmtoken
       AttValue -> Parser (Posn, TokenT) AttValue
forall a. a -> Parser (Posn, TokenT) a
forall (m :: * -> *) a. Monad m => a -> m a
return ([Either String Reference] -> AttValue
AttValue [String -> Either String Reference
forall a b. a -> Either a b
Left (String
sString -> String -> String
forall a. [a] -> [a] -> [a]
++String
v)]) ) Parser (Posn, TokenT) AttValue
-> Parser (Posn, TokenT) AttValue -> Parser (Posn, TokenT) AttValue
forall t a. Parser t a -> Parser t a -> Parser t a
`onFail`
  String -> Parser (Posn, TokenT) AttValue
forall a. String -> Parser (Posn, TokenT) a
failP String
"Badly formatted attribute value"

systemliteral :: HParser SystemLiteral
systemliteral :: Parser (Posn, TokenT) SystemLiteral
systemliteral = do
    String
s <- HParser TokenT
-> HParser TokenT
-> Parser (Posn, TokenT) String
-> Parser (Posn, TokenT) String
forall (p :: * -> *) bra ket a.
PolyParse p =>
p bra -> p ket -> p a -> p a
bracket (TokenT -> HParser TokenT
tok TokenT
TokQuote) (HParser TokenT -> HParser TokenT
forall a. Parser (Posn, TokenT) a -> Parser (Posn, TokenT) a
forall (p :: * -> *) a. Commitment p => p a -> p a
commit (HParser TokenT -> HParser TokenT)
-> HParser TokenT -> HParser TokenT
forall a b. (a -> b) -> a -> b
$ TokenT -> HParser TokenT
tok TokenT
TokQuote) Parser (Posn, TokenT) String
freetext
    SystemLiteral -> Parser (Posn, TokenT) SystemLiteral
forall a. a -> Parser (Posn, TokenT) a
forall (m :: * -> *) a. Monad m => a -> m a
return (String -> SystemLiteral
SystemLiteral String
s)            -- note: need to fold &...; escapes

pubidliteral :: HParser PubidLiteral
pubidliteral :: HParser PubidLiteral
pubidliteral = do
    String
s <- HParser TokenT
-> HParser TokenT
-> Parser (Posn, TokenT) String
-> Parser (Posn, TokenT) String
forall (p :: * -> *) bra ket a.
PolyParse p =>
p bra -> p ket -> p a -> p a
bracket (TokenT -> HParser TokenT
tok TokenT
TokQuote) (HParser TokenT -> HParser TokenT
forall a. Parser (Posn, TokenT) a -> Parser (Posn, TokenT) a
forall (p :: * -> *) a. Commitment p => p a -> p a
commit (HParser TokenT -> HParser TokenT)
-> HParser TokenT -> HParser TokenT
forall a b. (a -> b) -> a -> b
$ TokenT -> HParser TokenT
tok TokenT
TokQuote) Parser (Posn, TokenT) String
freetext
    PubidLiteral -> HParser PubidLiteral
forall a. a -> Parser (Posn, TokenT) a
forall (m :: * -> *) a. Monad m => a -> m a
return (String -> PubidLiteral
PubidLiteral String
s)             -- note: need to fold &...; escapes

chardata :: HParser CharData
chardata :: Parser (Posn, TokenT) String
chardata = Parser (Posn, TokenT) String
freetext -- <&> CharData