module Text.XML.HaXml.Html.Parse
( htmlParse
, htmlParse'
) where
import Prelude hiding (either,maybe,sequence)
import qualified Prelude (either)
import Data.Maybe hiding (maybe)
import Data.Char (toLower, isDigit, isHexDigit)
import Numeric (readDec,readHex)
import Control.Monad
import Text.XML.HaXml.Types
import Text.XML.HaXml.Namespaces
import Text.XML.HaXml.Lex
import Text.XML.HaXml.Posn
import Text.ParserCombinators.Poly.Plain
#if defined(DEBUG)
# if ( defined(__GLASGOW_HASKELL__) && __GLASGOW_HASKELL__ > 502 ) || \
( defined(__NHC__) && __NHC__ > 114 ) || defined(__HUGS__)
import Debug.Trace(trace)
# elif defined(__GLASGOW_HASKELL__)
import IOExts(trace)
# elif defined(__NHC__) || defined(__HBC__)
import NonStdTrace
# endif
debug :: Monad m => String -> m ()
debug s = trace s (return ())
#else
debug :: Monad m => String -> m ()
debug :: forall (m :: * -> *). Monad m => String -> m ()
debug String
_ = forall (m :: * -> *) a. Monad m => a -> m a
return ()
#endif
htmlParse :: String -> String -> Document Posn
htmlParse :: String -> String -> Document Posn
htmlParse String
file = forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
Prelude.either forall a. HasCallStack => String -> a
error forall a. a -> a
id forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String -> Either String (Document Posn)
htmlParse' String
file
htmlParse' :: String -> String -> Either String (Document Posn)
htmlParse' :: String -> String -> Either String (Document Posn)
htmlParse' String
file = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall i. Document i -> Document i
simplify forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a, b) -> a
fst
forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall t a. Parser t a -> [t] -> (Either String a, [t])
runParser HParser (Document Posn)
document forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String -> [(Posn, TokenT)]
xmlLex String
file
simplify :: Document i -> Document i
simplify :: forall i. Document i -> Document i
simplify (Document Prolog
p SymTab EntityDef
st (Elem QName
n [Attribute]
avs [Content i]
cs) [Misc]
ms) =
forall i.
Prolog -> SymTab EntityDef -> Element i -> [Misc] -> Document i
Document Prolog
p SymTab EntityDef
st (forall i. QName -> [Attribute] -> [Content i] -> Element i
Elem QName
n [Attribute]
avs (forall {i}. (Content i -> Bool) -> [Content i] -> [Content i]
deepfilter forall {i}. Content i -> Bool
simp [Content i]
cs)) [Misc]
ms
where
simp :: Content i -> Bool
simp (CElem (Elem (N String
"null") [] []) i
_) = Bool
False
simp (CElem (Elem QName
t [Attribute]
_ []) i
_)
| QName -> String
localName QName
t forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"font",String
"p",String
"i",String
"b",String
"em",String
"tt",String
"big",String
"small"]
= Bool
False
simp Content i
_ = Bool
True
deepfilter :: (Content i -> Bool) -> [Content i] -> [Content i]
deepfilter Content i -> Bool
f =
forall a. (a -> Bool) -> [a] -> [a]
filter Content i -> Bool
f forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a -> b) -> [a] -> [b]
map (\Content i
c-> case Content i
c of
CElem (Elem QName
t [Attribute]
avs [Content i]
cs) i
i
-> forall i. Element i -> i -> Content i
CElem (forall i. QName -> [Attribute] -> [Content i] -> Element i
Elem QName
t [Attribute]
avs ((Content i -> Bool) -> [Content i] -> [Content i]
deepfilter Content i -> Bool
f [Content i]
cs)) i
i
Content i
_ -> Content i
c)
selfclosingtags :: [String]
selfclosingtags :: [String]
selfclosingtags = [String
"img",String
"hr",String
"br",String
"meta",String
"col",String
"link",String
"base"
,String
"param",String
"area",String
"frame",String
"input"]
closeInnerTags :: [(String,[String])]
closeInnerTags :: [(String, [String])]
closeInnerTags =
[ (String
"ul", [String
"li"])
, (String
"ol", [String
"li"])
, (String
"dl", [String
"dt",String
"dd"])
, (String
"tr", [String
"th",String
"td"])
, (String
"div", [String
"p"])
, (String
"thead", [String
"th",String
"tr",String
"td"])
, (String
"tfoot", [String
"th",String
"tr",String
"td"])
, (String
"tbody", [String
"th",String
"tr",String
"td"])
, (String
"table", [String
"th",String
"tr",String
"td",String
"thead",String
"tfoot",String
"tbody"])
, (String
"caption", [String
"p"])
, (String
"th", [String
"p"])
, (String
"td", [String
"p"])
, (String
"li", [String
"p"])
, (String
"dt", [String
"p"])
, (String
"dd", [String
"p"])
, (String
"object", [String
"p"])
, (String
"map", [String
"p"])
, (String
"body", [String
"p"])
]
closes :: Name -> Name -> Bool
String
"a" closes :: String -> String -> Bool
`closes` String
"a" = Bool
True
String
"li" `closes` String
"li" = Bool
True
String
"th" `closes` String
t | String
t forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"th",String
"td"] = Bool
True
String
"td" `closes` String
t | String
t forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"th",String
"td"] = Bool
True
String
"tr" `closes` String
t | String
t forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"th",String
"td",String
"tr"] = Bool
True
String
"dt" `closes` String
t | String
t forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"dt",String
"dd"] = Bool
True
String
"dd" `closes` String
t | String
t forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"dt",String
"dd"] = Bool
True
String
"form" `closes` String
"form" = Bool
True
String
"label" `closes` String
"label" = Bool
True
String
_ `closes` String
"option" = Bool
True
String
"thead" `closes` String
t | String
t forall a. Eq a => a -> a -> Bool
== String
"colgroup" = Bool
True
String
"tfoot" `closes` String
t | String
t forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"thead",String
"colgroup"] = Bool
True
String
"tbody" `closes` String
t | String
t forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"tbody",String
"tfoot",String
"thead",String
"colgroup"] = Bool
True
String
"colgroup" `closes` String
"colgroup" = Bool
True
String
t `closes` String
"p"
| String
t forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"p",String
"h1",String
"h2",String
"h3",String
"h4",String
"h5",String
"h6"
,String
"hr",String
"div",String
"ul",String
"dl",String
"ol",String
"table"] = Bool
True
String
_ `closes` String
_ = Bool
False
type HParser a = Parser (Posn,TokenT) a
tok :: TokenT -> HParser TokenT
tok :: TokenT -> HParser TokenT
tok TokenT
t = do (Posn
p,TokenT
t') <- forall t. Parser t t
next
case TokenT
t' of TokError String
_ -> forall a.
(String -> HParser a) -> String -> Posn -> TokenT -> HParser a
report forall (p :: * -> *) a. PolyParse p => String -> p a
failBad (forall a. Show a => a -> String
show TokenT
t) Posn
p TokenT
t'
TokenT
_ | TokenT
t'forall a. Eq a => a -> a -> Bool
==TokenT
t -> forall (m :: * -> *) a. Monad m => a -> m a
return TokenT
t
| Bool
otherwise -> forall a.
(String -> HParser a) -> String -> Posn -> TokenT -> HParser a
report forall (m :: * -> *) a. MonadFail m => String -> m a
fail (forall a. Show a => a -> String
show TokenT
t) Posn
p TokenT
t'
qname :: HParser QName
qname :: HParser QName
qname = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap String -> QName
N HParser String
name
name :: HParser Name
name :: HParser String
name = do (Posn
p,TokenT
tok) <- forall t. Parser t t
next
case TokenT
tok of
TokName String
s -> forall (m :: * -> *) a. Monad m => a -> m a
return String
s
TokError String
_ -> forall a.
(String -> HParser a) -> String -> Posn -> TokenT -> HParser a
report forall (p :: * -> *) a. PolyParse p => String -> p a
failBad String
"a name" Posn
p TokenT
tok
TokenT
_ -> forall a.
(String -> HParser a) -> String -> Posn -> TokenT -> HParser a
report forall (m :: * -> *) a. MonadFail m => String -> m a
fail String
"a name" Posn
p TokenT
tok
string, freetext :: HParser String
string :: HParser String
string = do (Posn
p,TokenT
t) <- forall t. Parser t t
next
case TokenT
t of TokName String
s -> forall (m :: * -> *) a. Monad m => a -> m a
return String
s
TokenT
_ -> forall a.
(String -> HParser a) -> String -> Posn -> TokenT -> HParser a
report forall (m :: * -> *) a. MonadFail m => String -> m a
fail String
"text" Posn
p TokenT
t
freetext :: HParser String
freetext = do (Posn
p,TokenT
t) <- forall t. Parser t t
next
case TokenT
t of TokFreeText String
s -> forall (m :: * -> *) a. Monad m => a -> m a
return String
s
TokenT
_ -> forall a.
(String -> HParser a) -> String -> Posn -> TokenT -> HParser a
report forall (m :: * -> *) a. MonadFail m => String -> m a
fail String
"text" Posn
p TokenT
t
maybe :: HParser a -> HParser (Maybe a)
maybe :: forall a. HParser a -> HParser (Maybe a)
maybe HParser a
p =
(forall a. a -> Maybe a
Just forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> HParser a
p) forall t a. Parser t a -> Parser t a -> Parser t a
`onFail`
forall (m :: * -> *) a. Monad m => a -> m a
return forall a. Maybe a
Nothing
either :: HParser a -> HParser b -> HParser (Either a b)
either :: forall a b. HParser a -> HParser b -> HParser (Either a b)
either HParser a
p HParser b
q =
(forall a b. a -> Either a b
Left forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> HParser a
p) forall t a. Parser t a -> Parser t a -> Parser t a
`onFail`
(forall a b. b -> Either a b
Right forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> HParser b
q)
word :: String -> HParser ()
word :: String -> HParser ()
word String
s = do { (Posn, TokenT)
x <- forall t. Parser t t
next
; case (Posn, TokenT)
x of
(Posn
_p,TokName String
n) | String
sforall a. Eq a => a -> a -> Bool
==String
n -> forall (m :: * -> *) a. Monad m => a -> m a
return ()
(Posn
_p,TokFreeText String
n) | String
sforall a. Eq a => a -> a -> Bool
==String
n -> forall (m :: * -> *) a. Monad m => a -> m a
return ()
( Posn
p,t :: TokenT
t@(TokError String
_)) -> forall a.
(String -> HParser a) -> String -> Posn -> TokenT -> HParser a
report forall (p :: * -> *) a. PolyParse p => String -> p a
failBad (forall a. Show a => a -> String
show String
s) Posn
p TokenT
t
( Posn
p,TokenT
t) -> forall a.
(String -> HParser a) -> String -> Posn -> TokenT -> HParser a
report forall (m :: * -> *) a. MonadFail m => String -> m a
fail (forall a. Show a => a -> String
show String
s) Posn
p TokenT
t
}
posn :: HParser Posn
posn :: HParser Posn
posn = do { x :: (Posn, TokenT)
x@(Posn
p,TokenT
_) <- forall t. Parser t t
next
; forall t. [t] -> Parser t ()
reparse [(Posn, TokenT)
x]
; forall (m :: * -> *) a. Monad m => a -> m a
return Posn
p
} forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` forall (m :: * -> *) a. Monad m => a -> m a
return Posn
noPos
nmtoken :: HParser NmToken
nmtoken :: HParser String
nmtoken = HParser String
string forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` HParser String
freetext
failP, failBadP :: String -> HParser a
failP :: forall a. String -> HParser a
failP String
msg = do { Posn
p <- HParser Posn
posn; forall (m :: * -> *) a. MonadFail m => String -> m a
fail (String
msgforall a. [a] -> [a] -> [a]
++String
"\n at "forall a. [a] -> [a] -> [a]
++forall a. Show a => a -> String
show Posn
p) }
failBadP :: forall a. String -> HParser a
failBadP String
msg = do { Posn
p <- HParser Posn
posn; forall (p :: * -> *) a. PolyParse p => String -> p a
failBad (String
msgforall a. [a] -> [a] -> [a]
++String
"\n at "forall a. [a] -> [a] -> [a]
++forall a. Show a => a -> String
show Posn
p) }
report :: (String->HParser a) -> String -> Posn -> TokenT -> HParser a
report :: forall a.
(String -> HParser a) -> String -> Posn -> TokenT -> HParser a
report String -> HParser a
fail String
expect Posn
p TokenT
t = String -> HParser a
fail (String
"Expected "forall a. [a] -> [a] -> [a]
++forall a. Show a => a -> String
show String
expectforall a. [a] -> [a] -> [a]
++String
" but found "forall a. [a] -> [a] -> [a]
++forall a. Show a => a -> String
show TokenT
t
forall a. [a] -> [a] -> [a]
++String
"\n at "forall a. [a] -> [a] -> [a]
++forall a. Show a => a -> String
show Posn
p)
adjustErrP :: HParser a -> (String->String) -> HParser a
HParser a
p adjustErrP :: forall a. HParser a -> (String -> String) -> HParser a
`adjustErrP` String -> String
f = HParser a
p forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` do Posn
pn <- HParser Posn
posn
(HParser a
p forall (p :: * -> *) a.
Commitment p =>
p a -> (String -> String) -> p a
`adjustErr` String -> String
f) forall (p :: * -> *) a.
Commitment p =>
p a -> (String -> String) -> p a
`adjustErr` (forall a. [a] -> [a] -> [a]
++forall a. Show a => a -> String
show Posn
pn)
document :: HParser (Document Posn)
document :: HParser (Document Posn)
document = do
Prolog
p <- Parser (Posn, TokenT) Prolog
prolog forall (p :: * -> *) a.
Commitment p =>
p a -> (String -> String) -> p a
`adjustErr` (String
"unrecognisable XML prolog\n"forall a. [a] -> [a] -> [a]
++)
[(Stack, Element Posn)]
ht <- forall (p :: * -> *) a. PolyParse p => p a -> p [a]
many1 (String -> HParser (Stack, Element Posn)
element String
"HTML document")
[Misc]
ms <- forall (f :: * -> *) a. Alternative f => f a -> f [a]
many HParser Misc
misc
forall (m :: * -> *) a. Monad m => a -> m a
return (forall i.
Prolog -> SymTab EntityDef -> Element i -> [Misc] -> Document i
Document Prolog
p forall a. SymTab a
emptyST (case forall a b. (a -> b) -> [a] -> [b]
map forall a b. (a, b) -> b
snd [(Stack, Element Posn)]
ht of
[Element Posn
e] -> Element Posn
e
[Element Posn]
es -> forall i. QName -> [Attribute] -> [Content i] -> Element i
Elem (String -> QName
N String
"html") [] (forall a b. (a -> b) -> [a] -> [b]
map Element Posn -> Content Posn
mkCElem [Element Posn]
es))
[Misc]
ms)
where mkCElem :: Element Posn -> Content Posn
mkCElem Element Posn
e = forall i. Element i -> i -> Content i
CElem Element Posn
e Posn
noPos
comment :: HParser Comment
= do
forall (p :: * -> *) bra ket a.
PolyParse p =>
p bra -> p ket -> p a -> p a
bracket (TokenT -> HParser TokenT
tok TokenT
TokCommentOpen) (TokenT -> HParser TokenT
tok TokenT
TokCommentClose) HParser String
freetext
processinginstruction :: HParser ProcessingInstruction
processinginstruction :: HParser ProcessingInstruction
processinginstruction = do
TokenT -> HParser TokenT
tok TokenT
TokPIOpen
forall (p :: * -> *) a. Commitment p => p a -> p a
commit forall a b. (a -> b) -> a -> b
$ do
String
n <- HParser String
string forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` forall a. String -> HParser a
failP String
"processing instruction has no target"
String
f <- HParser String
freetext
(TokenT -> HParser TokenT
tok TokenT
TokPIClose forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` TokenT -> HParser TokenT
tok TokenT
TokAnyClose) forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` forall a. String -> HParser a
failP String
"missing ?> or >"
forall (m :: * -> *) a. Monad m => a -> m a
return (String
n, String
f)
cdsect :: HParser CDSect
cdsect :: HParser String
cdsect = do
TokenT -> HParser TokenT
tok TokenT
TokSectionOpen
forall (p :: * -> *) bra ket a.
PolyParse p =>
p bra -> p ket -> p a -> p a
bracket (TokenT -> HParser TokenT
tok (Section -> TokenT
TokSection Section
CDATAx)) (forall (p :: * -> *) a. Commitment p => p a -> p a
commit forall a b. (a -> b) -> a -> b
$ TokenT -> HParser TokenT
tok TokenT
TokSectionClose) HParser String
chardata
prolog :: HParser Prolog
prolog :: Parser (Posn, TokenT) Prolog
prolog = do
Maybe XMLDecl
x <- forall a. HParser a -> HParser (Maybe a)
maybe HParser XMLDecl
xmldecl
[Misc]
m1 <- forall (f :: * -> *) a. Alternative f => f a -> f [a]
many HParser Misc
misc
Maybe DocTypeDecl
dtd <- forall a. HParser a -> HParser (Maybe a)
maybe HParser DocTypeDecl
doctypedecl
[Misc]
m2 <- forall (f :: * -> *) a. Alternative f => f a -> f [a]
many HParser Misc
misc
forall (m :: * -> *) a. Monad m => a -> m a
return (Maybe XMLDecl -> [Misc] -> Maybe DocTypeDecl -> [Misc] -> Prolog
Prolog Maybe XMLDecl
x [Misc]
m1 Maybe DocTypeDecl
dtd [Misc]
m2)
xmldecl :: HParser XMLDecl
xmldecl :: HParser XMLDecl
xmldecl = do
TokenT -> HParser TokenT
tok TokenT
TokPIOpen
String -> HParser ()
word String
"xml" forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` String -> HParser ()
word String
"XML"
Posn
p <- HParser Posn
posn
String
s <- HParser String
freetext
TokenT -> HParser TokenT
tok TokenT
TokPIClose forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` forall a. String -> HParser a
failBadP String
"missing ?> in <?xml ...?>"
(forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
Prelude.either forall a. String -> HParser a
failP forall (m :: * -> *) a. Monad m => a -> m a
return forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a, b) -> a
fst forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall t a. Parser t a -> [t] -> (Either String a, [t])
runParser HParser XMLDecl
aux forall b c a. (b -> c) -> (a -> b) -> a -> c
. Posn -> String -> [(Posn, TokenT)]
xmlReLex Posn
p) String
s
where
aux :: HParser XMLDecl
aux = do
String
v <- HParser String
versioninfo forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` forall a. String -> HParser a
failP String
"missing XML version info"
Maybe EncodingDecl
e <- forall a. HParser a -> HParser (Maybe a)
maybe HParser EncodingDecl
encodingdecl
Maybe Bool
s <- forall a. HParser a -> HParser (Maybe a)
maybe HParser Bool
sddecl
forall (m :: * -> *) a. Monad m => a -> m a
return (String -> Maybe EncodingDecl -> Maybe Bool -> XMLDecl
XMLDecl String
v Maybe EncodingDecl
e Maybe Bool
s)
versioninfo :: HParser VersionInfo
versioninfo :: HParser String
versioninfo = do
String -> HParser ()
word String
"version" forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` String -> HParser ()
word String
"VERSION"
TokenT -> HParser TokenT
tok TokenT
TokEqual
forall (p :: * -> *) bra ket a.
PolyParse p =>
p bra -> p ket -> p a -> p a
bracket (TokenT -> HParser TokenT
tok TokenT
TokQuote) (forall (p :: * -> *) a. Commitment p => p a -> p a
commit forall a b. (a -> b) -> a -> b
$ TokenT -> HParser TokenT
tok TokenT
TokQuote) HParser String
freetext
misc :: HParser Misc
misc :: HParser Misc
misc =
forall (p :: * -> *) a. Commitment p => [(String, p a)] -> p a
oneOf' [ (String
"<!--comment-->", String -> Misc
Comment forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> HParser String
comment)
, (String
"<?PI?>", ProcessingInstruction -> Misc
PI forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> HParser ProcessingInstruction
processinginstruction)
]
doctypedecl :: HParser DocTypeDecl
doctypedecl :: HParser DocTypeDecl
doctypedecl = do
TokenT -> HParser TokenT
tok TokenT
TokSpecialOpen
TokenT -> HParser TokenT
tok (Special -> TokenT
TokSpecial Special
DOCTYPEx)
forall (p :: * -> *) a. Commitment p => p a -> p a
commit forall a b. (a -> b) -> a -> b
$ do
QName
n <- HParser QName
qname
Maybe ExternalID
eid <- forall a. HParser a -> HParser (Maybe a)
maybe HParser ExternalID
externalid
TokenT -> HParser TokenT
tok TokenT
TokAnyClose forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` forall a. String -> HParser a
failP String
"missing > in DOCTYPE decl"
forall (m :: * -> *) a. Monad m => a -> m a
return (QName -> Maybe ExternalID -> [MarkupDecl] -> DocTypeDecl
DTD QName
n Maybe ExternalID
eid [])
sddecl :: HParser SDDecl
sddecl :: HParser Bool
sddecl = do
String -> HParser ()
word String
"standalone" forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` String -> HParser ()
word String
"STANDALONE"
forall (p :: * -> *) a. Commitment p => p a -> p a
commit forall a b. (a -> b) -> a -> b
$ do
TokenT -> HParser TokenT
tok TokenT
TokEqual forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` forall a. String -> HParser a
failP String
"missing = in 'standalone' decl"
forall (p :: * -> *) bra ket a.
PolyParse p =>
p bra -> p ket -> p a -> p a
bracket (TokenT -> HParser TokenT
tok TokenT
TokQuote) (forall (p :: * -> *) a. Commitment p => p a -> p a
commit forall a b. (a -> b) -> a -> b
$ TokenT -> HParser TokenT
tok TokenT
TokQuote)
( (String -> HParser ()
word String
"yes" forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return Bool
True) forall t a. Parser t a -> Parser t a -> Parser t a
`onFail`
(String -> HParser ()
word String
"no" forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return Bool
False) forall t a. Parser t a -> Parser t a -> Parser t a
`onFail`
forall a. String -> HParser a
failP String
"'standalone' decl requires 'yes' or 'no' value" )
type Stack = [(Name,[Attribute])]
element :: Name -> HParser (Stack,Element Posn)
element :: String -> HParser (Stack, Element Posn)
element String
ctx =
do
TokenT -> HParser TokenT
tok TokenT
TokAnyOpen
(ElemTag (N String
e) [Attribute]
avs) <- HParser ElemTag
elemtag
( if String
e String -> String -> Bool
`closes` String
ctx then
( do forall (m :: * -> *). Monad m => String -> m ()
debug String
"/"
[TokenT] -> HParser ()
unparse ([TokenT
TokEndOpen, String -> TokenT
TokName String
ctx, TokenT
TokAnyClose,
TokenT
TokAnyOpen, String -> TokenT
TokName String
e] forall a. [a] -> [a] -> [a]
++ [Attribute] -> [TokenT]
reformatAttrs [Attribute]
avs)
forall (m :: * -> *) a. Monad m => a -> m a
return ([], forall i. QName -> [Attribute] -> [Content i] -> Element i
Elem (String -> QName
N String
"null") [] []))
else if String
e forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String]
selfclosingtags then
( do TokenT -> HParser TokenT
tok TokenT
TokEndClose
forall (m :: * -> *). Monad m => String -> m ()
debug (String
eforall a. [a] -> [a] -> [a]
++String
"[+]")
forall (m :: * -> *) a. Monad m => a -> m a
return ([], forall i. QName -> [Attribute] -> [Content i] -> Element i
Elem (String -> QName
N String
e) [Attribute]
avs [])) forall t a. Parser t a -> Parser t a -> Parser t a
`onFail`
( do TokenT -> HParser TokenT
tok TokenT
TokAnyClose
forall (m :: * -> *). Monad m => String -> m ()
debug (String
eforall a. [a] -> [a] -> [a]
++String
"[+]")
forall (m :: * -> *) a. Monad m => a -> m a
return ([], forall i. QName -> [Attribute] -> [Content i] -> Element i
Elem (String -> QName
N String
e) [Attribute]
avs []))
else
( do TokenT -> HParser TokenT
tok TokenT
TokEndClose
forall (m :: * -> *). Monad m => String -> m ()
debug (String
eforall a. [a] -> [a] -> [a]
++String
"[]")
forall (m :: * -> *) a. Monad m => a -> m a
return ([], forall i. QName -> [Attribute] -> [Content i] -> Element i
Elem (String -> QName
N String
e) [Attribute]
avs [])) forall t a. Parser t a -> Parser t a -> Parser t a
`onFail`
( do TokenT -> HParser TokenT
tok TokenT
TokAnyClose forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` forall a. String -> HParser a
failP String
"missing > or /> in element tag"
forall (m :: * -> *). Monad m => String -> m ()
debug (String
eforall a. [a] -> [a] -> [a]
++String
"[")
[(Stack, Content Posn)]
zz <- forall (p :: * -> *) a z. PolyParse p => p a -> p z -> p [a]
manyFinally (String -> HParser (Stack, Content Posn)
content String
e)
(TokenT -> HParser TokenT
tok TokenT
TokEndOpen)
(N String
n) <- HParser QName
qname
forall (p :: * -> *) a. Commitment p => p a -> p a
commit (TokenT -> HParser TokenT
tok TokenT
TokAnyClose)
forall (m :: * -> *). Monad m => String -> m ()
debug String
"]"
let ([Stack]
ss,[Content Posn]
cs) = forall a b. [(a, b)] -> ([a], [b])
unzip [(Stack, Content Posn)]
zz
let s :: Stack
s = if forall (t :: * -> *) a. Foldable t => t a -> Bool
null [Stack]
ss then [] else forall a. [a] -> a
last [Stack]
ss
( if String
e forall a. Eq a => a -> a -> Bool
== (forall a b. (a -> b) -> [a] -> [b]
map Char -> Char
toLower String
n :: Name) then
do [TokenT] -> HParser ()
unparse (Stack -> [TokenT]
reformatTags (String -> Stack -> Stack
closeInner String
e Stack
s))
forall (m :: * -> *). Monad m => String -> m ()
debug String
"^"
forall (m :: * -> *) a. Monad m => a -> m a
return ([], forall i. QName -> [Attribute] -> [Content i] -> Element i
Elem (String -> QName
N String
e) [Attribute]
avs [Content Posn]
cs)
else
do [TokenT] -> HParser ()
unparse [TokenT
TokEndOpen, String -> TokenT
TokName String
n, TokenT
TokAnyClose]
forall (m :: * -> *). Monad m => String -> m ()
debug String
"-"
forall (m :: * -> *) a. Monad m => a -> m a
return ((String
e,[Attribute]
avs)forall a. a -> [a] -> [a]
:Stack
s, forall i. QName -> [Attribute] -> [Content i] -> Element i
Elem (String -> QName
N String
e) [Attribute]
avs [Content Posn]
cs))
) forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` forall a. String -> HParser a
failP (String
"failed to repair non-matching tags in context: "forall a. [a] -> [a] -> [a]
++String
ctx))
closeInner :: Name -> [(Name,[Attribute])] -> [(Name,[Attribute])]
closeInner :: String -> Stack -> Stack
closeInner String
c Stack
ts =
case forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup String
c [(String, [String])]
closeInnerTags of
(Just [String]
these) -> forall a. (a -> Bool) -> [a] -> [a]
filter ((forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` [String]
these)forall b c a. (b -> c) -> (a -> b) -> a -> c
.forall a b. (a, b) -> a
fst) Stack
ts
Maybe [String]
Nothing -> Stack
ts
unparse :: [TokenT] -> Parser (Posn, TokenT) ()
unparse :: [TokenT] -> HParser ()
unparse [TokenT]
ts = do Posn
p <- HParser Posn
posn
forall t. [t] -> Parser t ()
reparse (forall a b. [a] -> [b] -> [(a, b)]
zip (forall a. a -> [a]
repeat Posn
p) [TokenT]
ts)
reformatAttrs :: [(QName, AttValue)] -> [TokenT]
reformatAttrs :: [Attribute] -> [TokenT]
reformatAttrs = forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap Attribute -> [TokenT]
f0
where f0 :: Attribute -> [TokenT]
f0 (QName
a, v :: AttValue
v@(AttValue [Either String Reference]
_)) = [ String -> TokenT
TokName (QName -> String
printableName QName
a), TokenT
TokEqual
, TokenT
TokQuote, String -> TokenT
TokFreeText (forall a. Show a => a -> String
show AttValue
v), TokenT
TokQuote ]
reformatTags :: [(String, [(QName, AttValue)])] -> [TokenT]
reformatTags :: Stack -> [TokenT]
reformatTags = forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap (String, [Attribute]) -> [TokenT]
f0
where f0 :: (String, [Attribute]) -> [TokenT]
f0 (String
t,[Attribute]
avs) = [TokenT
TokAnyOpen, String -> TokenT
TokName String
t]forall a. [a] -> [a] -> [a]
++[Attribute] -> [TokenT]
reformatAttrs [Attribute]
avsforall a. [a] -> [a] -> [a]
++[TokenT
TokAnyClose]
content :: Name -> HParser (Stack,Content Posn)
content :: String -> HParser (Stack, Content Posn)
content String
ctx = do { Posn
p <- HParser Posn
posn ; Posn -> HParser (Stack, Content Posn)
content' Posn
p }
where content' :: Posn -> HParser (Stack, Content Posn)
content' Posn
p = forall (p :: * -> *) a. Commitment p => [(String, p a)] -> p a
oneOf'
[ ( String
"element", String -> HParser (Stack, Element Posn)
element String
ctx forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \(Stack
s,Element Posn
e)-> forall (m :: * -> *) a. Monad m => a -> m a
return (Stack
s, forall i. Element i -> i -> Content i
CElem Element Posn
e Posn
p))
, ( String
"chardata", HParser String
chardata forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \String
s-> forall (m :: * -> *) a. Monad m => a -> m a
return ([], forall i. Bool -> String -> i -> Content i
CString Bool
False String
s Posn
p))
, ( String
"reference", HParser Reference
reference forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \Reference
r-> forall (m :: * -> *) a. Monad m => a -> m a
return ([], forall i. Reference -> i -> Content i
CRef Reference
r Posn
p))
, ( String
"cdsect", HParser String
cdsect forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \String
c-> forall (m :: * -> *) a. Monad m => a -> m a
return ([], forall i. Bool -> String -> i -> Content i
CString Bool
True String
c Posn
p))
, ( String
"misc", HParser Misc
misc forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \Misc
m-> forall (m :: * -> *) a. Monad m => a -> m a
return ([], forall i. Misc -> i -> Content i
CMisc Misc
m Posn
p))
] forall a. HParser a -> (String -> String) -> HParser a
`adjustErrP` (String
"when looking for a content item,\n"forall a. [a] -> [a] -> [a]
++)
elemtag :: HParser ElemTag
elemtag :: HParser ElemTag
elemtag = do
(N String
n) <- HParser QName
qname forall (p :: * -> *) a.
PolyParse p =>
p a -> (String -> String) -> p a
`adjustErrBad` (String
"malformed element tag\n"forall a. [a] -> [a] -> [a]
++)
[Attribute]
as <- forall (f :: * -> *) a. Alternative f => f a -> f [a]
many HParser Attribute
attribute
forall (m :: * -> *) a. Monad m => a -> m a
return (QName -> [Attribute] -> ElemTag
ElemTag (String -> QName
N forall a b. (a -> b) -> a -> b
$ forall a b. (a -> b) -> [a] -> [b]
map Char -> Char
toLower String
n) [Attribute]
as)
attribute :: HParser Attribute
attribute :: HParser Attribute
attribute = do
(N String
n) <- HParser QName
qname
AttValue
v <- (do TokenT -> HParser TokenT
tok TokenT
TokEqual
Parser (Posn, TokenT) AttValue
attvalue) forall t a. Parser t a -> Parser t a -> Parser t a
`onFail`
forall (m :: * -> *) a. Monad m => a -> m a
return ([Either String Reference] -> AttValue
AttValue [forall a b. a -> Either a b
Left String
"TRUE"])
forall (m :: * -> *) a. Monad m => a -> m a
return (String -> QName
N forall a b. (a -> b) -> a -> b
$ forall a b. (a -> b) -> [a] -> [b]
map Char -> Char
toLower String
n, AttValue
v)
reference :: HParser Reference
reference :: HParser Reference
reference = do
forall (p :: * -> *) bra ket a.
PolyParse p =>
p bra -> p ket -> p a -> p a
bracket (TokenT -> HParser TokenT
tok TokenT
TokAmp) (TokenT -> HParser TokenT
tok TokenT
TokSemi) (HParser String
freetext forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= forall {m :: * -> *}. Monad m => String -> m Reference
val)
where
val :: String -> m Reference
val (Char
'#':Char
'x':String
i) | forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Char -> Bool
isHexDigit String
i
= forall (m :: * -> *) a. Monad m => a -> m a
return forall b c a. (b -> c) -> (a -> b) -> a -> c
. CharRef -> Reference
RefChar forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a, b) -> a
fst forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. [a] -> a
head forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. (Eq a, Num a) => ReadS a
readHex forall a b. (a -> b) -> a -> b
$ String
i
val (Char
'#':String
i) | forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Char -> Bool
isDigit String
i
= forall (m :: * -> *) a. Monad m => a -> m a
return forall b c a. (b -> c) -> (a -> b) -> a -> c
. CharRef -> Reference
RefChar forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a, b) -> a
fst forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. [a] -> a
head forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. (Eq a, Num a) => ReadS a
readDec forall a b. (a -> b) -> a -> b
$ String
i
val String
ent = forall (m :: * -> *) a. Monad m => a -> m a
return forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Reference
RefEntity forall a b. (a -> b) -> a -> b
$ String
ent
externalid :: HParser ExternalID
externalid :: HParser ExternalID
externalid =
( do String -> HParser ()
word String
"SYSTEM"
SystemLiteral -> ExternalID
SYSTEM forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> HParser SystemLiteral
systemliteral) forall t a. Parser t a -> Parser t a -> Parser t a
`onFail`
( do String -> HParser ()
word String
"PUBLIC"
PubidLiteral
p <- HParser PubidLiteral
pubidliteral
PubidLiteral -> SystemLiteral -> ExternalID
PUBLIC PubidLiteral
p forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> HParser SystemLiteral
systemliteral forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` forall (m :: * -> *) a. Monad m => a -> m a
return (String -> SystemLiteral
SystemLiteral String
"")
)
encodingdecl :: HParser EncodingDecl
encodingdecl :: HParser EncodingDecl
encodingdecl = do
String -> HParser ()
word String
"encoding" forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` String -> HParser ()
word String
"ENCODING"
TokenT -> HParser TokenT
tok TokenT
TokEqual forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` forall a. String -> HParser a
failBadP String
"expected = in 'encoding' decl"
String
f <- forall (p :: * -> *) bra ket a.
PolyParse p =>
p bra -> p ket -> p a -> p a
bracket (TokenT -> HParser TokenT
tok TokenT
TokQuote) (forall (p :: * -> *) a. Commitment p => p a -> p a
commit forall a b. (a -> b) -> a -> b
$ TokenT -> HParser TokenT
tok TokenT
TokQuote) HParser String
freetext
forall (m :: * -> *) a. Monad m => a -> m a
return (String -> EncodingDecl
EncodingDecl String
f)
attvalue :: HParser AttValue
attvalue :: Parser (Posn, TokenT) AttValue
attvalue =
( do [Either String Reference]
avs <- forall (p :: * -> *) bra ket a.
PolyParse p =>
p bra -> p ket -> p a -> p a
bracket (TokenT -> HParser TokenT
tok TokenT
TokQuote) (forall (p :: * -> *) a. Commitment p => p a -> p a
commit forall a b. (a -> b) -> a -> b
$ TokenT -> HParser TokenT
tok TokenT
TokQuote)
(forall (f :: * -> *) a. Alternative f => f a -> f [a]
many (forall a b. HParser a -> HParser b -> HParser (Either a b)
either HParser String
freetext HParser Reference
reference))
forall (m :: * -> *) a. Monad m => a -> m a
return ([Either String Reference] -> AttValue
AttValue [Either String Reference]
avs) ) forall t a. Parser t a -> Parser t a -> Parser t a
`onFail`
( do String
v <- HParser String
nmtoken
String
s <- (TokenT -> HParser TokenT
tok TokenT
TokPercent forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return String
"%") forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` forall (m :: * -> *) a. Monad m => a -> m a
return String
""
forall (m :: * -> *) a. Monad m => a -> m a
return ([Either String Reference] -> AttValue
AttValue [forall a b. a -> Either a b
Left (String
vforall a. [a] -> [a] -> [a]
++String
s)]) ) forall t a. Parser t a -> Parser t a -> Parser t a
`onFail`
( do String
s <- forall (p :: * -> *) a. PolyParse p => [p a] -> p a
oneOf [ TokenT -> HParser TokenT
tok TokenT
TokPlus forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return String
"+"
, TokenT -> HParser TokenT
tok TokenT
TokHash forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return String
"#"
]
String
v <- HParser String
nmtoken
forall (m :: * -> *) a. Monad m => a -> m a
return ([Either String Reference] -> AttValue
AttValue [forall a b. a -> Either a b
Left (String
sforall a. [a] -> [a] -> [a]
++String
v)]) ) forall t a. Parser t a -> Parser t a -> Parser t a
`onFail`
forall a. String -> HParser a
failP String
"Badly formatted attribute value"
systemliteral :: HParser SystemLiteral
systemliteral :: HParser SystemLiteral
systemliteral = do
String
s <- forall (p :: * -> *) bra ket a.
PolyParse p =>
p bra -> p ket -> p a -> p a
bracket (TokenT -> HParser TokenT
tok TokenT
TokQuote) (forall (p :: * -> *) a. Commitment p => p a -> p a
commit forall a b. (a -> b) -> a -> b
$ TokenT -> HParser TokenT
tok TokenT
TokQuote) HParser String
freetext
forall (m :: * -> *) a. Monad m => a -> m a
return (String -> SystemLiteral
SystemLiteral String
s)
pubidliteral :: HParser PubidLiteral
pubidliteral :: HParser PubidLiteral
pubidliteral = do
String
s <- forall (p :: * -> *) bra ket a.
PolyParse p =>
p bra -> p ket -> p a -> p a
bracket (TokenT -> HParser TokenT
tok TokenT
TokQuote) (forall (p :: * -> *) a. Commitment p => p a -> p a
commit forall a b. (a -> b) -> a -> b
$ TokenT -> HParser TokenT
tok TokenT
TokQuote) HParser String
freetext
forall (m :: * -> *) a. Monad m => a -> m a
return (String -> PubidLiteral
PubidLiteral String
s)
chardata :: HParser CharData
chardata :: HParser String
chardata = HParser String
freetext