{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TupleSections #-}
module Text.XmlHtml.HTML.Parse where
import Control.Applicative
import Control.Monad
import Data.Char
import Data.List
import Data.Maybe
import Text.XmlHtml.Common
import Text.XmlHtml.HTML.Meta
import Text.XmlHtml.TextParser
import qualified Text.XmlHtml.XML.Parse as XML
import qualified Text.Parsec as P
import qualified Data.HashSet as S
import qualified Data.HashMap.Strict as M
import qualified Data.Map as Map
import Data.Text (Text)
import qualified Data.Text as T
docFragment :: Encoding -> Parser Document
docFragment :: Encoding -> Parser Document
docFragment Encoding
e = do
(Maybe DocType
dt, [Node]
nodes1) <- Parser (Maybe DocType, [Node])
prolog
([Node]
nodes2, ElemResult
Matched) <- Maybe Text -> Parser ([Node], ElemResult)
content forall a. Maybe a
Nothing
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Encoding -> Maybe DocType -> [Node] -> Document
HtmlDocument Encoding
e Maybe DocType
dt ([Node]
nodes1 forall a. [a] -> [a] -> [a]
++ [Node]
nodes2)
prolog :: Parser (Maybe DocType, [Node])
prolog :: Parser (Maybe DocType, [Node])
prolog = do
Maybe (Maybe Text)
_ <- forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional Parser (Maybe Text)
XML.xmlDecl
[Maybe Node]
nodes1 <- forall (f :: * -> *) a. Alternative f => f a -> f [a]
many Parser (Maybe Node)
XML.misc
Maybe (DocType, [Maybe Node])
rest <- forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional forall a b. (a -> b) -> a -> b
$ do
DocType
dt <- Parser DocType
docTypeDecl
[Maybe Node]
nodes2 <- forall (f :: * -> *) a. Alternative f => f a -> f [a]
many Parser (Maybe Node)
XML.misc
forall (m :: * -> *) a. Monad m => a -> m a
return (DocType
dt, [Maybe Node]
nodes2)
case Maybe (DocType, [Maybe Node])
rest of
Maybe (DocType, [Maybe Node])
Nothing -> forall (m :: * -> *) a. Monad m => a -> m a
return (forall a. Maybe a
Nothing, forall a. [Maybe a] -> [a]
catMaybes [Maybe Node]
nodes1)
Just (DocType
dt, [Maybe Node]
nodes2) -> forall (m :: * -> *) a. Monad m => a -> m a
return (forall a. a -> Maybe a
Just DocType
dt, forall a. [Maybe a] -> [a]
catMaybes ([Maybe Node]
nodes1 forall a. [a] -> [a] -> [a]
++ [Maybe Node]
nodes2))
docTypeDecl :: Parser DocType
docTypeDecl :: Parser DocType
docTypeDecl = do
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
P.try forall a b. (a -> b) -> a -> b
$ do
Text
_ <- Text -> Parser Text
text Text
"<!"
Text
decl <- Parser Text
XML.name
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Text -> Text
T.toLower Text
decl forall a. Eq a => a -> a -> Bool
/= Text
"doctype") forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *) a. MonadFail m => String -> m a
fail String
"Expected DOCTYPE"
ParsecT Text () Identity ()
XML.whiteSpace
Text
tag <- Parser Text
XML.name
Maybe ()
_ <- forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional ParsecT Text () Identity ()
XML.whiteSpace
ExternalID
extid <- Parser ExternalID
externalID
Maybe ()
_ <- forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional ParsecT Text () Identity ()
XML.whiteSpace
InternalSubset
intsub <- Parser InternalSubset
XML.internalDoctype
Char
_ <- forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'>'
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> ExternalID -> InternalSubset -> DocType
DocType Text
tag ExternalID
extid InternalSubset
intsub)
externalID :: Parser ExternalID
externalID :: Parser ExternalID
externalID = do
Maybe Text
tok <- forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional forall a b. (a -> b) -> a -> b
$ Text -> Text
T.toLower forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text
XML.name
case Maybe Text
tok of
Just Text
"system" -> Parser ExternalID
systemID
Just Text
"public" -> Parser ExternalID
publicID
Just Text
_ -> forall (m :: * -> *) a. MonadFail m => String -> m a
fail String
"Expected SYSTEM or PUBLIC"
Maybe Text
Nothing -> forall (m :: * -> *) a. Monad m => a -> m a
return ExternalID
NoExternalID
where
systemID :: Parser ExternalID
systemID = do
ParsecT Text () Identity ()
XML.whiteSpace
Text -> ExternalID
System forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text
XML.systemLiteral
publicID :: Parser ExternalID
publicID = do
ParsecT Text () Identity ()
XML.whiteSpace
Text
pid <- Parser Text
XML.pubIdLiteral
ParsecT Text () Identity ()
XML.whiteSpace
Text
sid <- Parser Text
XML.systemLiteral
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> Text -> ExternalID
Public Text
pid Text
sid)
data ElemResult = Matched
| ImplicitLast Text
| ImplicitNext Text Text [(Text, Text)] Bool
finishElement :: Text -> Text -> [(Text, Text)] -> Bool
-> Parser (Node, ElemResult)
finishElement :: Text -> Text -> [(Text, Text)] -> Bool -> Parser (Node, ElemResult)
finishElement Text
t Text
tbase [(Text, Text)]
a Bool
b = do
if Bool
b then forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> [(Text, Text)] -> [Node] -> Node
Element Text
t [(Text, Text)]
a [], ElemResult
Matched)
else Parser (Node, ElemResult)
nonEmptyElem
where
nonEmptyElem :: Parser (Node, ElemResult)
nonEmptyElem
| Text -> [(Text, Text)] -> Bool
isRawText Text
tbase [(Text, Text)]
a = do
Node
c <- forall a. String -> Parser a -> Parser Node
XML.cdata String
"<" forall a b. (a -> b) -> a -> b
$ forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
P.try (Text -> Parser ElemResult
endTag Text
t)
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> [(Text, Text)] -> [Node] -> Node
Element Text
t [(Text, Text)]
a [Node
c], ElemResult
Matched)
| Text
tbase forall a. (Eq a, Hashable a) => a -> HashSet a -> Bool
`S.member` HashSet Text
endOmittableLast = (Parser ElemResult -> ParsecT Text () Identity (Maybe ElemResult))
-> Parser (Node, ElemResult)
tagContents forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional
| Bool
otherwise = (Parser ElemResult -> ParsecT Text () Identity (Maybe ElemResult))
-> Parser (Node, ElemResult)
tagContents (forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall a. a -> Maybe a
Just)
tagContents :: (Parser ElemResult -> ParsecT Text () Identity (Maybe ElemResult))
-> Parser (Node, ElemResult)
tagContents Parser ElemResult -> ParsecT Text () Identity (Maybe ElemResult)
modifier = do
([Node]
c,ElemResult
r1) <- Maybe Text -> Parser ([Node], ElemResult)
content (forall a. a -> Maybe a
Just Text
tbase)
case ElemResult
r1 of
ElemResult
Matched -> do
Maybe ElemResult
r2 <- Parser ElemResult -> ParsecT Text () Identity (Maybe ElemResult)
modifier (Text -> Parser ElemResult
endTag Text
t)
case Maybe ElemResult
r2 of
Maybe ElemResult
Nothing -> forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> [(Text, Text)] -> [Node] -> Node
Element Text
t [(Text, Text)]
a [Node]
c, ElemResult
Matched)
Just ElemResult
rr -> forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> [(Text, Text)] -> [Node] -> Node
Element Text
t [(Text, Text)]
a [Node]
c, ElemResult
rr)
ImplicitLast Text
tag | Text -> Text
T.toCaseFold Text
tag forall a. Eq a => a -> a -> Bool
== Text -> Text
T.toCaseFold Text
t -> do
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> [(Text, Text)] -> [Node] -> Node
Element Text
t [(Text, Text)]
a [Node]
c, ElemResult
Matched)
ElemResult
end -> do
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> [(Text, Text)] -> [Node] -> Node
Element Text
t [(Text, Text)]
a [Node]
c, ElemResult
end)
emptyOrStartTag :: Parser (Text, Text, [(Text, Text)], Bool)
emptyOrStartTag :: Parser (Text, Text, [(Text, Text)], Bool)
emptyOrStartTag = do
Text
t <- forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
P.try forall a b. (a -> b) -> a -> b
$ forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'<' forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser Text
XML.name
let tbase :: Text
tbase = Text -> Text
T.toLower forall a b. (a -> b) -> a -> b
$ forall a b. (a, b) -> b
snd forall a b. (a -> b) -> a -> b
$ Text -> Text -> (Text, Text)
T.breakOnEnd Text
":" Text
t
[(Text, Text)]
a <- forall (f :: * -> *) a. Alternative f => f a -> f [a]
many forall a b. (a -> b) -> a -> b
$ forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
P.try forall a b. (a -> b) -> a -> b
$ do
ParsecT Text () Identity ()
XML.whiteSpace
ParsecT Text () Identity (Text, Text)
attribute
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (forall {a} {b}. Eq a => [(a, b)] -> Bool
hasDups [(Text, Text)]
a) forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *) a. MonadFail m => String -> m a
fail String
"Duplicate attribute names in element"
Maybe ()
_ <- forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional ParsecT Text () Identity ()
XML.whiteSpace
Bool
e <- forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall a. Maybe a -> Bool
isJust forall a b. (a -> b) -> a -> b
$ forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional (forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'/')
let e' :: Bool
e' = Bool
e Bool -> Bool -> Bool
|| (Text
tbase forall a. (Eq a, Hashable a) => a -> HashSet a -> Bool
`S.member` HashSet Text
voidTags)
Char
_ <- forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'>'
forall (m :: * -> *) a. Monad m => a -> m a
return (Text
t, Text
tbase, [(Text, Text)]
a, Bool
e')
where
hasDups :: [(a, b)] -> Bool
hasDups [(a, b)]
a = forall (t :: * -> *) a. Foldable t => t a -> Int
length (forall a. Eq a => [a] -> [a]
nub (forall a b. (a -> b) -> [a] -> [b]
map forall a b. (a, b) -> a
fst [(a, b)]
a)) forall a. Ord a => a -> a -> Bool
< forall (t :: * -> *) a. Foldable t => t a -> Int
length [(a, b)]
a
attrName :: Parser Text
attrName :: Parser Text
attrName = (Char -> Bool) -> Parser Text
takeWhile1 Char -> Bool
isAttrName
where isAttrName :: Char -> Bool
isAttrName Char
c | Char
c forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Char
'\0',Char
' ',Char
'"',Char
'\'',Char
'>',Char
'/',Char
'='] = Bool
False
| Char -> Bool
isControlChar Char
c = Bool
False
| Bool
otherwise = Bool
True
isControlChar :: Char -> Bool
isControlChar :: Char -> Bool
isControlChar Char
c | Char
c forall a. Ord a => a -> a -> Bool
>= Char
'\x007F' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\x009F' = Bool
True
| Char
c forall a. Ord a => a -> a -> Bool
>= Char
'\xFDD0' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\xFDEF' = Bool
True
| Bool
otherwise = Bool
False
quotedAttrValue :: Parser Text
quotedAttrValue :: Parser Text
quotedAttrValue = Parser Text
singleQuoted forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Text
doubleQuoted
where
singleQuoted :: Parser Text
singleQuoted = forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'\'' forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall {t :: * -> *}. Foldable t => t Char -> Parser Text
refTill [Char
'&',Char
'\''] forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'\''
doubleQuoted :: Parser Text
doubleQuoted = forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'"' forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall {t :: * -> *}. Foldable t => t Char -> Parser Text
refTill [Char
'&',Char
'"'] forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'"'
refTill :: t Char -> Parser Text
refTill t Char
end = [Text] -> Text
T.concat forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (f :: * -> *) a. Alternative f => f a -> f [a]
many
((Char -> Bool) -> Parser Text
takeWhile1 (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. (forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` t Char
end)) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Text
reference)
unquotedAttrValue :: Parser Text
unquotedAttrValue :: Parser Text
unquotedAttrValue = forall {t :: * -> *}. Foldable t => t Char -> Parser Text
refTill [Char
' ',Char
'"',Char
'\'',Char
'=',Char
'<',Char
'>',Char
'&',Char
'`']
where
refTill :: t Char -> Parser Text
refTill t Char
end = [Text] -> Text
T.concat forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (f :: * -> *) a. Alternative f => f a -> f [a]
some
((Char -> Bool) -> Parser Text
takeWhile1 (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. (forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` t Char
end)) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Text
reference)
attrValue :: Parser Text
attrValue :: Parser Text
attrValue = Parser Text
quotedAttrValue forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Text
unquotedAttrValue
attribute :: Parser (Text, Text)
attribute :: ParsecT Text () Identity (Text, Text)
attribute = do
Text
n <- Parser Text
attrName
Maybe Text
v <- forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional forall a b. (a -> b) -> a -> b
$ do
Char
_ <- forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
P.try forall a b. (a -> b) -> a -> b
$ do
Maybe ()
_ <- forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional ParsecT Text () Identity ()
XML.whiteSpace
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'='
Maybe ()
_ <- forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional ParsecT Text () Identity ()
XML.whiteSpace
Parser Text
attrValue
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ forall b a. b -> (a -> b) -> Maybe a -> b
maybe (Text
n,Text
"") (Text
n,) Maybe Text
v
endTag :: Text -> Parser ElemResult
endTag :: Text -> Parser ElemResult
endTag Text
s = do
Text
_ <- Text -> Parser Text
text Text
"</"
Text
t <- Parser Text
XML.name
let sbase :: Text
sbase = Text -> Text
T.toLower forall a b. (a -> b) -> a -> b
$ forall a b. (a, b) -> b
snd forall a b. (a -> b) -> a -> b
$ Text -> Text -> (Text, Text)
T.breakOnEnd Text
":" Text
s
ElemResult
r <- if (Text -> Text
T.toCaseFold Text
s forall a. Eq a => a -> a -> Bool
== Text -> Text
T.toCaseFold Text
t)
then forall (m :: * -> *) a. Monad m => a -> m a
return ElemResult
Matched
else if Text
sbase forall a. (Eq a, Hashable a) => a -> HashSet a -> Bool
`S.member` HashSet Text
endOmittableLast
then forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> ElemResult
ImplicitLast Text
t)
else forall (m :: * -> *) a. MonadFail m => String -> m a
fail forall a b. (a -> b) -> a -> b
$ String
"mismatched tags: </" forall a. [a] -> [a] -> [a]
++ Text -> String
T.unpack Text
t forall a. [a] -> [a] -> [a]
++
String
"> found inside <" forall a. [a] -> [a] -> [a]
++ Text -> String
T.unpack Text
s forall a. [a] -> [a] -> [a]
++ String
"> tag"
Maybe ()
_ <- forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional ParsecT Text () Identity ()
XML.whiteSpace
Text
_ <- Text -> Parser Text
text Text
">"
forall (m :: * -> *) a. Monad m => a -> m a
return ElemResult
r
content :: Maybe Text -> Parser ([Node], ElemResult)
content :: Maybe Text -> Parser ([Node], ElemResult)
content Maybe Text
parent = do
([Maybe Node]
ns, ElemResult
end) <- ParsecT Text () Identity ([Maybe Node], ElemResult)
readText
forall (m :: * -> *) a. Monad m => a -> m a
return ([Node] -> [Node]
coalesceText (forall a. [Maybe a] -> [a]
catMaybes [Maybe Node]
ns), ElemResult
end)
where
readText :: ParsecT Text () Identity ([Maybe Node], ElemResult)
readText = do
Maybe Node
s <- forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional Parser Node
XML.charData
Maybe ([Maybe Node], ElemResult)
t <- forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional ParsecT Text () Identity ([Maybe Node], ElemResult)
whileMatched
case Maybe ([Maybe Node], ElemResult)
t of
Maybe ([Maybe Node], ElemResult)
Nothing -> forall (m :: * -> *) a. Monad m => a -> m a
return ([Maybe Node
s], ElemResult
Matched)
Just ([Maybe Node]
tt, ElemResult
m) -> forall (m :: * -> *) a. Monad m => a -> m a
return (Maybe Node
sforall a. a -> [a] -> [a]
:[Maybe Node]
tt, ElemResult
m)
whileMatched :: ParsecT Text () Identity ([Maybe Node], ElemResult)
whileMatched = do
([Maybe Node]
n,ElemResult
end) <- (,ElemResult
Matched) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (forall a. a -> [a] -> [a]
:[]) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall a. a -> Maybe a
Just forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Text -> Node
TextNode forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text
reference
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (,ElemResult
Matched) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (forall a. a -> [a] -> [a]
:[]) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser (Maybe Node)
XML.cdSect
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (,ElemResult
Matched) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (forall a. a -> [a] -> [a]
:[]) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser (Maybe Node)
XML.processingInstruction
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (,ElemResult
Matched) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (forall a. a -> [a] -> [a]
:[]) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser (Maybe Node)
XML.comment
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> ParsecT Text () Identity ([Maybe Node], ElemResult)
doElement
case ElemResult
end of
ElemResult
Matched -> do
([Maybe Node]
ns, ElemResult
end') <- ParsecT Text () Identity ([Maybe Node], ElemResult)
readText
forall (m :: * -> *) a. Monad m => a -> m a
return ([Maybe Node]
n forall a. [a] -> [a] -> [a]
++ [Maybe Node]
ns, ElemResult
end')
ElemResult
_ -> do
forall (m :: * -> *) a. Monad m => a -> m a
return ([Maybe Node]
n, ElemResult
end)
doElement :: ParsecT Text () Identity ([Maybe Node], ElemResult)
doElement = do
(Text
t,Text
tb, [(Text, Text)]
a,Bool
b) <- Parser (Text, Text, [(Text, Text)], Bool)
emptyOrStartTag
Text
-> Text
-> [(Text, Text)]
-> Bool
-> ParsecT Text () Identity ([Maybe Node], ElemResult)
handle Text
t Text
tb [(Text, Text)]
a Bool
b
handle :: Text
-> Text
-> [(Text, Text)]
-> Bool
-> ParsecT Text () Identity ([Maybe Node], ElemResult)
handle Text
t Text
tb [(Text, Text)]
a Bool
b = do
if Text -> Maybe Text -> Bool
breaksTag Text
tb Maybe Text
parent
then forall (m :: * -> *) a. Monad m => a -> m a
return ([forall a. Maybe a
Nothing], Text -> Text -> [(Text, Text)] -> Bool -> ElemResult
ImplicitNext Text
t Text
tb [(Text, Text)]
a Bool
b)
else do
(Node
n,ElemResult
end) <- Text -> Text -> [(Text, Text)] -> Bool -> Parser (Node, ElemResult)
finishElement Text
t Text
tb [(Text, Text)]
a Bool
b
case ElemResult
end of
ImplicitNext Text
t' Text
tb' [(Text, Text)]
a' Bool
b' -> do
([Maybe Node]
ns,ElemResult
end') <- Text
-> Text
-> [(Text, Text)]
-> Bool
-> ParsecT Text () Identity ([Maybe Node], ElemResult)
handle Text
t' Text
tb' [(Text, Text)]
a' Bool
b'
forall (m :: * -> *) a. Monad m => a -> m a
return (forall a. a -> Maybe a
Just Node
n forall a. a -> [a] -> [a]
: [Maybe Node]
ns, ElemResult
end')
ElemResult
_ -> forall (m :: * -> *) a. Monad m => a -> m a
return ([forall a. a -> Maybe a
Just Node
n], ElemResult
end)
breaksTag :: Text -> Maybe Text -> Bool
breaksTag Text
_ Maybe Text
Nothing = Bool
False
breaksTag Text
child (Just Text
tag) = case forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
M.lookup Text
tag HashMap Text (HashSet Text)
endOmittableNext of
Maybe (HashSet Text)
Nothing -> Bool
False
Just HashSet Text
s -> forall a. (Eq a, Hashable a) => a -> HashSet a -> Bool
S.member Text
child HashSet Text
s
coalesceText :: [Node] -> [Node]
coalesceText (TextNode Text
s : TextNode Text
t : [Node]
ns)
= [Node] -> [Node]
coalesceText (Text -> Node
TextNode (Text -> Text -> Text
T.append Text
s Text
t) forall a. a -> [a] -> [a]
: [Node]
ns)
coalesceText (Node
n:[Node]
ns)
= Node
n forall a. a -> [a] -> [a]
: [Node] -> [Node]
coalesceText [Node]
ns
coalesceText []
= []
reference :: Parser Text
reference :: Parser Text
reference = do
Char
_ <- forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'&'
Either Char Text
r <- (forall a b. a -> Either a b
Left forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
P.try ParsecT Text () Identity Char
finishCharRef)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (forall a b. b -> Either a b
Right forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
P.try Parser Text
finishEntityRef)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (forall a b. a -> Either a b
Left forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) a. Monad m => a -> m a
return Char
'&')
case Either Char Text
r of
Left Char
c -> do
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Bool -> Bool
not (Char -> Bool
isValidChar Char
c)) forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *) a. MonadFail m => String -> m a
fail forall a b. (a -> b) -> a -> b
$
String
"Reference is not a valid character"
forall (m :: * -> *) a. Monad m => a -> m a
return (Char -> Text
T.singleton Char
c)
Right Text
nm -> case forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup Text
nm Map Text Text
predefinedRefs of
Maybe Text
Nothing -> forall (m :: * -> *) a. MonadFail m => String -> m a
fail forall a b. (a -> b) -> a -> b
$ String
"Unknown entity reference: " forall a. [a] -> [a] -> [a]
++ Text -> String
T.unpack Text
nm
Just Text
t -> forall (m :: * -> *) a. Monad m => a -> m a
return Text
t
finishCharRef :: Parser Char
finishCharRef :: ParsecT Text () Identity Char
finishCharRef = forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'#' forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> (forall {u}. ParsecT Text u Identity Char
hexCharRef forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall {u}. ParsecT Text u Identity Char
decCharRef)
where
decCharRef :: ParsecT Text u Identity Char
decCharRef = do
[Int]
ds <- forall (f :: * -> *) a. Alternative f => f a -> f [a]
some forall {u}. ParsecT Text u Identity Int
digit
Char
_ <- forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
';'
let c :: Char
c = Int -> Char
chr forall a b. (a -> b) -> a -> b
$ forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (\Int
a Int
b -> Int
10 forall a. Num a => a -> a -> a
* Int
a forall a. Num a => a -> a -> a
+ Int
b) Int
0 [Int]
ds
forall (m :: * -> *) a. Monad m => a -> m a
return Char
c
where
digit :: ParsecT Text u Identity Int
digit = do
Char
d <- forall s (m :: * -> *) u.
Stream s m Char =>
(Char -> Bool) -> ParsecT s u m Char
P.satisfy (\Char
c -> Char
c forall a. Ord a => a -> a -> Bool
>= Char
'0' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'9')
forall (m :: * -> *) a. Monad m => a -> m a
return (Char -> Int
ord Char
d forall a. Num a => a -> a -> a
- Char -> Int
ord Char
'0')
hexCharRef :: ParsecT Text u Identity Char
hexCharRef = do
Char
_ <- forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'x' forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'X'
[Int]
ds <- forall (f :: * -> *) a. Alternative f => f a -> f [a]
some forall {u}. ParsecT Text u Identity Int
digit
Char
_ <- forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
';'
let c :: Char
c = Int -> Char
chr forall a b. (a -> b) -> a -> b
$ forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (\Int
a Int
b -> Int
16 forall a. Num a => a -> a -> a
* Int
a forall a. Num a => a -> a -> a
+ Int
b) Int
0 [Int]
ds
forall (m :: * -> *) a. Monad m => a -> m a
return Char
c
where
digit :: ParsecT Text u Identity Int
digit = forall {u}. ParsecT Text u Identity Int
num forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall {u}. ParsecT Text u Identity Int
upper forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall {u}. ParsecT Text u Identity Int
lower
num :: ParsecT Text u Identity Int
num = do
Char
d <- forall s (m :: * -> *) u.
Stream s m Char =>
(Char -> Bool) -> ParsecT s u m Char
P.satisfy (\Char
c -> Char
c forall a. Ord a => a -> a -> Bool
>= Char
'0' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'9')
forall (m :: * -> *) a. Monad m => a -> m a
return (Char -> Int
ord Char
d forall a. Num a => a -> a -> a
- Char -> Int
ord Char
'0')
upper :: ParsecT Text u Identity Int
upper = do
Char
d <- forall s (m :: * -> *) u.
Stream s m Char =>
(Char -> Bool) -> ParsecT s u m Char
P.satisfy (\Char
c -> Char
c forall a. Ord a => a -> a -> Bool
>= Char
'A' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'F')
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
10 forall a. Num a => a -> a -> a
+ Char -> Int
ord Char
d forall a. Num a => a -> a -> a
- Char -> Int
ord Char
'A')
lower :: ParsecT Text u Identity Int
lower = do
Char
d <- forall s (m :: * -> *) u.
Stream s m Char =>
(Char -> Bool) -> ParsecT s u m Char
P.satisfy (\Char
c -> Char
c forall a. Ord a => a -> a -> Bool
>= Char
'a' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'f')
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
10 forall a. Num a => a -> a -> a
+ Char -> Int
ord Char
d forall a. Num a => a -> a -> a
- Char -> Int
ord Char
'a')
finishEntityRef :: Parser Text
finishEntityRef :: Parser Text
finishEntityRef = Parser Text
XML.name forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
';'