{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE MultiWayIf #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Zenacy.HTML.Internal.Parser
( Parser(..)
, ParserOptions(..)
, ParserResult(..)
, parseDocument
, parseFragment
) where
import Zenacy.HTML.Internal.BS
import Zenacy.HTML.Internal.Buffer
import Zenacy.HTML.Internal.Char
import Zenacy.HTML.Internal.Core
import Zenacy.HTML.Internal.DOM
import Zenacy.HTML.Internal.Lexer
import Zenacy.HTML.Internal.Token
import Zenacy.HTML.Internal.Types
import Control.Applicative
( liftA
)
import Control.Monad
( when
, unless
, void
)
import Control.Monad.Extra
( (||^)
, (&&^)
, anyM
, notM
, whenM
, whenJustM
, unlessM
)
import Control.Monad.ST
( ST
, runST
)
import Data.Default
( Default(..)
)
import Data.DList
( DList
)
import qualified Data.DList as D
( append
, empty
, snoc
, toList
)
import Data.IntMap
( IntMap
)
import qualified Data.IntMap as IntMap
( findWithDefault
, lookup
, insert
, map
, mapWithKey
)
import Data.List
( find
)
import Data.Map
( Map
)
import qualified Data.Map as Map
( fromList
, lookup
)
import Data.Maybe
( fromJust
, isJust
, isNothing
, listToMaybe
, mapMaybe
)
import Data.Monoid
( (<>)
)
import Data.Sequence
( Seq
)
import qualified Data.Sequence as Seq
( fromList
)
import Data.Set
( Set
)
import qualified Data.Set as Set
( fromList
, member
, notMember
, union
, unions
)
import Data.STRef
( STRef
, newSTRef
, readSTRef
, writeSTRef
)
import Data.Word
( Word8
)
data Parser s = Parser
{ parserLexer :: STRef s (Lexer s)
, parserDOM :: STRef s DOM
, parserElementStack :: STRef s [DOMID]
, parserActiveFormatList :: STRef s [ParserFormatItem]
, parserInsertionMode :: STRef s ParserMode
, parserOriginalMode :: STRef s ParserMode
, parserTemplateMode :: STRef s [ParserMode]
, parserContextElement :: STRef s (Maybe DOMID)
, parserHeadElement :: STRef s (Maybe DOMID)
, parserFormElement :: STRef s (Maybe DOMID)
, parserSelfClosingFlag :: STRef s Bool
, parserFragmentMode :: STRef s Bool
, parserFosterParenting :: STRef s Bool
, parserFrameSetOK :: STRef s Bool
, parserDone :: STRef s Bool
, parserTableChars :: STRef s [Token]
, parserAdoptionAgency :: STRef s (ParserAdoptionAgency s)
, parserErrors :: STRef s (DList BS)
, parserIFrameSrcDoc :: STRef s Bool
, parserTextMap :: STRef s (IntMap (STRef s (Buffer s)))
, parserLogErrors :: Bool
}
data ParserMode
= ModeInitial
| ModeBeforeHtml
| ModeBeforeHead
| ModeInHead
| ModeInHeadNoscript
| ModeAfterHead
| ModeInBody
| ModeText
| ModeInTable
| ModeInTableText
| ModeInCaption
| ModeInColumnGroup
| ModeInTableBody
| ModeInRow
| ModeInCell
| ModeInSelect
| ModeInSelectInTable
| ModeInTemplate
| ModeAfterBody
| ModeInFrameset
| ModeAfterFrameset
| ModeAfterAfterBody
| ModeAfterAfterFrameset
deriving (Eq, Ord, Show)
data ParserOptions = ParserOptions
{ parserOptionInput :: BS
, parserOptionLogErrors :: Bool
, parserOptionIgnoreEntities :: Bool
} deriving (Eq, Ord, Show)
data ParserResult = ParserResult
{ parserResultDOM :: DOM
, parserResultErrors :: [BS]
} deriving (Eq, Ord, Show)
data ParserFormatItem
= ParserFormatElement DOMID Token
| ParserFormatMarker
deriving (Eq, Ord, Show)
data ParserElementCategory
= ElementCategorySpecial
| ElementCategoryFormatting
| ElementCategoryOrdinary
deriving (Eq, Ord, Show)
data ElementDetails = ElementDetails
{ elementDetailsIndex :: Int
, elementDetailsID :: DOMID
, elementDetailsNode :: DOMNode
, elementDetailsType :: DOMType
} deriving (Eq, Ord, Show)
instance Default ParserOptions where
def = ParserOptions
{ parserOptionInput = bsEmpty
, parserOptionLogErrors = False
, parserOptionIgnoreEntities = False
}
instance Default ParserResult where
def = ParserResult
{ parserResultDOM = def
, parserResultErrors = []
}
parseDocument :: ParserOptions -> Either BS ParserResult
parseDocument x =
runST $ do
parserNew x >>= \case
Right p -> Right <$> parserRun p
Left e -> Left <$> pure e
parseFragment :: ParserOptions -> Either BS ParserResult
parseFragment x = Left "fragment support not yet implemented"
parserNew :: ParserOptions -> ST s (Either BS (Parser s))
parserNew o@ParserOptions{..} = do
a <- lexerNew def
{ lexerOptionInput = parserOptionInput
, lexerOptionLogErrors = parserOptionLogErrors
, lexerOptionIgnoreEntities = parserOptionIgnoreEntities
}
case a of
Right lex -> Right <$> parserMake o lex
Left err -> Left <$> pure err
parserMake :: ParserOptions -> Lexer s -> ST s (Parser s)
parserMake ParserOptions{..} lexer = do
lexerRef <- newSTRef lexer
dom <- newSTRef def
stack <- newSTRef []
fmtList <- newSTRef []
insMode <- newSTRef ModeInitial
orgMode <- newSTRef ModeInitial
tmpMode <- newSTRef []
ctxElem <- newSTRef Nothing
headElem <- newSTRef Nothing
formElem <- newSTRef Nothing
closing <- newSTRef False
fragMode <- newSTRef False
foster <- newSTRef False
frameSet <- newSTRef True
done <- newSTRef False
table <- newSTRef []
aa <- defaultAA
aaRef <- newSTRef aa
warn <- newSTRef def
iframe <- newSTRef False
textMap <- newSTRef def
pure $ Parser
{ parserLexer = lexerRef
, parserDOM = dom
, parserElementStack = stack
, parserActiveFormatList = fmtList
, parserInsertionMode = insMode
, parserOriginalMode = orgMode
, parserTemplateMode = tmpMode
, parserContextElement = ctxElem
, parserHeadElement = headElem
, parserFormElement = formElem
, parserSelfClosingFlag = closing
, parserFragmentMode = fragMode
, parserFosterParenting = foster
, parserFrameSetOK = frameSet
, parserDone = done
, parserTableChars = table
, parserAdoptionAgency = aaRef
, parserErrors = warn
, parserIFrameSrcDoc = iframe
, parserTextMap = textMap
, parserLogErrors = parserOptionLogErrors
}
parserRun :: Parser s -> ST s ParserResult
parserRun p @ Parser {..} = do
rref parserDone >>= \case
True -> do
Lexer{..} <- rref parserLexer
e <- D.append <$> rref lexerErrors <*> rref parserErrors
d <- textMapDOM p
pure $ ParserResult d $ D.toList e
False -> do
t <- rref parserLexer >>= lexerNext
selfClosingInit p t
dispatchTreeConstruction p t
whenM (selfClosingFlag p) $
parseError p (Just t) "self closing not acknowledged for token"
parserRun p
dispatchTreeConstruction :: Parser s -> Token -> ST s ()
dispatchTreeConstruction p @ Parser {..} t = do
e <- elementStackEmpty p
a <- adjustedCurrentNode p
b <- pure $ case a of
Just n -> domNodeIsHTML n
|| isMathMLIntegrationPoint n
&& isTokenStartNotNamed t ["mglyph", "malignmark"]
|| isMathMLIntegrationPoint n && tokenIsChar t
|| isMathMLElementNamed n "annotation-xml"
&& isTokenStartNamed t ["svg"]
|| isHtmlIntgrationPoint n && tokenIsStart t
|| isHtmlIntgrationPoint n && tokenIsChar t
Nothing -> False
if e || b || tokenIsEOF t
then doHtmlContent p t
else doForeignContent p t
where
tokenIsChar TChar {} = True
tokenIsChar _ = False
tokenIsStart TStart {} = True
tokenIsStart _ = False
tokenIsEOF TEOF = True
tokenIsEOF _ = False
doHtmlContent :: Parser s -> Token -> ST s ()
doHtmlContent p @ Parser {..} t = do
m <- rref parserInsertionMode
parserInserter m p t
reprocess :: Parser s -> Token -> ST s ()
reprocess = doHtmlContent
parserInserter :: ParserMode -> Parser s -> Token -> ST s ()
parserInserter = \case
ModeInitial -> doModeInitial
ModeBeforeHtml -> doModeBeforeHtml
ModeBeforeHead -> doModeBeforeHead
ModeInHead -> doModeInHead
ModeInHeadNoscript -> doModeInHeadNoscript
ModeAfterHead -> doModeAfterHead
ModeInBody -> doModeInBody
ModeText -> doModeText
ModeInTable -> doModeInTable
ModeInTableText -> doModeInTableText
ModeInCaption -> doModeInCaption
ModeInColumnGroup -> doModeInColumnGroup
ModeInTableBody -> doModeInTableBody
ModeInRow -> doModeInRow
ModeInCell -> doModeInCell
ModeInSelect -> doModeInSelect
ModeInSelectInTable -> doModeInSelectInTable
ModeInTemplate -> doModeInTemplate
ModeAfterBody -> doModeAfterBody
ModeInFrameset -> doModeInFrameset
ModeAfterFrameset -> doModeAfterFrameset
ModeAfterAfterBody -> doModeAfterAfterBody
ModeAfterAfterFrameset -> doModeAfterAfterFrameset
parseError :: Parser s -> Maybe Token -> BS -> ST s ()
parseError p @ Parser {..} t s =
when parserLogErrors $
uref parserErrors $ flip D.snoc e
where
e = s <> case t of
Just (TDoctype {..}) -> ",doctype"
Just (TStart {..}) -> ",tag-start," <> tStartName
Just (TEnd {..}) -> ",tag-end," <> tEndName
Just (TComment {..}) -> ",comment"
Just (TChar {..}) -> ",chr," <> bsOnly tCharData
Just TEOF -> ",eof"
Nothing -> bsEmpty
isTokenStartNamed :: Token -> [BS] -> Bool
isTokenStartNamed TStart {..} names = tStartName `elem` names
isTokenStartNamed _ _ = False
isTokenStartNotNamed :: Token -> [BS] -> Bool
isTokenStartNotNamed TStart {..} names = not $ tStartName `elem` names
isTokenStartNotNamed _ _ = False
isTokenEndNamed :: Token -> [BS] -> Bool
isTokenEndNamed TEnd {..} names = tEndName `elem` names
isTokenEndNamed _ _ = False
isTokenEndNotNamed :: Token -> [BS] -> Bool
isTokenEndNotNamed TEnd {..} names = not $ tEndName `elem` names
isTokenEndNotNamed _ _ = False
elementName :: BS -> DOMNode -> Bool
elementName x y = domNodeElementName y == x
elementNameNot :: BS -> DOMNode -> Bool
elementNameNot x = not . elementName x
elementNameIn :: [BS] -> DOMNode -> Bool
elementNameIn x y = domNodeElementName y `elem` x
elementNameNotIn :: [BS] -> DOMNode -> Bool
elementNameNotIn x = not . elementNameIn x
elementStack :: Parser s -> ST s [DOMID]
elementStack Parser {..} = readSTRef parserElementStack
elementStackEmpty :: Parser s -> ST s Bool
elementStackEmpty p @ Parser {..} = null <$> elementStack p
elementStackSize :: Parser s -> ST s Int
elementStackSize p @ Parser {..} = length <$> elementStack p
elementStackModify :: Parser s -> ([DOMID] -> [DOMID]) -> ST s ()
elementStackModify p @ Parser {..} f = uref parserElementStack f
elementStackPush :: Parser s -> DOMID -> ST s ()
elementStackPush p @ Parser {..} x = elementStackModify p $ (x:)
elementStackPop :: Parser s -> ST s ()
elementStackPop p @ Parser {..} = elementStackModify p $ drop 1
elementStackPopWhile :: Parser s -> (DOMNode -> Bool) -> ST s ()
elementStackPopWhile p @ Parser {..} f =
currentNode p >>= \case
Just a | f a -> elementStackPop p >> elementStackPopWhile p f
_ -> pure ()
elementStackPopIf :: Parser s -> (DOMNode -> Bool) -> ST s ()
elementStackPopIf p @ Parser {..} f =
currentNode p >>= \case
Just a | f a -> elementStackPop p
_ -> pure ()
elementStackPopUntil :: Parser s -> (DOMType -> Bool) -> ST s ()
elementStackPopUntil p @ Parser {..} f = do
elementStackPopWhile p (not . g)
elementStackPopIf p g
where
g = f . domNodeType
elementStackPopUntilID :: Parser s -> DOMID -> ST s ()
elementStackPopUntilID p x = elementStackModify p $ drop 1 . dropWhile (/=x)
elementStackPopUntilType :: Parser s -> DOMType -> ST s ()
elementStackPopUntilType p x = elementStackPopUntil p (==x)
elementStackPopUntilTypeIn :: Parser s -> [DOMType] -> ST s ()
elementStackPopUntilTypeIn p x = elementStackPopUntil p $ flip elem x
elementStackNodes :: Parser s -> ST s [DOMNode]
elementStackNodes p = domMapID <$> getDOM p <*> elementStack p
elementStackTypes :: Parser s -> ST s [DOMType]
elementStackTypes p = map domNodeType <$> elementStackNodes p
elementStackAny :: Parser s -> (DOMNode -> Bool) -> ST s Bool
elementStackAny p f = any f <$> elementStackNodes p
elementStackAll :: Parser s -> (DOMNode -> Bool) -> ST s Bool
elementStackAll p f = all f <$> elementStackNodes p
elementStackHasBody :: Parser s -> ST s Bool
elementStackHasBody p =
liftA reverse (elementStackTypes p) >>= pure . \case
(_:x:_) -> x == domMakeTypeHTML "body"
_otherwise -> False
elementStackHasTemplate :: Parser s -> ST s Bool
elementStackHasTemplate p = elementStackAny p domNodeIsTemplate
elementStackMissingTemplate :: Parser s -> ST s Bool
elementStackMissingTemplate p = elementStackAll p $ not . domNodeIsTemplate
elementStackRemove :: Parser s -> DOMID -> ST s ()
elementStackRemove p x = elementStackModify p $ filter (/=x)
elementStackReplace :: Parser s -> DOMID -> DOMID -> ST s ()
elementStackReplace p x y =
elementStackModify p $ map (\i -> if i == x then y else i)
elementStackSucc :: Parser s -> DOMID -> ST s (Maybe DOMID)
elementStackSucc p x = findSucc (==x) <$> elementStack p
elementStackInsertBefore :: Parser s -> DOMID -> DOMID -> ST s ()
elementStackInsertBefore p x y = elementStackModify p $ insertBefore (==x) y
elementStackDetails :: Parser s -> ST s [ElementDetails]
elementStackDetails p = g <$> getDOM p <*> elementStack p
where
g d x = mapMaybe (f d) $ zip [1..] x
f d (i, x) =
case domGetNode d x of
Nothing -> Nothing
Just a -> Just $ ElementDetails i x a $ domNodeType a
elementStackFind :: Parser s -> (ElementDetails -> Bool) -> ST s (Maybe ElementDetails)
elementStackFind p f = liftA (find f) $ elementStackDetails p
elementTypesSpecial :: Set DOMType
elementTypesSpecial = Set.unions
[ Set.fromList $ domTypesHTML
[ "address", "applet", "area", "article", "aside",
"base", "basefont", "bgsound", "blockquote", "body",
"br", "button", "caption", "center", "col", "colgroup",
"dd", "details", "dir", "div", "dl", "dt", "embed",
"fieldset", "figcaption", "figure", "footer", "form",
"frame", "frameset", "h1", "h2", "h3", "h4", "h5", "h6",
"head", "header", "hgroup", "hr", "html", "iframe",
"img", "input", "isindex", "li", "link", "listing",
"main", "marquee", "menu", "menuitem", "meta", "nav",
"noembed", "noframes", "noscript", "object", "ol",
"p", "param", "plaintext", "pre", "script", "section",
"select", "source", "style", "summary", "table",
"tbody", "td", "template", "textarea", "tfoot",
"th", "thead", "title", "tr", "track", "ul", "wbr" ]
, Set.fromList $ domTypesMathML
[ "mi", "mo", "mn", "ms", "mtext", "annotation-xml" ]
, Set.fromList $ domTypesSVG
[ "foreignObject", "desc", "title" ]
]
elementTypesFormatting :: Set DOMType
elementTypesFormatting =
Set.fromList $ domTypesHTML
[ "a", "b", "big", "code", "em", "font", "i", "nobr",
"s", "small", "strike", "strong", "tt", "u"]
elementCategory :: DOMType -> ParserElementCategory
elementCategory x
| Set.member x elementTypesSpecial = ElementCategorySpecial
| Set.member x elementTypesFormatting = ElementCategoryFormatting
| otherwise = ElementCategoryOrdinary
elementIsSpecial :: DOMType -> Bool
elementIsSpecial x = elementCategory x == ElementCategorySpecial
elementInSpecificScope :: Parser s -> Bool -> Set DOMType -> DOMType -> ST s Bool
elementInSpecificScope p include types target =
f <$> elementStackTypes p
where
f :: [DOMType] -> Bool
f [] = False
f (x:xs)
| x == target = True
| include == True && Set.member x types == True = False
| include == False && Set.member x types == False = False
| otherwise = f xs
elementScopes :: Set DOMType
elementScopes = Set.unions
[ Set.fromList $ domTypesHTML
[ "applet", "caption", "html", "table", "td", "th"
, "marquee", "object", "template" ]
, Set.fromList $ domTypesMathML
[ "mi", "mo", "mn", "ms", "mtext", "annotation-xml" ]
, Set.fromList $ domTypesSVG
[ "foreignObject", "desc", "title" ]
]
elementInScope :: Parser s -> DOMType -> ST s Bool
elementInScope p = elementInSpecificScope p True elementScopes
elementInListScope :: Parser s -> DOMType -> ST s Bool
elementInListScope p =
elementInSpecificScope p True $
Set.union elementScopes $ Set.fromList $
domTypesHTML [ "ol", "ul" ]
elementInButtonScope :: Parser s -> DOMType -> ST s Bool
elementInButtonScope p =
elementInSpecificScope p True $
Set.union elementScopes $ Set.fromList [ domMakeTypeHTML "button" ]
elementInTableScope :: Parser s -> DOMType -> ST s Bool
elementInTableScope p =
elementInSpecificScope p True $
Set.fromList $ domTypesHTML [ "html", "table", "template" ]
elementInSelectScope :: Parser s -> DOMType -> ST s Bool
elementInSelectScope p =
elementInSpecificScope p False $
Set.fromList $ domTypesHTML [ "optgroup", "option" ]
newID :: Parser s -> DOMNode -> ST s DOMID
newID p x = do
(d, i) <- flip domNewID x <$> getDOM p
setDOM p d
pure i
getNode :: Parser s -> DOMID -> ST s (Maybe DOMNode)
getNode p @ Parser {..} x = flip domGetNode x <$> getDOM p
nodeElementName :: Parser s -> DOMID -> ST s BS
nodeElementName p @ Parser {..} x = do
d <- getDOM p
pure $ case domGetNode d x of
Just a -> domNodeElementName a
Nothing -> bsEmpty
lastNodeID :: Parser s -> ST s (Maybe DOMID)
lastNodeID p @ Parser {..} = listToMaybe . reverse <$> elementStack p
currentNodeID :: Parser s -> ST s (Maybe DOMID)
currentNodeID p @ Parser {..} = listToMaybe <$> elementStack p
currentNode :: Parser s -> ST s (Maybe DOMNode)
currentNode p = currentNodeID p >>= maybe (pure Nothing) (getNode p)
currentNodeHasType :: Parser s -> DOMType -> ST s Bool
currentNodeHasType p x =
currentNode p >>= pure . \case
Just a -> domNodeType a == x
Nothing -> False
currentNodeHasHTMLType :: Parser s -> BS -> ST s Bool
currentNodeHasHTMLType p = currentNodeHasType p . domMakeTypeHTML
currentNodeHasTypeIn :: Parser s -> [DOMType] -> ST s Bool
currentNodeHasTypeIn p x =
currentNode p >>= pure . \case
Just a -> domNodeType a `elem` x
Nothing -> False
currentNodeHasHTMLTypeIn :: Parser s -> [BS] -> ST s Bool
currentNodeHasHTMLTypeIn p = currentNodeHasTypeIn p . domTypesHTML
adjustedCurrentNodeID :: Parser s -> ST s (Maybe DOMID)
adjustedCurrentNodeID p @ Parser {..} = do
f <- rref parserFragmentMode
n <- elementStackSize p
if f && n == 1
then rref parserContextElement
else currentNodeID p
adjustedCurrentNode :: Parser s -> ST s (Maybe DOMNode)
adjustedCurrentNode p =
adjustedCurrentNodeID p >>= maybe (pure Nothing) (getNode p)
isMathMLElementNamed :: DOMNode -> BS -> Bool
isMathMLElementNamed x n = domNodeIsMathML x && domElementName x == n
isMathMLIntegrationPoint :: DOMNode -> Bool
isMathMLIntegrationPoint x
| domNodeIsElement x =
domNodeIsMathML x && Set.member (domElementName x) s
| otherwise =
False
where
s = Set.fromList [ "mi", "mo", "mn", "ms", "mtext" ]
isHtmlIntgrationPoint :: DOMNode -> Bool
isHtmlIntgrationPoint x
| domNodeIsElement x = s || m
| otherwise = False
where
s = domNodeIsSVG x
&& Set.member (domElementName x) s0
m = domNodeIsMathML x
&& domElementName x == "annotation-xml"
&& case domElementFindAttr x "encoding" of
Just (DOMAttr n v s) ->
Set.member (bsLower v) s1
_otherwise -> False
s0 = Set.fromList [ "foreignObject", "desc", "title" ]
s1 = Set.fromList [ "text/html", "application/xhtml+xml" ]
getDOM :: Parser s -> ST s DOM
getDOM Parser {..} = rref parserDOM
setDOM :: Parser s -> DOM -> ST s ()
setDOM Parser {..} = wref parserDOM
modifyDOM :: Parser s -> (DOM -> DOM) -> ST s ()
modifyDOM p @ Parser {..} = uref parserDOM
setMode :: Parser s -> ParserMode -> ST s ()
setMode Parser {..} = wref parserInsertionMode
saveMode :: Parser s -> ST s ()
saveMode Parser {..} = rref parserInsertionMode >>= wref parserOriginalMode
restoreMode :: Parser s -> ST s ()
restoreMode Parser {..} = do
rref parserOriginalMode >>= wref parserInsertionMode
wref parserOriginalMode ModeInitial
setHeadID :: Parser s -> Maybe DOMID -> ST s ()
setHeadID Parser {..} = wref parserHeadElement
getHeadID :: Parser s -> ST s (Maybe DOMID)
getHeadID Parser {..} = rref parserHeadElement
getHeadElement :: Parser s -> ST s (Maybe DOMNode)
getHeadElement p = getHeadID p >>= maybe (pure Nothing) (getNode p)
saveHead :: Parser s -> ST s ()
saveHead p = currentNodeID p >>= setHeadID p
setFormID :: Parser s -> Maybe DOMID -> ST s ()
setFormID Parser {..} = wref parserFormElement
getFormID :: Parser s -> ST s (Maybe DOMID)
getFormID Parser {..} = rref parserFormElement
getFormElement :: Parser s -> ST s (Maybe DOMNode)
getFormElement p = getFormID p >>= maybe (pure Nothing) (getNode p)
getFormType :: Parser s -> ST s (Maybe DOMType)
getFormType p @ Parser {..} =
getFormElement p >>= pure . maybe Nothing (Just . domNodeType)
saveForm :: Parser s -> ST s ()
saveForm p = currentNodeID p >>= setFormID p
formNotNull :: Parser s -> ST s Bool
formNotNull p = isJust <$> getFormID p
selfClosingInit :: Parser s -> Token -> ST s ()
selfClosingInit p @ Parser {..} t =
wref parserSelfClosingFlag $
case t of
TStart {..} -> tStartClosed
_otherwise -> False
selfClosingAcknowledge :: Parser s -> ST s ()
selfClosingAcknowledge Parser {..} = wref parserSelfClosingFlag False
selfClosingFlag :: Parser s -> ST s Bool
selfClosingFlag Parser {..} = rref parserSelfClosingFlag
fosterParenting :: Parser s -> ST s Bool
fosterParenting Parser {..} = rref parserFosterParenting
fosterParentingSet :: Parser s -> ST s ()
fosterParentingSet Parser {..} = wref parserFosterParenting True
fosterParentingClear :: Parser s -> ST s ()
fosterParentingClear Parser {..} = wref parserFosterParenting False
frameSetNotOK :: Parser s -> ST s ()
frameSetNotOK Parser {..} = wref parserFrameSetOK False
iframeSrcDoc :: Parser s -> ST s Bool
iframeSrcDoc Parser {..} = rref parserIFrameSrcDoc
parserSetDone :: Parser s -> ST s ()
parserSetDone Parser {..} = wref parserDone True
activeFormatList :: Parser s -> ST s [ParserFormatItem]
activeFormatList Parser {..} = rref parserActiveFormatList
activeFormatNames :: Parser s -> ST s [BS]
activeFormatNames p = do
d <- getDOM p
map (f d) <$> activeFormatList p
where f d ParserFormatMarker = "marker"
f d (ParserFormatElement i t) =
domElementName $ fromJust $ domGetNode d i
activeFormatAddMarker :: Parser s -> ST s ()
activeFormatAddMarker Parser {..} =
uref parserActiveFormatList (ParserFormatMarker:)
activeFormatAddElement :: Parser s -> Token -> DOMID -> ST s ()
activeFormatAddElement p @ Parser {..} t x = do
d <- getDOM p
a <- activeFormatList p
let match (ParserFormatElement y _) = domMatch d x y
b = takeWhile (not . formatItemIsMarker) a
n = (foldr (\i z -> z + if match i then 1 else 0) 0 b) :: Int
a' = if n < 3 then a else removeFirst match a
e' = ParserFormatElement x t : a'
wref parserActiveFormatList e'
activeFormatAddCurrentNode :: Parser s -> Token -> ST s ()
activeFormatAddCurrentNode p @ Parser {..} t =
whenJustM (currentNodeID p) $ activeFormatAddElement p t
activeFormatAny :: Parser s -> (DOMNode -> Bool) -> ST s Bool
activeFormatAny p @ Parser {..} f = do
d <- getDOM p
a <- activeFormatList p
pure $
( any f
. domMapID d
. map (\(ParserFormatElement x _) -> x)
. takeWhile (not . formatItemIsMarker)
) a
activeFormatContains :: Parser s -> DOMID -> ST s Bool
activeFormatContains p x = any (formatItemHasID x) <$> activeFormatList p
activeFormatFindTag :: Parser s -> BS -> ST s (Maybe ParserFormatItem)
activeFormatFindTag p @ Parser {..} x = do
d <- getDOM p
a <- activeFormatList p
pure $
( find (formatItemHasTag d x)
. takeWhile (not . formatItemIsMarker)
) a
activeFormatFindToken :: Parser s -> DOMID -> ST s (Maybe Token)
activeFormatFindToken p @ Parser {..} x =
activeFormatList p >>= f
where
f [] = pure Nothing
f ((ParserFormatMarker):xs) = f xs
f ((ParserFormatElement i t):xs)
| x == i = pure $ Just t
| otherwise = f xs
activeFormatReconstruct :: Parser s -> ST s ()
activeFormatReconstruct p = do
e <- elementStack p
a <- activeFormatList p
case a of
[] -> pure ()
(x:xs)
| isOpen e x -> pure ()
| otherwise -> do
let b = reverse . takeWhile (not . isOpen e) $ a
a' = drop (length b) a
reopen p b a'
isOpen :: [DOMID] -> ParserFormatItem -> Bool
isOpen x = \case
ParserFormatMarker -> True
ParserFormatElement i _ -> i `elem` x
reopen :: Parser s -> [ParserFormatItem] -> [ParserFormatItem] -> ST s ()
reopen p @ Parser {..} b a =
case b of
[] ->
wref parserActiveFormatList a
((ParserFormatMarker):xs) ->
reopen p xs a
((ParserFormatElement _ t):xs) -> do
insertHtmlElement p t
i <- fromJust <$> currentNodeID p
reopen p xs $ ParserFormatElement i t : a
activeFormatClear :: Parser s -> ST s ()
activeFormatClear p =
activeFormatModify p $ drop 1 . dropWhile (not . formatItemIsMarker)
activeFormatRemove :: Parser s -> DOMID -> ST s ()
activeFormatRemove p x =
activeFormatModify p $ filter $ not . formatItemHasID x
activeFormatReplace :: Parser s -> DOMID -> DOMID -> ST s ()
activeFormatReplace p x y =
activeFormatModify p $ map f
where
f z@(ParserFormatMarker) = z
f z@(ParserFormatElement i t)
| i == x = ParserFormatElement y t
| otherwise = z
activeFormatModify :: Parser s -> ([ParserFormatItem] -> [ParserFormatItem]) -> ST s ()
activeFormatModify Parser {..} = uref parserActiveFormatList
activeFormatSucc :: Parser s -> DOMID -> ST s (Maybe DOMID)
activeFormatSucc p x =
f <$> activeFormatList p
where
f a = case findSucc (formatItemHasID x) a of
Just (ParserFormatElement i _) -> Just i
_otherwise -> Nothing
activeFormatInsertElement :: Parser s -> DOMID -> Token -> Maybe DOMID -> ST s ()
activeFormatInsertElement p x t y =
case y of
Just a -> activeFormatModify p $ insertBefore (formatItemHasID a) e
Nothing -> activeFormatModify p (<>[e])
where
e = ParserFormatElement x t
formatItemIsMarker :: ParserFormatItem -> Bool
formatItemIsMarker ParserFormatMarker = True
formatItemIsMarker (ParserFormatElement _ _) = False
formatItemHasID :: DOMID -> ParserFormatItem -> Bool
formatItemHasID x ParserFormatMarker = False
formatItemHasID x (ParserFormatElement i _) = i == x
formatItemHasTag :: DOM -> BS -> ParserFormatItem -> Bool
formatItemHasTag d n ParserFormatMarker = False
formatItemHasTag d n (ParserFormatElement i _) =
case domGetNode d i of
Just x -> domNodeElementName x == n
Nothing -> False
templateModeCurrent :: Parser s -> ST s (Maybe ParserMode)
templateModeCurrent p @ Parser {..} = listToMaybe <$> rref parserTemplateMode
templateModePush :: Parser s -> ParserMode -> ST s ()
templateModePush p @ Parser {..} x = uref parserTemplateMode (x:)
templateModePop :: Parser s -> ST s ()
templateModePop p @ Parser {..} =
rref parserTemplateMode >>= \case
(x:xs) -> wref parserTemplateMode xs
[] -> parseError p Nothing "attempt to pop empty template mode stack"
templateModeCount :: Parser s -> ST s Int
templateModeCount p @ Parser {..} = length <$> rref parserTemplateMode
appropriateInsertionLocation :: Parser s -> Maybe DOMID -> ST s DOMPos
appropriateInsertionLocation p @ Parser {..} override = do
target <- case override of
Just a -> pure a
Nothing -> maybe domRoot id <$> currentNodeID p
getNode p target >>= \case
Nothing ->
pure $ DOMPos domRoot Nothing
Just n -> do
f <- fosterParenting p
adjusted <-
if f && domNodeElementName n `elem`
[ "table", "tbody", "tfoot", "thead", "tr" ]
then do
lastTemplate <- elementStackFind p $ \x ->
elementDetailsType x == domMakeTypeHTML "template"
lastTable <- elementStackFind p $ \x ->
elementDetailsType x == domMakeTypeHTML "table"
let Just (ElementDetails i1 x1 n1 _) = lastTemplate
Just (ElementDetails i2 x2 n2 _) = lastTable
if | isJust lastTemplate && (isNothing lastTable || (i1 < i2)) ->
pure $ DOMPos (domTemplateContents n1) Nothing
| isNothing lastTable -> do
j <- fromJust <$> lastNodeID p
pure $ DOMPos j Nothing
| domNodeParent n2 /= domNull ->
pure $ DOMPos (domNodeParent n2) $ Just x2
| otherwise -> do
prev <- fromJust <$> elementStackSucc p x2
pure $ DOMPos prev Nothing
else
pure $ DOMPos target Nothing
getNode p (domPosParent adjusted) >>= \case
Just DOMTemplate{..} ->
pure $ DOMPos domTemplateContents Nothing
_ ->
pure adjusted
insertionLocation :: Parser s -> ST s DOMPos
insertionLocation p = appropriateInsertionLocation p Nothing
createElementForToken :: Parser s -> Token -> HTMLNamespace -> ST s DOMID
createElementForToken p t s
| tStartName t == "template" = do
i <- newID p $ domDefaultFragment
j <- newID p $ domDefaultTemplate
{ domTemplateNamespace = s
, domTemplateContents = i
}
modifyDOM p $ domSetParent i j
pure j
| otherwise = do
i <- newID p $ domDefaultElement
{ domElementName = tStartName t
, domElementAttributes = Seq.fromList $ map f (tStartAttr t)
, domElementNamespace = s
}
pure i
where
f (TAttr n v s) = DOMAttr n v s
insertForeignElement :: Parser s -> HTMLNamespace -> Token -> ST s ()
insertForeignElement p n =
withStartToken $ \t -> do
i <- createElementForToken p t n
x <- insertionLocation p
modifyDOM p $ domInsert x i
elementStackPush p i
insertHtmlElement :: Parser s -> Token -> ST s ()
insertHtmlElement p = insertForeignElement p HTMLNamespaceHTML
insertMathMLElement :: Parser s -> Token -> ST s ()
insertMathMLElement p = insertForeignElement p HTMLNamespaceMathML
insertSvgElement :: Parser s -> Token -> ST s ()
insertSvgElement p = insertForeignElement p HTMLNamespaceSVG
insertHtmlElementNamed :: Parser s -> BS -> ST s ()
insertHtmlElementNamed p x = insertHtmlElement p $ TStart x False []
adjustAttrMathML :: Token -> Token
adjustAttrMathML t =
case t of
TStart {} -> t { tStartAttr = map f $ tStartAttr t }
_otherwise -> t
where
f (TAttr n v s) = TAttr (g n) v s
g x = if x == "definitionurl" then "definitionUrl" else x
adjustAttrSVG :: Token -> Token
adjustAttrSVG t =
t { tStartAttr = map f $ tStartAttr t }
where
f t@(TAttr n v s) =
case Map.lookup n svgAttributeMap of
Just n' -> TAttr n' v s
Nothing -> t
adjustAttrForeign :: Token -> Token
adjustAttrForeign t =
t { tStartAttr = map f $ tStartAttr t }
where
f t@(TAttr n v s) =
case Map.lookup n foreignAttributeMap of
Just (n', s') -> TAttr n' v s'
Nothing -> t
adjustElemSVG :: Token -> Token
adjustElemSVG t =
case Map.lookup (tStartName t) svgElementMap of
Just x -> t { tStartName = x }
Nothing -> t
svgAttributeMap :: Map BS BS
svgAttributeMap = Map.fromList
[ ("attributename", "attributeName")
, ("attributetype", "attributeType")
, ("basefrequency", "baseFrequency")
, ("baseprofile", "baseProfile")
, ("calcmode", "calcMode")
, ("clippathunits", "clipPathUnits")
, ("diffuseconstant", "diffuseConstant")
, ("edgemode", "edgeMode")
, ("filterunits", "filterUnits")
, ("glyphref", "glyphRef")
, ("gradienttransform", "gradientTransform")
, ("gradientunits", "gradientUnits")
, ("kernelmatrix", "kernelMatrix")
, ("kernelunitlength", "kernelUnitLength")
, ("keypoints", "keyPoints")
, ("keysplines", "keySplines")
, ("keytimes", "keyTimes")
, ("lengthadjust", "lengthAdjust")
, ("limitingconeangle", "limitingConeAngle")
, ("markerheight", "markerHeight")
, ("markerunits", "markerUnits")
, ("markerwidth", "markerWidth")
, ("maskcontentunits", "maskContentUnits")
, ("maskunits", "maskUnits")
, ("numoctaves", "numOctaves")
, ("pathlength", "pathLength")
, ("patterncontentunits", "patternContentUnits")
, ("patterntransform", "patternTransform")
, ("patternunits", "patternUnits")
, ("pointsatx", "pointsAtX")
, ("pointsaty", "pointsAtY")
, ("pointsatz", "pointsAtZ")
, ("preservealpha", "preserveAlpha")
, ("preserveaspectratio", "preserveAspectRatio")
, ("primitiveunits", "primitiveUnits")
, ("refx", "refX")
, ("refy", "refY")
, ("repeatcount", "repeatCount")
, ("repeatdur", "repeatDur")
, ("requiredextensions", "requiredExtensions")
, ("requiredfeatures", "requiredFeatures")
, ("specularconstant", "specularConstant")
, ("specularexponent", "specularExponent")
, ("spreadmethod", "spreadMethod")
, ("startoffset", "startOffset")
, ("stddeviation", "stdDeviation")
, ("stitchtiles", "stitchTiles")
, ("surfacescale", "surfaceScale")
, ("systemlanguage", "systemLanguage")
, ("tablevalues", "tableValues")
, ("targetx", "targetX")
, ("targety", "targetY")
, ("textlength", "textLength")
, ("viewbox", "viewBox")
, ("viewtarget", "viewTarget")
, ("xchannelselector", "xChannelSelector")
, ("ychannelselector", "yChannelSelector")
, ("zoomandpan", "zoomAndPan")
]
svgElementMap :: Map BS BS
svgElementMap = Map.fromList
[ ("altglyph", "altGlyph")
, ("altglyphdef", "altGlyphDef")
, ("altglyphitem", "altGlyphItem")
, ("animatecolor", "animateColor")
, ("animatemotion", "animateMotion")
, ("animatetransform", "animateTransform")
, ("clippath", "clipPath")
, ("feblend", "feBlend")
, ("fecolormatrix", "feColorMatrix")
, ("fecomponenttransfer", "feComponentTransfer")
, ("fecomposite", "feComposite")
, ("feconvolvematrix", "feConvolveMatrix")
, ("fediffuselighting", "feDiffuseLighting")
, ("fedisplacementmap", "feDisplacementMap")
, ("fedistantlight", "feDistantLight")
, ("fedropshadow", "feDropShadow")
, ("feflood", "feFlood")
, ("fefunca", "feFuncA")
, ("fefuncb", "feFuncB")
, ("fefuncg", "feFuncG")
, ("fefuncr", "feFuncR")
, ("fegaussianblur", "feGaussianBlur")
, ("feimage", "feImage")
, ("femerge", "feMerge")
, ("femergenode", "feMergeNode")
, ("femorphology", "feMorphology")
, ("feoffset", "feOffset")
, ("fepointlight", "fePointLight")
, ("fespecularlighting", "feSpecularLighting")
, ("fespotlight", "feSpotLight")
, ("fetile", "feTile")
, ("feturbulence", "feTurbulence")
, ("foreignobject", "foreignObject")
, ("glyphref", "glyphRef")
, ("lineargradient", "linearGradient")
, ("radialgradient", "radialGradient")
, ("textpath", "textPath")
]
foreignAttributeMap :: Map BS (BS, HTMLAttrNamespace)
foreignAttributeMap = Map.fromList
[ ("xlink:actuate", ("actuate", HTMLAttrNamespaceXLink))
, ("xlink:arcrole", ("arcrole", HTMLAttrNamespaceXLink))
, ("xlink:href", ("href", HTMLAttrNamespaceXLink))
, ("xlink:role", ("role", HTMLAttrNamespaceXLink))
, ("xlink:show", ("show", HTMLAttrNamespaceXLink))
, ("xlink:title", ("title", HTMLAttrNamespaceXLink))
, ("xlink:type", ("type", HTMLAttrNamespaceXLink))
, ("xml:lang", ("lang", HTMLAttrNamespaceXML))
, ("xml:space", ("space", HTMLAttrNamespaceXML))
, ("xmlns", ("xmlns", HTMLAttrNamespaceXMLNS))
, ("xmlns:xlink", ("xlink", HTMLAttrNamespaceXMLNS))
]
insertNode :: Parser s -> DOMPos -> DOMID -> ST s ()
insertNode p @ Parser {..} i x = modifyDOM p $ domInsert i x
insertNewNode :: Parser s -> DOMPos -> DOMNode -> ST s DOMID
insertNewNode p @ Parser {..} i x = do
d <- getDOM p
let (d', j) = domInsertNew i x d
setDOM p d'
pure j
insertDocumentNode :: Parser s -> DOMID -> ST s ()
insertDocumentNode p @ Parser {..} = insertNode p domRootPos
insertNewDocumentNode :: Parser s -> DOMNode -> ST s ()
insertNewDocumentNode p @ Parser {..} = void . insertNewNode p domRootPos
commentMake :: Parser s -> Token -> ST s DOMNode
commentMake p @ Parser {..} t =
pure $ domDefaultComment { domCommentData = tCommentData t }
doctypeMake :: Parser s -> Token -> ST s DOMNode
doctypeMake p @ Parser {..} TDoctype {..} =
pure $ domDefaultDoctype
{ domDoctypeName = tDoctypeName
, domDoctypePublicID = tDoctypePublic
, domDoctypeSystemID = tDoctypeSystem
}
insertComment :: Parser s -> Token -> ST s ()
insertComment p @ Parser {..} t =
insertionLocation p >>= \x ->
commentMake p t >>= void . insertNewNode p x
insertDocComment :: Parser s -> Token -> ST s ()
insertDocComment p @ Parser {..} t =
commentMake p t >>= void . insertNewNode p domRootPos
insertChar :: Parser s -> Token -> ST s ()
insertChar p @ Parser {..} =
withCharToken $ \w -> do
pos <- insertionLocation p
let i = domPosParent pos
when (i /= domRoot) $ do
d <- getDOM p
case domLastChild d i of
Nothing -> do
j <- insertNewNode p pos domDefaultText
textMapAppend p j w
Just x ->
case domGetNode d x of
Just n@DOMText{..} ->
textMapAppend p domTextID w
Just n -> do
j <- insertNewNode p pos domDefaultText
textMapAppend p j w
Nothing ->
parseError p Nothing $ "insert char bad id: " <> bcPack (show x)
textMapAppend :: Parser s -> DOMID -> Word8 -> ST s ()
textMapAppend Parser {..} i w = do
m <- rref parserTextMap
case IntMap.lookup i m of
Just b ->
bufferAppend w b
Nothing -> do
b <- bufferNew
bufferAppend w b
wref parserTextMap $ IntMap.insert i b m
textMapLookup :: Parser s -> DOMID -> ST s BS
textMapLookup Parser {..} i = do
m <- rref parserTextMap
case IntMap.lookup i m of
Just b -> bufferPack b
Nothing -> pure bsEmpty
textMapDOM :: Parser s -> ST s DOM
textMapDOM p @ Parser {..} = do
DOM{..} <- getDOM p
m <- rref parserTextMap >>= mapM bufferPack
let f x = IntMap.findWithDefault bsEmpty x m
a = flip IntMap.mapWithKey domNodes $ \i n ->
case n of
DOMText{} -> n { domTextData = f i }
_otherwise -> n
pure $ DOM a domNextID
withStartToken :: (Token -> ST s ()) -> Token -> ST s ()
withStartToken f = \case
t@TStart {} -> f t
_otherwise -> pure ()
withCharToken :: (Word8 -> ST s ()) -> Token -> ST s ()
withCharToken f = \case
TChar w -> f w
_otherwise -> pure ()
parserLexerUpdate :: Parser s -> (Lexer s -> ST s ()) -> ST s ()
parserLexerUpdate Parser {..} f = rref parserLexer >>= f
parserSkipNextLF :: Parser s -> ST s ()
parserSkipNextLF p = parserLexerUpdate p lexerSkipNextLF
parserSetRCDATA :: Parser s -> ST s ()
parserSetRCDATA p = parserLexerUpdate p lexerSetRCDATA
parserSetRAWTEXT :: Parser s -> ST s ()
parserSetRAWTEXT p = parserLexerUpdate p lexerSetRAWTEXT
parserSetPLAINTEXT :: Parser s -> ST s ()
parserSetPLAINTEXT p = parserLexerUpdate p lexerSetPLAINTEXT
parserSetScriptData :: Parser s -> ST s ()
parserSetScriptData p = parserLexerUpdate p lexerSetScriptData
insertElementRCDATA :: Parser s -> Token -> ST s ()
insertElementRCDATA p t = do
insertHtmlElement p t
parserSetRCDATA p
saveMode p
setMode p ModeText
insertElementRAWTEXT :: Parser s -> Token -> ST s ()
insertElementRAWTEXT p t = do
insertHtmlElement p t
parserSetRAWTEXT p
saveMode p
setMode p ModeText
generateImpliedEndTags :: Parser s -> ST s ()
generateImpliedEndTags p = generateImpliedEndTagsExcept p bsEmpty
generateImpliedEndTagsExcept :: Parser s -> BS -> ST s ()
generateImpliedEndTagsExcept p x =
elementStackPopWhile p $ elementNameIn $
filter (/=x) [ "dd", "dt", "li", "menuitem", "optgroup",
"option", "p", "rb", "rp", "rt", "rtc" ]
generateImpliedEndTagsThoroughly :: Parser s -> ST s ()
generateImpliedEndTagsThoroughly p =
elementStackPopWhile p $ elementNameIn
[ "caption", "colgroup", "dd", "dt", "li", "optgroup",
"option", "p", "rb", "rp", "rt", "rtc",
"tbody", "td", "tfoot", "th", "thead", "tr" ]
resetInsertionMode :: Parser s -> ST s ()
resetInsertionMode p @ Parser {..} =
elementStackNodes p >>= f
where
f [] = pure ()
f (x:xs) = do
x' <- node
case (domNodeElementName x', lastNode) of
("select", _) -> g (x':xs)
("td", False) -> setMode p ModeInCell
("th", False) -> setMode p ModeInCell
("tr", _) -> setMode p ModeInRow
("tbody", _) -> setMode p ModeInTableBody
("thead", _) -> setMode p ModeInTableBody
("tfoot", _) -> setMode p ModeInTableBody
("caption", _) -> setMode p ModeInCaption
("colgroup", _) -> setMode p ModeInColumnGroup
("table", _) -> setMode p ModeInTable
("head", False) -> setMode p ModeInHead
("body", _) -> setMode p ModeInBody
("frameset", _) -> setMode p ModeInFrameset
("template", _) -> templateModeCurrent p >>= \case
Just m -> setMode p m
Nothing -> pure ()
("html", _) -> getHeadID p >>= \case
Nothing -> setMode p ModeBeforeHead
Just _ -> setMode p ModeAfterHead
(_, True) -> setMode p ModeInBody
(_, False) -> f xs
where
lastNode = length xs == 0
node = do
a <- rref parserFragmentMode
c <- rref parserContextElement
n <- getNode p $ fromJust c
pure $
if lastNode && a && isJust c
then fromJust $ n
else x
g (x:[]) =
setMode p ModeInSelect
g (x:y:ys) =
case domNodeElementName y of
"template" -> setMode p ModeInSelect
"table" -> setMode p ModeInSelectInTable
_otherwise -> g (y:ys)
closeElementP :: Parser s -> ST s ()
closeElementP p = do
let t = domMakeTypeHTML "p"
generateImpliedEndTagsExcept p "p"
unlessM (currentNodeHasType p t) $
parseError p Nothing "current node not p when closing p element"
elementStackPopUntilType p t
data ParserAdoptionAgency s = ParserAdoptionAgency
{ aaSubject :: BS
, aaOuterLoopCount :: Int
, aaInnerLoopCount :: Int
, aaNode :: DOMID
, aaLastNode :: DOMID
, aaNextNode :: DOMID
, aaFormattingElement :: DOMID
, aaCommonAncestor :: DOMID
, aaFurthestBlock :: DOMID
, aaBookmark :: (Maybe DOMID)
, aaAnyOtherEndTag :: ST s ()
}
defaultAA :: ST s (ParserAdoptionAgency s)
defaultAA =
pure $ ParserAdoptionAgency
{ aaSubject = bsEmpty
, aaOuterLoopCount = 0
, aaInnerLoopCount = 0
, aaNode = domNull
, aaLastNode = domNull
, aaNextNode = domNull
, aaFormattingElement = domNull
, aaCommonAncestor = domNull
, aaFurthestBlock = domNull
, aaBookmark = Nothing
, aaAnyOtherEndTag = pure ()
}
modifyAA :: Parser s -> (ParserAdoptionAgency s -> ParserAdoptionAgency s) -> ST s ()
modifyAA Parser {..} = uref parserAdoptionAgency
getAA :: Parser s -> ST s (ParserAdoptionAgency s)
getAA Parser {..} = rref parserAdoptionAgency
getsAA :: Parser s -> (ParserAdoptionAgency s -> a) -> ST s a
getsAA p f = f <$> getAA p
adoptionAgencyRun :: Parser s -> BS -> ST s () -> ST s ()
adoptionAgencyRun p @ Parser {..} subject anyOther = do
a <- currentNodeHasType p $ domMakeTypeHTML subject
b <- currentNodeID p >>= \case
Just i -> notM $ activeFormatContains p i
Nothing -> pure True
unless (a && b) $ do
aa <- defaultAA
modifyAA p $ const aa
{ aaSubject = subject
, aaAnyOtherEndTag = anyOther
}
adoptionAgencyOuterLoop p
adoptionAgencyOuterLoop :: Parser s -> ST s ()
adoptionAgencyOuterLoop p = do
i <- getsAA p aaOuterLoopCount
when (i < 8) $ do
modifyAA p $ \a -> a { aaOuterLoopCount = aaOuterLoopCount a + 1 }
liftA aaSubject (getAA p) >>= activeFormatFindTag p >>= \case
Nothing -> do
doAnyOtherEndTag <- getsAA p aaAnyOtherEndTag
doAnyOtherEndTag
Just (ParserFormatElement fe t) -> do
modifyAA p $ \a -> a { aaFormattingElement = fe }
x <- fromJust <$> getNode p fe
let name = domElementName x
(elementStackAny p ((==) fe . domNodeID)) >>= \case
False -> do
parseError p Nothing $
"element stack missing " <> name
<> "(ID:" <> bcPack (show fe) <> ") during adoption"
activeFormatRemove p fe
True ->
(elementInScope p $ domNodeType x) >>= \case
False ->
parseError p Nothing $
"element " <> name <> " not in scope during adoption"
True -> do
unlessM (maybe False (==fe) <$> currentNodeID p) $
parseError p Nothing $ "element " <> name
<> " is not the current ID during adoption"
d <- getDOM p
f <- pure $ find $ elementIsSpecial
. domNodeType . fromJust . domGetNode d
liftA (f . reverse . takeWhile (/=fe)) (elementStack p) >>= \case
Nothing -> do
elementStackPopUntilID p fe
activeFormatRemove p fe
Just fb -> do
ca <- fromJust <$> elementStackSucc p fe
bm <- activeFormatSucc p fe
modifyAA p $ \a -> a
{ aaNode = fb
, aaLastNode = fb
, aaCommonAncestor = ca
, aaFurthestBlock = fb
, aaBookmark = bm
}
adoptionAgencyInnerLoop p
adoptionAgencyInnerLoop :: Parser s -> ST s ()
adoptionAgencyInnerLoop p = do
modifyAA p $ \a -> a { aaInnerLoopCount = aaInnerLoopCount a + 1 }
n <- getsAA p aaNode
m <- getsAA p aaNextNode
node <- maybe m id <$> elementStackSucc p n
f <- getsAA p aaFormattingElement
if node == f
then adoptionAgencyPostLoop p
else do
ic <- getsAA p aaInnerLoopCount
ac <- activeFormatContains p node
when (ic > 3 && ac) $ activeFormatRemove p node
unlessM (activeFormatContains p node) $ do
m <- fromJust <$> elementStackSucc p node
modifyAA p $ \a -> a { aaNextNode = m }
elementStackRemove p node
adoptionAgencyInnerLoop p
t <- fromJust <$> activeFormatFindToken p node
e <- createElementForToken p t HTMLNamespaceHTML
c <- getsAA p aaCommonAncestor
modifyDOM p $ domAppend c e
activeFormatReplace p node e
elementStackReplace p node e
modifyAA p $ \a -> a { aaNode = e }
x <- getsAA p aaLastNode
b <- getsAA p aaFurthestBlock
when (x == b) $ do
bm <- activeFormatSucc p e
modifyAA p $ \a -> a { aaBookmark = bm }
modifyDOM p $ domMove x e
modifyAA p $ \a -> a { aaLastNode = e }
adoptionAgencyInnerLoop p
adoptionAgencyPostLoop :: Parser s -> ST s ()
adoptionAgencyPostLoop p = do
c <- getsAA p aaCommonAncestor
n <- getsAA p aaLastNode
i <- appropriateInsertionLocation p $ Just c
modifyDOM p $ domMove n $ domPosParent i
f <- getsAA p aaFormattingElement
t <- fromJust <$> activeFormatFindToken p f
e <- createElementForToken p t HTMLNamespaceHTML
b <- getsAA p aaFurthestBlock
modifyDOM p $ domMoveChildren b e
modifyDOM p $ domAppend b e
activeFormatRemove p f
getsAA p aaBookmark >>= activeFormatInsertElement p e t
elementStackRemove p f
elementStackInsertBefore p b e
adoptionAgencyOuterLoop p
pendingTableCharInit :: Parser s -> ST s ()
pendingTableCharInit Parser {..} = wref parserTableChars []
pendingTableCharAppend :: Parser s -> Token -> ST s ()
pendingTableCharAppend Parser {..} t = uref parserTableChars (<>[t])
pendingTableChars :: Parser s -> ST s [Token]
pendingTableChars Parser {..} = rref parserTableChars
doctypeTokenCheck :: Parser s -> Token -> ST s ()
doctypeTokenCheck parser@Parser {..} t@(TDoctype n q p s) =
when (n /= "html"
|| p /= Nothing
|| s /= Nothing && s /= Just "about:legacy-compat") $
parseError parser (Just t) "doctype error"
tokenQuirks :: Token -> Bool
tokenQuirks (TDoctype n True p s) = True
tokenQuirks (TDoctype n False p s) =
or
[ n /= "html"
, idMatch p "-//W3O//DTD W3 HTML Strict 3.0//EN//"
, idMatch p "-/W3C/DTD HTML 4.0 Transitional/EN"
, idMatch p "HTML"
, idMatch s "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"
, anyPrefix p publicIdPrefix
, and
[ s == Nothing
, anyPrefix p
[ "-//W3C//DTD HTML 4.01 Frameset//"
, "-//W3C//DTD HTML 4.01 Transitional//"
]
]
]
publicIdPrefix :: [BS]
publicIdPrefix =
[ "+//Silmaril//dtd html Pro v0r11 19970101//"
, "-//AS//DTD HTML 3.0 asWedit + extensions//"
, "-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//"
, "-//IETF//DTD HTML 2.0 Level 1//"
, "-//IETF//DTD HTML 2.0 Level 2//"
, "-//IETF//DTD HTML 2.0 Strict Level 1//"
, "-//IETF//DTD HTML 2.0 Strict Level 2//"
, "-//IETF//DTD HTML 2.0 Strict//"
, "-//IETF//DTD HTML 2.0//"
, "-//IETF//DTD HTML 2.1E//"
, "-//IETF//DTD HTML 3.0//"
, "-//IETF//DTD HTML 3.2 Final//"
, "-//IETF//DTD HTML 3.2//"
, "-//IETF//DTD HTML 3//"
, "-//IETF//DTD HTML Level 0//"
, "-//IETF//DTD HTML Level 1//"
, "-//IETF//DTD HTML Level 2//"
, "-//IETF//DTD HTML Level 3//"
, "-//IETF//DTD HTML Strict Level 0//"
, "-//IETF//DTD HTML Strict Level 1//"
, "-//IETF//DTD HTML Strict Level 2//"
, "-//IETF//DTD HTML Strict Level 3//"
, "-//IETF//DTD HTML Strict//"
, "-//IETF//DTD HTML//"
, "-//Metrius//DTD Metrius Presentational//"
, "-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//"
, "-//Microsoft//DTD Internet Explorer 2.0 HTML//"
, "-//Microsoft//DTD Internet Explorer 2.0 Tables//"
, "-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//"
, "-//Microsoft//DTD Internet Explorer 3.0 HTML//"
, "-//Microsoft//DTD Internet Explorer 3.0 Tables//"
, "-//Netscape Comm. Corp.//DTD HTML//"
, "-//Netscape Comm. Corp.//DTD Strict HTML//"
, "-//O'Reilly and Associates//DTD HTML 2.0//"
, "-//O'Reilly and Associates//DTD HTML Extended 1.0//"
, "-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//"
, "-//SQ//DTD HTML 2.0 HoTMetaL + extensions//"
, "-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::extensions to HTML 4.0//"
, "-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::extensions to HTML 4.0//"
, "-//Spyglass//DTD HTML 2.0 Extended//"
, "-//Sun Microsystems Corp.//DTD HotJava HTML//"
, "-//Sun Microsystems Corp.//DTD HotJava Strict HTML//"
, "-//W3C//DTD HTML 3 1995-03-24//"
, "-//W3C//DTD HTML 3.2 Draft//"
, "-//W3C//DTD HTML 3.2 Final//"
, "-//W3C//DTD HTML 3.2//"
, "-//W3C//DTD HTML 3.2S Draft//"
, "-//W3C//DTD HTML 4.0 Frameset//"
, "-//W3C//DTD HTML 4.0 Transitional//"
, "-//W3C//DTD HTML Experimental 19960712//"
, "-//W3C//DTD HTML Experimental 970421//"
, "-//W3C//DTD W3 HTML//"
, "-//W3O//DTD W3 HTML 3.0//"
, "-//WebTechs//DTD Mozilla HTML 2.0//"
, "-//WebTechs//DTD Mozilla HTML//"
]
tokenLimitedQuirks :: Token -> Bool
tokenLimitedQuirks TDoctype {..} =
or
[ anyPrefix tDoctypePublic
[ "-//W3C//DTD XHTML 1.0 Frameset//"
, "-//W3C//DTD XHTML 1.0 Transitional//"
]
, and
[ isJust tDoctypeSystem
, anyPrefix tDoctypePublic
[ "-//W3C//DTD HTML 4.01 Frameset//"
, "-//W3C//DTD HTML 4.01 Transitional//"
]
]
]
idMatch :: Maybe BS -> BS -> Bool
idMatch (Just x) y = bsLower x == bsLower y
idMatch Nothing y = False
anyPrefix :: Maybe BS -> [BS] -> Bool
anyPrefix (Just x) ys = any (\y -> y `bsPrefixCI` x) ys
anyPrefix Nothing ys = False
doModeInitial :: Parser s -> Token -> ST s ()
doModeInitial p @ Parser {..} t =
case t of
TChar {..} | chrWhitespace tCharData -> do
pure ()
TComment {} -> do
insertDocComment p t
TDoctype {} -> do
doctypeTokenCheck p t
doctypeMake p t >>= insertNewDocumentNode p
iframe <- iframeSrcDoc p
when (not iframe && tokenQuirks t) $ do
modifyDOM p $ domQuirksSet DOMQuirksMode
when (not iframe && tokenLimitedQuirks t) $ do
modifyDOM p $ domQuirksSet DOMQuirksLimited
setMode p ModeBeforeHtml
_otherwise -> do
whenM (notM $ iframeSrcDoc p) $ do
parseError p (Just t) "initial unexpected token"
modifyDOM p $ domQuirksSet DOMQuirksMode
setMode p ModeBeforeHtml
reprocess p t
doModeBeforeHtml :: Parser s -> Token -> ST s ()
doModeBeforeHtml p @ Parser {..} t =
case t of
TDoctype {} ->
parseError p (Just t) "before html doctype"
TComment {} ->
insertDocComment p t
TChar {..} | chrWhitespace tCharData ->
pure ()
TStart { tStartName = "html" } -> do
insertHtmlElement p t
setMode p ModeBeforeHead
TEnd { tEndName = x }
| elem x [ "head", "body", "html", "br" ] -> do
insertHtmlElementNamed p "html"
setMode p ModeBeforeHead
reprocess p t
TEnd {} ->
parseError p (Just t) "before html end tag"
_otherwise -> do
insertHtmlElementNamed p "html"
setMode p ModeBeforeHead
reprocess p t
doModeBeforeHead :: Parser s -> Token -> ST s ()
doModeBeforeHead p @ Parser {..} t =
case t of
TChar {..} | chrWhitespace tCharData ->
pure ()
TComment {} ->
insertComment p t
TDoctype {} ->
parseError p (Just t) "before head doctype"
TStart { tStartName = "html" } ->
doModeInBody p t
TStart { tStartName = "head" } -> do
insertHtmlElement p t
saveHead p
setMode p ModeInHead
TEnd { tEndName = x }
| elem x [ "head", "body", "html", "br" ] -> do
insertHtmlElementNamed p "head"
saveHead p
setMode p ModeInHead
reprocess p t
TEnd {} ->
parseError p (Just t) "before head end tag"
_otherwise -> do
insertHtmlElementNamed p "head"
saveHead p
setMode p ModeInHead
reprocess p t
doModeInHead :: Parser s -> Token -> ST s ()
doModeInHead p @ Parser {..} t =
case t of
TChar {..} | chrWhitespace tCharData ->
insertChar p t
TComment {} ->
insertComment p t
TDoctype {} ->
warn "doctype"
TStart { tStartName = "html" } ->
doModeInBody p t
TStart { tStartName = x }
| elem x [ "base", "basefont", "bgsound", "link" ] -> do
insertHtmlElement p t
elementStackPop p
selfClosingAcknowledge p
TStart { tStartName = "meta" } -> do
insertHtmlElement p t
elementStackPop p
selfClosingAcknowledge p
TStart { tStartName = "title" } ->
insertElementRCDATA p t
TStart { tStartName = "noframes" } ->
insertElementRAWTEXT p t
TStart { tStartName = "style" } ->
insertElementRAWTEXT p t
TStart { tStartName = "noscript" } -> do
insertHtmlElement p t
setMode p ModeInHeadNoscript
TStart { tStartName = "script" } -> do
insertHtmlElement p t
parserSetScriptData p
saveMode p
setMode p ModeText
TEnd { tEndName = "head" } -> do
elementStackPop p
setMode p ModeAfterHead
TEnd { tEndName = x }
| elem x [ "body", "html", "br" ] -> do
elementStackPop p
setMode p ModeAfterHead
reprocess p t
TStart { tStartName = "template" } -> do
insertHtmlElement p t
activeFormatAddMarker p
frameSetNotOK p
setMode p ModeInTemplate
templateModePush p ModeInTemplate
TEnd { tEndName = x@"template" } -> do
let a = domMakeTypeHTML x
elementStackHasTemplate p >>= \case
False ->
warn "template start tag missing"
True -> do
generateImpliedEndTagsThoroughly p
unlessM (currentNodeHasType p a) $
parseError p Nothing "template not current node"
elementStackPopUntilType p a
activeFormatClear p
templateModePop p
resetInsertionMode p
TStart { tStartName = "head" } ->
warn "head"
TEnd {} ->
warn "unexpected end tag"
_otherwise -> do
elementStackPop p
setMode p ModeAfterHead
reprocess p t
where
warn x = parseError p (Just t) $ "in head " <> x
doModeInHeadNoscript :: Parser s -> Token -> ST s ()
doModeInHeadNoscript p @ Parser {..} t =
case t of
TDoctype {} ->
warn "doctype"
TStart { tStartName = "html" } ->
doModeInBody p t
TEnd { tEndName = "noscript" } -> do
elementStackPop p
setMode p ModeInHead
TChar {..} | chrWhitespace tCharData ->
doModeInHead p t
TComment {} ->
doModeInHead p t
TStart { tStartName = x }
| elem x [ "basefont", "bgsound", "link",
"meta", "noframes", "style" ] ->
doModeInHead p t
TEnd { tEndName = "br" } -> do
warn "br"
elementStackPop p
setMode p ModeInHead
reprocess p t
TStart { tStartName = "head" } ->
warn "head"
TStart { tStartName = "noscript" } ->
warn "noscript"
TEnd {} ->
warn "end tag"
_otherwise -> do
warn "bad token"
elementStackPop p
setMode p ModeInHead
reprocess p t
where
warn x = parseError p (Just t) $ "in head noscript " <> x
doModeAfterHead :: Parser s -> Token -> ST s ()
doModeAfterHead p @ Parser {..} t =
case t of
TChar {..} | chrWhitespace tCharData ->
insertChar p t
TComment {} ->
insertComment p t
TDoctype {} ->
parseError p (Just t) "after head doctype"
TStart { tStartName = "html" } ->
doModeInBody p t
TStart { tStartName = "body" } -> do
insertHtmlElement p t
frameSetNotOK p
setMode p ModeInBody
TStart { tStartName = "frameset" } -> do
insertHtmlElement p t
setMode p ModeInFrameset
TStart { tStartName = x }
| elem x [ "base", "basefont", "bgsound", "link", "meta", "noframes",
"script", "style", "template", "title", "head" ] -> do
parseError p (Just t) "AfterHead bad start tag"
Just h <- getHeadID p
elementStackPush p h
doModeInHead p t
elementStackRemove p h
TEnd { tEndName = "template" } ->
doModeInHead p t
TEnd { tEndName = x }
| elem x [ "body", "html", "br" ] -> do
insertHtmlElementNamed p "body"
setMode p ModeInBody
reprocess p t
TStart { tStartName = "head" } ->
parseError p (Just t) "after head head"
TEnd {} ->
parseError p (Just t) "after head end tag"
_otherwise -> do
insertHtmlElementNamed p "body"
setMode p ModeInBody
reprocess p t
doModeInBody :: Parser s -> Token -> ST s ()
doModeInBody p @ Parser {..} t =
case t of
TChar {..} | chrWhitespace tCharData -> do
activeFormatReconstruct p
insertChar p t
TChar {} -> do
activeFormatReconstruct p
insertChar p t
frameSetNotOK p
TComment {} ->
insertComment p t
TDoctype {} ->
warn "doctype"
TStart { tStartName = x@"html" } -> do
warn x
unlessM (elementStackHasTemplate p) $ do
Just i <- lastNodeID p
modifyDOM p $ domAttrMerge i $ Seq.fromList $
map (\(TAttr n v s) -> DOMAttr n v s) $ tStartAttr t
TStart { tStartName = x }
| elem x ["base", "basefont", "bgsound", "link", "meta",
"noframes", "script", "style", "template", "title"] ->
doModeInHead p t
TEnd { tEndName = "template" } ->
doModeInHead p t
TStart { tStartName = x@"body" } -> do
warn x
unlessM (notM (elementStackHasBody p)
||^ liftA (==1) (elementStackSize p)
||^ (elementStackHasTemplate p)) $ do
frameSetNotOK p
Just i <- listToMaybe . drop 1 . reverse <$> elementStack p
modifyDOM p $ domAttrMerge i $ Seq.fromList $
map (\(TAttr n v s) -> DOMAttr n v s) $ tStartAttr t
TStart { tStartName = x@"frameset" } -> do
warn x
unlessM (liftA (==1) (elementStackSize p)
||^ notM (elementStackHasBody p)
||^ notM (rref parserFrameSetOK)) $ do
Just n <- listToMaybe . drop 1 . reverse <$> elementStackNodes p
modifyDOM p $ domRemoveChild (domNodeParent n) $ domNodeID n
elementStackPopWhile p $ \n ->
domNodeType n /= domMakeTypeHTML "html"
insertHtmlElement p t
setMode p ModeInFrameset
TEOF -> do
n <- templateModeCount p
if n > 0
then doModeInTemplate p t
else do
whenM (elementStackAny p $ elementNameNotIn
["dd", "dt", "li", "menuitem", "optgroup",
"option", "p", "rb", "rp", "rt", "rtc",
"tbody", "td", "tfoot", "th", "thead", "tr",
"body", "html"]) $
warn "bad element on stack"
parserSetDone p
TEnd { tEndName = x@"body" } -> do
let a = domMakeTypeHTML x
elementInScope p a >>= \case
False ->
warn "no body element in scope"
True -> do
whenM (elementStackAny p $ elementNameNotIn
["dd", "dt", "li", "menuitem", "optgroup",
"option", "p", "rb", "rp", "rt", "rtc",
"tbody", "td", "tfoot", "th", "thead", "tr",
"body", "html"]) $
warn "bad element on stack"
setMode p ModeAfterBody
TEnd { tEndName = x@"html" } -> do
let a = domMakeTypeHTML x
elementInScope p a >>= \case
False ->
warn "no body element in scope"
True -> do
whenM (elementStackAny p $ elementNameNotIn
["dd", "dt", "li", "menuitem", "optgroup",
"option", "p", "rb", "rp", "rt", "rtc",
"tbody", "td", "tfoot", "th", "thead", "tr",
"body", "html"]) $
warn "bad element on stack"
setMode p ModeAfterBody
reprocess p t
TStart { tStartName = x } | elem x
["address", "article", "aside", "blockquote",
"center", "details", "dialog", "dir", "div", "dl",
"fieldset", "figcaption", "figure", "footer", "header",
"hgroup", "main", "nav", "ol", "p", "section",
"summary", "ul"] -> do
closeP
insertHtmlElement p t
TStart { tStartName = "menu" } -> do
closeP
popMenuitem
insertHtmlElement p t
TStart { tStartName = x }
| elem x ["h1", "h2", "h3", "h4", "h5", "h6"] -> do
closeP
whenM (currentNodeHasTypeIn p $ domTypesHTML
["h1", "h2", "h3", "h4", "h5", "h6"]) $ do
warn "bad header tag on stack"
elementStackPop p
insertHtmlElement p t
TStart { tStartName = x } | elem x ["pre", "listing"] -> do
closeP
insertHtmlElement p t
parserSkipNextLF p
frameSetNotOK p
TStart { tStartName = "form" } -> do
(formNotNull p &&^ elementStackMissingTemplate p) >>= \case
True ->
warn "form without template"
False -> do
closeP
insertHtmlElement p t
whenM (elementStackMissingTemplate p) $ saveForm p
TStart { tStartName = x@"li" } -> do
let a = domMakeTypeHTML x
s = Set.fromList $ domTypesHTML [ "address", "div", "p" ]
f [] = pure ()
f (y:ys)
| y == a = do
generateImpliedEndTagsExcept p x
unlessM (currentNodeHasType p a) $
warn "current node is not li"
elementStackPopUntilType p a
| elementIsSpecial y && Set.notMember y s = pure ()
| otherwise = f ys
frameSetNotOK p
elementStackTypes p >>= f
closeP
insertHtmlElement p t
TStart { tStartName = x } | elem x ["dd", "dt"] -> do
let dd = domMakeTypeHTML "dd"
dt = domMakeTypeHTML "dt"
s = Set.fromList $ domTypesHTML [ "address", "div", "p" ]
f [] = pure ()
f (y:ys)
| y == dd || y == dt = do
generateImpliedEndTagsExcept p $ domTypeName y
unlessM (currentNodeHasType p y) $
warn "current node is not dd or dt"
elementStackPopUntilType p y
| elementIsSpecial y && Set.notMember y s = pure ()
| otherwise = f ys
frameSetNotOK p
elementStackTypes p >>= f
closeP
insertHtmlElement p t
TStart { tStartName = "plaintext" } -> do
closeP
insertHtmlElement p t
parserSetPLAINTEXT p
TStart { tStartName = x@"button" } -> do
let a = domMakeTypeHTML x
whenM (elementInScope p a) $ do
warn "button element in scope"
generateImpliedEndTags p
elementStackPopUntilType p a
activeFormatReconstruct p
insertHtmlElement p t
frameSetNotOK p
TEnd { tEndName = x }
| elem x ["address", "article", "aside", "blockquote", "button",
"center", "details", "dialog", "dir", "div", "dl",
"fieldset", "figcaption", "figure", "footer", "header",
"hgroup", "listing", "main", "menu", "nav", "ol",
"pre", "section", "summary", "ul"] -> do
let a = domMakeTypeHTML x
elementInScope p a >>= \case
False ->
warn "element not in scope"
True -> do
generateImpliedEndTags p
unlessM (currentNodeHasType p a) $
warn "current node wrong type"
elementStackPopUntilType p a
TEnd { tEndName = x@"form" } ->
elementStackHasTemplate p >>= \case
False -> do
getFormID p >>= \case
Nothing ->
warn "form not defined"
Just n -> do
a <- fromJust <$> getFormType p
setFormID p Nothing
elementInScope p a >>= \case
False ->
warn "form not in scope"
True -> do
generateImpliedEndTags p
unlessM (liftA (==(Just n)) (currentNodeID p)) $
warn "current node is wrong node"
elementStackRemove p n
True -> do
let a = domMakeTypeHTML x
elementInScope p a >>= \case
False ->
warn "form not in scope"
True -> do
generateImpliedEndTags p
unlessM (currentNodeHasType p a) $
warn "current node is not a form"
elementStackPopUntilType p a
TEnd { tEndName = x@"p" } -> do
let a = domMakeTypeHTML x
unlessM (elementInButtonScope p a) $ do
warn "no p in button scope"
insertHtmlElementNamed p x
closeElementP p
TEnd { tEndName = x@"li" } -> do
let a = domMakeTypeHTML x
elementInListScope p a >>= \case
False ->
warn "no li in list scope"
True -> do
generateImpliedEndTagsExcept p x
unlessM (currentNodeHasType p a) $
warn "current node not an li element"
elementStackPopUntilType p a
TEnd { tEndName = x } | elem x ["dd", "dt"] -> do
let a = domMakeTypeHTML x
elementInListScope p a >>= \case
False ->
warn "no dd or dt in list scope"
True -> do
generateImpliedEndTagsExcept p x
unlessM (currentNodeHasType p a) $
warn "current not is not a dd or dt"
elementStackPopUntilType p a
TEnd { tEndName = x }
| elem x ["h1", "h2", "h3", "h4", "h5", "h6"] -> do
let h = domTypesHTML ["h1", "h2", "h3", "h4", "h5", "h6"]
anyM (elementInScope p) h >>= \case
False ->
warn "header element not in scope"
True -> do
generateImpliedEndTags p
unlessM (currentNodeHasType p $ domMakeTypeHTML x) $
warn "current node not a header type"
elementStackPopUntilTypeIn p h
TEnd { tEndName = "sarcasm" } ->
doAnyOtherEndTag
TStart { tStartName = x@"a" } -> do
let a = domMakeTypeHTML x
activeFormatFindTag p x >>= \case
Nothing -> pure ()
Just (ParserFormatElement i _) -> do
warn "active format already has anchor"
runAA x
elementStackRemove p i
activeFormatRemove p i
activeFormatReconstruct p
insertHtmlElement p t
activeFormatAddCurrentNode p t
TStart { tStartName = x } | elem x
["b", "big", "code", "em", "font", "i", "s",
"small", "strike", "strong", "tt", "u"] -> do
activeFormatReconstruct p
insertHtmlElement p t
activeFormatAddCurrentNode p t
TStart { tStartName = x@"nobr" } -> do
let a = domMakeTypeHTML x
activeFormatReconstruct p
whenM (elementInScope p a) $ do
warn "nobr tag when nobr element already in scope"
runAA x
activeFormatReconstruct p
insertHtmlElement p t
activeFormatAddCurrentNode p t
TEnd { tEndName = x } | elem x
["a", "b", "big", "code", "em", "font", "i", "nobr",
"s", "small", "strike", "strong", "tt", "u"] ->
runAA x
TStart { tStartName = x } | elem x
["applet", "marquee", "object"] -> do
activeFormatReconstruct p
insertHtmlElement p t
activeFormatAddMarker p
frameSetNotOK p
TEnd { tEndName = x } | elem x
["applet", "marquee", "object"] -> do
let a = domMakeTypeHTML x
elementInScope p a >>= \case
False ->
warn "element scope missing"
True -> do
generateImpliedEndTags p
unlessM (currentNodeHasType p a) $
warn "current node is wring type"
elementStackPopUntilType p a
activeFormatClear p
TStart { tStartName = "table" } -> do
q <- domQuirksGet <$> getDOM p
when (q /= DOMQuirksMode) closeP
insertHtmlElement p t
frameSetNotOK p
setMode p ModeInTable
TEnd { tEndName = "br" } -> do
warn "br end tag"
activeFormatReconstruct p
insertHtmlElement p $ TStart "br" False []
elementStackPop p
selfClosingAcknowledge p
frameSetNotOK p
TStart { tStartName = x } | elem x
["area", "br", "embed", "img", "keygen", "wbr"] -> do
activeFormatReconstruct p
insertHtmlElement p t
elementStackPop p
selfClosingAcknowledge p
frameSetNotOK p
TStart { tStartName = "input" } -> do
activeFormatReconstruct p
insertHtmlElement p t
elementStackPop p
selfClosingAcknowledge p
case tokenGetAttrVal "type" t of
Just v -> when (bsLower v /= "hidden") $ frameSetNotOK p
Nothing -> frameSetNotOK p
TStart { tStartName = x } | elem x
["param", "source", "track"] -> do
insertHtmlElement p t
elementStackPop p
selfClosingAcknowledge p
TStart { tStartName = "hr" } -> do
closeP
popMenuitem
insertHtmlElement p t
elementStackPop p
selfClosingAcknowledge p
frameSetNotOK p
TStart { tStartName = "image" } -> do
warn "image"
let t' = t { tStartName = "img" }
reprocess p t'
TStart { tStartName = "textarea" } -> do
insertHtmlElement p t
parserSkipNextLF p
parserSetRCDATA p
saveMode p
frameSetNotOK p
setMode p ModeText
TStart { tStartName = "xmp" } -> do
closeP
activeFormatReconstruct p
frameSetNotOK p
insertElementRAWTEXT p t
TStart { tStartName = "iframe" } -> do
frameSetNotOK p
insertElementRAWTEXT p t
TStart { tStartName = "noembed" } ->
insertElementRAWTEXT p t
TStart { tStartName = "select" } -> do
activeFormatReconstruct p
insertHtmlElement p t
frameSetNotOK p
s <- pure $ Set.fromList
[ ModeInTable, ModeInCaption, ModeInTableBody,
ModeInRow, ModeInCell ]
rref parserInsertionMode >>= \x -> setMode p $
if Set.member x s
then ModeInSelectInTable
else ModeInSelect
TStart { tStartName = x } | elem x
["optgroup", "option"] -> do
elementStackPopIf p $ elementName "option"
activeFormatReconstruct p
insertHtmlElement p t
TStart { tStartName = "menuitem" } -> do
popMenuitem
insertHtmlElement p t
TStart { tStartName = x } | elem x ["rb", "rtc"] -> do
let a = domMakeTypeHTML "ruby"
whenM (elementInScope p a) $ do
generateImpliedEndTags p
unlessM (currentNodeHasType p a) $
warn "ruby element not in scope"
insertHtmlElement p t
TStart { tStartName = x } | elem x ["rp", "rt"] -> do
let a = domMakeTypeHTML "ruby"
b = domMakeTypeHTML "rtc"
whenM (elementInScope p a) $ do
generateImpliedEndTagsExcept p "rtc"
unlessM (currentNodeHasType p a ||^ currentNodeHasType p b) $
warn "ruby or rtc element not in scope"
insertHtmlElement p t
TStart { tStartName = "math" } -> do
activeFormatReconstruct p
insertMathMLElement p . adjustAttrForeign . adjustAttrMathML $ t
when (tStartClosed t) $ do
elementStackPop p
selfClosingAcknowledge p
TStart { tStartName = "svg" } -> do
activeFormatReconstruct p
insertSvgElement p . adjustAttrForeign . adjustAttrSVG $ t
when (tStartClosed t) $ do
elementStackPop p
selfClosingAcknowledge p
TStart { tStartName = x } | elem x
["caption", "col", "colgroup", "frame", "head",
"tbody", "td", "tfoot", "th", "thead", "tr"] ->
warn "bad start token"
TStart {} -> do
activeFormatReconstruct p
insertHtmlElement p t
TEnd {} ->
doAnyOtherEndTag
where
closeP = do
let a = domMakeTypeHTML "p"
whenM (elementInButtonScope p a) $ closeElementP p
popMenuitem =
elementStackPopIf p $ elementName "menuitem"
runAA =
flip (adoptionAgencyRun p) doAnyOtherEndTag
doAnyOtherEndTag =
elementStackTypes p >>= f
where
n = tEndName t
a = domMakeTypeHTML n
f [] = pure ()
f (x:xs)
| x == a = do
generateImpliedEndTagsExcept p n
unlessM (currentNodeHasType p x) $
warn "current node has wrong type"
elementStackPop p
| elementIsSpecial x = do
warn "special element in stack"
| otherwise = f xs
warn x =
parseError p (Just t) $ "in body " <> x
doModeText :: Parser s -> Token -> ST s ()
doModeText p @ Parser {..} t =
case t of
TChar {} ->
insertChar p t
TEOF -> do
parseError p (Just t) "text eof"
elementStackPop p
restoreMode p
reprocess p t
TEnd { tEndName = "script" } -> do
elementStackPop p
restoreMode p
_otherwise -> do
elementStackPop p
restoreMode p
doModeInTable :: Parser s -> Token -> ST s ()
doModeInTable p @ Parser {..} t =
case t of
TChar {} -> do
a <- currentNodeHasTypeIn p $ domTypesHTML
["table", "tbody", "tfoot", "thead", "tr"]
if a
then do
pendingTableCharInit p
saveMode p
setMode p ModeInTableText
reprocess p t
else do
anythingElse
TComment {} ->
insertComment p t
TDoctype {} ->
warn "doctype"
TStart { tStartName = "caption" } -> do
clearToTableContext
activeFormatAddMarker p
insertHtmlElement p t
setMode p ModeInCaption
TStart { tStartName = "colgroup" } -> do
clearToTableContext
insertHtmlElement p t
setMode p ModeInColumnGroup
TStart { tStartName = x@"col" } -> do
clearToTableContext
insertHtmlElementNamed p x
setMode p ModeInColumnGroup
reprocess p t
TStart { tStartName = x } | elem x
["tbody", "tfoot", "thead"] -> do
clearToTableContext
insertHtmlElement p t
setMode p ModeInTableBody
TStart { tStartName = x } | elem x
["td", "th", "tr"] -> do
clearToTableContext
insertHtmlElementNamed p "tbody"
setMode p ModeInTableBody
reprocess p t
TStart { tStartName = x@"table" } -> do
warn "table start tag"
let a = domMakeTypeHTML x
unlessM (elementInTableScope p a) $ do
elementStackPopUntilType p a
resetInsertionMode p
reprocess p t
TEnd { tEndName = x@"table" } -> do
let a = domMakeTypeHTML x
elementInTableScope p a >>= \case
False ->
warn "no table in scope"
True -> do
elementStackPopUntilType p a
resetInsertionMode p
TStart { tStartName = x } | elem x
["body", "caption", "col", "colgroup", "html",
"tbody", "td", "tfoot", "th", "thead", "tr"] ->
warn "unexpected start tag"
TStart { tStartName = x } | elem x
["style", "script", "template"] ->
doModeInHead p t
TEnd { tEndName = "template" } ->
doModeInHead p t
TStart { tStartName = "input" } -> do
if case tokenGetAttr "type" t of
Nothing -> True
Just a -> bsLower (tAttrName a) /= "hidden"
then anythingElse
else do
warn "hidden input"
insertHtmlElement p t
elementStackPop p
selfClosingAcknowledge p
TStart { tStartName = "form" } -> do
warn "form start tag"
unlessM (elementStackHasTemplate p ||^ formNotNull p) $ do
insertHtmlElement p t
saveForm p
elementStackPop p
TEOF ->
doModeInBody p t
_otherwise ->
anythingElse
where
clearToTableContext =
elementStackPopWhile p $ \x -> not $ elem (domNodeType x) $
domTypesHTML ["table", "template", "html"]
anythingElse = do
warn "unexpected token"
fosterParentingSet p
doModeInBody p t
fosterParentingClear p
warn x =
parseError p (Just t) $ "in table " <> x
doModeInTableText :: Parser s -> Token -> ST s ()
doModeInTableText p @ Parser {..} t =
case t of
TChar {} ->
pendingTableCharAppend p t
_otherwise -> do
a <- pendingTableChars p
if any (not . chrWhitespace . tCharData) a
then do
warn "unexpected character"
fosterParentingSet p
mapM_ (doModeInBody p) a
fosterParentingClear p
else do
mapM_ (insertChar p) a
restoreMode p
reprocess p t
where
warn x =
parseError p (Just t) $ "in table text " <> x
doModeInCaption :: Parser s -> Token -> ST s ()
doModeInCaption p @ Parser {..} t =
case t of
TEnd { tEndName = x@"caption" } ->
processCaption
TStart { tStartName = x } | elem x
["caption", "col", "colgroup",
"tbody", "td", "tfoot", "th", "thead", "tr"] -> do
processCaption
reprocess p t
TEnd { tEndName = "table" } -> do
processCaption
reprocess p t
TEnd { tEndName = x } | elem x
["body", "col", "colgroup", "html",
"tbody", "td", "tfoot", "th", "thead", "tr"] ->
warn "unexpected end tag"
_otherwise ->
doModeInBody p t
where
processCaption = do
let a = domMakeTypeHTML "caption"
elementInTableScope p a >>= \case
False ->
warn "no caption in table scope"
True -> do
generateImpliedEndTags p
unlessM (currentNodeHasType p a) $
warn "current node is not a caption"
elementStackPopUntilType p a
activeFormatClear p
setMode p ModeInTable
warn x =
parseError p (Just t) $ "in caption " <> x
doModeInColumnGroup :: Parser s -> Token -> ST s ()
doModeInColumnGroup p @ Parser {..} t =
case t of
TChar {..} | chrWhitespace tCharData ->
insertChar p t
TComment {} ->
insertComment p t
TDoctype {} ->
warn "doctype"
TStart { tStartName = "html" } ->
doModeInBody p t
TStart { tStartName = "col" } -> do
insertHtmlElement p t
elementStackPop p
selfClosingAcknowledge p
TEnd { tEndName = x@"colgroup" } -> do
let a = domMakeTypeHTML x
currentNodeHasType p a >>= \case
False ->
warn "current node not colgroup end"
True -> do
elementStackPop p
setMode p ModeInTable
TEnd { tEndName = "col" } ->
warn "col end tag"
TStart { tStartName = "template" } ->
doModeInHead p t
TEnd { tEndName = "template" } ->
doModeInHead p t
TEOF ->
doModeInBody p t
_otherwise -> do
let a = domMakeTypeHTML "colgroup"
currentNodeHasType p a >>= \case
False ->
warn "current node not colgroup end"
True -> do
elementStackPop p
setMode p ModeInTable
reprocess p t
where
warn x =
parseError p (Just t) $ "in column group " <> x
doModeInTableBody :: Parser s -> Token -> ST s ()
doModeInTableBody p @ Parser {..} t =
case t of
TStart { tStartName = "tr" } -> do
clearToTableBodyContext
insertHtmlElement p t
setMode p ModeInRow
TStart { tStartName = x } | elem x ["th", "td"] -> do
warn "th or td missing tr"
clearToTableBodyContext
insertHtmlElementNamed p "tr"
setMode p ModeInRow
reprocess p t
TEnd { tEndName = x } | elem x
["tbody", "tfoot", "thead"] -> do
let a = domMakeTypeHTML x
elementInTableScope p a >>= \case
False ->
warn "element not in table scope"
True -> do
clearToTableBodyContext
elementStackPop p
setMode p ModeInTable
TStart { tStartName = x } | elem x
["caption", "col", "colgroup", "tbody", "tfoot", "thead"] ->
processElements
TEnd { tEndName = "table" } ->
processElements
TEnd { tEndName = x } | elem x
["body", "caption", "col", "colgroup", "html", "td", "th", "tr"] ->
warn "unexpected end tag"
_otherwise ->
doModeInTable p t
where
processElements = do
anyM (elementInTableScope p . domMakeTypeHTML)
["tbody", "tfoot", "thead"] >>= \case
False ->
warn "element not in table scope"
True -> do
clearToTableBodyContext
elementStackPop p
setMode p ModeInTable
reprocess p t
clearToTableBodyContext =
elementStackPopWhile p $ \x -> not $ elem (domNodeType x) $
domTypesHTML ["tbody", "tfoot", "thead", "template", "html"]
warn x =
parseError p (Just t) $ "in table body " <> x
doModeInRow :: Parser s -> Token -> ST s ()
doModeInRow p @ Parser {..} t =
case t of
TStart { tStartName = x } | elem x ["th", "td"] -> do
clearToTableRowContext
insertHtmlElement p t
setMode p ModeInCell
activeFormatAddMarker p
TEnd { tEndName = "tr" } ->
processTr
TStart { tStartName = x } | elem x
["caption", "col", "colgroup", "tbody", "tfoot", "thead", "tr"] -> do
processTr
reprocess p t
TEnd { tEndName = "table" } -> do
processTr
reprocess p t
TEnd { tEndName = x } | elem x
["tbody", "tfoot", "thead"] -> do
let a = domMakeTypeHTML x
b = domMakeTypeHTML "tr"
elementInTableScope p a >>= \case
False ->
warn "element not in table scope"
True ->
whenM (elementInTableScope p b) $ do
clearToTableRowContext
elementStackPop p
setMode p ModeInTableBody
reprocess p t
TEnd { tEndName = x } | elem x
["body", "caption", "col", "colgroup", "html", "td", "th"] ->
warn "unexpected end tag"
_otherwise ->
doModeInTable p t
where
processTr = do
let a = domMakeTypeHTML "tr"
elementInTableScope p a >>= \case
False ->
warn "element not in table scope"
True -> do
clearToTableRowContext
elementStackPop p
setMode p ModeInTableBody
clearToTableRowContext =
elementStackPopWhile p $ \x -> not $ elem (domNodeType x) $
domTypesHTML ["tr", "template", "html"]
warn x =
parseError p (Just t) $ "in row " <> x
doModeInCell :: Parser s -> Token -> ST s ()
doModeInCell p @ Parser {..} t =
case t of
TEnd { tEndName = x } | elem x ["td", "th"] -> do
let a = domMakeTypeHTML x
elementInTableScope p a >>= \case
False ->
warn "element not in table scope"
True -> do
generateImpliedEndTags p
unlessM (currentNodeHasType p a) $
warn $ "current node not " <> x
elementStackPopUntilType p a
activeFormatClear p
setMode p ModeInRow
TStart { tStartName = x } | elem x
["caption", "col", "colgroup",
"tbody", "td", "tfoot", "th", "thead", "tr"] -> do
anyM (elementInTableScope p . domMakeTypeHTML) ["td", "th"] >>= \case
False ->
warn "td or th not in table scope"
True -> do
closeCell
reprocess p t
TEnd { tEndName = x } | elem x
["body", "caption", "col", "colgroup", "html"] ->
warn "unexpected end tag"
TEnd { tEndName = x } | elem x
["table", "tbody", "tfoot", "thead", "tr"] -> do
let a = domMakeTypeHTML x
elementInTableScope p a >>= \case
False ->
warn "element not in table scope"
True -> do
closeCell
reprocess p t
_otherwise ->
doModeInBody p t
where
closeCell = do
let a = domTypesHTML ["td", "th"]
generateImpliedEndTags p
unlessM (currentNodeHasTypeIn p a) $
warn "current node is not td or th"
elementStackPopUntilTypeIn p a
activeFormatClear p
setMode p ModeInRow
warn x =
parseError p (Just t) $ "in cell " <> x
doModeInSelect :: Parser s -> Token -> ST s ()
doModeInSelect p @ Parser {..} t =
case t of
TChar {} ->
insertChar p t
TComment {} ->
insertComment p t
TDoctype {} ->
warn "doctype"
TStart { tStartName = "html" } ->
doModeInBody p t
TStart { tStartName = x@"option" } -> do
elementStackPopIf p $ elementName x
insertHtmlElement p t
TStart { tStartName = x@"optgroup" } -> do
elementStackPopIf p $ elementName "option"
elementStackPopIf p $ elementName x
insertHtmlElement p t
TEnd { tEndName = x@"optgroup" } -> do
let a = domMakeTypeHTML "option"
b = domMakeTypeHTML x
y <- take 2 <$> elementStackTypes p
when (y == [a,b]) $ elementStackPop p
currentNodeHasType p b >>= \case
False ->
warn $ "current node not " <> x
True ->
elementStackPop p
TEnd { tEndName = x@"option" } -> do
currentNodeHasType p (domMakeTypeHTML x) >>= \case
False ->
warn $ "current node not " <> x
True ->
elementStackPop p
TEnd { tEndName = x@"select" } -> do
let a = domMakeTypeHTML x
elementInSelectScope p a >>= \case
False ->
warn "no select in select scope"
True -> do
elementStackPopUntilType p a
resetInsertionMode p
TStart { tStartName = x@"select" } -> do
warn "unexpected start tag"
let a = domMakeTypeHTML x
whenM (elementInSelectScope p a) $ do
elementStackPopUntilType p a
resetInsertionMode p
TStart { tStartName = x } | elem x
["input", "keygen", "textarea"] -> do
warn "unexpected start tag"
let a = domMakeTypeHTML x
whenM (elementInSelectScope p a) $ do
elementStackPopUntilType p a
resetInsertionMode p
reprocess p t
TStart { tStartName = x } | elem x
["script", "template"] ->
doModeInHead p t
TEnd { tEndName = "template" } ->
doModeInHead p t
TEOF ->
doModeInBody p t
_otherwise ->
warn "unexpected token"
where
warn x =
parseError p (Just t) $ "in select " <> x
doModeInSelectInTable :: Parser s -> Token -> ST s ()
doModeInSelectInTable p @ Parser {..} t =
case t of
TStart { tStartName = x } | elem x
["caption", "table", "tbody", "tfoot",
"thead", "tr", "td", "th"] -> do
warn "unexpected start tag"
elementStackPopUntilType p $ domMakeTypeHTML "select"
resetInsertionMode p
reprocess p t
TEnd { tEndName = x } | elem x
["caption", "table", "tbody", "tfoot",
"thead", "tr", "td", "th"] -> do
warn "unexpected end tag"
whenM (elementInTableScope p $ domMakeTypeHTML x) $ do
elementStackPopUntilType p $ domMakeTypeHTML "select"
resetInsertionMode p
reprocess p t
_otherwise ->
doModeInSelect p t
where
warn x =
parseError p (Just t) $ "in select in table " <> x
doModeInTemplate :: Parser s -> Token -> ST s ()
doModeInTemplate p @ Parser {..} t =
case t of
TChar {} ->
doModeInBody p t
TComment {} ->
doModeInBody p t
TDoctype {} ->
doModeInBody p t
TStart { tStartName = x } | elem x
["base", "basefont", "bgsound", "link", "meta",
"noframes", "script", "style", "template", "title"] ->
doModeInHead p t
TEnd { tEndName = "template" } ->
doModeInHead p t
TStart { tStartName = x } | elem x
["caption", "col", "tbody", "tfoot", "thead"] -> do
templateModePop p
templateModePush p ModeInTable
setMode p ModeInTable
reprocess p t
TStart { tStartName = "col" } -> do
templateModePop p
templateModePush p ModeInColumnGroup
setMode p ModeInColumnGroup
reprocess p t
TStart { tStartName = "tr" } -> do
templateModePop p
templateModePush p ModeInTableBody
setMode p ModeInTableBody
reprocess p t
TStart { tStartName = x } | elem x ["td", "th"] -> do
templateModePop p
templateModePush p ModeInRow
setMode p ModeInRow
reprocess p t
TStart {} -> do
templateModePop p
templateModePush p ModeInBody
setMode p ModeInBody
reprocess p t
TEnd {} ->
warn "unexpected end tag"
TEOF ->
elementStackMissingTemplate p >>= \case
True ->
parserSetDone p
False -> do
warn "template on stack"
activeFormatClear p
templateModePop p
resetInsertionMode p
reprocess p t
where
warn x =
parseError p (Just t) $ "in template " <> x
doModeAfterBody :: Parser s -> Token -> ST s ()
doModeAfterBody p @ Parser {..} t =
case t of
TChar {..} | chrWhitespace tCharData ->
doModeInBody p t
TComment {} -> do
x <- domPos . fromJust <$> lastNodeID p
commentMake p t >>= void . insertNewNode p x
TDoctype {} ->
warn "doctype"
TStart { tStartName = "html" } ->
doModeInBody p t
TEnd { tEndName = "html" } ->
rref parserFragmentMode >>= \case
True ->
warn "html end tag"
False ->
setMode p ModeAfterAfterBody
TEOF ->
parserSetDone p
_otherwise -> do
warn "unexpected token"
setMode p ModeInBody
reprocess p t
where
warn x =
parseError p (Just t) $ "after body " <> x
doModeInFrameset :: Parser s -> Token -> ST s ()
doModeInFrameset p @ Parser {..} t =
case t of
TChar {..} | chrWhitespace tCharData ->
insertChar p t
TComment {} ->
insertComment p t
TDoctype {} ->
warn "doctype"
TStart { tStartName = "html" } ->
doModeInBody p t
TStart { tStartName = "frameset" } ->
insertHtmlElement p t
TEnd { tEndName = "frameset" } -> do
currentNodeHasHTMLType p "html" >>= \case
True ->
warn "current node is html"
False -> do
elementStackPop p
whenM (notM (rref parserFragmentMode) &&^
notM (currentNodeHasHTMLType p "frameset")) $
setMode p ModeAfterFrameset
TStart { tStartName = "frame" } -> do
insertHtmlElement p t
elementStackPop p
selfClosingAcknowledge p
TStart { tStartName = "noframes" } ->
doModeInHead p t
TEOF -> do
unlessM (currentNodeHasHTMLType p "html") $
warn "current node is not html"
parserSetDone p
_ ->
warn "unexpected token"
where
warn x =
parseError p (Just t) $ "in frameset " <> x
doModeAfterFrameset :: Parser s -> Token -> ST s ()
doModeAfterFrameset p @ Parser {..} t =
case t of
TChar {..} | chrWhitespace tCharData ->
insertChar p t
TComment {} ->
insertComment p t
TDoctype {} ->
warn "doctype"
TStart { tStartName = "html" } ->
doModeInBody p t
TEnd { tEndName = "html" } ->
setMode p ModeAfterAfterFrameset
TStart { tStartName = "noframes" } ->
doModeInHead p t
TEOF ->
parserSetDone p
_ ->
warn "unexpected token"
where
warn x =
parseError p (Just t) $ "after frameset " <> x
doModeAfterAfterBody :: Parser s -> Token -> ST s ()
doModeAfterAfterBody p @ Parser {..} t =
case t of
TComment {} ->
insertDocComment p t
TDoctype {} ->
doModeInBody p t
TChar {..} | chrWhitespace tCharData ->
doModeInBody p t
TStart { tStartName = "html" } ->
doModeInBody p t
TEOF ->
parserSetDone p
_otherwise -> do
warn "unexpected token"
setMode p ModeInBody
reprocess p t
where
warn x =
parseError p (Just t) $ "after after body " <> x
doModeAfterAfterFrameset :: Parser s -> Token -> ST s ()
doModeAfterAfterFrameset p @ Parser {..} t =
case t of
TComment {} ->
insertDocComment p t
TDoctype {} ->
doModeInBody p t
TChar {..} | chrWhitespace tCharData ->
doModeInBody p t
TStart { tStartName = "html" } ->
doModeInBody p t
TEOF ->
parserSetDone p
TStart { tStartName = "noframes" } ->
doModeInHead p t
_otherwise -> do
warn "unexpected token"
where
warn x =
parseError p (Just t) $ "after after frameset " <> x
doForeignContent :: Parser s -> Token -> ST s ()
doForeignContent p @ Parser {..} t =
case t of
TChar {..} | chrWhitespace tCharData ->
insertChar p t
TChar {} -> do
insertChar p t
frameSetNotOK p
TComment {} ->
insertComment p t
TDoctype {} ->
warn "doctype"
TStart { tStartName = x } | elem x
["b", "big", "blockquote", "body", "br", "center",
"code", "dd", "div", "dl", "dt", "em", "embed",
"h1", "h2", "h3", "h4", "h5", "h6", "head",
"hr", "i", "img", "li", "listing", "menu",
"meta", "nobr", "ol", "p", "pre", "ruby", "s",
"small", "span", "strong", "strike", "sub",
"sup", "table", "tt", "u", "ul", "var"]
|| x == "font" &&
any (flip tokenHasAttr t) ["color","face","size"] -> do
warn "unexpected start tag"
rref parserFragmentMode >>= \case
True ->
anyOtherStartTag
False -> do
elementStackPop p
elementStackPopWhile p $ \n ->
not (isMathMLIntegrationPoint n
|| isHtmlIntgrationPoint n
|| domNodeIsHTML n)
reprocess p t
TStart {} ->
anyOtherStartTag
TEnd {} -> do
let s = "script"
a = domMakeTypeSVG s
n = tEndName t
svg <- maybe False ((==) a . domNodeType) <$> currentNode p
if n == s && svg
then doScriptEndTag
else do
node <- fromJust <$> currentNode p
let h = bsLower . domNodeElementName
nodeName = h node
when (nodeName /= n) $
warn $
"bad end tag in foreign content ("
<> nodeName <> " /= " <> bcPack (show n) <> ")"
let f (x:[]) = pure ()
f (x:y:ys)
| h x == n =
elementStackPopUntilID p $ domNodeID node
| domNodeIsHTML y =
doHtmlContent p t
| otherwise =
f (y:ys)
elementStackNodes p >>= f
where
anyOtherStartTag = do
(t', n) <- adjustedCurrentNode p >>= \case
Just a
| domNodeIsMathML a ->
pure ( adjustAttrMathML t
, domNodeElementNamespace a
)
| domNodeIsSVG a ->
pure ( adjustElemSVG $ adjustAttrSVG t
, domNodeElementNamespace a
)
Nothing ->
pure (t, HTMLNamespaceHTML)
insertForeignElement p n $ adjustAttrForeign t'
when (tStartClosed t) $ do
svg <- maybe False domNodeIsSVG <$> currentNode p
if tStartName t == "script" && svg
then do
selfClosingAcknowledge p
doScriptEndTag
else do
elementStackPop p
selfClosingAcknowledge p
doScriptEndTag = do
elementStackPop p
warn x =
parseError p (Just t) $ "foreign content " <> x