{-# LANGUAGE OverloadedStrings #-}

{-|
Description:    Token processing rules within the @\<head\>@ section.

Copyright:      (c) 2020 Sam May
License:        MPL-2.0
Maintainer:     ag.eitilt@gmail.com

Stability:      stable
Portability:    portable
-}
module Web.Mangrove.Parse.Tree.InHead
    ( treeInHead
    ) where


import qualified Control.Applicative as A
import qualified Control.Monad as N
import qualified Control.Monad.Trans.State as N.S

import qualified Data.Bifunctor as F.B
import qualified Data.ByteString.Short as BS.SH
import qualified Data.HashMap.Strict as M
import qualified Data.Text as T
import qualified Data.Text.Encoding as T

import Web.Mangrove.Parse.Common.Error
import Web.Mangrove.Parse.Tokenize.Common
import Web.Mangrove.Parse.Tree.Common
import Web.Mangrove.Parse.Tree.InText
import Web.Mangrove.Parse.Tree.Patch
import Web.Willow.Common.Encoding
import Web.Willow.Common.Encoding.Character
import Web.Willow.Common.Encoding.Labels
import Web.Willow.Common.Encoding.Sniffer
import Web.Willow.Common.Parser
import Web.Willow.Common.Parser.Switch

import {-# SOURCE #-} Web.Mangrove.Parse.Tree.InBody

import Control.Applicative ( (<|>) )


-- | __HTML:__
--      @[the "in head" insertion mode]
--      (https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inhead)@
-- 
-- The parsing instructions corresponding to the 'InHead' section of the state
-- machine.
treeInHead :: TreeBuilder TreeOutput
treeInHead :: TreeBuilder TreeOutput
treeInHead = StateT TreeParserState (Parser [TreeInput]) TreeInput
forall (m :: * -> *) stream token.
MonadParser m stream token =>
m token
next StateT TreeParserState (Parser [TreeInput]) TreeInput
-> (TreeInput -> TreeBuilder TreeOutput) -> TreeBuilder TreeOutput
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= [SwitchCase
   TreeInput (StateT TreeParserState (Parser [TreeInput])) TreeOutput]
-> TreeInput -> TreeBuilder TreeOutput
forall (m :: * -> *) test out.
Alternative m =>
[SwitchCase test m out] -> test -> m out
switch
    [ (TreeInput -> Bool)
-> (TreeInput -> TreeBuilder TreeOutput)
-> SwitchCase
     TreeInput (StateT TreeParserState (Parser [TreeInput])) TreeOutput
forall test (m :: * -> *) out.
(test -> Bool) -> (test -> m out) -> SwitchCase test m out
If TreeInput -> Bool
isWhitespace TreeInput -> TreeBuilder TreeOutput
insertCharacter
    , (TreeInput -> Bool)
-> (TreeInput -> TreeBuilder TreeOutput)
-> SwitchCase
     TreeInput (StateT TreeParserState (Parser [TreeInput])) TreeOutput
forall test (m :: * -> *) out.
(test -> Bool) -> (test -> m out) -> SwitchCase test m out
If TreeInput -> Bool
isComment TreeInput -> TreeBuilder TreeOutput
insertComment
    , (TreeInput -> Bool)
-> (TreeInput -> TreeBuilder TreeOutput)
-> SwitchCase
     TreeInput (StateT TreeParserState (Parser [TreeInput])) TreeOutput
forall test (m :: * -> *) out.
(test -> Bool) -> (test -> m out) -> SwitchCase test m out
If TreeInput -> Bool
isDoctype ((TreeInput -> TreeBuilder TreeOutput)
 -> SwitchCase
      TreeInput (StateT TreeParserState (Parser [TreeInput])) TreeOutput)
-> (TreeInput -> TreeBuilder TreeOutput)
-> SwitchCase
     TreeInput (StateT TreeParserState (Parser [TreeInput])) TreeOutput
forall a b. (a -> b) -> a -> b
$ \TreeInput
t' ->
        [ParseError] -> TreeInput -> TreeBuilder TreeOutput
packTreeErrors [DocumentTypeParams -> ParseError
UnexpectedDoctype (DocumentTypeParams -> ParseError)
-> DocumentTypeParams -> ParseError
forall a b. (a -> b) -> a -> b
$ TreeInput -> DocumentTypeParams
tokenDocumentType TreeInput
t'] TreeInput
t'
    , (TreeInput -> Bool)
-> (TreeInput -> TreeBuilder TreeOutput)
-> SwitchCase
     TreeInput (StateT TreeParserState (Parser [TreeInput])) TreeOutput
forall test (m :: * -> *) out.
(test -> Bool) -> (test -> m out) -> SwitchCase test m out
If ([String] -> TreeInput -> Bool
isStartTag [String
"html"]) ((TreeInput -> TreeBuilder TreeOutput)
 -> SwitchCase
      TreeInput (StateT TreeParserState (Parser [TreeInput])) TreeOutput)
-> (TreeInput -> TreeBuilder TreeOutput)
-> SwitchCase
     TreeInput (StateT TreeParserState (Parser [TreeInput])) TreeOutput
forall a b. (a -> b) -> a -> b
$ \TreeInput
t' -> do
        TreeInput -> StateT TreeParserState (Parser [TreeInput]) ()
forall (m :: * -> *) stream token.
MonadParser m stream token =>
token -> m ()
push TreeInput
t'
        TreeBuilder TreeOutput
treeInBody
    , (TreeInput -> Bool)
-> (TreeInput -> TreeBuilder TreeOutput)
-> SwitchCase
     TreeInput (StateT TreeParserState (Parser [TreeInput])) TreeOutput
forall test (m :: * -> *) out.
(test -> Bool) -> (test -> m out) -> SwitchCase test m out
If ([String] -> TreeInput -> Bool
isStartTag [String
"base", String
"basefont", String
"bgsound", String
"link"]) TreeInput -> TreeBuilder TreeOutput
insertNullElement
    , (TreeInput -> Bool)
-> (TreeInput -> TreeBuilder TreeOutput)
-> SwitchCase
     TreeInput (StateT TreeParserState (Parser [TreeInput])) TreeOutput
forall test (m :: * -> *) out.
(test -> Bool) -> (test -> m out) -> SwitchCase test m out
If ([String] -> TreeInput -> Bool
isStartTag [String
"meta"]) ((TreeInput -> TreeBuilder TreeOutput)
 -> SwitchCase
      TreeInput (StateT TreeParserState (Parser [TreeInput])) TreeOutput)
-> (TreeInput -> TreeBuilder TreeOutput)
-> SwitchCase
     TreeInput (StateT TreeParserState (Parser [TreeInput])) TreeOutput
forall a b. (a -> b) -> a -> b
$ \TreeInput
t' -> do
        TreeOutput
insert <- TreeInput -> TreeBuilder TreeOutput
insertNullElement TreeInput
t'
        Maybe TreeOutput
change' <- TreeBuilder TreeOutput
-> StateT TreeParserState (Parser [TreeInput]) (Maybe TreeOutput)
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
A.optional (TreeBuilder TreeOutput
 -> StateT TreeParserState (Parser [TreeInput]) (Maybe TreeOutput))
-> TreeBuilder TreeOutput
-> StateT TreeParserState (Parser [TreeInput]) (Maybe TreeOutput)
forall a b. (a -> b) -> a -> b
$ TreeInput -> TreeBuilder TreeOutput
changeEncoding TreeInput
t'
        TreeOutput -> TreeBuilder TreeOutput
forall (m :: * -> *) a. Monad m => a -> m a
return (TreeOutput -> TreeBuilder TreeOutput)
-> TreeOutput -> TreeBuilder TreeOutput
forall a b. (a -> b) -> a -> b
$ case Maybe TreeOutput
change' of
            Just TreeOutput
change -> TreeOutput
insert TreeOutput -> TreeOutput -> TreeOutput
|++| TreeOutput
change
            Maybe TreeOutput
Nothing -> TreeOutput
insert
    , (TreeInput -> Bool)
-> (TreeInput -> TreeBuilder TreeOutput)
-> SwitchCase
     TreeInput (StateT TreeParserState (Parser [TreeInput])) TreeOutput
forall test (m :: * -> *) out.
(test -> Bool) -> (test -> m out) -> SwitchCase test m out
If ([String] -> TreeInput -> Bool
isStartTag [String
"title"]) TreeInput -> TreeBuilder TreeOutput
genericRCDataElement
    , (TreeInput -> Bool)
-> (TreeInput -> TreeBuilder TreeOutput)
-> SwitchCase
     TreeInput (StateT TreeParserState (Parser [TreeInput])) TreeOutput
forall test (m :: * -> *) out.
(test -> Bool) -> (test -> m out) -> SwitchCase test m out
If ([String] -> TreeInput -> Bool
isStartTag [String
"noscript"]) ((TreeInput -> TreeBuilder TreeOutput)
 -> SwitchCase
      TreeInput (StateT TreeParserState (Parser [TreeInput])) TreeOutput)
-> (TreeInput -> TreeBuilder TreeOutput)
-> SwitchCase
     TreeInput (StateT TreeParserState (Parser [TreeInput])) TreeOutput
forall a b. (a -> b) -> a -> b
$ \TreeInput
t' -> do
        TreeParserState
state <- StateT TreeParserState (Parser [TreeInput]) TreeParserState
forall (m :: * -> *) s. Monad m => StateT s m s
N.S.get
        if TreeParserState -> Bool
scriptingEnabled TreeParserState
state
            then TreeInput -> TreeBuilder TreeOutput
genericRawTextElement TreeInput
t'
            else do
                InsertionMode -> StateT TreeParserState (Parser [TreeInput]) ()
switchMode InsertionMode
InHeadNoscript
                TreeInput -> TreeBuilder TreeOutput
insertElement TreeInput
t'
    , (TreeInput -> Bool)
-> (TreeInput -> TreeBuilder TreeOutput)
-> SwitchCase
     TreeInput (StateT TreeParserState (Parser [TreeInput])) TreeOutput
forall test (m :: * -> *) out.
(test -> Bool) -> (test -> m out) -> SwitchCase test m out
If ([String] -> TreeInput -> Bool
isStartTag [String
"noframes", String
"style"]) TreeInput -> TreeBuilder TreeOutput
genericRawTextElement
    , (TreeInput -> Bool)
-> (TreeInput -> TreeBuilder TreeOutput)
-> SwitchCase
     TreeInput (StateT TreeParserState (Parser [TreeInput])) TreeOutput
forall test (m :: * -> *) out.
(test -> Bool) -> (test -> m out) -> SwitchCase test m out
If ([String] -> TreeInput -> Bool
isStartTag [String
"script"]) ((TreeInput -> TreeBuilder TreeOutput)
 -> SwitchCase
      TreeInput (StateT TreeParserState (Parser [TreeInput])) TreeOutput)
-> (TreeInput -> TreeBuilder TreeOutput)
-> SwitchCase
     TreeInput (StateT TreeParserState (Parser [TreeInput])) TreeOutput
forall a b. (a -> b) -> a -> b
$ \TreeInput
t' -> do
        (TreeParserState -> TreeParserState)
-> StateT TreeParserState (Parser [TreeInput]) ()
forall (m :: * -> *) s. Monad m => (s -> s) -> StateT s m ()
N.S.modify ((TreeParserState -> TreeParserState)
 -> StateT TreeParserState (Parser [TreeInput]) ())
-> (TreeParserState -> TreeParserState)
-> StateT TreeParserState (Parser [TreeInput]) ()
forall a b. (a -> b) -> a -> b
$ \TreeParserState
state -> TreeParserState
state
            { originalInsertionMode :: Maybe InsertionMode
originalInsertionMode = InsertionMode -> Maybe InsertionMode
forall a. a -> Maybe a
Just (InsertionMode -> Maybe InsertionMode)
-> InsertionMode -> Maybe InsertionMode
forall a b. (a -> b) -> a -> b
$ TreeParserState -> InsertionMode
insertionMode TreeParserState
state
            }
        InsertionMode -> StateT TreeParserState (Parser [TreeInput]) ()
switchMode InsertionMode
InText
        TreeOutput
insert <- TreeInput -> TreeBuilder TreeOutput
insertElement (TreeInput -> TreeBuilder TreeOutput)
-> ((TokenParserState -> TokenParserState) -> TreeInput)
-> (TokenParserState -> TokenParserState)
-> TreeBuilder TreeOutput
forall b c a. (b -> c) -> (a -> b) -> a -> c
. TreeInput -> (TokenParserState -> TokenParserState) -> TreeInput
mapTokenState TreeInput
t' ((TokenParserState -> TokenParserState) -> TreeBuilder TreeOutput)
-> (TokenParserState -> TokenParserState) -> TreeBuilder TreeOutput
forall a b. (a -> b) -> a -> b
$ \TokenParserState
state -> TokenParserState
state
            { currentState :: CurrentTokenizerState
currentState = CurrentTokenizerState
ScriptDataState
            }
        TreeOutput -> TreeBuilder TreeOutput
forall (m :: * -> *) a. Monad m => a -> m a
return TreeOutput
insert
    , (TreeInput -> Bool)
-> (TreeInput -> TreeBuilder TreeOutput)
-> SwitchCase
     TreeInput (StateT TreeParserState (Parser [TreeInput])) TreeOutput
forall test (m :: * -> *) out.
(test -> Bool) -> (test -> m out) -> SwitchCase test m out
If ([String] -> TreeInput -> Bool
isEndTag [String
"head"]) ((TreeInput -> TreeBuilder TreeOutput)
 -> SwitchCase
      TreeInput (StateT TreeParserState (Parser [TreeInput])) TreeOutput)
-> (TreeInput -> TreeBuilder TreeOutput)
-> SwitchCase
     TreeInput (StateT TreeParserState (Parser [TreeInput])) TreeOutput
forall a b. (a -> b) -> a -> b
$ \TreeInput
t' -> do
        InsertionMode -> StateT TreeParserState (Parser [TreeInput]) ()
switchMode InsertionMode
AfterHead
        TreeInput -> [Patch] -> TreeBuilder TreeOutput
packTree TreeInput
t' [Patch]
softCloseCurrentNode_
    , (TreeInput -> Bool)
-> (TreeInput -> TreeBuilder TreeOutput)
-> SwitchCase
     TreeInput (StateT TreeParserState (Parser [TreeInput])) TreeOutput
forall test (m :: * -> *) out.
(test -> Bool) -> (test -> m out) -> SwitchCase test m out
If ([String] -> TreeInput -> Bool
isEndTag [String
"body", String
"html", String
"br"]) TreeInput -> TreeBuilder TreeOutput
anythingElse
    , (TreeInput -> Bool)
-> (TreeInput -> TreeBuilder TreeOutput)
-> SwitchCase
     TreeInput (StateT TreeParserState (Parser [TreeInput])) TreeOutput
forall test (m :: * -> *) out.
(test -> Bool) -> (test -> m out) -> SwitchCase test m out
If ([String] -> TreeInput -> Bool
isStartTag [String
"template"]) ((TreeInput -> TreeBuilder TreeOutput)
 -> SwitchCase
      TreeInput (StateT TreeParserState (Parser [TreeInput])) TreeOutput)
-> (TreeInput -> TreeBuilder TreeOutput)
-> SwitchCase
     TreeInput (StateT TreeParserState (Parser [TreeInput])) TreeOutput
forall a b. (a -> b) -> a -> b
$ \TreeInput
t' -> do
        TreeOutput
insert <- TreeInput -> TreeBuilder TreeOutput
insertElement TreeInput
t'
        StateT TreeParserState (Parser [TreeInput]) ()
insertFormattingMarker
        StateT TreeParserState (Parser [TreeInput]) ()
setFramesetNotOk
        InsertionMode -> StateT TreeParserState (Parser [TreeInput]) ()
switchMode InsertionMode
InTemplate
        InsertionMode -> StateT TreeParserState (Parser [TreeInput]) ()
pushTemplateMode InsertionMode
InTemplate
        TreeOutput -> TreeBuilder TreeOutput
forall (m :: * -> *) a. Monad m => a -> m a
return TreeOutput
insert
    , (TreeInput -> Bool)
-> (TreeInput -> TreeBuilder TreeOutput)
-> SwitchCase
     TreeInput (StateT TreeParserState (Parser [TreeInput])) TreeOutput
forall test (m :: * -> *) out.
(test -> Bool) -> (test -> m out) -> SwitchCase test m out
If ([String] -> TreeInput -> Bool
isEndTag [String
"template"]) ((TreeInput -> TreeBuilder TreeOutput)
 -> SwitchCase
      TreeInput (StateT TreeParserState (Parser [TreeInput])) TreeOutput)
-> (TreeInput -> TreeBuilder TreeOutput)
-> SwitchCase
     TreeInput (StateT TreeParserState (Parser [TreeInput])) TreeOutput
forall a b. (a -> b) -> a -> b
$ \TreeInput
t' -> do
        [Patch]
generate <- [ElementName] -> TreeBuilder [Patch]
generateEndTags [ElementName]
thoroughlyImpliedEndTags
        Maybe ElementParams
current <- TreeBuilder (Maybe ElementParams)
currentNode
        let errF :: [Patch] -> [Patch]
errF = case ElementName -> ElementParams -> Bool
nodeIsElement ElementName
"template" (ElementParams -> Bool) -> Maybe ElementParams -> Maybe Bool
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Maybe ElementParams
current of
                Just Bool
True -> [Patch] -> [Patch]
forall a. a -> a
id
                Maybe Bool
_ -> ParseError -> [Patch] -> [Patch]
consTreeError_ ParseError
UnexpectedElementWithImpliedEndTag
        [Patch]
clear <- ElementName -> TreeBuilder [Patch]
closeElement ElementName
"template"
        StateT TreeParserState (Parser [TreeInput]) ()
clearFormattingElements
        StateT TreeParserState (Parser [TreeInput]) ()
popTemplateMode
        StateT TreeParserState (Parser [TreeInput]) ()
resetInsertionMode
        TreeInput -> [Patch] -> TreeBuilder TreeOutput
packTree TreeInput
t' ([Patch] -> TreeBuilder TreeOutput)
-> [Patch] -> TreeBuilder TreeOutput
forall a b. (a -> b) -> a -> b
$ [Patch] -> [Patch]
errF [Patch]
generate [Patch] -> [Patch] -> [Patch]
forall a. [a] -> [a] -> [a]
++ [Patch]
clear
    , (TreeInput -> Bool)
-> (TreeInput -> TreeBuilder TreeOutput)
-> SwitchCase
     TreeInput (StateT TreeParserState (Parser [TreeInput])) TreeOutput
forall test (m :: * -> *) out.
(test -> Bool) -> (test -> m out) -> SwitchCase test m out
If ([String] -> TreeInput -> Bool
isStartTag [String
"head"]) ((TreeInput -> TreeBuilder TreeOutput)
 -> SwitchCase
      TreeInput (StateT TreeParserState (Parser [TreeInput])) TreeOutput)
-> (TreeInput -> TreeBuilder TreeOutput)
-> SwitchCase
     TreeInput (StateT TreeParserState (Parser [TreeInput])) TreeOutput
forall a b. (a -> b) -> a -> b
$ [ParseError] -> TreeInput -> TreeBuilder TreeOutput
packTreeErrors [ParseError
NestedSingletonElement]
    , (TreeInput -> Bool)
-> (TreeInput -> TreeBuilder TreeOutput)
-> SwitchCase
     TreeInput (StateT TreeParserState (Parser [TreeInput])) TreeOutput
forall test (m :: * -> *) out.
(test -> Bool) -> (test -> m out) -> SwitchCase test m out
If TreeInput -> Bool
isAnyEndTag ((TreeInput -> TreeBuilder TreeOutput)
 -> SwitchCase
      TreeInput (StateT TreeParserState (Parser [TreeInput])) TreeOutput)
-> (TreeInput -> TreeBuilder TreeOutput)
-> SwitchCase
     TreeInput (StateT TreeParserState (Parser [TreeInput])) TreeOutput
forall a b. (a -> b) -> a -> b
$ \TreeInput
t' ->
        [ParseError] -> TreeInput -> TreeBuilder TreeOutput
packTreeErrors [ElementParams -> ParseError
UnmatchedEndTag (ElementParams -> ParseError) -> ElementParams -> ParseError
forall a b. (a -> b) -> a -> b
$ TreeInput -> ElementParams
tokenElement TreeInput
t'] TreeInput
t'
    , (TreeInput -> TreeBuilder TreeOutput)
-> SwitchCase
     TreeInput (StateT TreeParserState (Parser [TreeInput])) TreeOutput
forall test (m :: * -> *) out.
(test -> m out) -> SwitchCase test m out
Else TreeInput -> TreeBuilder TreeOutput
anythingElse
    ]
  where anythingElse :: TreeInput -> TreeBuilder TreeOutput
anythingElse TreeInput
t' = do
            TreeInput -> StateT TreeParserState (Parser [TreeInput]) ()
forall (m :: * -> *) stream token.
MonadParser m stream token =>
token -> m ()
push TreeInput
t'
            InsertionMode -> StateT TreeParserState (Parser [TreeInput]) ()
switchMode InsertionMode
AfterHead
            [Patch] -> TreeBuilder TreeOutput
packTree_ [Patch]
softCloseCurrentNode_
        changeEncoding :: TreeInput -> TreeBuilder TreeOutput
changeEncoding TreeInput
t' = do
            let d :: TagParams
d = TreeInput -> TagParams
tokenTag TreeInput
t'
                state :: Maybe DecoderState
state = TreeInput -> TokenizerOutputState
tokenState TreeInput
t' TokenizerOutputState
-> ((TokenizerState, ByteString) -> Maybe DecoderState)
-> Maybe DecoderState
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= TokenizerState -> Maybe DecoderState
decoderState (TokenizerState -> Maybe DecoderState)
-> ((TokenizerState, ByteString) -> TokenizerState)
-> (TokenizerState, ByteString)
-> Maybe DecoderState
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (TokenizerState, ByteString) -> TokenizerState
forall a b. (a, b) -> a
fst
            Encoding
enc' <- StateT TreeParserState (Parser [TreeInput]) Encoding
-> (Encoding
    -> StateT TreeParserState (Parser [TreeInput]) Encoding)
-> Maybe Encoding
-> StateT TreeParserState (Parser [TreeInput]) Encoding
forall b a. b -> (a -> b) -> Maybe a -> b
maybe StateT TreeParserState (Parser [TreeInput]) Encoding
forall (f :: * -> *) a. Alternative f => f a
A.empty Encoding -> StateT TreeParserState (Parser [TreeInput]) Encoding
forall (m :: * -> *) a. Monad m => a -> m a
return (Maybe Encoding
 -> StateT TreeParserState (Parser [TreeInput]) Encoding)
-> Maybe Encoding
-> StateT TreeParserState (Parser [TreeInput]) Encoding
forall a b. (a -> b) -> a -> b
$ do
                Encoding
e <- TagParams -> Maybe Encoding
changeEncodingCharset TagParams
d Maybe Encoding -> Maybe Encoding -> Maybe Encoding
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> TagParams -> Maybe Encoding
changeEncodingContentType TagParams
d
                Encoding -> Maybe Encoding
forall (m :: * -> *) a. Monad m => a -> m a
return (Encoding -> Maybe Encoding) -> Encoding -> Maybe Encoding
forall a b. (a -> b) -> a -> b
$ case Encoding
e of
                    Encoding
Utf16be -> Encoding
Utf8
                    Encoding
Utf16le -> Encoding
Utf8
                    Encoding
UserDefined -> Encoding
Windows1252
                    Encoding
enc -> Encoding
enc
            case Maybe DecoderState
state of
                    Just DecoderState
s -> case DecoderState -> Encoding
decoderEncoding DecoderState
s of
                        Encoding
Utf16be -> TreeInput -> DecoderState -> TreeBuilder TreeOutput
putDecoderState TreeInput
t' (DecoderState -> TreeBuilder TreeOutput)
-> DecoderState -> TreeBuilder TreeOutput
forall a b. (a -> b) -> a -> b
$ Encoding -> DecoderState -> DecoderState
setEncodingCertain Encoding
Utf16be DecoderState
s
                        Encoding
Utf16le -> TreeInput -> DecoderState -> TreeBuilder TreeOutput
putDecoderState TreeInput
t' (DecoderState -> TreeBuilder TreeOutput)
-> DecoderState -> TreeBuilder TreeOutput
forall a b. (a -> b) -> a -> b
$ Encoding -> DecoderState -> DecoderState
setEncodingCertain Encoding
Utf16be DecoderState
s
                        Encoding
enc | Encoding
enc Encoding -> Encoding -> Bool
forall a. Eq a => a -> a -> Bool
== Encoding
enc' -> TreeInput -> DecoderState -> TreeBuilder TreeOutput
putDecoderState TreeInput
t' (DecoderState -> TreeBuilder TreeOutput)
-> DecoderState -> TreeBuilder TreeOutput
forall a b. (a -> b) -> a -> b
$ Encoding -> DecoderState -> DecoderState
setEncodingCertain Encoding
enc' DecoderState
s
                        Encoding
_ -> case DecoderState -> Confidence
decoderConfidence DecoderState
s of
                            Tentative Encoding
_ ReparseData
rec | Bool -> Bool
not (Bool -> Bool) -> Bool -> Bool
forall a b. (a -> b) -> a -> b
$ ReparseData -> Encoding -> Bool
encodingEquivalent ReparseData
rec Encoding
enc' -> do
                                TreeOutput
_ <- TreeInput -> DecoderState -> TreeBuilder TreeOutput
putDecoderState TreeInput
t' (DecoderState -> TreeBuilder TreeOutput)
-> DecoderState -> TreeBuilder TreeOutput
forall a b. (a -> b) -> a -> b
$ Encoding -> DecoderState
initialDecoderState Encoding
enc'
                                ByteString -> TreeBuilder TreeOutput
restartParsing (ByteString -> TreeBuilder TreeOutput)
-> ByteString -> TreeBuilder TreeOutput
forall a b. (a -> b) -> a -> b
$ ReparseData -> ByteString
streamStart ReparseData
rec
                            Confidence
_ -> TreeInput -> DecoderState -> TreeBuilder TreeOutput
putDecoderState TreeInput
t' (DecoderState -> TreeBuilder TreeOutput)
-> DecoderState -> TreeBuilder TreeOutput
forall a b. (a -> b) -> a -> b
$ Encoding -> DecoderState
initialDecoderState Encoding
enc'
                    Maybe DecoderState
Nothing -> TreeInput -> DecoderState -> TreeBuilder TreeOutput
putDecoderState TreeInput
t' (DecoderState -> TreeBuilder TreeOutput)
-> DecoderState -> TreeBuilder TreeOutput
forall a b. (a -> b) -> a -> b
$ Encoding -> DecoderState
initialDecoderState Encoding
enc'
        changeEncodingCharset :: TagParams -> Maybe Encoding
changeEncodingCharset TagParams
d = do
            ElementName
charset <- ElementName -> HashMap ElementName ElementName -> Maybe ElementName
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
M.lookup ElementName
"charset" (HashMap ElementName ElementName -> Maybe ElementName)
-> HashMap ElementName ElementName -> Maybe ElementName
forall a b. (a -> b) -> a -> b
$ TagParams -> HashMap ElementName ElementName
tagAttributes TagParams
d
            ElementName -> Maybe Encoding
lookupEncoding ElementName
charset
        changeEncodingContentType :: TagParams -> Maybe Encoding
changeEncodingContentType TagParams
d = do
            ElementName
httpEquiv <- ElementName -> HashMap ElementName ElementName -> Maybe ElementName
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
M.lookup ElementName
"http-equiv" (HashMap ElementName ElementName -> Maybe ElementName)
-> HashMap ElementName ElementName -> Maybe ElementName
forall a b. (a -> b) -> a -> b
$ TagParams -> HashMap ElementName ElementName
tagAttributes TagParams
d
            Bool -> Maybe () -> Maybe ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
N.unless ((Char -> Char) -> ElementName -> ElementName
T.map Char -> Char
toAsciiLower ElementName
httpEquiv ElementName -> ElementName -> Bool
forall a. Eq a => a -> a -> Bool
== ElementName
"content-type") Maybe ()
forall (f :: * -> *) a. Alternative f => f a
A.empty
            ElementName
content <- ElementName -> HashMap ElementName ElementName -> Maybe ElementName
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
M.lookup ElementName
"content" (HashMap ElementName ElementName -> Maybe ElementName)
-> HashMap ElementName ElementName -> Maybe ElementName
forall a b. (a -> b) -> a -> b
$ TagParams -> HashMap ElementName ElementName
tagAttributes TagParams
d
            ByteString -> Maybe Encoding
extractEncoding (ByteString -> Maybe Encoding) -> ByteString -> Maybe Encoding
forall a b. (a -> b) -> a -> b
$ ElementName -> ByteString
T.encodeUtf8 ElementName
content
        encodingEquivalent :: ReparseData -> Encoding -> Bool
encodingEquivalent ReparseData
rec Encoding
enc = ([ElementName] -> [ElementName] -> Bool)
-> ([ElementName], [ElementName]) -> Bool
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry [ElementName] -> [ElementName] -> Bool
forall a. Eq a => a -> a -> Bool
(==) (([ElementName], [ElementName]) -> Bool)
-> (HashMap ShortByteString Char -> ([ElementName], [ElementName]))
-> HashMap ShortByteString Char
-> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
            ([ShortByteString] -> [ElementName])
-> (String -> [ElementName])
-> ([ShortByteString], String)
-> ([ElementName], [ElementName])
forall (p :: * -> * -> *) a b c d.
Bifunctor p =>
(a -> b) -> (c -> d) -> p a c -> p b d
F.B.bimap ((ShortByteString -> ElementName)
-> [ShortByteString] -> [ElementName]
forall a b. (a -> b) -> [a] -> [b]
map ((ShortByteString -> ElementName)
 -> [ShortByteString] -> [ElementName])
-> (ShortByteString -> ElementName)
-> [ShortByteString]
-> [ElementName]
forall a b. (a -> b) -> a -> b
$ Encoding -> ShortByteString -> ElementName
parseChar Encoding
enc) ((Char -> ElementName) -> String -> [ElementName]
forall a b. (a -> b) -> [a] -> [b]
map Char -> ElementName
T.singleton) (([ShortByteString], String) -> ([ElementName], [ElementName]))
-> (HashMap ShortByteString Char -> ([ShortByteString], String))
-> HashMap ShortByteString Char
-> ([ElementName], [ElementName])
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [(ShortByteString, Char)] -> ([ShortByteString], String)
forall a b. [(a, b)] -> ([a], [b])
unzip ([(ShortByteString, Char)] -> ([ShortByteString], String))
-> (HashMap ShortByteString Char -> [(ShortByteString, Char)])
-> HashMap ShortByteString Char
-> ([ShortByteString], String)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. HashMap ShortByteString Char -> [(ShortByteString, Char)]
forall k v. HashMap k v -> [(k, v)]
M.toList (HashMap ShortByteString Char -> Bool)
-> HashMap ShortByteString Char -> Bool
forall a b. (a -> b) -> a -> b
$ ReparseData -> HashMap ShortByteString Char
parsedChars ReparseData
rec
        parseChar :: Encoding -> ShortByteString -> ElementName
parseChar Encoding
enc = (ElementName, DecoderState) -> ElementName
forall a b. (a, b) -> a
fst ((ElementName, DecoderState) -> ElementName)
-> (ShortByteString -> (ElementName, DecoderState))
-> ShortByteString
-> ElementName
forall b c a. (b -> c) -> (a -> b) -> a -> c
. DecoderState -> ByteString -> (ElementName, DecoderState)
decode' (Encoding -> DecoderState
initialDecoderState Encoding
enc) (ByteString -> (ElementName, DecoderState))
-> (ShortByteString -> ByteString)
-> ShortByteString
-> (ElementName, DecoderState)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShortByteString -> ByteString
BS.SH.fromShort
        putDecoderState :: TreeInput -> DecoderState -> TreeBuilder TreeOutput
putDecoderState TreeInput
t' DecoderState
decState =
            (TreeInput -> [Patch] -> TreeBuilder TreeOutput)
-> [Patch] -> TreeInput -> TreeBuilder TreeOutput
forall a b c. (a -> b -> c) -> b -> a -> c
flip TreeInput -> [Patch] -> TreeBuilder TreeOutput
packTree [] (TreeInput -> TreeBuilder TreeOutput)
-> ((TokenizerState -> TokenizerState) -> TreeInput)
-> (TokenizerState -> TokenizerState)
-> TreeBuilder TreeOutput
forall b c a. (b -> c) -> (a -> b) -> a -> c
. TreeInput
-> (TokenizerOutputState -> TokenizerOutputState) -> TreeInput
mapTokenState' TreeInput
t' ((TokenizerOutputState -> TokenizerOutputState) -> TreeInput)
-> ((TokenizerState -> TokenizerState)
    -> TokenizerOutputState -> TokenizerOutputState)
-> (TokenizerState -> TokenizerState)
-> TreeInput
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((TokenizerState, ByteString) -> (TokenizerState, ByteString))
-> TokenizerOutputState -> TokenizerOutputState
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (((TokenizerState, ByteString) -> (TokenizerState, ByteString))
 -> TokenizerOutputState -> TokenizerOutputState)
-> ((TokenizerState -> TokenizerState)
    -> (TokenizerState, ByteString) -> (TokenizerState, ByteString))
-> (TokenizerState -> TokenizerState)
-> TokenizerOutputState
-> TokenizerOutputState
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (TokenizerState -> TokenizerState)
-> (TokenizerState, ByteString) -> (TokenizerState, ByteString)
forall (p :: * -> * -> *) a b c.
Bifunctor p =>
(a -> b) -> p a c -> p b c
F.B.first ((TokenizerState -> TokenizerState) -> TreeBuilder TreeOutput)
-> (TokenizerState -> TokenizerState) -> TreeBuilder TreeOutput
forall a b. (a -> b) -> a -> b
$ \TokenizerState
state -> TokenizerState
state
                { decoderState_ :: Either (Either SnifferEnvironment Encoding) (Maybe DecoderState)
decoderState_ = Maybe DecoderState
-> Either (Either SnifferEnvironment Encoding) (Maybe DecoderState)
forall a b. b -> Either a b
Right (Maybe DecoderState
 -> Either
      (Either SnifferEnvironment Encoding) (Maybe DecoderState))
-> Maybe DecoderState
-> Either (Either SnifferEnvironment Encoding) (Maybe DecoderState)
forall a b. (a -> b) -> a -> b
$ DecoderState -> Maybe DecoderState
forall a. a -> Maybe a
Just DecoderState
decState
                }