module Web.Mangrove.Parse.Tokenize.Common
(
Tokenizer
, TokenizerState ( .. )
, TokenParserState ( .. )
, decoderState
, decoderDefaultState
, CurrentTokenizerState ( .. )
, defaultTokenizerState
, TokenizerOutput ( .. )
, mapErrs
, continueState
, endState
, finalStateList
, Wrapped
, WrappedOutput
, WrappedOutputs
, TokenizerInput ( .. )
, DecoderOutputState
, decodedRemainder
, setRemainder
, Token ( .. )
, DoctypeParams ( .. )
, emptyDoctypeParams
, TagParams ( .. )
, emptyTagParams
, BasicAttribute
, tokenizer
, if_
, ifChar
, else_
, elseChar
, tokenizers
, ifs_
, ifsChar
, elses_
, elsesChar
, ifPush_
, ifPushChar
, elsePush_
, elsePushChar
, packToken
, packState
, emit
, emit'
, consEmit
, consTokenError
, consTokenErrors
, consTokenErrorsList
, consOut
, consOuts
, appropriateEndTag
, changeState
, chunk'
) where
import qualified Control.Applicative as A
import qualified Control.Monad.Trans.State as N.S
import qualified Data.ByteString as BS
import qualified Data.ByteString.Short as BS.SH
import qualified Data.Either as E
import qualified Data.HashMap.Strict as M
import qualified Data.Maybe as Y
import qualified Data.Text as T
import Web.Willow.DOM
import Web.Mangrove.Parse.Common.Error
import Web.Willow.Common.Encoding hiding ( setRemainder )
import Web.Willow.Common.Encoding.Sniffer
import Web.Willow.Common.Parser
import Web.Willow.Common.Parser.Switch
import qualified Web.Willow.Common.Encoding as Willow
import Control.Applicative ( (<|>) )
data Token
= Doctype DoctypeParams
| StartTag TagParams
| EndTag TagParams
| T.Text
| Character Char
| EndOfStream
deriving ( Token -> Token -> Bool
(Token -> Token -> Bool) -> (Token -> Token -> Bool) -> Eq Token
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Token -> Token -> Bool
$c/= :: Token -> Token -> Bool
== :: Token -> Token -> Bool
$c== :: Token -> Token -> Bool
Eq, Int -> Token -> ShowS
[Token] -> ShowS
Token -> String
(Int -> Token -> ShowS)
-> (Token -> String) -> ([Token] -> ShowS) -> Show Token
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Token] -> ShowS
$cshowList :: [Token] -> ShowS
show :: Token -> String
$cshow :: Token -> String
showsPrec :: Int -> Token -> ShowS
$cshowsPrec :: Int -> Token -> ShowS
Show, ReadPrec [Token]
ReadPrec Token
Int -> ReadS Token
ReadS [Token]
(Int -> ReadS Token)
-> ReadS [Token]
-> ReadPrec Token
-> ReadPrec [Token]
-> Read Token
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [Token]
$creadListPrec :: ReadPrec [Token]
readPrec :: ReadPrec Token
$creadPrec :: ReadPrec Token
readList :: ReadS [Token]
$creadList :: ReadS [Token]
readsPrec :: Int -> ReadS Token
$creadsPrec :: Int -> ReadS Token
Read )
data DoctypeParams = DoctypeParams
{ DoctypeParams -> Maybe Text
doctypeName :: Maybe T.Text
, DoctypeParams -> Maybe Text
doctypePublicId :: Maybe T.Text
, DoctypeParams -> Maybe Text
doctypeSystemId :: Maybe T.Text
, DoctypeParams -> Bool
doctypeQuirks :: Bool
}
deriving ( DoctypeParams -> DoctypeParams -> Bool
(DoctypeParams -> DoctypeParams -> Bool)
-> (DoctypeParams -> DoctypeParams -> Bool) -> Eq DoctypeParams
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: DoctypeParams -> DoctypeParams -> Bool
$c/= :: DoctypeParams -> DoctypeParams -> Bool
== :: DoctypeParams -> DoctypeParams -> Bool
$c== :: DoctypeParams -> DoctypeParams -> Bool
Eq, Int -> DoctypeParams -> ShowS
[DoctypeParams] -> ShowS
DoctypeParams -> String
(Int -> DoctypeParams -> ShowS)
-> (DoctypeParams -> String)
-> ([DoctypeParams] -> ShowS)
-> Show DoctypeParams
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [DoctypeParams] -> ShowS
$cshowList :: [DoctypeParams] -> ShowS
show :: DoctypeParams -> String
$cshow :: DoctypeParams -> String
showsPrec :: Int -> DoctypeParams -> ShowS
$cshowsPrec :: Int -> DoctypeParams -> ShowS
Show, ReadPrec [DoctypeParams]
ReadPrec DoctypeParams
Int -> ReadS DoctypeParams
ReadS [DoctypeParams]
(Int -> ReadS DoctypeParams)
-> ReadS [DoctypeParams]
-> ReadPrec DoctypeParams
-> ReadPrec [DoctypeParams]
-> Read DoctypeParams
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [DoctypeParams]
$creadListPrec :: ReadPrec [DoctypeParams]
readPrec :: ReadPrec DoctypeParams
$creadPrec :: ReadPrec DoctypeParams
readList :: ReadS [DoctypeParams]
$creadList :: ReadS [DoctypeParams]
readsPrec :: Int -> ReadS DoctypeParams
$creadsPrec :: Int -> ReadS DoctypeParams
Read )
emptyDoctypeParams :: DoctypeParams
emptyDoctypeParams :: DoctypeParams
emptyDoctypeParams = DoctypeParams :: Maybe Text -> Maybe Text -> Maybe Text -> Bool -> DoctypeParams
DoctypeParams
{ doctypeName :: Maybe Text
doctypeName = Maybe Text
forall a. Maybe a
Nothing
, doctypePublicId :: Maybe Text
doctypePublicId = Maybe Text
forall a. Maybe a
Nothing
, doctypeSystemId :: Maybe Text
doctypeSystemId = Maybe Text
forall a. Maybe a
Nothing
, doctypeQuirks :: Bool
doctypeQuirks = Bool
False
}
data TagParams = TagParams
{ TagParams -> Text
tagName :: ElementName
, TagParams -> Bool
tagIsSelfClosing :: Bool
, TagParams -> HashMap Text Text
tagAttributes :: M.HashMap T.Text T.Text
}
deriving ( TagParams -> TagParams -> Bool
(TagParams -> TagParams -> Bool)
-> (TagParams -> TagParams -> Bool) -> Eq TagParams
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: TagParams -> TagParams -> Bool
$c/= :: TagParams -> TagParams -> Bool
== :: TagParams -> TagParams -> Bool
$c== :: TagParams -> TagParams -> Bool
Eq, Int -> TagParams -> ShowS
[TagParams] -> ShowS
TagParams -> String
(Int -> TagParams -> ShowS)
-> (TagParams -> String)
-> ([TagParams] -> ShowS)
-> Show TagParams
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [TagParams] -> ShowS
$cshowList :: [TagParams] -> ShowS
show :: TagParams -> String
$cshow :: TagParams -> String
showsPrec :: Int -> TagParams -> ShowS
$cshowsPrec :: Int -> TagParams -> ShowS
Show, ReadPrec [TagParams]
ReadPrec TagParams
Int -> ReadS TagParams
ReadS [TagParams]
(Int -> ReadS TagParams)
-> ReadS [TagParams]
-> ReadPrec TagParams
-> ReadPrec [TagParams]
-> Read TagParams
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [TagParams]
$creadListPrec :: ReadPrec [TagParams]
readPrec :: ReadPrec TagParams
$creadPrec :: ReadPrec TagParams
readList :: ReadS [TagParams]
$creadList :: ReadS [TagParams]
readsPrec :: Int -> ReadS TagParams
$creadsPrec :: Int -> ReadS TagParams
Read )
emptyTagParams :: TagParams
emptyTagParams :: TagParams
emptyTagParams = TagParams :: Text -> Bool -> HashMap Text Text -> TagParams
TagParams
{ tagName :: Text
tagName = Text
T.empty
, tagIsSelfClosing :: Bool
tagIsSelfClosing = Bool
False
, tagAttributes :: HashMap Text Text
tagAttributes = HashMap Text Text
forall k v. HashMap k v
M.empty
}
type Tokenizer = StateParser TokenParserState [TokenizerInput]
data TokenizerState = TokenizerState
{ TokenizerState -> TokenParserState
tokenParserState :: TokenParserState
, TokenizerState
-> Either (Either SnifferEnvironment Encoding) (Maybe DecoderState)
decoderState_ :: Either (Either SnifferEnvironment Encoding) (Maybe DecoderState)
}
deriving ( TokenizerState -> TokenizerState -> Bool
(TokenizerState -> TokenizerState -> Bool)
-> (TokenizerState -> TokenizerState -> Bool) -> Eq TokenizerState
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: TokenizerState -> TokenizerState -> Bool
$c/= :: TokenizerState -> TokenizerState -> Bool
== :: TokenizerState -> TokenizerState -> Bool
$c== :: TokenizerState -> TokenizerState -> Bool
Eq, Int -> TokenizerState -> ShowS
[TokenizerState] -> ShowS
TokenizerState -> String
(Int -> TokenizerState -> ShowS)
-> (TokenizerState -> String)
-> ([TokenizerState] -> ShowS)
-> Show TokenizerState
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [TokenizerState] -> ShowS
$cshowList :: [TokenizerState] -> ShowS
show :: TokenizerState -> String
$cshow :: TokenizerState -> String
showsPrec :: Int -> TokenizerState -> ShowS
$cshowsPrec :: Int -> TokenizerState -> ShowS
Show, ReadPrec [TokenizerState]
ReadPrec TokenizerState
Int -> ReadS TokenizerState
ReadS [TokenizerState]
(Int -> ReadS TokenizerState)
-> ReadS [TokenizerState]
-> ReadPrec TokenizerState
-> ReadPrec [TokenizerState]
-> Read TokenizerState
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [TokenizerState]
$creadListPrec :: ReadPrec [TokenizerState]
readPrec :: ReadPrec TokenizerState
$creadPrec :: ReadPrec TokenizerState
readList :: ReadS [TokenizerState]
$creadList :: ReadS [TokenizerState]
readsPrec :: Int -> ReadS TokenizerState
$creadsPrec :: Int -> ReadS TokenizerState
Read )
data TokenParserState = TokenParserState
{ TokenParserState -> Maybe Text
prevStartTag :: Maybe ElementName
, TokenParserState -> CurrentTokenizerState
currentState :: CurrentTokenizerState
, TokenParserState -> Maybe Text
currentNodeNamespace :: Maybe Namespace
, TokenParserState -> Bool
atEndOfStream :: Bool
}
deriving ( TokenParserState -> TokenParserState -> Bool
(TokenParserState -> TokenParserState -> Bool)
-> (TokenParserState -> TokenParserState -> Bool)
-> Eq TokenParserState
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: TokenParserState -> TokenParserState -> Bool
$c/= :: TokenParserState -> TokenParserState -> Bool
== :: TokenParserState -> TokenParserState -> Bool
$c== :: TokenParserState -> TokenParserState -> Bool
Eq, Int -> TokenParserState -> ShowS
[TokenParserState] -> ShowS
TokenParserState -> String
(Int -> TokenParserState -> ShowS)
-> (TokenParserState -> String)
-> ([TokenParserState] -> ShowS)
-> Show TokenParserState
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [TokenParserState] -> ShowS
$cshowList :: [TokenParserState] -> ShowS
show :: TokenParserState -> String
$cshow :: TokenParserState -> String
showsPrec :: Int -> TokenParserState -> ShowS
$cshowsPrec :: Int -> TokenParserState -> ShowS
Show, ReadPrec [TokenParserState]
ReadPrec TokenParserState
Int -> ReadS TokenParserState
ReadS [TokenParserState]
(Int -> ReadS TokenParserState)
-> ReadS [TokenParserState]
-> ReadPrec TokenParserState
-> ReadPrec [TokenParserState]
-> Read TokenParserState
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [TokenParserState]
$creadListPrec :: ReadPrec [TokenParserState]
readPrec :: ReadPrec TokenParserState
$creadPrec :: ReadPrec TokenParserState
readList :: ReadS [TokenParserState]
$creadList :: ReadS [TokenParserState]
readsPrec :: Int -> ReadS TokenParserState
$creadsPrec :: Int -> ReadS TokenParserState
Read )
data CurrentTokenizerState
= DataState
| RCDataState
| RawTextState
| PlainTextState
| ScriptDataState
| ScriptDataEscapedState
| ScriptDataDoubleEscapedState
| CDataState
deriving ( CurrentTokenizerState -> CurrentTokenizerState -> Bool
(CurrentTokenizerState -> CurrentTokenizerState -> Bool)
-> (CurrentTokenizerState -> CurrentTokenizerState -> Bool)
-> Eq CurrentTokenizerState
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: CurrentTokenizerState -> CurrentTokenizerState -> Bool
$c/= :: CurrentTokenizerState -> CurrentTokenizerState -> Bool
== :: CurrentTokenizerState -> CurrentTokenizerState -> Bool
$c== :: CurrentTokenizerState -> CurrentTokenizerState -> Bool
Eq, Eq CurrentTokenizerState
Eq CurrentTokenizerState
-> (CurrentTokenizerState -> CurrentTokenizerState -> Ordering)
-> (CurrentTokenizerState -> CurrentTokenizerState -> Bool)
-> (CurrentTokenizerState -> CurrentTokenizerState -> Bool)
-> (CurrentTokenizerState -> CurrentTokenizerState -> Bool)
-> (CurrentTokenizerState -> CurrentTokenizerState -> Bool)
-> (CurrentTokenizerState
-> CurrentTokenizerState -> CurrentTokenizerState)
-> (CurrentTokenizerState
-> CurrentTokenizerState -> CurrentTokenizerState)
-> Ord CurrentTokenizerState
CurrentTokenizerState -> CurrentTokenizerState -> Bool
CurrentTokenizerState -> CurrentTokenizerState -> Ordering
CurrentTokenizerState
-> CurrentTokenizerState -> CurrentTokenizerState
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: CurrentTokenizerState
-> CurrentTokenizerState -> CurrentTokenizerState
$cmin :: CurrentTokenizerState
-> CurrentTokenizerState -> CurrentTokenizerState
max :: CurrentTokenizerState
-> CurrentTokenizerState -> CurrentTokenizerState
$cmax :: CurrentTokenizerState
-> CurrentTokenizerState -> CurrentTokenizerState
>= :: CurrentTokenizerState -> CurrentTokenizerState -> Bool
$c>= :: CurrentTokenizerState -> CurrentTokenizerState -> Bool
> :: CurrentTokenizerState -> CurrentTokenizerState -> Bool
$c> :: CurrentTokenizerState -> CurrentTokenizerState -> Bool
<= :: CurrentTokenizerState -> CurrentTokenizerState -> Bool
$c<= :: CurrentTokenizerState -> CurrentTokenizerState -> Bool
< :: CurrentTokenizerState -> CurrentTokenizerState -> Bool
$c< :: CurrentTokenizerState -> CurrentTokenizerState -> Bool
compare :: CurrentTokenizerState -> CurrentTokenizerState -> Ordering
$ccompare :: CurrentTokenizerState -> CurrentTokenizerState -> Ordering
$cp1Ord :: Eq CurrentTokenizerState
Ord, CurrentTokenizerState
CurrentTokenizerState
-> CurrentTokenizerState -> Bounded CurrentTokenizerState
forall a. a -> a -> Bounded a
maxBound :: CurrentTokenizerState
$cmaxBound :: CurrentTokenizerState
minBound :: CurrentTokenizerState
$cminBound :: CurrentTokenizerState
Bounded, Int -> CurrentTokenizerState
CurrentTokenizerState -> Int
CurrentTokenizerState -> [CurrentTokenizerState]
CurrentTokenizerState -> CurrentTokenizerState
CurrentTokenizerState
-> CurrentTokenizerState -> [CurrentTokenizerState]
CurrentTokenizerState
-> CurrentTokenizerState
-> CurrentTokenizerState
-> [CurrentTokenizerState]
(CurrentTokenizerState -> CurrentTokenizerState)
-> (CurrentTokenizerState -> CurrentTokenizerState)
-> (Int -> CurrentTokenizerState)
-> (CurrentTokenizerState -> Int)
-> (CurrentTokenizerState -> [CurrentTokenizerState])
-> (CurrentTokenizerState
-> CurrentTokenizerState -> [CurrentTokenizerState])
-> (CurrentTokenizerState
-> CurrentTokenizerState -> [CurrentTokenizerState])
-> (CurrentTokenizerState
-> CurrentTokenizerState
-> CurrentTokenizerState
-> [CurrentTokenizerState])
-> Enum CurrentTokenizerState
forall a.
(a -> a)
-> (a -> a)
-> (Int -> a)
-> (a -> Int)
-> (a -> [a])
-> (a -> a -> [a])
-> (a -> a -> [a])
-> (a -> a -> a -> [a])
-> Enum a
enumFromThenTo :: CurrentTokenizerState
-> CurrentTokenizerState
-> CurrentTokenizerState
-> [CurrentTokenizerState]
$cenumFromThenTo :: CurrentTokenizerState
-> CurrentTokenizerState
-> CurrentTokenizerState
-> [CurrentTokenizerState]
enumFromTo :: CurrentTokenizerState
-> CurrentTokenizerState -> [CurrentTokenizerState]
$cenumFromTo :: CurrentTokenizerState
-> CurrentTokenizerState -> [CurrentTokenizerState]
enumFromThen :: CurrentTokenizerState
-> CurrentTokenizerState -> [CurrentTokenizerState]
$cenumFromThen :: CurrentTokenizerState
-> CurrentTokenizerState -> [CurrentTokenizerState]
enumFrom :: CurrentTokenizerState -> [CurrentTokenizerState]
$cenumFrom :: CurrentTokenizerState -> [CurrentTokenizerState]
fromEnum :: CurrentTokenizerState -> Int
$cfromEnum :: CurrentTokenizerState -> Int
toEnum :: Int -> CurrentTokenizerState
$ctoEnum :: Int -> CurrentTokenizerState
pred :: CurrentTokenizerState -> CurrentTokenizerState
$cpred :: CurrentTokenizerState -> CurrentTokenizerState
succ :: CurrentTokenizerState -> CurrentTokenizerState
$csucc :: CurrentTokenizerState -> CurrentTokenizerState
Enum, Int -> CurrentTokenizerState -> ShowS
[CurrentTokenizerState] -> ShowS
CurrentTokenizerState -> String
(Int -> CurrentTokenizerState -> ShowS)
-> (CurrentTokenizerState -> String)
-> ([CurrentTokenizerState] -> ShowS)
-> Show CurrentTokenizerState
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [CurrentTokenizerState] -> ShowS
$cshowList :: [CurrentTokenizerState] -> ShowS
show :: CurrentTokenizerState -> String
$cshow :: CurrentTokenizerState -> String
showsPrec :: Int -> CurrentTokenizerState -> ShowS
$cshowsPrec :: Int -> CurrentTokenizerState -> ShowS
Show, ReadPrec [CurrentTokenizerState]
ReadPrec CurrentTokenizerState
Int -> ReadS CurrentTokenizerState
ReadS [CurrentTokenizerState]
(Int -> ReadS CurrentTokenizerState)
-> ReadS [CurrentTokenizerState]
-> ReadPrec CurrentTokenizerState
-> ReadPrec [CurrentTokenizerState]
-> Read CurrentTokenizerState
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [CurrentTokenizerState]
$creadListPrec :: ReadPrec [CurrentTokenizerState]
readPrec :: ReadPrec CurrentTokenizerState
$creadPrec :: ReadPrec CurrentTokenizerState
readList :: ReadS [CurrentTokenizerState]
$creadList :: ReadS [CurrentTokenizerState]
readsPrec :: Int -> ReadS CurrentTokenizerState
$creadsPrec :: Int -> ReadS CurrentTokenizerState
Read )
defaultTokenizerState :: TokenizerState
defaultTokenizerState :: TokenizerState
defaultTokenizerState = TokenizerState :: TokenParserState
-> Either (Either SnifferEnvironment Encoding) (Maybe DecoderState)
-> TokenizerState
TokenizerState
{ decoderState_ :: Either (Either SnifferEnvironment Encoding) (Maybe DecoderState)
decoderState_ = Either SnifferEnvironment Encoding
-> Either (Either SnifferEnvironment Encoding) (Maybe DecoderState)
forall a b. a -> Either a b
Left (Either SnifferEnvironment Encoding
-> Either
(Either SnifferEnvironment Encoding) (Maybe DecoderState))
-> Either SnifferEnvironment Encoding
-> Either (Either SnifferEnvironment Encoding) (Maybe DecoderState)
forall a b. (a -> b) -> a -> b
$ Encoding -> Either SnifferEnvironment Encoding
forall a b. b -> Either a b
Right Encoding
Utf8
, tokenParserState :: TokenParserState
tokenParserState = TokenParserState :: Maybe Text
-> CurrentTokenizerState -> Maybe Text -> Bool -> TokenParserState
TokenParserState
{ prevStartTag :: Maybe Text
prevStartTag = Maybe Text
forall a. Maybe a
Nothing
, currentState :: CurrentTokenizerState
currentState = CurrentTokenizerState
DataState
, currentNodeNamespace :: Maybe Text
currentNodeNamespace = Maybe Text
forall a. Maybe a
Nothing
, atEndOfStream :: Bool
atEndOfStream = Bool
False
}
}
decoderState :: TokenizerState -> Maybe DecoderState
decoderState :: TokenizerState -> Maybe DecoderState
decoderState = Maybe DecoderState
-> Either (Either SnifferEnvironment Encoding) (Maybe DecoderState)
-> Maybe DecoderState
forall b a. b -> Either a b -> b
E.fromRight Maybe DecoderState
forall a. Maybe a
Nothing (Either (Either SnifferEnvironment Encoding) (Maybe DecoderState)
-> Maybe DecoderState)
-> (TokenizerState
-> Either
(Either SnifferEnvironment Encoding) (Maybe DecoderState))
-> TokenizerState
-> Maybe DecoderState
forall b c a. (b -> c) -> (a -> b) -> a -> c
. TokenizerState
-> Either (Either SnifferEnvironment Encoding) (Maybe DecoderState)
decoderState_
decoderDefaultState :: TokenizerState -> BS.ByteString -> Maybe DecoderState
decoderDefaultState :: TokenizerState -> ByteString -> Maybe DecoderState
decoderDefaultState TokenizerState
state ByteString
stream = case TokenizerState
-> Either (Either SnifferEnvironment Encoding) (Maybe DecoderState)
decoderState_ TokenizerState
state of
Right Maybe DecoderState
dState -> Maybe DecoderState
dState
Left Either SnifferEnvironment Encoding
initialize -> DecoderState -> Maybe DecoderState
forall a. a -> Maybe a
Just (DecoderState -> Maybe DecoderState)
-> DecoderState -> Maybe DecoderState
forall a b. (a -> b) -> a -> b
$ (SnifferEnvironment -> DecoderState)
-> (Encoding -> DecoderState)
-> Either SnifferEnvironment Encoding
-> DecoderState
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either ((SnifferEnvironment -> ByteString -> DecoderState)
-> ByteString -> SnifferEnvironment -> DecoderState
forall a b c. (a -> b -> c) -> b -> a -> c
flip SnifferEnvironment -> ByteString -> DecoderState
sniffDecoderState ByteString
stream) Encoding -> DecoderState
initialDecoderState Either SnifferEnvironment Encoding
initialize
appropriateEndTag :: String -> Tokenizer Bool
appropriateEndTag :: String -> Tokenizer Bool
appropriateEndTag String
testName = do
Maybe Text
prevName <- TokenParserState -> Maybe Text
prevStartTag (TokenParserState -> Maybe Text)
-> StateT
TokenParserState (Parser [TokenizerInput]) TokenParserState
-> StateT TokenParserState (Parser [TokenizerInput]) (Maybe Text)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> StateT TokenParserState (Parser [TokenizerInput]) TokenParserState
forall (m :: * -> *) s. Monad m => StateT s m s
N.S.get
Bool -> Tokenizer Bool
forall (m :: * -> *) a. Monad m => a -> m a
return (Bool -> Tokenizer Bool) -> Bool -> Tokenizer Bool
forall a b. (a -> b) -> a -> b
$ Maybe Text
prevName Maybe Text -> Maybe Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text -> Maybe Text
forall a. a -> Maybe a
Just (String -> Text
T.pack String
testName)
changeState :: CurrentTokenizerState -> Tokenizer ()
changeState :: CurrentTokenizerState -> Tokenizer ()
changeState CurrentTokenizerState
newState = do
TokenParserState
state <- StateT TokenParserState (Parser [TokenizerInput]) TokenParserState
forall (m :: * -> *) s. Monad m => StateT s m s
N.S.get
TokenParserState -> Tokenizer ()
forall (m :: * -> *) s. Monad m => s -> StateT s m ()
N.S.put (TokenParserState -> Tokenizer ())
-> TokenParserState -> Tokenizer ()
forall a b. (a -> b) -> a -> b
$ TokenParserState
state
{ currentState :: CurrentTokenizerState
currentState = CurrentTokenizerState
newState
}
type DecoderOutputState = Maybe (Maybe DecoderState, BS.ByteString)
tokenizer
:: Maybe ([ParseError], out)
-> [SwitchCase TokenizerInput Tokenizer (WrappedOutput out)]
-> Tokenizer (TokenizerOutput out)
tokenizer :: Maybe ([ParseError], out)
-> [SwitchCase TokenizerInput Tokenizer (WrappedOutput out)]
-> Tokenizer (TokenizerOutput out)
tokenizer Maybe ([ParseError], out)
Nothing [SwitchCase TokenizerInput Tokenizer (WrappedOutput out)]
cases = StateT TokenParserState (Parser [TokenizerInput]) TokenizerInput
forall (m :: * -> *) stream token.
MonadParser m stream token =>
m token
next StateT TokenParserState (Parser [TokenizerInput]) TokenizerInput
-> (TokenizerInput -> Tokenizer (TokenizerOutput out))
-> Tokenizer (TokenizerOutput out)
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= [SwitchCase TokenizerInput Tokenizer (WrappedOutput out)]
-> TokenizerInput -> Tokenizer (TokenizerOutput out)
forall out.
[SwitchCase TokenizerInput Tokenizer (WrappedOutput out)]
-> TokenizerInput -> Tokenizer (TokenizerOutput out)
switch' [SwitchCase TokenizerInput Tokenizer (WrappedOutput out)]
cases
tokenizer (Just ([ParseError]
errs, out
out)) [SwitchCase TokenizerInput Tokenizer (WrappedOutput out)]
cases = (StateT TokenParserState (Parser [TokenizerInput]) TokenizerInput
forall (m :: * -> *) stream token.
MonadParser m stream token =>
m token
next StateT TokenParserState (Parser [TokenizerInput]) TokenizerInput
-> (TokenizerInput -> Tokenizer (TokenizerOutput out))
-> Tokenizer (TokenizerOutput out)
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= [SwitchCase TokenizerInput Tokenizer (WrappedOutput out)]
-> TokenizerInput -> Tokenizer (TokenizerOutput out)
forall out.
[SwitchCase TokenizerInput Tokenizer (WrappedOutput out)]
-> TokenizerInput -> Tokenizer (TokenizerOutput out)
switch' [SwitchCase TokenizerInput Tokenizer (WrappedOutput out)]
cases) Tokenizer (TokenizerOutput out)
-> Tokenizer (TokenizerOutput out)
-> Tokenizer (TokenizerOutput out)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> do
Bool
eos <- TokenParserState -> Bool
atEndOfStream (TokenParserState -> Bool)
-> StateT
TokenParserState (Parser [TokenizerInput]) TokenParserState
-> Tokenizer Bool
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> StateT TokenParserState (Parser [TokenizerInput]) TokenParserState
forall (m :: * -> *) s. Monad m => StateT s m s
N.S.get
if Bool
eos
then Tokenizer ()
forall (trans :: * -> *) stream token.
MonadParser trans stream token =>
trans ()
end Tokenizer ()
-> Tokenizer (TokenizerOutput out)
-> Tokenizer (TokenizerOutput out)
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> TokenizerOutput out -> Tokenizer (TokenizerOutput out)
forall (m :: * -> *) a. Monad m => a -> m a
return TokenizerOutput out
recovery
else Tokenizer (TokenizerOutput out)
forall (f :: * -> *) a. Alternative f => f a
A.empty
where recovery :: TokenizerOutput out
recovery = TokenizerOutput :: forall out.
[ParseError] -> out -> DecoderOutputState -> TokenizerOutput out
TokenizerOutput
{ tokenizedErrs :: [ParseError]
tokenizedErrs = [ParseError]
errs
, tokenizedOut :: out
tokenizedOut = out
out
, tokenizedState :: DecoderOutputState
tokenizedState = DecoderOutputState
endState
}
switch'
:: [SwitchCase TokenizerInput Tokenizer (WrappedOutput out)]
-> TokenizerInput
-> Tokenizer (TokenizerOutput out)
switch' :: [SwitchCase TokenizerInput Tokenizer (WrappedOutput out)]
-> TokenizerInput -> Tokenizer (TokenizerOutput out)
switch' [SwitchCase TokenizerInput Tokenizer (WrappedOutput out)]
cases TokenizerInput
t' = WrappedOutput out -> TokenizerOutput out
forall out. (Bool, TokenizerOutput out) -> TokenizerOutput out
mapState' (WrappedOutput out -> TokenizerOutput out)
-> Tokenizer (WrappedOutput out) -> Tokenizer (TokenizerOutput out)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [SwitchCase TokenizerInput Tokenizer (WrappedOutput out)]
-> TokenizerInput -> Tokenizer (WrappedOutput out)
forall (m :: * -> *) test out.
Alternative m =>
[SwitchCase test m out] -> test -> m out
switch [SwitchCase TokenizerInput Tokenizer (WrappedOutput out)]
cases TokenizerInput
t'
where mapState' :: (Bool, TokenizerOutput out) -> TokenizerOutput out
mapState' (Bool
True, TokenizerOutput out
out) = (DecoderOutputState -> DecoderOutputState)
-> TokenizerOutput out -> TokenizerOutput out
forall out.
(DecoderOutputState -> DecoderOutputState)
-> TokenizerOutput out -> TokenizerOutput out
mapState (DecoderOutputState -> DecoderOutputState -> DecoderOutputState
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> TokenizerInput -> DecoderOutputState
decodedState TokenizerInput
t') TokenizerOutput out
out
mapState' (Bool
False, TokenizerOutput out
out) = TokenizerOutput out
out
tokenizers
:: Maybe [([ParseError], out)]
-> [SwitchCase TokenizerInput Tokenizer (WrappedOutputs out)]
-> Tokenizer [TokenizerOutput out]
tokenizers :: Maybe [([ParseError], out)]
-> [SwitchCase TokenizerInput Tokenizer (WrappedOutputs out)]
-> Tokenizer [TokenizerOutput out]
tokenizers Maybe [([ParseError], out)]
Nothing [SwitchCase TokenizerInput Tokenizer (WrappedOutputs out)]
cases = StateT TokenParserState (Parser [TokenizerInput]) TokenizerInput
forall (m :: * -> *) stream token.
MonadParser m stream token =>
m token
next StateT TokenParserState (Parser [TokenizerInput]) TokenizerInput
-> (TokenizerInput -> Tokenizer [TokenizerOutput out])
-> Tokenizer [TokenizerOutput out]
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= [SwitchCase TokenizerInput Tokenizer (WrappedOutputs out)]
-> TokenizerInput -> Tokenizer [TokenizerOutput out]
forall out.
[SwitchCase TokenizerInput Tokenizer (WrappedOutputs out)]
-> TokenizerInput -> Tokenizer [TokenizerOutput out]
switches' [SwitchCase TokenizerInput Tokenizer (WrappedOutputs out)]
cases
tokenizers (Just [([ParseError], out)]
recovery) [SwitchCase TokenizerInput Tokenizer (WrappedOutputs out)]
cases = (StateT TokenParserState (Parser [TokenizerInput]) TokenizerInput
forall (m :: * -> *) stream token.
MonadParser m stream token =>
m token
next StateT TokenParserState (Parser [TokenizerInput]) TokenizerInput
-> (TokenizerInput -> Tokenizer [TokenizerOutput out])
-> Tokenizer [TokenizerOutput out]
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= [SwitchCase TokenizerInput Tokenizer (WrappedOutputs out)]
-> TokenizerInput -> Tokenizer [TokenizerOutput out]
forall out.
[SwitchCase TokenizerInput Tokenizer (WrappedOutputs out)]
-> TokenizerInput -> Tokenizer [TokenizerOutput out]
switches' [SwitchCase TokenizerInput Tokenizer (WrappedOutputs out)]
cases) Tokenizer [TokenizerOutput out]
-> Tokenizer [TokenizerOutput out]
-> Tokenizer [TokenizerOutput out]
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> do
Bool
eos <- TokenParserState -> Bool
atEndOfStream (TokenParserState -> Bool)
-> StateT
TokenParserState (Parser [TokenizerInput]) TokenParserState
-> Tokenizer Bool
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> StateT TokenParserState (Parser [TokenizerInput]) TokenParserState
forall (m :: * -> *) s. Monad m => StateT s m s
N.S.get
if Bool
eos
then Tokenizer ()
forall (trans :: * -> *) stream token.
MonadParser trans stream token =>
trans ()
end Tokenizer ()
-> Tokenizer [TokenizerOutput out]
-> Tokenizer [TokenizerOutput out]
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> [TokenizerOutput out] -> Tokenizer [TokenizerOutput out]
forall (m :: * -> *) a. Monad m => a -> m a
return [TokenizerOutput out]
recovery'
else Tokenizer [TokenizerOutput out]
forall (f :: * -> *) a. Alternative f => f a
A.empty
where recovery' :: [TokenizerOutput out]
recovery' = DecoderOutputState
-> [TokenizerOutput out] -> [TokenizerOutput out]
forall out.
DecoderOutputState
-> [TokenizerOutput out] -> [TokenizerOutput out]
finalStateList DecoderOutputState
endState ([TokenizerOutput out] -> [TokenizerOutput out])
-> ((([ParseError], out) -> TokenizerOutput out)
-> [TokenizerOutput out])
-> (([ParseError], out) -> TokenizerOutput out)
-> [TokenizerOutput out]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((([ParseError], out) -> TokenizerOutput out)
-> [([ParseError], out)] -> [TokenizerOutput out])
-> [([ParseError], out)]
-> (([ParseError], out) -> TokenizerOutput out)
-> [TokenizerOutput out]
forall a b c. (a -> b -> c) -> b -> a -> c
flip (([ParseError], out) -> TokenizerOutput out)
-> [([ParseError], out)] -> [TokenizerOutput out]
forall a b. (a -> b) -> [a] -> [b]
map [([ParseError], out)]
recovery ((([ParseError], out) -> TokenizerOutput out)
-> [TokenizerOutput out])
-> (([ParseError], out) -> TokenizerOutput out)
-> [TokenizerOutput out]
forall a b. (a -> b) -> a -> b
$ \([ParseError]
errs, out
out) ->
[ParseError] -> out -> DecoderOutputState -> TokenizerOutput out
forall out.
[ParseError] -> out -> DecoderOutputState -> TokenizerOutput out
TokenizerOutput [ParseError]
errs out
out DecoderOutputState
forall a. Maybe a
Nothing
switches'
:: [SwitchCase TokenizerInput Tokenizer (WrappedOutputs out)]
-> TokenizerInput
-> Tokenizer [TokenizerOutput out]
switches' :: [SwitchCase TokenizerInput Tokenizer (WrappedOutputs out)]
-> TokenizerInput -> Tokenizer [TokenizerOutput out]
switches' [SwitchCase TokenizerInput Tokenizer (WrappedOutputs out)]
cases TokenizerInput
t' = WrappedOutputs out -> [TokenizerOutput out]
forall out. (Bool, [TokenizerOutput out]) -> [TokenizerOutput out]
repack (WrappedOutputs out -> [TokenizerOutput out])
-> Tokenizer (WrappedOutputs out)
-> Tokenizer [TokenizerOutput out]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [SwitchCase TokenizerInput Tokenizer (WrappedOutputs out)]
-> TokenizerInput -> Tokenizer (WrappedOutputs out)
forall (m :: * -> *) test out.
Alternative m =>
[SwitchCase test m out] -> test -> m out
switch [SwitchCase TokenizerInput Tokenizer (WrappedOutputs out)]
cases TokenizerInput
t'
where repack :: (Bool, [TokenizerOutput out]) -> [TokenizerOutput out]
repack (Bool
_, []) = []
repack (Bool
True, [TokenizerOutput out
out']) = [(DecoderOutputState -> DecoderOutputState)
-> TokenizerOutput out -> TokenizerOutput out
forall out.
(DecoderOutputState -> DecoderOutputState)
-> TokenizerOutput out -> TokenizerOutput out
mapState (DecoderOutputState -> DecoderOutputState -> DecoderOutputState
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> TokenizerInput -> DecoderOutputState
decodedState TokenizerInput
t') TokenizerOutput out
out']
repack (Bool
False, [TokenizerOutput out
out']) = [TokenizerOutput out
out']
repack (Bool
useState, TokenizerOutput out
t:[TokenizerOutput out]
ts) = TokenizerOutput out
t TokenizerOutput out
-> [TokenizerOutput out] -> [TokenizerOutput out]
forall a. a -> [a] -> [a]
: (Bool, [TokenizerOutput out]) -> [TokenizerOutput out]
repack (Bool
useState, [TokenizerOutput out]
ts)
mapErrs :: ([ParseError] -> [ParseError]) -> TokenizerOutput out -> TokenizerOutput out
mapErrs :: ([ParseError] -> [ParseError])
-> TokenizerOutput out -> TokenizerOutput out
mapErrs [ParseError] -> [ParseError]
f TokenizerOutput out
out = TokenizerOutput out
out { tokenizedErrs :: [ParseError]
tokenizedErrs = [ParseError] -> [ParseError]
f ([ParseError] -> [ParseError]) -> [ParseError] -> [ParseError]
forall a b. (a -> b) -> a -> b
$ TokenizerOutput out -> [ParseError]
forall out. TokenizerOutput out -> [ParseError]
tokenizedErrs TokenizerOutput out
out }
mapState
:: (DecoderOutputState -> DecoderOutputState)
-> TokenizerOutput out
-> TokenizerOutput out
mapState :: (DecoderOutputState -> DecoderOutputState)
-> TokenizerOutput out -> TokenizerOutput out
mapState DecoderOutputState -> DecoderOutputState
f TokenizerOutput out
out = TokenizerOutput out
out { tokenizedState :: DecoderOutputState
tokenizedState = DecoderOutputState -> DecoderOutputState
f (DecoderOutputState -> DecoderOutputState)
-> DecoderOutputState -> DecoderOutputState
forall a b. (a -> b) -> a -> b
$ TokenizerOutput out -> DecoderOutputState
forall out. TokenizerOutput out -> DecoderOutputState
tokenizedState TokenizerOutput out
out }
continueState :: TokenizerOutput out -> TokenizerOutput out
continueState :: TokenizerOutput out -> TokenizerOutput out
continueState = (DecoderOutputState -> DecoderOutputState)
-> TokenizerOutput out -> TokenizerOutput out
forall out.
(DecoderOutputState -> DecoderOutputState)
-> TokenizerOutput out -> TokenizerOutput out
mapState ((DecoderOutputState -> DecoderOutputState)
-> TokenizerOutput out -> TokenizerOutput out)
-> (DecoderOutputState -> DecoderOutputState)
-> TokenizerOutput out
-> TokenizerOutput out
forall a b. (a -> b) -> a -> b
$ DecoderOutputState -> DecoderOutputState -> DecoderOutputState
forall a b. a -> b -> a
const DecoderOutputState
forall a. Maybe a
Nothing
finalStateList :: DecoderOutputState -> [TokenizerOutput out] -> [TokenizerOutput out]
finalStateList :: DecoderOutputState
-> [TokenizerOutput out] -> [TokenizerOutput out]
finalStateList DecoderOutputState
_ [] = []
finalStateList DecoderOutputState
state [TokenizerOutput out
t'] = [TokenizerOutput out
t' { tokenizedState :: DecoderOutputState
tokenizedState = DecoderOutputState
state }]
finalStateList DecoderOutputState
state (TokenizerOutput out
t':[TokenizerOutput out]
ts') = TokenizerOutput out -> TokenizerOutput out
forall out. TokenizerOutput out -> TokenizerOutput out
continueState TokenizerOutput out
t' TokenizerOutput out
-> [TokenizerOutput out] -> [TokenizerOutput out]
forall a. a -> [a] -> [a]
: DecoderOutputState
-> [TokenizerOutput out] -> [TokenizerOutput out]
forall out.
DecoderOutputState
-> [TokenizerOutput out] -> [TokenizerOutput out]
finalStateList DecoderOutputState
state [TokenizerOutput out]
ts'
packToken :: ([ParseError], out) -> Tokenizer (TokenizerOutput out)
packToken :: ([ParseError], out) -> Tokenizer (TokenizerOutput out)
packToken = (([ParseError], out)
-> DecoderOutputState -> Tokenizer (TokenizerOutput out))
-> DecoderOutputState
-> ([ParseError], out)
-> Tokenizer (TokenizerOutput out)
forall a b c. (a -> b -> c) -> b -> a -> c
flip ([ParseError], out)
-> DecoderOutputState -> Tokenizer (TokenizerOutput out)
forall out.
([ParseError], out)
-> DecoderOutputState -> Tokenizer (TokenizerOutput out)
packState DecoderOutputState
forall a. Maybe a
Nothing
packState :: ([ParseError], out) -> DecoderOutputState -> Tokenizer (TokenizerOutput out)
packState :: ([ParseError], out)
-> DecoderOutputState -> Tokenizer (TokenizerOutput out)
packState ([ParseError]
errs, out
out) DecoderOutputState
dState = TokenizerOutput out -> Tokenizer (TokenizerOutput out)
forall (m :: * -> *) a. Monad m => a -> m a
return (TokenizerOutput out -> Tokenizer (TokenizerOutput out))
-> TokenizerOutput out -> Tokenizer (TokenizerOutput out)
forall a b. (a -> b) -> a -> b
$ TokenizerOutput :: forall out.
[ParseError] -> out -> DecoderOutputState -> TokenizerOutput out
TokenizerOutput
{ tokenizedErrs :: [ParseError]
tokenizedErrs = [ParseError]
errs
, tokenizedOut :: out
tokenizedOut = out
out
, tokenizedState :: DecoderOutputState
tokenizedState = DecoderOutputState
dState
}
emit :: ([ParseError], Token) -> Tokenizer (TokenizerOutput Token)
emit :: ([ParseError], Token) -> Tokenizer (TokenizerOutput Token)
emit t' :: ([ParseError], Token)
t'@([ParseError]
_, StartTag TagParams
d) = do
TokenParserState
state <- StateT TokenParserState (Parser [TokenizerInput]) TokenParserState
forall (m :: * -> *) s. Monad m => StateT s m s
N.S.get
TokenParserState -> Tokenizer ()
forall (m :: * -> *) s. Monad m => s -> StateT s m ()
N.S.put (TokenParserState -> Tokenizer ())
-> TokenParserState -> Tokenizer ()
forall a b. (a -> b) -> a -> b
$ TokenParserState
state
{ prevStartTag :: Maybe Text
prevStartTag = Text -> Maybe Text
forall a. a -> Maybe a
Just (Text -> Maybe Text) -> Text -> Maybe Text
forall a b. (a -> b) -> a -> b
$ TagParams -> Text
tagName TagParams
d
}
([ParseError], Token) -> Tokenizer (TokenizerOutput Token)
forall out. ([ParseError], out) -> Tokenizer (TokenizerOutput out)
packToken ([ParseError], Token)
t'
emit t' :: ([ParseError], Token)
t'@([ParseError]
_, EndTag TagParams
d) = [ParseError] -> TokenizerOutput Token -> TokenizerOutput Token
forall out.
[ParseError] -> TokenizerOutput out -> TokenizerOutput out
consTokenErrors [ParseError]
errs (TokenizerOutput Token -> TokenizerOutput Token)
-> Tokenizer (TokenizerOutput Token)
-> Tokenizer (TokenizerOutput Token)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ([ParseError], Token) -> Tokenizer (TokenizerOutput Token)
forall out. ([ParseError], out) -> Tokenizer (TokenizerOutput out)
packToken ([ParseError], Token)
t'
where errs :: [ParseError]
errs = [Maybe ParseError] -> [ParseError]
forall a. [Maybe a] -> [a]
Y.catMaybes [Maybe ParseError
attrError, Maybe ParseError
closeError]
attrError :: Maybe ParseError
attrError
| HashMap Text Text -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null (TagParams -> HashMap Text Text
tagAttributes TagParams
d) = Maybe ParseError
forall a. Maybe a
Nothing
| Bool
otherwise = ParseError -> Maybe ParseError
forall a. a -> Maybe a
Just ParseError
EndTagWithAttributes
closeError :: Maybe ParseError
closeError
| TagParams -> Bool
tagIsSelfClosing TagParams
d = ParseError -> Maybe ParseError
forall a. a -> Maybe a
Just ParseError
EndTagWithTrailingSolidus
| Bool
otherwise = Maybe ParseError
forall a. Maybe a
Nothing
emit ([ParseError], Token)
t' = ([ParseError], Token) -> Tokenizer (TokenizerOutput Token)
forall out. ([ParseError], out) -> Tokenizer (TokenizerOutput out)
packToken ([ParseError], Token)
t'
emit' :: ([ParseError], Token) -> Tokenizer [TokenizerOutput Token]
emit' :: ([ParseError], Token) -> Tokenizer [TokenizerOutput Token]
emit' = (TokenizerOutput Token -> [TokenizerOutput Token])
-> Tokenizer (TokenizerOutput Token)
-> Tokenizer [TokenizerOutput Token]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (TokenizerOutput Token
-> [TokenizerOutput Token] -> [TokenizerOutput Token]
forall a. a -> [a] -> [a]
: []) (Tokenizer (TokenizerOutput Token)
-> Tokenizer [TokenizerOutput Token])
-> (([ParseError], Token) -> Tokenizer (TokenizerOutput Token))
-> ([ParseError], Token)
-> Tokenizer [TokenizerOutput Token]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([ParseError], Token) -> Tokenizer (TokenizerOutput Token)
emit
consEmit
:: ([ParseError], Token)
-> Tokenizer [TokenizerOutput Token]
-> Tokenizer [TokenizerOutput Token]
consEmit :: ([ParseError], Token)
-> Tokenizer [TokenizerOutput Token]
-> Tokenizer [TokenizerOutput Token]
consEmit ([ParseError], Token)
tok Tokenizer [TokenizerOutput Token]
p = do
TokenizerOutput Token
t <- ([ParseError], Token) -> Tokenizer (TokenizerOutput Token)
emit ([ParseError], Token)
tok
[TokenizerOutput Token]
ts <- Tokenizer [TokenizerOutput Token]
p
[TokenizerOutput Token] -> Tokenizer [TokenizerOutput Token]
forall (m :: * -> *) a. Monad m => a -> m a
return ([TokenizerOutput Token] -> Tokenizer [TokenizerOutput Token])
-> [TokenizerOutput Token] -> Tokenizer [TokenizerOutput Token]
forall a b. (a -> b) -> a -> b
$ TokenizerOutput Token
t TokenizerOutput Token
-> [TokenizerOutput Token] -> [TokenizerOutput Token]
forall a. a -> [a] -> [a]
: [TokenizerOutput Token]
ts
data TokenizerInput = TokenizerInput
{ TokenizerInput -> [ParseError]
decodedErrs :: [ParseError]
, TokenizerInput -> Char
decodedOut :: Char
, TokenizerInput -> DecoderOutputState
decodedState :: DecoderOutputState
}
deriving ( TokenizerInput -> TokenizerInput -> Bool
(TokenizerInput -> TokenizerInput -> Bool)
-> (TokenizerInput -> TokenizerInput -> Bool) -> Eq TokenizerInput
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: TokenizerInput -> TokenizerInput -> Bool
$c/= :: TokenizerInput -> TokenizerInput -> Bool
== :: TokenizerInput -> TokenizerInput -> Bool
$c== :: TokenizerInput -> TokenizerInput -> Bool
Eq, Int -> TokenizerInput -> ShowS
[TokenizerInput] -> ShowS
TokenizerInput -> String
(Int -> TokenizerInput -> ShowS)
-> (TokenizerInput -> String)
-> ([TokenizerInput] -> ShowS)
-> Show TokenizerInput
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [TokenizerInput] -> ShowS
$cshowList :: [TokenizerInput] -> ShowS
show :: TokenizerInput -> String
$cshow :: TokenizerInput -> String
showsPrec :: Int -> TokenizerInput -> ShowS
$cshowsPrec :: Int -> TokenizerInput -> ShowS
Show, ReadPrec [TokenizerInput]
ReadPrec TokenizerInput
Int -> ReadS TokenizerInput
ReadS [TokenizerInput]
(Int -> ReadS TokenizerInput)
-> ReadS [TokenizerInput]
-> ReadPrec TokenizerInput
-> ReadPrec [TokenizerInput]
-> Read TokenizerInput
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [TokenizerInput]
$creadListPrec :: ReadPrec [TokenizerInput]
readPrec :: ReadPrec TokenizerInput
$creadPrec :: ReadPrec TokenizerInput
readList :: ReadS [TokenizerInput]
$creadList :: ReadS [TokenizerInput]
readsPrec :: Int -> ReadS TokenizerInput
$creadsPrec :: Int -> ReadS TokenizerInput
Read )
decodedRemainder :: TokenizerInput -> Maybe BS.ByteString
decodedRemainder :: TokenizerInput -> Maybe ByteString
decodedRemainder = ((Maybe DecoderState, ByteString) -> ByteString)
-> DecoderOutputState -> Maybe ByteString
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Maybe DecoderState, ByteString) -> ByteString
forall a b. (a, b) -> b
snd (DecoderOutputState -> Maybe ByteString)
-> (TokenizerInput -> DecoderOutputState)
-> TokenizerInput
-> Maybe ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. TokenizerInput -> DecoderOutputState
decodedState
setRemainder :: BS.SH.ShortByteString -> TokenizerState -> TokenizerState
setRemainder :: ShortByteString -> TokenizerState -> TokenizerState
setRemainder ShortByteString
bs TokenizerState
state = TokenizerState
state
{ decoderState_ :: Either (Either SnifferEnvironment Encoding) (Maybe DecoderState)
decoderState_ = Maybe DecoderState
-> Either (Either SnifferEnvironment Encoding) (Maybe DecoderState)
forall a b. b -> Either a b
Right (Maybe DecoderState
-> Either
(Either SnifferEnvironment Encoding) (Maybe DecoderState))
-> Maybe DecoderState
-> Either (Either SnifferEnvironment Encoding) (Maybe DecoderState)
forall a b. (a -> b) -> a -> b
$ ShortByteString -> DecoderState -> DecoderState
Willow.setRemainder ShortByteString
bs (DecoderState -> DecoderState)
-> Maybe DecoderState -> Maybe DecoderState
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> TokenizerState -> ByteString -> Maybe DecoderState
decoderDefaultState TokenizerState
state ByteString
BS.empty
}
data TokenizerOutput out = TokenizerOutput
{ TokenizerOutput out -> [ParseError]
tokenizedErrs :: [ParseError]
, TokenizerOutput out -> out
tokenizedOut :: out
, TokenizerOutput out -> DecoderOutputState
tokenizedState :: DecoderOutputState
}
deriving ( TokenizerOutput out -> TokenizerOutput out -> Bool
(TokenizerOutput out -> TokenizerOutput out -> Bool)
-> (TokenizerOutput out -> TokenizerOutput out -> Bool)
-> Eq (TokenizerOutput out)
forall out.
Eq out =>
TokenizerOutput out -> TokenizerOutput out -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: TokenizerOutput out -> TokenizerOutput out -> Bool
$c/= :: forall out.
Eq out =>
TokenizerOutput out -> TokenizerOutput out -> Bool
== :: TokenizerOutput out -> TokenizerOutput out -> Bool
$c== :: forall out.
Eq out =>
TokenizerOutput out -> TokenizerOutput out -> Bool
Eq, Int -> TokenizerOutput out -> ShowS
[TokenizerOutput out] -> ShowS
TokenizerOutput out -> String
(Int -> TokenizerOutput out -> ShowS)
-> (TokenizerOutput out -> String)
-> ([TokenizerOutput out] -> ShowS)
-> Show (TokenizerOutput out)
forall out. Show out => Int -> TokenizerOutput out -> ShowS
forall out. Show out => [TokenizerOutput out] -> ShowS
forall out. Show out => TokenizerOutput out -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [TokenizerOutput out] -> ShowS
$cshowList :: forall out. Show out => [TokenizerOutput out] -> ShowS
show :: TokenizerOutput out -> String
$cshow :: forall out. Show out => TokenizerOutput out -> String
showsPrec :: Int -> TokenizerOutput out -> ShowS
$cshowsPrec :: forall out. Show out => Int -> TokenizerOutput out -> ShowS
Show, ReadPrec [TokenizerOutput out]
ReadPrec (TokenizerOutput out)
Int -> ReadS (TokenizerOutput out)
ReadS [TokenizerOutput out]
(Int -> ReadS (TokenizerOutput out))
-> ReadS [TokenizerOutput out]
-> ReadPrec (TokenizerOutput out)
-> ReadPrec [TokenizerOutput out]
-> Read (TokenizerOutput out)
forall out. Read out => ReadPrec [TokenizerOutput out]
forall out. Read out => ReadPrec (TokenizerOutput out)
forall out. Read out => Int -> ReadS (TokenizerOutput out)
forall out. Read out => ReadS [TokenizerOutput out]
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [TokenizerOutput out]
$creadListPrec :: forall out. Read out => ReadPrec [TokenizerOutput out]
readPrec :: ReadPrec (TokenizerOutput out)
$creadPrec :: forall out. Read out => ReadPrec (TokenizerOutput out)
readList :: ReadS [TokenizerOutput out]
$creadList :: forall out. Read out => ReadS [TokenizerOutput out]
readsPrec :: Int -> ReadS (TokenizerOutput out)
$creadsPrec :: forall out. Read out => Int -> ReadS (TokenizerOutput out)
Read )
instance Functor TokenizerOutput where
fmap :: (a -> b) -> TokenizerOutput a -> TokenizerOutput b
fmap a -> b
f TokenizerOutput a
out = TokenizerOutput a
out { tokenizedOut :: b
tokenizedOut = a -> b
f (a -> b) -> a -> b
forall a b. (a -> b) -> a -> b
$ TokenizerOutput a -> a
forall out. TokenizerOutput out -> out
tokenizedOut TokenizerOutput a
out }
type Wrapped out = (Bool, out)
type WrappedOutput out = Wrapped (TokenizerOutput out)
type WrappedOutputs out = Wrapped [TokenizerOutput out]
if_
:: (Char -> Bool)
-> Tokenizer (TokenizerOutput out)
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput out)
if_ :: (Char -> Bool)
-> Tokenizer (TokenizerOutput out)
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput out)
if_ Char -> Bool
f Tokenizer (TokenizerOutput out)
t = (TokenizerInput -> Bool)
-> (TokenizerInput
-> StateT
TokenParserState (Parser [TokenizerInput]) (WrappedOutput out))
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput out)
forall test (m :: * -> *) out.
(test -> Bool) -> (test -> m out) -> SwitchCase test m out
If (Char -> Bool
f (Char -> Bool)
-> (TokenizerInput -> Char) -> TokenizerInput -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. TokenizerInput -> Char
decodedOut) ((TokenizerInput
-> StateT
TokenParserState (Parser [TokenizerInput]) (WrappedOutput out))
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput out))
-> (TokenizerInput
-> StateT
TokenParserState (Parser [TokenizerInput]) (WrappedOutput out))
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput out)
forall a b. (a -> b) -> a -> b
$ \TokenizerInput
c' ->
(,) Bool
True (TokenizerOutput out -> WrappedOutput out)
-> (TokenizerOutput out -> TokenizerOutput out)
-> TokenizerOutput out
-> WrappedOutput out
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([ParseError] -> [ParseError])
-> TokenizerOutput out -> TokenizerOutput out
forall out.
([ParseError] -> [ParseError])
-> TokenizerOutput out -> TokenizerOutput out
mapErrs (TokenizerInput -> [ParseError]
decodedErrs TokenizerInput
c' [ParseError] -> [ParseError] -> [ParseError]
forall a. [a] -> [a] -> [a]
++) (TokenizerOutput out -> WrappedOutput out)
-> Tokenizer (TokenizerOutput out)
-> StateT
TokenParserState (Parser [TokenizerInput]) (WrappedOutput out)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Tokenizer (TokenizerOutput out)
t
ifs_
:: (Char -> Bool)
-> Tokenizer [TokenizerOutput Token]
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
ifs_ :: (Char -> Bool)
-> Tokenizer [TokenizerOutput Token]
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
ifs_ Char -> Bool
f Tokenizer [TokenizerOutput Token]
t = (TokenizerInput -> Bool)
-> (TokenizerInput
-> StateT
TokenParserState (Parser [TokenizerInput]) (WrappedOutputs Token))
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
forall test (m :: * -> *) out.
(test -> Bool) -> (test -> m out) -> SwitchCase test m out
If (Char -> Bool
f (Char -> Bool)
-> (TokenizerInput -> Char) -> TokenizerInput -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. TokenizerInput -> Char
decodedOut) ((TokenizerInput
-> StateT
TokenParserState (Parser [TokenizerInput]) (WrappedOutputs Token))
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token))
-> (TokenizerInput
-> StateT
TokenParserState (Parser [TokenizerInput]) (WrappedOutputs Token))
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
forall a b. (a -> b) -> a -> b
$ \TokenizerInput
c' ->
(,) Bool
True ([TokenizerOutput Token] -> WrappedOutputs Token)
-> ([TokenizerOutput Token] -> [TokenizerOutput Token])
-> [TokenizerOutput Token]
-> WrappedOutputs Token
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [ParseError] -> [TokenizerOutput Token] -> [TokenizerOutput Token]
consTokenErrorsList (TokenizerInput -> [ParseError]
decodedErrs TokenizerInput
c') ([TokenizerOutput Token] -> WrappedOutputs Token)
-> Tokenizer [TokenizerOutput Token]
-> StateT
TokenParserState (Parser [TokenizerInput]) (WrappedOutputs Token)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Tokenizer [TokenizerOutput Token]
t
ifChar
:: (Char -> Bool)
-> (Char -> Tokenizer (TokenizerOutput out))
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput out)
ifChar :: (Char -> Bool)
-> (Char -> Tokenizer (TokenizerOutput out))
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput out)
ifChar Char -> Bool
f Char -> Tokenizer (TokenizerOutput out)
t = (TokenizerInput -> Bool)
-> (TokenizerInput
-> StateT
TokenParserState (Parser [TokenizerInput]) (WrappedOutput out))
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput out)
forall test (m :: * -> *) out.
(test -> Bool) -> (test -> m out) -> SwitchCase test m out
If (Char -> Bool
f (Char -> Bool)
-> (TokenizerInput -> Char) -> TokenizerInput -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. TokenizerInput -> Char
decodedOut) ((TokenizerInput
-> StateT
TokenParserState (Parser [TokenizerInput]) (WrappedOutput out))
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput out))
-> (TokenizerInput
-> StateT
TokenParserState (Parser [TokenizerInput]) (WrappedOutput out))
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput out)
forall a b. (a -> b) -> a -> b
$ \TokenizerInput
c' ->
(,) Bool
True (TokenizerOutput out -> WrappedOutput out)
-> (TokenizerOutput out -> TokenizerOutput out)
-> TokenizerOutput out
-> WrappedOutput out
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([ParseError] -> [ParseError])
-> TokenizerOutput out -> TokenizerOutput out
forall out.
([ParseError] -> [ParseError])
-> TokenizerOutput out -> TokenizerOutput out
mapErrs (TokenizerInput -> [ParseError]
decodedErrs TokenizerInput
c' [ParseError] -> [ParseError] -> [ParseError]
forall a. [a] -> [a] -> [a]
++) (TokenizerOutput out -> WrappedOutput out)
-> Tokenizer (TokenizerOutput out)
-> StateT
TokenParserState (Parser [TokenizerInput]) (WrappedOutput out)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Char -> Tokenizer (TokenizerOutput out)
t (TokenizerInput -> Char
decodedOut TokenizerInput
c')
ifsChar
:: (Char -> Bool)
-> (Char -> Tokenizer [TokenizerOutput Token])
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
ifsChar :: (Char -> Bool)
-> (Char -> Tokenizer [TokenizerOutput Token])
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
ifsChar Char -> Bool
f Char -> Tokenizer [TokenizerOutput Token]
t = (TokenizerInput -> Bool)
-> (TokenizerInput
-> StateT
TokenParserState (Parser [TokenizerInput]) (WrappedOutputs Token))
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
forall test (m :: * -> *) out.
(test -> Bool) -> (test -> m out) -> SwitchCase test m out
If (Char -> Bool
f (Char -> Bool)
-> (TokenizerInput -> Char) -> TokenizerInput -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. TokenizerInput -> Char
decodedOut) ((TokenizerInput
-> StateT
TokenParserState (Parser [TokenizerInput]) (WrappedOutputs Token))
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token))
-> (TokenizerInput
-> StateT
TokenParserState (Parser [TokenizerInput]) (WrappedOutputs Token))
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
forall a b. (a -> b) -> a -> b
$ \TokenizerInput
c' ->
(,) Bool
True ([TokenizerOutput Token] -> WrappedOutputs Token)
-> ([TokenizerOutput Token] -> [TokenizerOutput Token])
-> [TokenizerOutput Token]
-> WrappedOutputs Token
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [ParseError] -> [TokenizerOutput Token] -> [TokenizerOutput Token]
consTokenErrorsList (TokenizerInput -> [ParseError]
decodedErrs TokenizerInput
c') ([TokenizerOutput Token] -> WrappedOutputs Token)
-> Tokenizer [TokenizerOutput Token]
-> StateT
TokenParserState (Parser [TokenizerInput]) (WrappedOutputs Token)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Char -> Tokenizer [TokenizerOutput Token]
t (TokenizerInput -> Char
decodedOut TokenizerInput
c')
ifPush_
:: (Char -> Bool)
-> Tokenizer out
-> SwitchCase TokenizerInput Tokenizer (Wrapped out)
ifPush_ :: (Char -> Bool)
-> Tokenizer out
-> SwitchCase TokenizerInput Tokenizer (Wrapped out)
ifPush_ Char -> Bool
f Tokenizer out
t = (TokenizerInput -> Bool)
-> (TokenizerInput
-> StateT TokenParserState (Parser [TokenizerInput]) (Wrapped out))
-> SwitchCase TokenizerInput Tokenizer (Wrapped out)
forall test (m :: * -> *) out.
(test -> Bool) -> (test -> m out) -> SwitchCase test m out
If (Char -> Bool
f (Char -> Bool)
-> (TokenizerInput -> Char) -> TokenizerInput -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. TokenizerInput -> Char
decodedOut) ((TokenizerInput
-> StateT TokenParserState (Parser [TokenizerInput]) (Wrapped out))
-> SwitchCase TokenizerInput Tokenizer (Wrapped out))
-> (TokenizerInput
-> StateT TokenParserState (Parser [TokenizerInput]) (Wrapped out))
-> SwitchCase TokenizerInput Tokenizer (Wrapped out)
forall a b. (a -> b) -> a -> b
$ \TokenizerInput
c' ->
TokenizerInput -> Tokenizer ()
forall (m :: * -> *) stream token.
MonadParser m stream token =>
token -> m ()
push TokenizerInput
c' Tokenizer ()
-> StateT TokenParserState (Parser [TokenizerInput]) (Wrapped out)
-> StateT TokenParserState (Parser [TokenizerInput]) (Wrapped out)
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> (out -> Wrapped out)
-> Tokenizer out
-> StateT TokenParserState (Parser [TokenizerInput]) (Wrapped out)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ((,) Bool
False) Tokenizer out
t
ifPushChar
:: (Char -> Bool)
-> (Char -> Tokenizer out)
-> SwitchCase TokenizerInput Tokenizer (Wrapped out)
ifPushChar :: (Char -> Bool)
-> (Char -> Tokenizer out)
-> SwitchCase TokenizerInput Tokenizer (Wrapped out)
ifPushChar Char -> Bool
f Char -> Tokenizer out
t = (TokenizerInput -> Bool)
-> (TokenizerInput
-> StateT TokenParserState (Parser [TokenizerInput]) (Wrapped out))
-> SwitchCase TokenizerInput Tokenizer (Wrapped out)
forall test (m :: * -> *) out.
(test -> Bool) -> (test -> m out) -> SwitchCase test m out
If (Char -> Bool
f (Char -> Bool)
-> (TokenizerInput -> Char) -> TokenizerInput -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. TokenizerInput -> Char
decodedOut) ((TokenizerInput
-> StateT TokenParserState (Parser [TokenizerInput]) (Wrapped out))
-> SwitchCase TokenizerInput Tokenizer (Wrapped out))
-> (TokenizerInput
-> StateT TokenParserState (Parser [TokenizerInput]) (Wrapped out))
-> SwitchCase TokenizerInput Tokenizer (Wrapped out)
forall a b. (a -> b) -> a -> b
$ \TokenizerInput
c' ->
TokenizerInput -> Tokenizer ()
forall (m :: * -> *) stream token.
MonadParser m stream token =>
token -> m ()
push TokenizerInput
c' Tokenizer ()
-> StateT TokenParserState (Parser [TokenizerInput]) (Wrapped out)
-> StateT TokenParserState (Parser [TokenizerInput]) (Wrapped out)
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> (out -> Wrapped out)
-> Tokenizer out
-> StateT TokenParserState (Parser [TokenizerInput]) (Wrapped out)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ((,) Bool
False) (Char -> Tokenizer out
t (Char -> Tokenizer out) -> Char -> Tokenizer out
forall a b. (a -> b) -> a -> b
$ TokenizerInput -> Char
decodedOut TokenizerInput
c')
else_
:: Tokenizer (TokenizerOutput out)
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput out)
else_ :: Tokenizer (TokenizerOutput out)
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput out)
else_ Tokenizer (TokenizerOutput out)
t = (TokenizerInput
-> StateT
TokenParserState (Parser [TokenizerInput]) (WrappedOutput out))
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput out)
forall test (m :: * -> *) out.
(test -> m out) -> SwitchCase test m out
Else ((TokenizerInput
-> StateT
TokenParserState (Parser [TokenizerInput]) (WrappedOutput out))
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput out))
-> (TokenizerInput
-> StateT
TokenParserState (Parser [TokenizerInput]) (WrappedOutput out))
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput out)
forall a b. (a -> b) -> a -> b
$ \TokenizerInput
c' ->
(,) Bool
True (TokenizerOutput out -> WrappedOutput out)
-> (TokenizerOutput out -> TokenizerOutput out)
-> TokenizerOutput out
-> WrappedOutput out
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([ParseError] -> [ParseError])
-> TokenizerOutput out -> TokenizerOutput out
forall out.
([ParseError] -> [ParseError])
-> TokenizerOutput out -> TokenizerOutput out
mapErrs (TokenizerInput -> [ParseError]
decodedErrs TokenizerInput
c' [ParseError] -> [ParseError] -> [ParseError]
forall a. [a] -> [a] -> [a]
++) (TokenizerOutput out -> WrappedOutput out)
-> Tokenizer (TokenizerOutput out)
-> StateT
TokenParserState (Parser [TokenizerInput]) (WrappedOutput out)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Tokenizer (TokenizerOutput out)
t
elses_
:: Tokenizer [TokenizerOutput Token]
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
elses_ :: Tokenizer [TokenizerOutput Token]
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
elses_ Tokenizer [TokenizerOutput Token]
t = (TokenizerInput
-> StateT
TokenParserState (Parser [TokenizerInput]) (WrappedOutputs Token))
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
forall test (m :: * -> *) out.
(test -> m out) -> SwitchCase test m out
Else ((TokenizerInput
-> StateT
TokenParserState (Parser [TokenizerInput]) (WrappedOutputs Token))
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token))
-> (TokenizerInput
-> StateT
TokenParserState (Parser [TokenizerInput]) (WrappedOutputs Token))
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
forall a b. (a -> b) -> a -> b
$ \TokenizerInput
c' ->
(,) Bool
True ([TokenizerOutput Token] -> WrappedOutputs Token)
-> ([TokenizerOutput Token] -> [TokenizerOutput Token])
-> [TokenizerOutput Token]
-> WrappedOutputs Token
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [ParseError] -> [TokenizerOutput Token] -> [TokenizerOutput Token]
consTokenErrorsList (TokenizerInput -> [ParseError]
decodedErrs TokenizerInput
c') ([TokenizerOutput Token] -> WrappedOutputs Token)
-> Tokenizer [TokenizerOutput Token]
-> StateT
TokenParserState (Parser [TokenizerInput]) (WrappedOutputs Token)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Tokenizer [TokenizerOutput Token]
t
elseChar
:: (Char -> Tokenizer (TokenizerOutput out))
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput out)
elseChar :: (Char -> Tokenizer (TokenizerOutput out))
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput out)
elseChar Char -> Tokenizer (TokenizerOutput out)
t = (TokenizerInput
-> StateT
TokenParserState (Parser [TokenizerInput]) (WrappedOutput out))
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput out)
forall test (m :: * -> *) out.
(test -> m out) -> SwitchCase test m out
Else ((TokenizerInput
-> StateT
TokenParserState (Parser [TokenizerInput]) (WrappedOutput out))
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput out))
-> (TokenizerInput
-> StateT
TokenParserState (Parser [TokenizerInput]) (WrappedOutput out))
-> SwitchCase TokenizerInput Tokenizer (WrappedOutput out)
forall a b. (a -> b) -> a -> b
$ \TokenizerInput
c' ->
(,) Bool
True (TokenizerOutput out -> WrappedOutput out)
-> (TokenizerOutput out -> TokenizerOutput out)
-> TokenizerOutput out
-> WrappedOutput out
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([ParseError] -> [ParseError])
-> TokenizerOutput out -> TokenizerOutput out
forall out.
([ParseError] -> [ParseError])
-> TokenizerOutput out -> TokenizerOutput out
mapErrs (TokenizerInput -> [ParseError]
decodedErrs TokenizerInput
c' [ParseError] -> [ParseError] -> [ParseError]
forall a. [a] -> [a] -> [a]
++) (TokenizerOutput out -> WrappedOutput out)
-> Tokenizer (TokenizerOutput out)
-> StateT
TokenParserState (Parser [TokenizerInput]) (WrappedOutput out)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Char -> Tokenizer (TokenizerOutput out)
t (TokenizerInput -> Char
decodedOut TokenizerInput
c')
elsesChar
:: (Char -> Tokenizer [TokenizerOutput Token])
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
elsesChar :: (Char -> Tokenizer [TokenizerOutput Token])
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
elsesChar Char -> Tokenizer [TokenizerOutput Token]
t = (TokenizerInput
-> StateT
TokenParserState (Parser [TokenizerInput]) (WrappedOutputs Token))
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
forall test (m :: * -> *) out.
(test -> m out) -> SwitchCase test m out
Else ((TokenizerInput
-> StateT
TokenParserState (Parser [TokenizerInput]) (WrappedOutputs Token))
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token))
-> (TokenizerInput
-> StateT
TokenParserState (Parser [TokenizerInput]) (WrappedOutputs Token))
-> SwitchCase TokenizerInput Tokenizer (WrappedOutputs Token)
forall a b. (a -> b) -> a -> b
$ \TokenizerInput
c' ->
(,) Bool
True ([TokenizerOutput Token] -> WrappedOutputs Token)
-> ([TokenizerOutput Token] -> [TokenizerOutput Token])
-> [TokenizerOutput Token]
-> WrappedOutputs Token
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [ParseError] -> [TokenizerOutput Token] -> [TokenizerOutput Token]
consTokenErrorsList (TokenizerInput -> [ParseError]
decodedErrs TokenizerInput
c') ([TokenizerOutput Token] -> WrappedOutputs Token)
-> Tokenizer [TokenizerOutput Token]
-> StateT
TokenParserState (Parser [TokenizerInput]) (WrappedOutputs Token)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Char -> Tokenizer [TokenizerOutput Token]
t (TokenizerInput -> Char
decodedOut TokenizerInput
c')
elsePush_
:: Tokenizer out
-> SwitchCase TokenizerInput Tokenizer (Wrapped out)
elsePush_ :: Tokenizer out -> SwitchCase TokenizerInput Tokenizer (Wrapped out)
elsePush_ Tokenizer out
t = (TokenizerInput
-> StateT TokenParserState (Parser [TokenizerInput]) (Wrapped out))
-> SwitchCase TokenizerInput Tokenizer (Wrapped out)
forall test (m :: * -> *) out.
(test -> m out) -> SwitchCase test m out
Else ((TokenizerInput
-> StateT TokenParserState (Parser [TokenizerInput]) (Wrapped out))
-> SwitchCase TokenizerInput Tokenizer (Wrapped out))
-> (TokenizerInput
-> StateT TokenParserState (Parser [TokenizerInput]) (Wrapped out))
-> SwitchCase TokenizerInput Tokenizer (Wrapped out)
forall a b. (a -> b) -> a -> b
$ \TokenizerInput
c' ->
TokenizerInput -> Tokenizer ()
forall (m :: * -> *) stream token.
MonadParser m stream token =>
token -> m ()
push TokenizerInput
c' Tokenizer ()
-> StateT TokenParserState (Parser [TokenizerInput]) (Wrapped out)
-> StateT TokenParserState (Parser [TokenizerInput]) (Wrapped out)
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> (out -> Wrapped out)
-> Tokenizer out
-> StateT TokenParserState (Parser [TokenizerInput]) (Wrapped out)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ((,) Bool
False) Tokenizer out
t
elsePushChar
:: (Char -> Tokenizer out)
-> SwitchCase TokenizerInput Tokenizer (Wrapped out)
elsePushChar :: (Char -> Tokenizer out)
-> SwitchCase TokenizerInput Tokenizer (Wrapped out)
elsePushChar Char -> Tokenizer out
t = (TokenizerInput
-> StateT TokenParserState (Parser [TokenizerInput]) (Wrapped out))
-> SwitchCase TokenizerInput Tokenizer (Wrapped out)
forall test (m :: * -> *) out.
(test -> m out) -> SwitchCase test m out
Else ((TokenizerInput
-> StateT TokenParserState (Parser [TokenizerInput]) (Wrapped out))
-> SwitchCase TokenizerInput Tokenizer (Wrapped out))
-> (TokenizerInput
-> StateT TokenParserState (Parser [TokenizerInput]) (Wrapped out))
-> SwitchCase TokenizerInput Tokenizer (Wrapped out)
forall a b. (a -> b) -> a -> b
$ \TokenizerInput
c' ->
TokenizerInput -> Tokenizer ()
forall (m :: * -> *) stream token.
MonadParser m stream token =>
token -> m ()
push TokenizerInput
c' Tokenizer ()
-> StateT TokenParserState (Parser [TokenizerInput]) (Wrapped out)
-> StateT TokenParserState (Parser [TokenizerInput]) (Wrapped out)
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> (out -> Wrapped out)
-> Tokenizer out
-> StateT TokenParserState (Parser [TokenizerInput]) (Wrapped out)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ((,) Bool
False) (Char -> Tokenizer out
t (Char -> Tokenizer out) -> Char -> Tokenizer out
forall a b. (a -> b) -> a -> b
$ TokenizerInput -> Char
decodedOut TokenizerInput
c')
consTokenError :: ParseError -> TokenizerOutput out -> TokenizerOutput out
consTokenError :: ParseError -> TokenizerOutput out -> TokenizerOutput out
consTokenError ParseError
err = ([ParseError] -> [ParseError])
-> TokenizerOutput out -> TokenizerOutput out
forall out.
([ParseError] -> [ParseError])
-> TokenizerOutput out -> TokenizerOutput out
mapErrs (ParseError
err ParseError -> [ParseError] -> [ParseError]
forall a. a -> [a] -> [a]
:)
consTokenErrors :: [ParseError] -> TokenizerOutput out -> TokenizerOutput out
consTokenErrors :: [ParseError] -> TokenizerOutput out -> TokenizerOutput out
consTokenErrors [ParseError]
errs = ([ParseError] -> [ParseError])
-> TokenizerOutput out -> TokenizerOutput out
forall out.
([ParseError] -> [ParseError])
-> TokenizerOutput out -> TokenizerOutput out
mapErrs ([ParseError]
errs [ParseError] -> [ParseError] -> [ParseError]
forall a. [a] -> [a] -> [a]
++)
consTokenErrorsList :: [ParseError] -> [TokenizerOutput Token] -> [TokenizerOutput Token]
consTokenErrorsList :: [ParseError] -> [TokenizerOutput Token] -> [TokenizerOutput Token]
consTokenErrorsList [] [] = []
consTokenErrorsList [ParseError]
errs [] = [[ParseError]
-> Token -> DecoderOutputState -> TokenizerOutput Token
forall out.
[ParseError] -> out -> DecoderOutputState -> TokenizerOutput out
TokenizerOutput [ParseError]
errs Token
EndOfStream DecoderOutputState
forall a. Maybe a
Nothing]
consTokenErrorsList [ParseError]
errs (TokenizerOutput Token
t:[TokenizerOutput Token]
ts) = [ParseError] -> TokenizerOutput Token -> TokenizerOutput Token
forall out.
[ParseError] -> TokenizerOutput out -> TokenizerOutput out
consTokenErrors [ParseError]
errs TokenizerOutput Token
t TokenizerOutput Token
-> [TokenizerOutput Token] -> [TokenizerOutput Token]
forall a. a -> [a] -> [a]
: [TokenizerOutput Token]
ts
consOut :: out -> TokenizerOutput [out] -> TokenizerOutput [out]
consOut :: out -> TokenizerOutput [out] -> TokenizerOutput [out]
consOut out
x = ([out] -> [out]) -> TokenizerOutput [out] -> TokenizerOutput [out]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (out
x out -> [out] -> [out]
forall a. a -> [a] -> [a]
:)
consOuts :: [out] -> TokenizerOutput [out] -> TokenizerOutput [out]
consOuts :: [out] -> TokenizerOutput [out] -> TokenizerOutput [out]
consOuts [out]
xs = ([out] -> [out]) -> TokenizerOutput [out] -> TokenizerOutput [out]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ([out]
xs [out] -> [out] -> [out]
forall a. [a] -> [a] -> [a]
++)
chunk' :: (Char -> Char) -> String -> Tokenizer [TokenizerInput]
chunk' :: (Char -> Char) -> String -> Tokenizer [TokenizerInput]
chunk' Char -> Char
f String
test = Word -> Tokenizer [TokenizerInput]
forall (m :: * -> *) stream token.
MonadParser m stream token =>
Word -> m stream
nextChunk (Int -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word) -> Int -> Word
forall a b. (a -> b) -> a -> b
$ String -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length String
test) Tokenizer [TokenizerInput]
-> ([TokenizerInput] -> Tokenizer [TokenizerInput])
-> Tokenizer [TokenizerInput]
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>=
([TokenizerInput] -> Bool)
-> [TokenizerInput] -> Tokenizer [TokenizerInput]
forall (trans :: * -> *) stream token out.
MonadParser trans stream token =>
(out -> Bool) -> out -> trans out
satisfying (\[TokenizerInput]
str -> (TokenizerInput -> Char) -> [TokenizerInput] -> String
forall a b. (a -> b) -> [a] -> [b]
map (Char -> Char
f (Char -> Char)
-> (TokenizerInput -> Char) -> TokenizerInput -> Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. TokenizerInput -> Char
decodedOut) [TokenizerInput]
str String -> String -> Bool
forall a. Eq a => a -> a -> Bool
== String
test)
endState :: DecoderOutputState
endState :: DecoderOutputState
endState = (Maybe DecoderState, ByteString) -> DecoderOutputState
forall a. a -> Maybe a
Just (Maybe DecoderState
forall a. Maybe a
Nothing, ByteString
BS.empty)