{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE LambdaCase #-}
{-# OPTIONS_GHC -O2 #-}
module Text.HTML.Parser
(
parseTokens
, parseTokensLazy
, token
, Token(..)
, TagName, AttrName, AttrValue
, Attr(..)
, renderTokens
, renderToken
, renderAttrs
, renderAttr
, canonicalizeTokens
) where
import Data.Char hiding (isSpace)
import Data.List (unfoldr)
import GHC.Generics
import Control.Applicative
import Data.Monoid
import Control.Monad (guard)
import Control.DeepSeq
import Data.Attoparsec.Text
import qualified Data.Attoparsec.Text.Lazy as AL
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
import Data.Text.Lazy.Builder (Builder)
import qualified Data.Text.Lazy.Builder as B
import Prelude hiding (take, takeWhile)
import Text.HTML.Parser.Entities (entities)
import qualified Data.Trie as Trie
type TagName = Text
type AttrName = Text
type AttrValue = Text
data Token
= TagOpen !TagName [Attr]
| TagSelfClose !TagName [Attr]
| TagClose !TagName
| ContentText !Text
| ContentChar !Char
| !Builder
| Doctype !Text
deriving (Int -> Token -> ShowS
[Token] -> ShowS
Token -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Token] -> ShowS
$cshowList :: [Token] -> ShowS
show :: Token -> String
$cshow :: Token -> String
showsPrec :: Int -> Token -> ShowS
$cshowsPrec :: Int -> Token -> ShowS
Show, Eq Token
Token -> Token -> Bool
Token -> Token -> Ordering
Token -> Token -> Token
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: Token -> Token -> Token
$cmin :: Token -> Token -> Token
max :: Token -> Token -> Token
$cmax :: Token -> Token -> Token
>= :: Token -> Token -> Bool
$c>= :: Token -> Token -> Bool
> :: Token -> Token -> Bool
$c> :: Token -> Token -> Bool
<= :: Token -> Token -> Bool
$c<= :: Token -> Token -> Bool
< :: Token -> Token -> Bool
$c< :: Token -> Token -> Bool
compare :: Token -> Token -> Ordering
$ccompare :: Token -> Token -> Ordering
Ord, Token -> Token -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Token -> Token -> Bool
$c/= :: Token -> Token -> Bool
== :: Token -> Token -> Bool
$c== :: Token -> Token -> Bool
Eq, forall x. Rep Token x -> Token
forall x. Token -> Rep Token x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep Token x -> Token
$cfrom :: forall x. Token -> Rep Token x
Generic)
endOfFileToken :: Token
endOfFileToken :: Token
endOfFileToken = Text -> Token
ContentText Text
""
data Attr = Attr !AttrName !AttrValue
deriving (Int -> Attr -> ShowS
[Attr] -> ShowS
Attr -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Attr] -> ShowS
$cshowList :: [Attr] -> ShowS
show :: Attr -> String
$cshow :: Attr -> String
showsPrec :: Int -> Attr -> ShowS
$cshowsPrec :: Int -> Attr -> ShowS
Show, Attr -> Attr -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Attr -> Attr -> Bool
$c/= :: Attr -> Attr -> Bool
== :: Attr -> Attr -> Bool
$c== :: Attr -> Attr -> Bool
Eq, Eq Attr
Attr -> Attr -> Bool
Attr -> Attr -> Ordering
Attr -> Attr -> Attr
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: Attr -> Attr -> Attr
$cmin :: Attr -> Attr -> Attr
max :: Attr -> Attr -> Attr
$cmax :: Attr -> Attr -> Attr
>= :: Attr -> Attr -> Bool
$c>= :: Attr -> Attr -> Bool
> :: Attr -> Attr -> Bool
$c> :: Attr -> Attr -> Bool
<= :: Attr -> Attr -> Bool
$c<= :: Attr -> Attr -> Bool
< :: Attr -> Attr -> Bool
$c< :: Attr -> Attr -> Bool
compare :: Attr -> Attr -> Ordering
$ccompare :: Attr -> Attr -> Ordering
Ord)
instance NFData Token where
rnf :: Token -> ()
rnf (Comment Builder
b) = forall a. NFData a => a -> ()
rnf forall a b. (a -> b) -> a -> b
$ Builder -> Text
B.toLazyText Builder
b
rnf Token
_ = ()
token :: Parser Token
token :: Parser Token
token = Parser Token
dataState
dataState :: Parser Token
dataState :: Parser Token
dataState = do
Text
content <- (Char -> Bool) -> Parser Text
takeWhile (\Char
c -> Char
c forall a. Eq a => a -> a -> Bool
/= Char
'<' Bool -> Bool -> Bool
&& Char
c forall a. Eq a => a -> a -> Bool
/= Char
'&')
if Bool -> Bool
not forall a b. (a -> b) -> a -> b
$ Text -> Bool
T.null Text
content
then forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Text -> Token
ContentText Text
content
else forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
[ Char -> Parser Char
char Char
'<' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Token
tagOpen
, forall i a. Parser i a -> Parser i a
try forall a b. (a -> b) -> a -> b
$ Char -> Parser Char
char Char
'&' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Token
charRef
, Char -> Token
ContentChar Char
'&' forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Char -> Parser Char
char Char
'&'
]
charRef :: Parser Token
charRef :: Parser Token
charRef = Trie Char Token -> Parser Token
go Trie Char Token
entityTrie
where
go :: Trie.Trie Char Token -> Parser Token
go :: Trie Char Token -> Parser Token
go Trie Char Token
trie = do
Char
c <- Parser Char
anyChar
case Char
c of
Char
';' -> forall b a. b -> (a -> b) -> Maybe a -> b
maybe forall (f :: * -> *) a. Alternative f => f a
empty forall (m :: * -> *) a. Monad m => a -> m a
return (forall k v. Trie k v -> Maybe v
Trie.terminal Trie Char Token
trie)
Char
_ -> Trie Char Token -> Parser Token
go (forall k v. Ord k => k -> Trie k v -> Trie k v
Trie.step Char
c Trie Char Token
trie)
entityTrie :: Trie.Trie Char Token
entityTrie :: Trie Char Token
entityTrie = forall k v. Ord k => [([k], v)] -> Trie k v
Trie.fromList
[ (Text -> String
T.unpack Text
name, Text -> Token
ContentText Text
expansion)
| (Text
name, Text
expansion) <- [(Text, Text)]
entities
]
tagOpen :: Parser Token
tagOpen :: Parser Token
tagOpen =
(Char -> Parser Char
char Char
'!' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Token
markupDeclOpen)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Parser Char
char Char
'/' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Token
endTagOpen)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Parser Char
char Char
'?' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Builder -> Parser Token
bogusComment forall a. Monoid a => a
mempty)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Token
tagNameOpen
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Token
other
where
other :: Parser Token
other = do
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Char -> Token
ContentChar Char
'<'
endTagOpen :: Parser Token
endTagOpen :: Parser Token
endTagOpen = Parser Token
tagNameClose
isWhitespace :: Char -> Bool
isWhitespace :: Char -> Bool
isWhitespace Char
'\x09' = Bool
True
isWhitespace Char
'\x0a' = Bool
True
isWhitespace Char
'\x0c' = Bool
True
isWhitespace Char
'\x0d' = Bool
True
isWhitespace Char
' ' = Bool
True
isWhitespace Char
_ = Bool
False
orC :: (Char -> Bool) -> (Char -> Bool) -> Char -> Bool
orC :: (Char -> Bool) -> (Char -> Bool) -> Char -> Bool
orC Char -> Bool
f Char -> Bool
g Char
c = Char -> Bool
f Char
c Bool -> Bool -> Bool
|| Char -> Bool
g Char
c
{-# INLINE orC #-}
isC :: Char -> Char -> Bool
isC :: Char -> Char -> Bool
isC = forall a. Eq a => a -> a -> Bool
(==)
{-# INLINE isC #-}
tagNameOpen :: Parser Token
tagNameOpen :: Parser Token
tagNameOpen = do
Text
tag <- Parser Text
tagName'
forall a. a -> a
id forall a b. (a -> b) -> a -> b
$ ((Char -> Bool) -> Parser Char
satisfy Char -> Bool
isWhitespace forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Text -> [Attr] -> Parser Token
beforeAttrName Text
tag [])
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Parser Char
char Char
'/' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Text -> [Attr] -> Parser Token
selfClosingStartTag Text
tag [])
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Parser Char
char Char
'>' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> [Attr] -> Token
TagOpen Text
tag []))
tagNameClose :: Parser Token
tagNameClose :: Parser Token
tagNameClose = do
Text
tag <- Parser Text
tagName'
Char -> Parser Char
char Char
'>' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> Token
TagClose Text
tag)
tagName' :: Parser Text
tagName' :: Parser Text
tagName' = do
Char
c <- Parser Char
peekChar'
forall (f :: * -> *). Alternative f => Bool -> f ()
guard forall a b. (a -> b) -> a -> b
$ Char -> Bool
isAsciiUpper Char
c Bool -> Bool -> Bool
|| Char -> Bool
isAsciiLower Char
c
(Char -> Bool) -> Parser Text
takeWhile forall a b. (a -> b) -> a -> b
$ Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool
isWhitespace (Char -> Bool) -> (Char -> Bool) -> Char -> Bool
`orC` Char -> Char -> Bool
isC Char
'/' (Char -> Bool) -> (Char -> Bool) -> Char -> Bool
`orC` Char -> Char -> Bool
isC Char
'<' (Char -> Bool) -> (Char -> Bool) -> Char -> Bool
`orC` Char -> Char -> Bool
isC Char
'>')
selfClosingStartTag :: TagName -> [Attr] -> Parser Token
selfClosingStartTag :: Text -> [Attr] -> Parser Token
selfClosingStartTag Text
tag [Attr]
attrs = do
(Char -> Parser Char
char Char
'>' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> [Attr] -> Token
TagSelfClose Text
tag [Attr]
attrs))
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (forall t. Chunk t => Parser t ()
endOfInput forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return Token
endOfFileToken)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Text -> [Attr] -> Parser Token
beforeAttrName Text
tag [Attr]
attrs
beforeAttrName :: TagName -> [Attr] -> Parser Token
beforeAttrName :: Text -> [Attr] -> Parser Token
beforeAttrName Text
tag [Attr]
attrs = do
(Char -> Bool) -> Parser Text ()
skipWhile Char -> Bool
isWhitespace
forall a. a -> a
id forall a b. (a -> b) -> a -> b
$ (Char -> Parser Char
char Char
'/' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Text -> [Attr] -> Parser Token
selfClosingStartTag Text
tag [Attr]
attrs)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Parser Char
char Char
'>' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> [Attr] -> Token
TagOpen Text
tag [Attr]
attrs))
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Text -> [Attr] -> Parser Token
attrName Text
tag [Attr]
attrs
attrName :: TagName -> [Attr] -> Parser Token
attrName :: Text -> [Attr] -> Parser Token
attrName Text
tag [Attr]
attrs = do
Text
name <- (Char -> Bool) -> Parser Text
takeWhile forall a b. (a -> b) -> a -> b
$ Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool
isWhitespace (Char -> Bool) -> (Char -> Bool) -> Char -> Bool
`orC` Char -> Char -> Bool
isC Char
'/' (Char -> Bool) -> (Char -> Bool) -> Char -> Bool
`orC` Char -> Char -> Bool
isC Char
'=' (Char -> Bool) -> (Char -> Bool) -> Char -> Bool
`orC` Char -> Char -> Bool
isC Char
'>')
forall a. a -> a
id forall a b. (a -> b) -> a -> b
$ (forall t. Chunk t => Parser t ()
endOfInput forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Text -> [Attr] -> Text -> Parser Token
afterAttrName Text
tag [Attr]
attrs Text
name)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Parser Char
char Char
'=' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Text -> [Attr] -> Text -> Parser Token
beforeAttrValue Text
tag [Attr]
attrs Text
name)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall i a. Parser i a -> Parser i a
try (do Maybe Char
mc <- Parser (Maybe Char)
peekChar
case Maybe Char
mc of
Just Char
c | Char -> Bool
notNameChar Char
c -> Text -> [Attr] -> Text -> Parser Token
afterAttrName Text
tag [Attr]
attrs Text
name
Maybe Char
_ -> forall (f :: * -> *) a. Alternative f => f a
empty)
where notNameChar :: Char -> Bool
notNameChar = Char -> Bool
isWhitespace (Char -> Bool) -> (Char -> Bool) -> Char -> Bool
`orC` Char -> Char -> Bool
isC Char
'/' (Char -> Bool) -> (Char -> Bool) -> Char -> Bool
`orC` Char -> Char -> Bool
isC Char
'>'
afterAttrName :: TagName -> [Attr] -> AttrName -> Parser Token
afterAttrName :: Text -> [Attr] -> Text -> Parser Token
afterAttrName Text
tag [Attr]
attrs Text
name = do
(Char -> Bool) -> Parser Text ()
skipWhile Char -> Bool
isWhitespace
forall a. a -> a
id forall a b. (a -> b) -> a -> b
$ (Char -> Parser Char
char Char
'/' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Text -> [Attr] -> Parser Token
selfClosingStartTag Text
tag [Attr]
attrs)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Parser Char
char Char
'=' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Text -> [Attr] -> Text -> Parser Token
beforeAttrValue Text
tag [Attr]
attrs Text
name)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Parser Char
char Char
'>' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> [Attr] -> Token
TagOpen Text
tag (Text -> Text -> Attr
Attr Text
name Text
T.empty forall a. a -> [a] -> [a]
: [Attr]
attrs)))
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (forall t. Chunk t => Parser t ()
endOfInput forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return Token
endOfFileToken)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Text -> [Attr] -> Parser Token
attrName Text
tag (Text -> Text -> Attr
Attr Text
name Text
T.empty forall a. a -> [a] -> [a]
: [Attr]
attrs)
beforeAttrValue :: TagName -> [Attr] -> AttrName -> Parser Token
beforeAttrValue :: Text -> [Attr] -> Text -> Parser Token
beforeAttrValue Text
tag [Attr]
attrs Text
name = do
(Char -> Bool) -> Parser Text ()
skipWhile Char -> Bool
isWhitespace
forall a. a -> a
id forall a b. (a -> b) -> a -> b
$ (Char -> Parser Char
char Char
'"' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Text -> [Attr] -> Text -> Parser Token
attrValueDQuoted Text
tag [Attr]
attrs Text
name)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Parser Char
char Char
'\'' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Text -> [Attr] -> Text -> Parser Token
attrValueSQuoted Text
tag [Attr]
attrs Text
name)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Parser Char
char Char
'>' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> [Attr] -> Token
TagOpen Text
tag (Text -> Text -> Attr
Attr Text
name Text
T.empty forall a. a -> [a] -> [a]
: [Attr]
attrs)))
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Text -> [Attr] -> Text -> Parser Token
attrValueUnquoted Text
tag [Attr]
attrs Text
name
attrValueDQuoted :: TagName -> [Attr] -> AttrName -> Parser Token
attrValueDQuoted :: Text -> [Attr] -> Text -> Parser Token
attrValueDQuoted Text
tag [Attr]
attrs Text
name = do
Text
value <- (Char -> Bool) -> Parser Text
takeWhile (forall a. Eq a => a -> a -> Bool
/= Char
'"')
Char
_ <- Char -> Parser Char
char Char
'"'
Text -> [Attr] -> Text -> Text -> Parser Token
afterAttrValueQuoted Text
tag [Attr]
attrs Text
name Text
value
attrValueSQuoted :: TagName -> [Attr] -> AttrName -> Parser Token
attrValueSQuoted :: Text -> [Attr] -> Text -> Parser Token
attrValueSQuoted Text
tag [Attr]
attrs Text
name = do
Text
value <- (Char -> Bool) -> Parser Text
takeWhile (forall a. Eq a => a -> a -> Bool
/= Char
'\'')
Char
_ <- Char -> Parser Char
char Char
'\''
Text -> [Attr] -> Text -> Text -> Parser Token
afterAttrValueQuoted Text
tag [Attr]
attrs Text
name Text
value
attrValueUnquoted :: TagName -> [Attr] -> AttrName -> Parser Token
attrValueUnquoted :: Text -> [Attr] -> Text -> Parser Token
attrValueUnquoted Text
tag [Attr]
attrs Text
name = do
Text
value <- (Char -> Bool) -> Parser Text
takeTill forall a b. (a -> b) -> a -> b
$ Char -> Bool
isWhitespace (Char -> Bool) -> (Char -> Bool) -> Char -> Bool
`orC` Char -> Char -> Bool
isC Char
'>'
forall a. a -> a
id forall a b. (a -> b) -> a -> b
$ ((Char -> Bool) -> Parser Char
satisfy Char -> Bool
isWhitespace forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Text -> [Attr] -> Parser Token
beforeAttrName Text
tag [Attr]
attrs)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Parser Char
char Char
'>' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> [Attr] -> Token
TagOpen Text
tag (Text -> Text -> Attr
Attr Text
name Text
value forall a. a -> [a] -> [a]
: [Attr]
attrs)))
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (forall t. Chunk t => Parser t ()
endOfInput forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return Token
endOfFileToken)
afterAttrValueQuoted :: TagName -> [Attr] -> AttrName -> AttrValue -> Parser Token
afterAttrValueQuoted :: Text -> [Attr] -> Text -> Text -> Parser Token
afterAttrValueQuoted Text
tag [Attr]
attrs Text
name Text
value =
((Char -> Bool) -> Parser Char
satisfy Char -> Bool
isWhitespace forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Text -> [Attr] -> Parser Token
beforeAttrName Text
tag [Attr]
attrs')
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Parser Char
char Char
'/' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Text -> [Attr] -> Parser Token
selfClosingStartTag Text
tag [Attr]
attrs')
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Parser Char
char Char
'>' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> [Attr] -> Token
TagOpen Text
tag [Attr]
attrs'))
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (forall t. Chunk t => Parser t ()
endOfInput forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return Token
endOfFileToken)
where attrs' :: [Attr]
attrs' = Text -> Text -> Attr
Attr Text
name Text
value forall a. a -> [a] -> [a]
: [Attr]
attrs
bogusComment :: Builder -> Parser Token
Builder
content = do
(Char -> Parser Char
char Char
'>' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return (Builder -> Token
Comment Builder
content))
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (forall t. Chunk t => Parser t ()
endOfInput forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return (Builder -> Token
Comment Builder
content))
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Parser Char
char Char
'\x00' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Builder -> Parser Token
bogusComment (Builder
content forall a. Semigroup a => a -> a -> a
<> Builder
"\xfffd"))
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Parser Char
anyChar forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \Char
c -> Builder -> Parser Token
bogusComment (Builder
content forall a. Semigroup a => a -> a -> a
<> Char -> Builder
B.singleton Char
c))
markupDeclOpen :: Parser Token
markupDeclOpen :: Parser Token
markupDeclOpen =
forall i a. Parser i a -> Parser i a
try Parser Token
comment_
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall i a. Parser i a -> Parser i a
try Parser Token
docType
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Builder -> Parser Token
bogusComment forall a. Monoid a => a
mempty
where
comment_ :: Parser Token
comment_ = Char -> Parser Char
char Char
'-' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Char -> Parser Char
char Char
'-' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Token
commentStart
docType :: Parser Token
docType = do
Text
s <- Int -> Parser Text
take Int
7
forall (f :: * -> *). Alternative f => Bool -> f ()
guard forall a b. (a -> b) -> a -> b
$ Text -> Text
T.toLower Text
s forall a. Eq a => a -> a -> Bool
== Text
"doctype"
Parser Token
doctype
commentStart :: Parser Token
= do
(Char -> Parser Char
char Char
'-' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Token
commentStartDash)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Parser Char
char Char
'>' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return (Builder -> Token
Comment forall a. Monoid a => a
mempty))
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Builder -> Parser Token
comment forall a. Monoid a => a
mempty
commentStartDash :: Parser Token
=
(Char -> Parser Char
char Char
'-' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Builder -> Parser Token
commentEnd forall a. Monoid a => a
mempty)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Parser Char
char Char
'>' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return (Builder -> Token
Comment forall a. Monoid a => a
mempty))
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (forall t. Chunk t => Parser t ()
endOfInput forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return (Builder -> Token
Comment forall a. Monoid a => a
mempty))
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Builder -> Parser Token
comment (Char -> Builder
B.singleton Char
'-'))
comment :: Builder -> Parser Token
Builder
content0 = do
Builder
content <- Text -> Builder
B.fromText forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser Text
takeWhile (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Char -> Bool
isC Char
'-' (Char -> Bool) -> (Char -> Bool) -> Char -> Bool
`orC` Char -> Char -> Bool
isC Char
'\x00' (Char -> Bool) -> (Char -> Bool) -> Char -> Bool
`orC` Char -> Char -> Bool
isC Char
'<'))
forall a. a -> a
id forall a b. (a -> b) -> a -> b
$ (Char -> Parser Char
char Char
'<' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Builder -> Parser Token
commentLessThan (Builder
content0 forall a. Semigroup a => a -> a -> a
<> Builder
content forall a. Semigroup a => a -> a -> a
<> Builder
"<"))
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Parser Char
char Char
'-' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Builder -> Parser Token
commentEndDash (Builder
content0 forall a. Semigroup a => a -> a -> a
<> Builder
content))
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Parser Char
char Char
'\x00' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Builder -> Parser Token
comment (Builder
content0 forall a. Semigroup a => a -> a -> a
<> Builder
content forall a. Semigroup a => a -> a -> a
<> Char -> Builder
B.singleton Char
'\xfffd'))
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (forall t. Chunk t => Parser t ()
endOfInput forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return (Builder -> Token
Comment forall a b. (a -> b) -> a -> b
$ Builder
content0 forall a. Semigroup a => a -> a -> a
<> Builder
content))
commentLessThan :: Builder -> Parser Token
Builder
content =
(Char -> Parser Char
char Char
'!' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Builder -> Parser Token
commentLessThanBang (Builder
content forall a. Semigroup a => a -> a -> a
<> Builder
"!"))
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Parser Char
char Char
'<' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Builder -> Parser Token
commentLessThan (Builder
content forall a. Semigroup a => a -> a -> a
<> Builder
"<"))
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Builder -> Parser Token
comment Builder
content
commentLessThanBang :: Builder -> Parser Token
Builder
content =
(Char -> Parser Char
char Char
'-' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Builder -> Parser Token
commentLessThanBangDash Builder
content)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Builder -> Parser Token
comment Builder
content
commentLessThanBangDash :: Builder -> Parser Token
Builder
content =
(Char -> Parser Char
char Char
'-' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Builder -> Parser Token
commentLessThanBangDashDash Builder
content)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Builder -> Parser Token
commentEndDash Builder
content
commentLessThanBangDashDash :: Builder -> Parser Token
Builder
content =
(Char -> Parser Char
char Char
'>' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Builder -> Parser Token
comment Builder
content)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (forall t. Chunk t => Parser t ()
endOfInput forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Builder -> Parser Token
comment Builder
content)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Builder -> Parser Token
commentEnd Builder
content
commentEndDash :: Builder -> Parser Token
Builder
content = do
(Char -> Parser Char
char Char
'-' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Builder -> Parser Token
commentEnd Builder
content)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (forall t. Chunk t => Parser t ()
endOfInput forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return (Builder -> Token
Comment Builder
content))
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Builder -> Parser Token
comment (Builder
content forall a. Semigroup a => a -> a -> a
<> Builder
"-"))
commentEnd :: Builder -> Parser Token
Builder
content = do
(Char -> Parser Char
char Char
'>' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return (Builder -> Token
Comment Builder
content))
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Parser Char
char Char
'!' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Builder -> Parser Token
commentEndBang Builder
content)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Parser Char
char Char
'-' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Builder -> Parser Token
commentEnd (Builder
content forall a. Semigroup a => a -> a -> a
<> Builder
"-"))
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (forall t. Chunk t => Parser t ()
endOfInput forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return (Builder -> Token
Comment Builder
content))
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Builder -> Parser Token
comment (Builder
content forall a. Semigroup a => a -> a -> a
<> Builder
"--"))
commentEndBang :: Builder -> Parser Token
Builder
content = do
(Char -> Parser Char
char Char
'-' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Builder -> Parser Token
commentEndDash (Builder
content forall a. Semigroup a => a -> a -> a
<> Builder
"--!"))
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Parser Char
char Char
'>' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return (Builder -> Token
Comment Builder
content))
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (forall t. Chunk t => Parser t ()
endOfInput forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return (Builder -> Token
Comment Builder
content))
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Builder -> Parser Token
comment (Builder
content forall a. Semigroup a => a -> a -> a
<> Builder
"--!"))
doctype :: Parser Token
doctype :: Parser Token
doctype = do
Text
content <- (Char -> Bool) -> Parser Text
takeTill (forall a. Eq a => a -> a -> Bool
==Char
'>')
Char
_ <- Char -> Parser Char
char Char
'>'
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Text -> Token
Doctype Text
content
parseTokens :: Text -> [Token]
parseTokens :: Text -> [Token]
parseTokens = forall b a. (b -> Maybe (a, b)) -> b -> [a]
unfoldr Text -> Maybe (Token, Text)
f
where
f :: Text -> Maybe (Token, Text)
f :: Text -> Maybe (Token, Text)
f Text
t
| Text -> Bool
T.null Text
t = forall a. Maybe a
Nothing
| Bool
otherwise =
case forall a. Parser a -> Text -> Result a
parse Parser Token
token Text
t of
Done Text
rest Token
tok -> forall a. a -> Maybe a
Just (Token
tok, Text
rest)
Partial Text -> IResult Text Token
cont ->
case Text -> IResult Text Token
cont forall a. Monoid a => a
mempty of
Done Text
rest Token
tok -> forall a. a -> Maybe a
Just (Token
tok, Text
rest)
IResult Text Token
_ -> forall a. Maybe a
Nothing
IResult Text Token
_ -> forall a. Maybe a
Nothing
parseTokensLazy :: TL.Text -> [Token]
parseTokensLazy :: Text -> [Token]
parseTokensLazy = forall b a. (b -> Maybe (a, b)) -> b -> [a]
unfoldr Text -> Maybe (Token, Text)
f
where
f :: TL.Text -> Maybe (Token, TL.Text)
f :: Text -> Maybe (Token, Text)
f Text
t
| Text -> Bool
TL.null Text
t = forall a. Maybe a
Nothing
| Bool
otherwise =
case forall a. Parser a -> Text -> Result a
AL.parse Parser Token
token Text
t of
AL.Done Text
rest Token
tok -> forall a. a -> Maybe a
Just (Token
tok, Text
rest)
Result Token
_ -> forall a. Maybe a
Nothing
renderTokens :: [Token] -> TL.Text
renderTokens :: [Token] -> Text
renderTokens = forall a. Monoid a => [a] -> a
mconcat forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Token -> Text
renderToken
renderToken :: Token -> TL.Text
renderToken :: Token -> Text
renderToken = Text -> Text
TL.fromStrict forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Monoid a => [a] -> a
mconcat forall b c a. (b -> c) -> (a -> b) -> a -> c
. \case
(TagOpen Text
n []) -> [Text
"<", Text
n, Text
">"]
(TagOpen Text
n [Attr]
attrs) -> [Text
"<", Text
n, Text
" ", [Attr] -> Text
renderAttrs [Attr]
attrs, Text
">"]
(TagSelfClose Text
n [Attr]
attrs) -> [Text
"<", Text
n, Text
" ", [Attr] -> Text
renderAttrs [Attr]
attrs, Text
" />"]
(TagClose Text
n) -> [Text
"</", Text
n, Text
">"]
(ContentChar Char
c) -> [Char -> Text
T.singleton Char
c]
(ContentText Text
t) -> [Text
t]
(Comment Builder
builder) -> [Text
"<!--", Text -> Text
TL.toStrict forall a b. (a -> b) -> a -> b
$ Builder -> Text
B.toLazyText Builder
builder, Text
"-->"]
(Doctype Text
t) -> [Text
"<!DOCTYPE", Text
t, Text
">"]
renderAttrs :: [Attr] -> Text
renderAttrs :: [Attr] -> Text
renderAttrs = [Text] -> Text
T.unwords forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Attr -> Text
renderAttr forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. [a] -> [a]
reverse
renderAttr :: Attr -> Text
renderAttr :: Attr -> Text
renderAttr (Attr Text
k Text
v) = forall a. Monoid a => [a] -> a
mconcat [Text
k, Text
"=\"", Text
v, Text
"\""]
canonicalizeTokens :: [Token] -> [Token]
canonicalizeTokens :: [Token] -> [Token]
canonicalizeTokens = forall a. (a -> Bool) -> [a] -> [a]
filter (forall a. Eq a => a -> a -> Bool
/= Text -> Token
ContentText Text
"") forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Token] -> [Token]
meldTextTokens
meldTextTokens :: [Token] -> [Token]
meldTextTokens :: [Token] -> [Token]
meldTextTokens = [Token] -> [Token]
concatTexts forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Token -> Token
charToText
where
charToText :: Token -> Token
charToText (ContentChar Char
c) = Text -> Token
ContentText (Char -> Text
T.singleton Char
c)
charToText Token
t = Token
t
concatTexts :: [Token] -> [Token]
concatTexts = \case
(ContentText Text
t : ContentText Text
t' : [Token]
ts) -> [Token] -> [Token]
concatTexts forall a b. (a -> b) -> a -> b
$ Text -> Token
ContentText (Text
t forall a. Semigroup a => a -> a -> a
<> Text
t') forall a. a -> [a] -> [a]
: [Token]
ts
(Token
t : [Token]
ts) -> Token
t forall a. a -> [a] -> [a]
: [Token] -> [Token]
concatTexts [Token]
ts
[] -> []