{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ViewPatterns #-}
module Text.HTML.Tree
(
tokensToForest
, ParseTokenForestError(..), PStack(..)
, nonClosing
, tokensFromForest
, tokensFromTree
) where
import Data.Monoid
import Data.Text (Text)
import Data.Tree
import Prelude
import Text.HTML.Parser
tokensToForest :: [Token] -> Either ParseTokenForestError (Forest Token)
tokensToForest :: [Token] -> Either ParseTokenForestError (Forest Token)
tokensToForest = PStack -> [Token] -> Either ParseTokenForestError (Forest Token)
f (Forest Token -> [(Token, Forest Token)] -> PStack
PStack [] [])
where
f :: PStack -> [Token] -> Either ParseTokenForestError (Forest Token)
f (PStack Forest Token
ss []) [] = forall a b. b -> Either a b
Right (forall a. [a] -> [a]
reverse Forest Token
ss)
f PStack
pstack [] = forall a b. a -> Either a b
Left forall a b. (a -> b) -> a -> b
$ PStack -> Maybe Token -> ParseTokenForestError
ParseTokenForestErrorBracketMismatch PStack
pstack forall a. Maybe a
Nothing
f PStack
pstack (Token
t : [Token]
ts) = case Token
t of
TagOpen TagName
n [Attr]
_ -> if TagName
n forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [TagName]
nonClosing
then PStack -> [Token] -> Either ParseTokenForestError (Forest Token)
f (Token -> PStack -> PStack
pushFlatSibling Token
t PStack
pstack) [Token]
ts
else PStack -> [Token] -> Either ParseTokenForestError (Forest Token)
f (Token -> PStack -> PStack
pushParent Token
t PStack
pstack) [Token]
ts
TagSelfClose {} -> PStack -> [Token] -> Either ParseTokenForestError (Forest Token)
f (Token -> PStack -> PStack
pushFlatSibling Token
t PStack
pstack) [Token]
ts
TagClose TagName
n -> (PStack -> [Token] -> Either ParseTokenForestError (Forest Token)
`f` [Token]
ts) forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< TagName -> PStack -> Either ParseTokenForestError PStack
popParent TagName
n PStack
pstack
ContentChar Char
_ -> PStack -> [Token] -> Either ParseTokenForestError (Forest Token)
f (Token -> PStack -> PStack
pushFlatSibling Token
t PStack
pstack) [Token]
ts
ContentText TagName
_ -> PStack -> [Token] -> Either ParseTokenForestError (Forest Token)
f (Token -> PStack -> PStack
pushFlatSibling Token
t PStack
pstack) [Token]
ts
Comment Builder
_ -> PStack -> [Token] -> Either ParseTokenForestError (Forest Token)
f (Token -> PStack -> PStack
pushFlatSibling Token
t PStack
pstack) [Token]
ts
Doctype TagName
_ -> PStack -> [Token] -> Either ParseTokenForestError (Forest Token)
f (Token -> PStack -> PStack
pushFlatSibling Token
t PStack
pstack) [Token]
ts
nonClosing :: [Text]
nonClosing :: [TagName]
nonClosing = [TagName
"br", TagName
"hr", TagName
"img", TagName
"meta", TagName
"area", TagName
"base", TagName
"col", TagName
"embed", TagName
"input", TagName
"link", TagName
"param", TagName
"source", TagName
"track", TagName
"wbr"]
data ParseTokenForestError =
ParseTokenForestErrorBracketMismatch PStack (Maybe Token)
deriving (ParseTokenForestError -> ParseTokenForestError -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: ParseTokenForestError -> ParseTokenForestError -> Bool
$c/= :: ParseTokenForestError -> ParseTokenForestError -> Bool
== :: ParseTokenForestError -> ParseTokenForestError -> Bool
$c== :: ParseTokenForestError -> ParseTokenForestError -> Bool
Eq, Int -> ParseTokenForestError -> ShowS
[ParseTokenForestError] -> ShowS
ParseTokenForestError -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [ParseTokenForestError] -> ShowS
$cshowList :: [ParseTokenForestError] -> ShowS
show :: ParseTokenForestError -> String
$cshow :: ParseTokenForestError -> String
showsPrec :: Int -> ParseTokenForestError -> ShowS
$cshowsPrec :: Int -> ParseTokenForestError -> ShowS
Show)
data PStack = PStack
{ PStack -> Forest Token
_pstackToplevelSiblings :: Forest Token
, PStack -> [(Token, Forest Token)]
_pstackParents :: [(Token, Forest Token)]
}
deriving (PStack -> PStack -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: PStack -> PStack -> Bool
$c/= :: PStack -> PStack -> Bool
== :: PStack -> PStack -> Bool
$c== :: PStack -> PStack -> Bool
Eq, Int -> PStack -> ShowS
[PStack] -> ShowS
PStack -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [PStack] -> ShowS
$cshowList :: [PStack] -> ShowS
show :: PStack -> String
$cshow :: PStack -> String
showsPrec :: Int -> PStack -> ShowS
$cshowsPrec :: Int -> PStack -> ShowS
Show)
pushParent :: Token -> PStack -> PStack
pushParent :: Token -> PStack -> PStack
pushParent Token
t (PStack Forest Token
ss [(Token, Forest Token)]
ps) = Forest Token -> [(Token, Forest Token)] -> PStack
PStack [] ((Token
t, Forest Token
ss) forall a. a -> [a] -> [a]
: [(Token, Forest Token)]
ps)
popParent :: TagName -> PStack -> Either ParseTokenForestError PStack
popParent :: TagName -> PStack -> Either ParseTokenForestError PStack
popParent TagName
n (PStack Forest Token
ss ((p :: Token
p@(TagOpen TagName
n' [Attr]
_), Forest Token
ss') : [(Token, Forest Token)]
ps))
| TagName
n forall a. Eq a => a -> a -> Bool
== TagName
n' = forall a b. b -> Either a b
Right forall a b. (a -> b) -> a -> b
$ Forest Token -> [(Token, Forest Token)] -> PStack
PStack (forall a. a -> [Tree a] -> Tree a
Node Token
p (forall a. [a] -> [a]
reverse Forest Token
ss) forall a. a -> [a] -> [a]
: Forest Token
ss') [(Token, Forest Token)]
ps
popParent TagName
n PStack
pstack
= forall a b. a -> Either a b
Left forall a b. (a -> b) -> a -> b
$ PStack -> Maybe Token -> ParseTokenForestError
ParseTokenForestErrorBracketMismatch PStack
pstack (forall a. a -> Maybe a
Just forall a b. (a -> b) -> a -> b
$ TagName -> Token
TagClose TagName
n)
pushFlatSibling :: Token -> PStack -> PStack
pushFlatSibling :: Token -> PStack -> PStack
pushFlatSibling Token
t (PStack Forest Token
ss [(Token, Forest Token)]
ps) = Forest Token -> [(Token, Forest Token)] -> PStack
PStack (forall a. a -> [Tree a] -> Tree a
Node Token
t [] forall a. a -> [a] -> [a]
: Forest Token
ss) [(Token, Forest Token)]
ps
tokensFromForest :: Forest Token -> [Token]
tokensFromForest :: Forest Token -> [Token]
tokensFromForest = forall a. Monoid a => [a] -> a
mconcat forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Tree Token -> [Token]
tokensFromTree
tokensFromTree :: Tree Token -> [Token]
tokensFromTree :: Tree Token -> [Token]
tokensFromTree (Node o :: Token
o@(TagOpen TagName
n [Attr]
_) Forest Token
ts) | TagName
n forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` [TagName]
nonClosing
= [Token
o] forall a. Semigroup a => a -> a -> a
<> Forest Token -> [Token]
tokensFromForest Forest Token
ts forall a. Semigroup a => a -> a -> a
<> [TagName -> Token
TagClose TagName
n]
tokensFromTree (Node Token
t [])
= [Token
t]
tokensFromTree Tree Token
_
= forall a. HasCallStack => String -> a
error String
"renderTokenTree: leaf node with children."