module Text.HTML.Parser.Util
(
toToken
, toTokenDefault
, isTagOpen
, isTagClose
, isTagSelfClose
, isContentText
, isContentChar
, isComment
, isDoctype
, isTagOpenName
, isTagCloseName
, fromContentText
, maybeContentText
, fromAttrib
, maybeAttrib
, innerText
, toHeadContentText
, between
, dropHeader
, allContentText
, sections
, section
, partitions
, (~==)
, (~/=)
) where
import qualified Data.Attoparsec.Text as A
import qualified Data.List.NonEmpty as NE
import qualified Data.Text as T
import Data.Either (fromRight)
import Data.List (groupBy, tails)
import Data.Maybe (fromMaybe, mapMaybe)
import Data.Text (Text)
import Text.HTML.Parser (Attr (Attr), Token (Comment, ContentChar, ContentText, Doctype, TagClose, TagOpen, TagSelfClose), token)
toToken :: Text -> Token
toToken :: AttrName -> Token
toToken = Token -> AttrName -> Token
toTokenDefault (AttrName -> Token
Doctype AttrName
"Could not parse string into token.")
toTokenDefault :: Token -> Text -> Token
toTokenDefault :: Token -> AttrName -> Token
toTokenDefault Token
d = Token -> Either [Char] Token -> Token
forall b a. b -> Either a b -> b
fromRight Token
d (Either [Char] Token -> Token)
-> (AttrName -> Either [Char] Token) -> AttrName -> Token
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Parser Token -> AttrName -> Either [Char] Token
forall a. Parser a -> AttrName -> Either [Char] a
A.parseOnly Parser Token
token
sections :: (a -> Bool) -> [a] -> [[a]]
sections :: forall a. (a -> Bool) -> [a] -> [[a]]
sections a -> Bool
p = ([a] -> Bool) -> [[a]] -> [[a]]
forall a. (a -> Bool) -> [a] -> [a]
filter (a -> Bool
p (a -> Bool) -> ([a] -> a) -> [a] -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [a] -> a
forall a. HasCallStack => [a] -> a
head) ([[a]] -> [[a]]) -> ([a] -> [[a]]) -> [a] -> [[a]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [[a]] -> [[a]]
forall a. HasCallStack => [a] -> [a]
init ([[a]] -> [[a]]) -> ([a] -> [[a]]) -> [a] -> [[a]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [a] -> [[a]]
forall a. [a] -> [[a]]
tails
section :: (a -> Bool) -> [a] -> [a]
section :: forall a. (a -> Bool) -> [a] -> [a]
section a -> Bool
f = \case
[] -> []
[a]
xs -> [a] -> (NonEmpty [a] -> [a]) -> Maybe (NonEmpty [a]) -> [a]
forall b a. b -> (a -> b) -> Maybe a -> b
maybe [] NonEmpty [a] -> [a]
forall a. NonEmpty a -> a
NE.head ([[a]] -> Maybe (NonEmpty [a])
forall a. [a] -> Maybe (NonEmpty a)
NE.nonEmpty ((a -> Bool) -> [a] -> [[a]]
forall a. (a -> Bool) -> [a] -> [[a]]
sections a -> Bool
f [a]
xs))
partitions :: (a -> Bool) -> [a] -> [[a]]
partitions :: forall a. (a -> Bool) -> [a] -> [[a]]
partitions a -> Bool
p = (a -> a -> Bool) -> [a] -> [[a]]
forall a. (a -> a -> Bool) -> [a] -> [[a]]
groupBy ((a -> Bool) -> a -> a -> Bool
forall a b. a -> b -> a
const a -> Bool
notp) ([a] -> [[a]]) -> ([a] -> [a]) -> [a] -> [[a]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (a -> Bool) -> [a] -> [a]
forall a. (a -> Bool) -> [a] -> [a]
dropWhile a -> Bool
notp
where notp :: a -> Bool
notp = Bool -> Bool
not (Bool -> Bool) -> (a -> Bool) -> a -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> Bool
p
toHeadContentText :: [Token] -> Text
toHeadContentText :: [Token] -> AttrName
toHeadContentText = AttrName
-> (NonEmpty AttrName -> AttrName)
-> Maybe (NonEmpty AttrName)
-> AttrName
forall b a. b -> (a -> b) -> Maybe a -> b
maybe AttrName
"" NonEmpty AttrName -> AttrName
forall a. NonEmpty a -> a
NE.head (Maybe (NonEmpty AttrName) -> AttrName)
-> ([Token] -> Maybe (NonEmpty AttrName)) -> [Token] -> AttrName
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [AttrName] -> Maybe (NonEmpty AttrName)
forall a. [a] -> Maybe (NonEmpty a)
NE.nonEmpty ([AttrName] -> Maybe (NonEmpty AttrName))
-> ([Token] -> [AttrName]) -> [Token] -> Maybe (NonEmpty AttrName)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Token] -> [AttrName]
allContentText
between :: Token -> Token -> [Token] -> [Token]
between :: Token -> Token -> [Token] -> [Token]
between Token
start Token
end = (Token -> Bool) -> [Token] -> [Token]
forall a. (a -> Bool) -> [a] -> [a]
takeWhile (Token -> Token -> Bool
~/= Token
end )
([Token] -> [Token]) -> ([Token] -> [Token]) -> [Token] -> [Token]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> [Token] -> [Token]
forall a. Int -> [a] -> [a]
drop Int
1
([Token] -> [Token]) -> ([Token] -> [Token]) -> [Token] -> [Token]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Token -> Bool) -> [Token] -> [Token]
forall a. (a -> Bool) -> [a] -> [a]
dropWhile (Token -> Token -> Bool
~/= Token
start)
dropHeader :: [Attr] -> [Token] -> [Token]
[Attr]
attr = Int -> [Token] -> [Token]
forall a. Int -> [a] -> [a]
drop Int
1
([Token] -> [Token]) -> ([Token] -> [Token]) -> [Token] -> [Token]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Token -> Bool) -> [Token] -> [Token]
forall a. (a -> Bool) -> [a] -> [a]
dropWhile (Token -> Token -> Bool
~/= AttrName -> Token
TagClose AttrName
"header" )
([Token] -> [Token]) -> ([Token] -> [Token]) -> [Token] -> [Token]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Token -> Bool) -> [Token] -> [Token]
forall a. (a -> Bool) -> [a] -> [a]
dropWhile (Token -> Token -> Bool
~/= AttrName -> [Attr] -> Token
TagOpen AttrName
"header" [Attr]
attr)
allContentText :: [Token] -> [Text]
allContentText :: [Token] -> [AttrName]
allContentText = (Token -> Maybe AttrName) -> [Token] -> [AttrName]
forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe Token -> Maybe AttrName
maybeContentText
isTagOpen :: Token -> Bool
isTagOpen :: Token -> Bool
isTagOpen = \case
TagOpen{} -> Bool
True
Token
_ -> Bool
False
isTagClose :: Token -> Bool
isTagClose :: Token -> Bool
isTagClose = \case
TagClose{} -> Bool
True
Token
_ -> Bool
False
isContentText :: Token -> Bool
isContentText :: Token -> Bool
isContentText = \case
ContentText{} -> Bool
True
Token
_ -> Bool
False
maybeContentText :: Token -> Maybe Text
maybeContentText :: Token -> Maybe AttrName
maybeContentText = \case
ContentText AttrName
t -> AttrName -> Maybe AttrName
forall a. a -> Maybe a
Just AttrName
t
Token
_ -> Maybe AttrName
forall a. Maybe a
Nothing
fromContentText :: Token -> Text
fromContentText :: Token -> AttrName
fromContentText = \case
ContentText AttrName
t -> AttrName
t
Token
t -> [Char] -> AttrName
forall a. HasCallStack => [Char] -> a
error ([Char] -> AttrName) -> [Char] -> AttrName
forall a b. (a -> b) -> a -> b
$ [Char]
"(" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Token -> [Char]
forall a. Show a => a -> [Char]
show Token
t [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
") is not a ContentText"
innerText :: [Token] -> Text
innerText :: [Token] -> AttrName
innerText = [AttrName] -> AttrName
forall a. Monoid a => [a] -> a
mconcat ([AttrName] -> AttrName)
-> ([Token] -> [AttrName]) -> [Token] -> AttrName
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Token -> Maybe AttrName) -> [Token] -> [AttrName]
forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe Token -> Maybe AttrName
maybeContentText
isTagSelfClose :: Token -> Bool
isTagSelfClose :: Token -> Bool
isTagSelfClose = \case
TagSelfClose{} -> Bool
True
Token
_ -> Bool
False
isContentChar :: Token -> Bool
isContentChar :: Token -> Bool
isContentChar = \case
ContentChar{} -> Bool
True
Token
_ -> Bool
False
isComment :: Token -> Bool
= \case
Comment{} -> Bool
True
Token
_ -> Bool
False
isDoctype :: Token -> Bool
isDoctype :: Token -> Bool
isDoctype = \case
Doctype{} -> Bool
True
Token
_ -> Bool
False
isTagOpenName :: Text -> Token -> Bool
isTagOpenName :: AttrName -> Token -> Bool
isTagOpenName AttrName
name (TagOpen AttrName
n [Attr]
_) = AttrName
n AttrName -> AttrName -> Bool
forall a. Eq a => a -> a -> Bool
== AttrName
name
isTagOpenName AttrName
_ Token
_ = Bool
False
isTagCloseName :: Text -> Token -> Bool
isTagCloseName :: AttrName -> Token -> Bool
isTagCloseName AttrName
name (TagClose AttrName
n) = AttrName
n AttrName -> AttrName -> Bool
forall a. Eq a => a -> a -> Bool
== AttrName
name
isTagCloseName AttrName
_ Token
_ = Bool
False
fromAttrib :: Attr -> Token -> Attr
fromAttrib :: Attr -> Token -> Attr
fromAttrib Attr
att Token
tag = Attr -> Maybe Attr -> Attr
forall a. a -> Maybe a -> a
fromMaybe (AttrName -> AttrName -> Attr
Attr AttrName
"" AttrName
"") (Maybe Attr -> Attr) -> Maybe Attr -> Attr
forall a b. (a -> b) -> a -> b
$ Attr -> Token -> Maybe Attr
maybeAttrib Attr
att Token
tag
maybeAttrib :: Attr -> Token -> Maybe Attr
maybeAttrib :: Attr -> Token -> Maybe Attr
maybeAttrib Attr
att (TagOpen AttrName
_ [Attr]
atts)
| Attr
att Attr -> [Attr] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Attr]
atts = Attr -> Maybe Attr
forall a. a -> Maybe a
Just Attr
att
| Bool
otherwise = Maybe Attr
forall a. Maybe a
Nothing
maybeAttrib Attr
_ Token
t = [Char] -> Maybe Attr
forall a. HasCallStack => [Char] -> a
error ([Char]
"(" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Token -> [Char]
forall a. Show a => a -> [Char]
show Token
t [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
") is not a TagOpen")
infixl 9 ~==
(~==) :: Token -> Token -> Bool
~== :: Token -> Token -> Bool
(~==) = Token -> Token -> Bool
f
where
f :: Token -> Token -> Bool
f (ContentText AttrName
y) (ContentText AttrName
x) = AttrName -> Bool
T.null AttrName
x Bool -> Bool -> Bool
|| AttrName
x AttrName -> AttrName -> Bool
forall a. Eq a => a -> a -> Bool
== AttrName
y
f (TagClose AttrName
y) (TagClose AttrName
x) = AttrName -> Bool
T.null AttrName
x Bool -> Bool -> Bool
|| AttrName
x AttrName -> AttrName -> Bool
forall a. Eq a => a -> a -> Bool
== AttrName
y
f (Comment Builder
x) (Comment Builder
y) = Builder
x Builder -> Builder -> Bool
forall a. Eq a => a -> a -> Bool
== Builder
forall a. Monoid a => a
mempty Bool -> Bool -> Bool
|| Builder
x Builder -> Builder -> Bool
forall a. Eq a => a -> a -> Bool
== Builder
y
f (TagOpen AttrName
y [Attr]
ys) (TagOpen AttrName
x [Attr]
xs) = (AttrName -> Bool
T.null AttrName
x Bool -> Bool -> Bool
|| AttrName
x AttrName -> AttrName -> Bool
forall a. Eq a => a -> a -> Bool
== AttrName
y) Bool -> Bool -> Bool
&& (Attr -> Bool) -> [Attr] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Attr -> Bool
g [Attr]
xs
where
g :: Attr -> Bool
g :: Attr -> Bool
g nv :: Attr
nv@(Attr AttrName
name AttrName
val)
| AttrName -> Bool
T.null AttrName
name = AttrName
val AttrName -> [AttrName] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` (Attr -> AttrName) -> [Attr] -> [AttrName]
forall a b. (a -> b) -> [a] -> [b]
map (\(Attr AttrName
o AttrName
_) -> AttrName
o) [Attr]
ys
| AttrName -> Bool
T.null AttrName
val = AttrName
name AttrName -> [AttrName] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` (Attr -> AttrName) -> [Attr] -> [AttrName]
forall a b. (a -> b) -> [a] -> [b]
map (\(Attr AttrName
_ AttrName
t) -> AttrName
t) [Attr]
ys
| Bool
otherwise = Attr
nv Attr -> [Attr] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Attr]
ys
f Token
_ Token
_ = Bool
False
infixl 9 ~/=
(~/=) :: Token -> Token -> Bool
~/= :: Token -> Token -> Bool
(~/=) Token
a Token
b = Bool -> Bool
not (Token
a Token -> Token -> Bool
~== Token
b)