{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE MultiWayIf #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Zenacy.HTML.Internal.Oper
( htmlNodeIsElem
, htmlNodeIsText
, htmlNodeContent
, htmlNodeContentSet
, htmlNodeShow
, htmlNodeFind
, htmlNodeCount
, htmlNodeCountM
, htmlTextSpace
, htmlTextAppend
, htmlTextPrepend
, htmlAttrHasName
, htmlAttrRename
, htmlElemAttr
, htmlElemAttrCount
, htmlElemAttrFind
, htmlElemAttrFindName
, htmlElemAttrApply
, htmlElemAttrFilter
, htmlElemAttrMap
, htmlElemHasAttr
, htmlElemHasAttrName
, htmlElemHasAttrVal
, htmlElemHasAttrValInfix
, htmlElemAddAttr
, htmlElemSetAttr
, htmlElemGetAttr
, htmlElemAttrRemove
, htmlElemRemoveAllAttr
, htmlElemAttrRename
, htmlElemID
, htmlElemIDSet
, htmlElemHasID
, htmlElemFindID
, htmlElemClass
, htmlElemClassSet
, htmlElemClasses
, htmlElemClassesSet
, htmlElemClassesAdd
, htmlElemClassesRemove
, htmlElemClassesContains
, htmlElemStyle
, htmlElemStyles
, htmlElemStyleParseURL
, htmlElemContent
, htmlElemContentSet
, htmlElemHasContent
, htmlElemNodeFirst
, htmlElemNodeLast
, htmlElemNodeCount
, htmlElemName
, htmlElemHasName
, htmlElemRename
, htmlElemFindElem
, htmlElemFindElemNamed
, htmlElemHasElem
, htmlElemHasElemNamed
, htmlElemContentApply
, htmlElemContentMap
, htmlElemContentFilter
, htmlElemSearch
, htmlElemText
, htmlDocHtml
, htmlDocBody
, htmlDocHead
, htmlDocTitle
, htmlMapElem
, htmlMapElemM
, htmlElemCollapse
, htmlElemCollapseM
) where
import Zenacy.HTML.Internal.Core
import Zenacy.HTML.Internal.HTML
import Control.Monad
( (>=>)
)
import Control.Monad.Extra as X
( whenJust
, whenJustM
, concatMapM
, ifM
)
import Data.Char
( isSpace
)
import Data.Functor.Identity
( runIdentity
)
import Data.List
( find
)
import Data.List.Extra
( firstJust
)
import Data.Map
( Map
)
import qualified Data.Map as Map
( empty
, fromList
, lookup
)
import Data.Maybe
( listToMaybe
, isJust
)
import Data.Monoid
( (<>)
)
import Data.Set
( Set
)
import qualified Data.Set as Set
( delete
, empty
, fromList
, insert
, member
, notMember
, toList
, union
, unions
)
import Data.Text
( Text
)
import qualified Data.Text as T
( all
, append
, breakOn
, concat
, drop
, dropAround
, empty
, isInfixOf
, isPrefixOf
, null
, split
, splitOn
, strip
, words
, unwords
)
import Data.Tuple.Extra
( first
, second
)
htmlNodeIsElem :: HTMLNode -> Bool
htmlNodeIsElem HTMLElement {} = True
htmlNodeIsElem _ = False
htmlNodeIsText :: HTMLNode -> Bool
htmlNodeIsText HTMLText {} = True
htmlNodeIsText _ = False
htmlNodeContent :: HTMLNode -> [HTMLNode]
htmlNodeContent (HTMLDocument _ c) = c
htmlNodeContent (HTMLFragment _ c) = c
htmlNodeContent (HTMLElement _ _ _ c) = c
htmlNodeContent _ = []
htmlNodeContentSet :: [HTMLNode] -> HTMLNode -> HTMLNode
htmlNodeContentSet x (HTMLDocument n c) = HTMLDocument n x
htmlNodeContentSet x (HTMLFragment n c) = HTMLFragment n x
htmlNodeContentSet x (HTMLElement n s a c) = HTMLElement n s a x
htmlNodeContentSet x y = y
htmlNodeShow :: HTMLNode -> String
htmlNodeShow = show . htmlNodeContentSet []
htmlNodeFind :: (HTMLNode -> Bool) -> HTMLNode -> Maybe HTMLNode
htmlNodeFind p x = find p $ htmlNodeContent x
htmlNodeCount :: (HTMLNode -> Bool) -> HTMLNode -> Int
htmlNodeCount f = runIdentity . htmlNodeCountM (pure . f)
htmlNodeCountM :: Monad m => (HTMLNode -> m Bool) -> HTMLNode -> m Int
htmlNodeCountM f x = do
n <- sum <$> mapM (htmlNodeCountM f) (htmlNodeContent x)
ifM (f x) (pure $ 1 + n) (pure n)
htmlTextSpace :: HTMLNode -> Bool
htmlTextSpace (HTMLText x) = T.all isSpace x
htmlTextSpace _ = False
htmlTextAppend :: Text -> HTMLNode -> HTMLNode
htmlTextAppend a (HTMLText x) = HTMLText $ T.append x a
htmlTextAppend a x = x
htmlTextPrepend :: Text -> HTMLNode -> HTMLNode
htmlTextPrepend a (HTMLText x) = HTMLText $ T.append a x
htmlTextPrepend a x = x
htmlAttrHasName :: Text -> HTMLAttr -> Bool
htmlAttrHasName x a = x == htmlAttrName a
htmlAttrRename :: Text -> HTMLAttr -> HTMLAttr
htmlAttrRename x (HTMLAttr n v s) = HTMLAttr x v s
htmlElemAttr :: HTMLNode -> [HTMLAttr]
htmlElemAttr (HTMLElement _ _ a _) = a
htmlElemAttr _ = []
htmlElemAttrCount :: HTMLNode -> Int
htmlElemAttrCount = length . htmlElemAttr
htmlElemAttrFind :: (HTMLAttr -> Bool) -> HTMLNode -> Maybe HTMLAttr
htmlElemAttrFind f (HTMLElement _ _ a _) = find f a
htmlElemAttrFind _ _ = Nothing
htmlElemAttrFindName :: Text -> HTMLNode -> Maybe HTMLAttr
htmlElemAttrFindName x = htmlElemAttrFind $ htmlAttrHasName x
htmlElemAttrApply :: ([HTMLAttr] -> [HTMLAttr]) -> HTMLNode -> HTMLNode
htmlElemAttrApply f (HTMLElement n s a c) = HTMLElement n s (f a) c
htmlElemAttrApply _ x = x
htmlElemAttrFilter :: (HTMLAttr -> Bool) -> HTMLNode -> HTMLNode
htmlElemAttrFilter f = htmlElemAttrApply $ filter f
htmlElemAttrMap :: (HTMLAttr -> HTMLAttr) -> HTMLNode -> HTMLNode
htmlElemAttrMap f = htmlElemAttrApply $ map f
htmlElemHasAttr :: HTMLNode -> Bool
htmlElemHasAttr x = htmlElemAttrCount x > 0
htmlElemHasAttrName :: Text -> HTMLNode -> Bool
htmlElemHasAttrName x = isJust . htmlElemAttrFindName x
htmlElemHasAttrVal :: Text -> Text -> HTMLNode -> Bool
htmlElemHasAttrVal x y z =
maybe False (\a -> y == htmlAttrVal a) $ htmlElemAttrFindName x z
htmlElemHasAttrValInfix :: Text -> Text -> HTMLNode -> Bool
htmlElemHasAttrValInfix x y z =
maybe False (\a -> y `T.isInfixOf` htmlAttrVal a) $ htmlElemAttrFindName x z
htmlElemAddAttr :: HTMLAttr -> HTMLNode -> HTMLNode
htmlElemAddAttr x (HTMLElement n s a c) = HTMLElement n s (a <> [x]) c
htmlElemAddAttr x y = y
htmlElemSetAttr :: Text -> Text -> HTMLNode -> HTMLNode
htmlElemSetAttr x v n =
if htmlElemHasAttrName x n
then htmlElemAttrMap f n
else htmlElemAddAttr (htmlAttr x v) n
where
f a@(HTMLAttr an av as) =
if an == x then (HTMLAttr an v as) else a
htmlElemGetAttr :: Text -> HTMLNode -> Maybe Text
htmlElemGetAttr x n = htmlAttrVal <$> htmlElemAttrFindName x n
htmlElemAttrRemove :: Text -> HTMLNode -> HTMLNode
htmlElemAttrRemove x (HTMLElement n s a c) = HTMLElement n s a' c
where a' = filter (\y -> htmlAttrName y /= x) a
htmlElemAttrRemove x y = y
htmlElemRemoveAllAttr :: HTMLNode -> HTMLNode
htmlElemRemoveAllAttr (HTMLElement n s a c) = HTMLElement n s [] c
htmlElemRemoveAllAttr x = x
htmlElemAttrRename :: Text -> Text -> HTMLNode -> HTMLNode
htmlElemAttrRename old new = htmlElemAttrMap rename
where
rename x =
if htmlAttrHasName old x
then htmlAttrRename new x
else x
htmlElemID :: HTMLNode -> Maybe Text
htmlElemID = htmlElemGetAttr "id"
htmlElemIDSet :: Text -> HTMLNode -> HTMLNode
htmlElemIDSet = htmlElemSetAttr "id"
htmlElemHasID :: Text -> HTMLNode -> Bool
htmlElemHasID x y = htmlElemID y == Just x
htmlElemFindID :: Text -> HTMLNode -> Maybe HTMLNode
htmlElemFindID x = htmlElemSearch $ htmlElemHasID x
htmlElemClass :: HTMLNode -> Maybe Text
htmlElemClass = htmlElemGetAttr "class"
htmlElemClassSet :: Text -> HTMLNode -> HTMLNode
htmlElemClassSet = htmlElemSetAttr "class"
htmlElemClasses :: HTMLNode -> Set Text
htmlElemClasses = maybe Set.empty (Set.fromList . T.words) . htmlElemClass
htmlElemClassesSet :: Set Text -> HTMLNode -> HTMLNode
htmlElemClassesSet s = htmlElemClassSet (T.unwords $ Set.toList s)
htmlElemClassesAdd :: Text -> HTMLNode -> HTMLNode
htmlElemClassesAdd c x =
htmlElemClassesSet (Set.insert c $ htmlElemClasses x) x
htmlElemClassesRemove :: Text -> HTMLNode -> HTMLNode
htmlElemClassesRemove c x =
htmlElemClassesSet (Set.delete c $ htmlElemClasses x) x
htmlElemClassesContains :: Text -> HTMLNode -> Bool
htmlElemClassesContains c = Set.member c . htmlElemClasses
htmlElemStyle :: HTMLNode -> Maybe Text
htmlElemStyle = htmlElemGetAttr "style"
htmlElemStyles :: HTMLNode -> Map Text Text
htmlElemStyles =
maybe Map.empty parse . htmlElemStyle
where
parse =
( Map.fromList
. map
( first T.strip
. second T.strip
. second (T.drop 1)
. T.breakOn ":"
)
. filter (not . T.null)
. map T.strip
. T.splitOn ";"
)
htmlElemStyleParseURL :: Text -> Maybe Text
htmlElemStyleParseURL x
| "url" `T.isPrefixOf` x =
( T.strip
. T.dropAround (=='\'')
. T.dropAround (=='\"')
. T.strip
) <$> textExtract "(" ")" x
| otherwise = Nothing
htmlElemContent :: HTMLNode -> [HTMLNode]
htmlElemContent (HTMLElement _ _ _ c) = c
htmlElemContent _ = []
htmlElemContentSet :: [HTMLNode] -> HTMLNode -> HTMLNode
htmlElemContentSet x (HTMLElement n s a c) = HTMLElement n s a x
htmlElemContentSet x y = y
htmlElemHasContent :: HTMLNode -> Bool
htmlElemHasContent (HTMLElement _ _ _ []) = False
htmlElemHasContent (HTMLElement _ _ _ (x:xs)) = True
htmlElemHasContent _ = False
htmlElemNodeFirst :: HTMLNode -> Maybe HTMLNode
htmlElemNodeFirst = listToMaybe . htmlElemContent
htmlElemNodeLast :: HTMLNode -> Maybe HTMLNode
htmlElemNodeLast = listToMaybe . reverse . htmlElemContent
htmlElemNodeCount :: HTMLNode -> Int
htmlElemNodeCount = length . htmlElemContent
htmlElemName :: HTMLNode -> Text
htmlElemName (HTMLElement n _ _ _) = n
htmlElemName _ = T.empty
htmlElemHasName :: Text -> HTMLNode -> Bool
htmlElemHasName x y = htmlElemName y == x
htmlElemRename :: Text -> HTMLNode -> HTMLNode
htmlElemRename n (HTMLElement _ s a c) = HTMLElement n s a c
htmlElemRename n x = x
htmlElemFindElem :: (HTMLNode -> Bool) -> HTMLNode -> Maybe HTMLNode
htmlElemFindElem p (HTMLElement _ _ _ c) = find p c
htmlElemFindElem _ _ = Nothing
htmlElemFindElemNamed :: Text -> HTMLNode -> Maybe HTMLNode
htmlElemFindElemNamed x = htmlElemFindElem $ htmlElemHasName x
htmlElemHasElem :: (HTMLNode -> Bool) -> HTMLNode -> Bool
htmlElemHasElem p = isJust . htmlElemFindElem p
htmlElemHasElemNamed :: Text -> HTMLNode -> Bool
htmlElemHasElemNamed x = isJust . htmlElemFindElemNamed x
htmlElemContentApply :: ([HTMLNode] -> [HTMLNode]) -> HTMLNode -> HTMLNode
htmlElemContentApply f (HTMLElement n s a c) = HTMLElement n s a $ f c
htmlElemContentApply _ x = x
htmlElemContentMap :: (HTMLNode -> HTMLNode) -> HTMLNode -> HTMLNode
htmlElemContentMap f = htmlElemContentApply $ map f
htmlElemContentFilter :: (HTMLNode -> Bool) -> HTMLNode -> HTMLNode
htmlElemContentFilter f = htmlElemContentApply $ filter f
htmlElemSearch :: (HTMLNode -> Bool) -> HTMLNode -> Maybe HTMLNode
htmlElemSearch f x = case x of
HTMLElement _ _ _ c ->
if f x then Just x else firstJust (htmlElemSearch f) c
_otherwise -> Nothing
htmlElemText :: HTMLNode -> Maybe Text
htmlElemText (HTMLElement n s a c) =
case filter htmlNodeIsText c of
a@(x:xs) -> Just . T.concat . map htmlTextData $ a
[] -> Nothing
htmlElemText _ = Nothing
htmlDocHtml :: HTMLNode -> Maybe HTMLNode
htmlDocHtml = htmlNodeFind $ htmlElemHasName "html"
htmlDocBody :: HTMLNode -> Maybe HTMLNode
htmlDocBody = htmlDocHtml >=> htmlElemFindElemNamed "body"
htmlDocHead :: HTMLNode -> Maybe HTMLNode
htmlDocHead = htmlDocHtml >=> htmlElemFindElemNamed "head"
htmlDocTitle :: HTMLNode -> Maybe Text
htmlDocTitle = htmlDocHead
>=> htmlElemFindElemNamed "title"
>=> htmlElemText
htmlMapElem :: (HTMLNode -> HTMLNode) -> HTMLNode -> HTMLNode
htmlMapElem f = runIdentity . htmlMapElemM (pure . f)
htmlMapElemM :: Monad m => (HTMLNode -> m HTMLNode) -> HTMLNode -> m HTMLNode
htmlMapElemM f x =
case x of
HTMLElement {} -> do
a <- f x
case a of
HTMLElement n s a c ->
HTMLElement n s a <$> mapM (htmlMapElemM f) c
_otherwise ->
pure a
_otherwise ->
pure x
htmlElemCollapse :: (HTMLNode -> Bool) -> HTMLNode -> [HTMLNode]
htmlElemCollapse f = runIdentity . htmlElemCollapseM (pure . f)
htmlElemCollapseM :: Monad m => (HTMLNode -> m Bool) -> HTMLNode -> m [HTMLNode]
htmlElemCollapseM f x@(HTMLElement n s a c) = do
c' <- concatMapM (htmlElemCollapseM f) c
ifM (f x) (pure c') $ pure [ HTMLElement n s a c' ]
htmlElemCollapseM _ x = pure [x]