{-# LANGUAGE OverloadedStrings #-}

-- |Scraping (innerHTML/innerText) and modification (node removal) functions.
module Text.XML.Scraping where

import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
import Text.XML as X
import Text.XML.Cursor
import Data.String
import qualified Text.HTML.DOM as H
import qualified Data.Map as M
import Data.List
import Data.Maybe
import System.Environment (getArgs)
import Text.Blaze.Html as Bl
import Text.Blaze.Html.Renderer.Text
import Data.Text.Lazy  (fromStrict,toStrict, unpack)

import Text.XML.Selector
import Text.XML.Selector.Types

-- * InnerHTML / InnerText

-- |Get ''innerHTML'' from a list of cursors.
innerHtml :: [Cursor] -> TL.Text
innerHtml cs = renderNodes $ map node $ concatMap child cs

-- |Get ''innerText'' from a list of cursors.
innerText :: [Cursor] -> T.Text
innerText cs = T.concat $ map (innerTextN . node) cs
-- |''toHTML'' of a list of nodes.
renderNodes :: [Node] -> TL.Text
renderNodes ns = TL.concat $ map (renderHtml . Bl.toHtml) ns

-- |''toHTML'' of a list of cursors.
toHtml :: [Cursor] -> TL.Text
toHtml cs = renderNodes $ map node cs

-- |''innerText'' of a single node.
innerTextN :: Node -> T.Text
innerTextN (NodeElement (Element _ _ cs)) = T.concat $ map innerTextN cs
innerTextN (NodeContent txt) = txt
innerTextN _ = ""

-- * Attirbutes
-- |Tag name of element node. Return Nothing if the node is not an element.
ename :: Node -> Maybe T.Text
ename (NodeElement (Element n _ _)) = Just $ nameLocalName n
ename _ = Nothing

-- |Return an element id. If node is not an element or does not have an id, return Nothing.
eid :: Node -> Maybe T.Text
eid (NodeElement (Element _ as _)) = M.lookup "id" as
eid _ = Nothing

-- |Return element classes. If node is not an element or does not have a class, return an empty list.
eclass :: Node -> [T.Text]
eclass (NodeElement (Element _ as _)) = maybe [] T.words $ M.lookup "class" as
eclass _ = []

-- | Search a meta with a specified name under a cursor, and get a ''content'' field. 
getMeta :: T.Text -> Cursor -> [T.Text] 
getMeta n cursor = concat $ cursor $// element "meta" &| attributeIs "name" n &.// attribute "content"


-- * Removing Nodes
-- |Remove descendant nodes that satisfie predicate (''Destructive'').
remove :: (Node->Bool)->Node->Node
remove f (NodeElement (Element a b cs)) = NodeElement (Element a b (map (remove f) (filter (not . f) cs)))
remove _ n = n

-- |Similar to 'remove', but with a depth.
removeDepth :: (Node->Bool)->Int->Node->Node
removeDepth _ (-1) n = n
removeDepth f d (NodeElement (Element a b cs)) = NodeElement (Element a b (map (removeDepth f (d-1)) (filter (not . f) cs)))
removeDepth _ _ n = n

-- |Remove elements with specified tags.
removeTags :: [T.Text] -> [Node] -> [Node]
removeTags ts ns = map (remove (\n -> ename n `elem` map Just ts)) ns

-- |Remove descendant nodes that match a query string.
removeQuery :: String -> [Node] -> [Node]
removeQuery q ns = map (remove (queryMatchNode q)) ns

-- |Remove descendant nodes that match any of query strings.
removeQueries :: [String] -> [Node] -> [Node]
removeQueries qs ns = map (remove f) ns
  where
    f :: Node -> Bool
    f n = any (flip queryMatchNode n) qs

-- |See if the node contains any descendant (and self) node that satisfies predicate.
-- To return false, this function needs to traverse all descendant elements....
nodeHaving :: (Node->Bool)->Node->Bool
nodeHaving f n@(NodeElement (Element _ _ cs)) = f n || any (nodeHaving f) cs
nodeHaving _ _ = False


-- |Remove descendant nodes that match the condition (similar to 'remove')
rmElem :: String -> String -> [String] -> [Node] -> [Node]
rmElem tag id kl ns = map (remove f) ns
  where
    f :: Node -> Bool
    f (NodeElement e) = selectorMatch (JQSelector Descendant (g tag) (g id) kl []) e
    f _ = False
    g :: String -> Maybe String
    g "" = Nothing
    g s = Just s


--
-- Conversion to trees
--
{-
toTree :: Node -> Tr.Tree String
toTree (TextNode t) = Tr.Node ("\""++T.unpack t++"\"") []
toTree (Comment t) = Tr.Node ("<-- "++T.unpack t++" -->") []
toTree (Element t as c) = Tr.Node str (map toTree c)
	where
		str = T.unpack t ++ g as
		g as | null as = ""
					| otherwise = ": [" ++ intercalate "," (map f as) ++ "]"
		f (k,v) = T.unpack k ++ "=\"" ++ T.unpack v ++ "\""


showTree :: [Node] -> IO ()
showTree ns = do
	mapM_ (putStr . Tr.drawTree . toTree) ns
-}
--
-- Helper functions for XmlHtml data
--
{-
(!!!) :: Node -> Int -> Node
(Element _ _ c) !!! i = c !! i
_ !!! _ = error "No child elements"
-}