{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE CPP #-}
{- |
This module uses HXT to transverse an HTML document using CSS selectors.

The most important function here is 'findBySelector', it takes a CSS query and
a string containing the HTML to look into,
and it returns a list of the HTML fragments that matched the given query.

Only a subset of the CSS spec is currently supported:

 * By tag name: /table td a/

 * By class names: /.container .content/

 * By Id: /#oneId/

 * By attribute: /[hasIt]/, /[exact=match]/, /[contains*=text]/, /[starts^=with]/, /[ends$=with]/

 * Union: /a, span, p/

 * Immediate children: /div > p/

 * Get jiggy with it: /div[data-attr=yeah] > .mon, .foo.bar div, #oneThing/

-}

module Yesod.Test.TransversingCSS (
  findBySelector,
  findAttributeBySelector,
  HtmlLBS,
  Query,
  -- * For HXT hackers
  -- | These functions expose some low level details that you can blissfully ignore.
  parseQuery,
  runQuery,
  Selector(..),
  SelectorGroup(..)

  )
where

import Yesod.Test.CssQuery
import qualified Data.Text as T
import qualified Control.Applicative
import Text.XML
import Text.XML.Cursor
import qualified Data.ByteString.Lazy as L
import qualified Text.HTML.DOM as HD
import Text.Blaze.Html (toHtml)
import Text.Blaze.Html.Renderer.String (renderHtml)

type Query = T.Text
type HtmlLBS = L.ByteString

-- | Perform a css 'Query' on 'Html'. Returns Either
--
-- * Left: Query parse error.
--
-- * Right: List of matching Html fragments.
findBySelector :: HtmlLBS -> Query -> Either String [String]
findBySelector :: HtmlLBS -> Text -> Either String [String]
findBySelector HtmlLBS
html Text
query =
  forall a b. (a -> b) -> [a] -> [b]
map (Html -> String
renderHtml forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. ToMarkup a => a -> Html
toHtml forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall node. Cursor node -> node
node) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
Control.Applicative.<$> HtmlLBS -> Text -> Either String [Cursor Node]
findCursorsBySelector HtmlLBS
html Text
query

-- | Perform a css 'Query' on 'Html'. Returns Either
--
-- * Left: Query parse error.
--
-- * Right: List of matching Cursors
findCursorsBySelector :: HtmlLBS -> Query -> Either String [Cursor]
findCursorsBySelector :: HtmlLBS -> Text -> Either String [Cursor Node]
findCursorsBySelector HtmlLBS
html Text
query =
  Cursor Node -> [[SelectorGroup]] -> [Cursor Node]
runQuery (Document -> Cursor Node
fromDocument forall a b. (a -> b) -> a -> b
$ HtmlLBS -> Document
HD.parseLBS HtmlLBS
html)
       forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
Control.Applicative.<$> Text -> Either String [[SelectorGroup]]
parseQuery Text
query

-- | Perform a css 'Query' on 'Html'. Returns Either
--
-- * Left: Query parse error.
--
-- * Right: List of matching Cursors
--
-- @since 1.5.7
findAttributeBySelector :: HtmlLBS -> Query -> T.Text -> Either String [[T.Text]]
findAttributeBySelector :: HtmlLBS -> Text -> Text -> Either String [[Text]]
findAttributeBySelector HtmlLBS
html Text
query Text
attr =
  forall a b. (a -> b) -> [a] -> [b]
map (Text -> Cursor Node -> [Text]
laxAttribute Text
attr) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
Control.Applicative.<$> HtmlLBS -> Text -> Either String [Cursor Node]
findCursorsBySelector HtmlLBS
html Text
query


-- Run a compiled query on Html, returning a list of matching Html fragments.
runQuery :: Cursor -> [[SelectorGroup]] -> [Cursor]
runQuery :: Cursor Node -> [[SelectorGroup]] -> [Cursor Node]
runQuery Cursor Node
html [[SelectorGroup]]
query = forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap (Cursor Node -> [SelectorGroup] -> [Cursor Node]
runGroup Cursor Node
html) [[SelectorGroup]]
query

runGroup :: Cursor -> [SelectorGroup] -> [Cursor]
runGroup :: Cursor Node -> [SelectorGroup] -> [Cursor Node]
runGroup Cursor Node
c [] = [Cursor Node
c]
runGroup Cursor Node
c (DirectChildren [Selector]
s:[SelectorGroup]
gs) = forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap (forall a b c. (a -> b -> c) -> b -> a -> c
flip Cursor Node -> [SelectorGroup] -> [Cursor Node]
runGroup [SelectorGroup]
gs) forall a b. (a -> b) -> a -> b
$ Cursor Node
c forall node a. Cursor node -> (Cursor node -> [a]) -> [a]
$/ [Selector] -> Cursor Node -> [Cursor Node]
selectors [Selector]
s
runGroup Cursor Node
c (DeepChildren [Selector]
s:[SelectorGroup]
gs) = forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap (forall a b c. (a -> b -> c) -> b -> a -> c
flip Cursor Node -> [SelectorGroup] -> [Cursor Node]
runGroup [SelectorGroup]
gs) forall a b. (a -> b) -> a -> b
$ Cursor Node
c forall node a. Cursor node -> (Cursor node -> [a]) -> [a]
$// [Selector] -> Cursor Node -> [Cursor Node]
selectors [Selector]
s

selectors :: [Selector] -> Cursor -> [Cursor]
selectors :: [Selector] -> Cursor Node -> [Cursor Node]
selectors [Selector]
ss Cursor Node
c
    | forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all (Cursor Node -> Selector -> Bool
selector Cursor Node
c) [Selector]
ss = [Cursor Node
c]
    | Bool
otherwise = []

selector :: Cursor -> Selector -> Bool
selector :: Cursor Node -> Selector -> Bool
selector Cursor Node
c (ById Text
x) = Bool -> Bool
not forall a b. (a -> b) -> a -> b
$ forall (t :: * -> *) a. Foldable t => t a -> Bool
null forall a b. (a -> b) -> a -> b
$ Name -> Text -> Cursor Node -> [Cursor Node]
attributeIs Name
"id" Text
x Cursor Node
c
selector Cursor Node
c (ByClass Text
x) =
    case Name -> Cursor Node -> [Text]
attribute Name
"class" Cursor Node
c of
        Text
t:[Text]
_ -> Text
x forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` Text -> [Text]
T.words Text
t
        [] -> Bool
False
selector Cursor Node
c (ByTagName Text
t) = Bool -> Bool
not forall a b. (a -> b) -> a -> b
$ forall (t :: * -> *) a. Foldable t => t a -> Bool
null forall a b. (a -> b) -> a -> b
$ Name -> Cursor Node -> [Cursor Node]
element (Text -> Maybe Text -> Maybe Text -> Name
Name Text
t forall a. Maybe a
Nothing forall a. Maybe a
Nothing) Cursor Node
c
selector Cursor Node
c (ByAttrExists Text
t) = Bool -> Bool
not forall a b. (a -> b) -> a -> b
$ forall (t :: * -> *) a. Foldable t => t a -> Bool
null forall a b. (a -> b) -> a -> b
$ Name -> Cursor Node -> [Cursor Node]
hasAttribute (Text -> Maybe Text -> Maybe Text -> Name
Name Text
t forall a. Maybe a
Nothing forall a. Maybe a
Nothing) Cursor Node
c
selector Cursor Node
c (ByAttrEquals Text
t Text
v) = Bool -> Bool
not forall a b. (a -> b) -> a -> b
$ forall (t :: * -> *) a. Foldable t => t a -> Bool
null forall a b. (a -> b) -> a -> b
$ Name -> Text -> Cursor Node -> [Cursor Node]
attributeIs (Text -> Maybe Text -> Maybe Text -> Name
Name Text
t forall a. Maybe a
Nothing forall a. Maybe a
Nothing) Text
v Cursor Node
c
selector Cursor Node
c (ByAttrContains Text
n Text
v) =
    case Name -> Cursor Node -> [Text]
attribute (Text -> Maybe Text -> Maybe Text -> Name
Name Text
n forall a. Maybe a
Nothing forall a. Maybe a
Nothing) Cursor Node
c of
        Text
t:[Text]
_ -> Text
v Text -> Text -> Bool
`T.isInfixOf` Text
t
        [] -> Bool
False
selector Cursor Node
c (ByAttrStarts Text
n Text
v) =
    case Name -> Cursor Node -> [Text]
attribute (Text -> Maybe Text -> Maybe Text -> Name
Name Text
n forall a. Maybe a
Nothing forall a. Maybe a
Nothing) Cursor Node
c of
        Text
t:[Text]
_ -> Text
v Text -> Text -> Bool
`T.isPrefixOf` Text
t
        [] -> Bool
False
selector Cursor Node
c (ByAttrEnds Text
n Text
v) =
    case Name -> Cursor Node -> [Text]
attribute (Text -> Maybe Text -> Maybe Text -> Name
Name Text
n forall a. Maybe a
Nothing forall a. Maybe a
Nothing) Cursor Node
c of
        Text
t:[Text]
_ -> Text
v Text -> Text -> Bool
`T.isSuffixOf` Text
t
        [] -> Bool
False