module Text.HTML.Scalpel.Internal.Scrape (
Scraper (..)
, attr
, attrs
, html
, htmls
, text
, texts
, chroot
, chroots
) where
import Text.HTML.Scalpel.Internal.Select
import Text.HTML.Scalpel.Internal.Select.Types
import Control.Applicative
import Control.Monad
import Data.Maybe
import qualified Text.HTML.TagSoup as TagSoup
import qualified Text.StringLike as TagSoup
newtype Scraper str a = MkScraper {
scrape :: [TagSoup.Tag str] -> Maybe a
}
instance Functor (Scraper str) where
fmap f (MkScraper a) = MkScraper $ fmap (fmap f) a
instance Applicative (Scraper str) where
pure = MkScraper . const . Just
(MkScraper f) <*> (MkScraper a) = MkScraper applied
where applied tags | (Just aVal) <- a tags = ($ aVal) <$> f tags
| otherwise = Nothing
instance Alternative (Scraper str) where
empty = MkScraper $ const Nothing
(MkScraper a) <|> (MkScraper b) = MkScraper choice
where choice tags | (Just aVal) <- a tags = Just aVal
| otherwise = b tags
instance Monad (Scraper str) where
return = pure
(MkScraper a) >>= f = MkScraper combined
where combined tags | (Just aVal) <- a tags = let (MkScraper b) = f aVal
in b tags
| otherwise = Nothing
instance MonadPlus (Scraper str) where
mzero = empty
mplus = (<|>)
chroot :: (TagSoup.StringLike str, Selectable s)
=> s -> Scraper str a -> Scraper str a
chroot selector (MkScraper inner) = MkScraper
$ join . (inner <$>)
. listToMaybe . select selector
chroots :: (TagSoup.StringLike str, Selectable s)
=> s -> Scraper str a -> Scraper str [a]
chroots selector (MkScraper inner) = MkScraper
$ return . mapMaybe inner . select selector
text :: (TagSoup.StringLike str, Selectable s) => s -> Scraper str str
text s = MkScraper $ withHead tagsToText . select s
texts :: (TagSoup.StringLike str, Selectable s) => s -> Scraper str [str]
texts s = MkScraper $ withAll tagsToText . select s
html :: (TagSoup.StringLike str, Selectable s) => s -> Scraper str str
html s = MkScraper $ withHead tagsToHTML . select s
htmls :: (TagSoup.StringLike str, Selectable s) => s -> Scraper str [str]
htmls s = MkScraper $ withAll tagsToHTML . select s
attr :: (Show str, TagSoup.StringLike str, Selectable s)
=> String -> s -> Scraper str str
attr name s = MkScraper
$ join . withHead (tagsToAttr $ TagSoup.castString name) . select s
attrs :: (Show str, TagSoup.StringLike str, Selectable s)
=> String -> s -> Scraper str [str]
attrs name s = MkScraper
$ fmap catMaybes . withAll (tagsToAttr nameStr) . select s
where nameStr = TagSoup.castString name
withHead :: (a -> b) -> [a] -> Maybe b
withHead _ [] = Nothing
withHead f (x:_) = Just $ f x
withAll :: (a -> b) -> [a] -> Maybe [b]
withAll _ [] = Nothing
withAll f xs = Just $ map f xs
tagsToText :: TagSoup.StringLike str => [TagSoup.Tag str] -> str
tagsToText = TagSoup.innerText
tagsToHTML :: TagSoup.StringLike str => [TagSoup.Tag str] -> str
tagsToHTML = TagSoup.renderTags
tagsToAttr :: (Show str, TagSoup.StringLike str)
=> str -> [TagSoup.Tag str] -> Maybe str
tagsToAttr attr tags = do
tag <- listToMaybe tags
guard $ TagSoup.isTagOpen tag
return $ TagSoup.fromAttrib attr tag