module Text.HTML.Scalpel.Internal.Scrape (
Scraper
, scrape
, attr
, attrs
, html
, htmls
, innerHTML
, innerHTMLs
, text
, texts
, chroot
, chroots
, position
) where
import Text.HTML.Scalpel.Internal.Select
import Text.HTML.Scalpel.Internal.Select.Types
import Control.Applicative
import Control.Monad
import Data.Maybe
import qualified Control.Monad.Fail as Fail
import qualified Data.Vector as Vector
import qualified Text.HTML.TagSoup as TagSoup
import qualified Text.StringLike as TagSoup
newtype Scraper str a = MkScraper {
scrapeTagSpec :: TagSpec str -> Maybe a
}
instance Functor (Scraper str) where
fmap f (MkScraper a) = MkScraper $ fmap (fmap f) a
instance Applicative (Scraper str) where
pure = MkScraper . const . Just
(MkScraper f) <*> (MkScraper a) = MkScraper applied
where applied tags | (Just aVal) <- a tags = ($ aVal) <$> f tags
| otherwise = Nothing
instance Alternative (Scraper str) where
empty = MkScraper $ const Nothing
(MkScraper a) <|> (MkScraper b) = MkScraper choice
where choice tags | (Just aVal) <- a tags = Just aVal
| otherwise = b tags
instance Monad (Scraper str) where
fail = Fail.fail
return = pure
(MkScraper a) >>= f = MkScraper combined
where combined tags | (Just aVal) <- a tags = let (MkScraper b) = f aVal
in b tags
| otherwise = Nothing
instance MonadPlus (Scraper str) where
mzero = empty
mplus = (<|>)
instance Fail.MonadFail (Scraper str) where
fail _ = mzero
scrape :: (Ord str, TagSoup.StringLike str)
=> Scraper str a -> [TagSoup.Tag str] -> Maybe a
scrape s = scrapeTagSpec s . tagsToSpec . TagSoup.canonicalizeTags
chroot :: (Ord str, TagSoup.StringLike str)
=> Selector -> Scraper str a -> Scraper str a
chroot selector inner = do
maybeResult <- listToMaybe <$> chroots selector inner
guard (isJust maybeResult)
return $ fromJust maybeResult
chroots :: (Ord str, TagSoup.StringLike str)
=> Selector -> Scraper str a -> Scraper str [a]
chroots selector (MkScraper inner) = MkScraper
$ return . mapMaybe inner . select selector
text :: (Ord str, TagSoup.StringLike str) => Selector -> Scraper str str
text s = MkScraper $ withHead tagsToText . select s
texts :: (Ord str, TagSoup.StringLike str)
=> Selector -> Scraper str [str]
texts s = MkScraper $ withAll tagsToText . select s
html :: (Ord str, TagSoup.StringLike str) => Selector -> Scraper str str
html s = MkScraper $ withHead tagsToHTML . select s
htmls :: (Ord str, TagSoup.StringLike str)
=> Selector -> Scraper str [str]
htmls s = MkScraper $ withAll tagsToHTML . select s
innerHTML :: (Ord str, TagSoup.StringLike str)
=> Selector -> Scraper str str
innerHTML s = MkScraper $ withHead tagsToInnerHTML . select s
innerHTMLs :: (Ord str, TagSoup.StringLike str)
=> Selector -> Scraper str [str]
innerHTMLs s = MkScraper $ withAll tagsToInnerHTML . select s
attr :: (Ord str, Show str, TagSoup.StringLike str)
=> String -> Selector -> Scraper str str
attr name s = MkScraper
$ join . withHead (tagsToAttr $ TagSoup.castString name) . select s
attrs :: (Ord str, Show str, TagSoup.StringLike str)
=> String -> Selector -> Scraper str [str]
attrs name s = MkScraper
$ fmap catMaybes . withAll (tagsToAttr nameStr) . select s
where nameStr = TagSoup.castString name
position :: (Ord str, TagSoup.StringLike str) => Scraper str Int
position = MkScraper $ Just . tagsToPosition
withHead :: (a -> b) -> [a] -> Maybe b
withHead _ [] = Nothing
withHead f (x:_) = Just $ f x
withAll :: (a -> b) -> [a] -> Maybe [b]
withAll f xs = Just $ map f xs
foldSpec :: TagSoup.StringLike str
=> (TagSoup.Tag str -> str -> str) -> TagSpec str -> str
foldSpec f = Vector.foldr' (f . infoTag) TagSoup.empty . (\(a, _, _) -> a)
tagsToText :: TagSoup.StringLike str => TagSpec str -> str
tagsToText = foldSpec f
where
f (TagSoup.TagText str) s = str `TagSoup.append` s
f _ s = s
tagsToHTML :: TagSoup.StringLike str => TagSpec str -> str
tagsToHTML = foldSpec (\tag s -> TagSoup.renderTags [tag] `TagSoup.append` s)
tagsToInnerHTML :: TagSoup.StringLike str => TagSpec str -> str
tagsToInnerHTML (tags, tree, ctx)
| len < 2 = TagSoup.empty
| otherwise = tagsToHTML (Vector.slice 1 (len 2) tags, tree, ctx)
where len = Vector.length tags
tagsToAttr :: (Show str, TagSoup.StringLike str)
=> str -> TagSpec str -> Maybe str
tagsToAttr attr (tags, _, _) = do
guard $ 0 < Vector.length tags
let tag = infoTag $ tags Vector.! 0
guard $ TagSoup.isTagOpen tag
return $ TagSoup.fromAttrib attr tag
tagsToPosition :: TagSpec str -> Int
tagsToPosition (_, _, ctx) = ctxPosition ctx