module ScrapeReddit where

import Network.HTTP.Client (Manager)
import RIO
import qualified RIO.Text as Text
import RIO.Time (UTCTime, defaultTimeLocale, iso8601DateFormat, parseTimeM)
import Text.HTML.Scalpel
  ( Config (..),
    Scraper,
    anySelector,
    attr,
    chroot,
    chroots,
    defaultDecoder,
    hasClass,
    scrapeURLWithConfig,
    text,
    (@:),
  )

data Link = Link
  { Link -> Text
title :: Text,
    Link -> String
href :: String,
    Link -> Maybe Int
currentScore :: Maybe Int,
    Link -> Maybe Int
comments :: Maybe Int,
    Link -> Maybe UTCTime
date :: Maybe UTCTime
  }
  deriving (Link -> Link -> Bool
(Link -> Link -> Bool) -> (Link -> Link -> Bool) -> Eq Link
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Link -> Link -> Bool
$c/= :: Link -> Link -> Bool
== :: Link -> Link -> Bool
$c== :: Link -> Link -> Bool
Eq, Int -> Link -> ShowS
[Link] -> ShowS
Link -> String
(Int -> Link -> ShowS)
-> (Link -> String) -> ([Link] -> ShowS) -> Show Link
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Link] -> ShowS
$cshowList :: [Link] -> ShowS
show :: Link -> String
$cshow :: Link -> String
showsPrec :: Int -> Link -> ShowS
$cshowsPrec :: Int -> Link -> ShowS
Show, (forall x. Link -> Rep Link x)
-> (forall x. Rep Link x -> Link) -> Generic Link
forall x. Rep Link x -> Link
forall x. Link -> Rep Link x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep Link x -> Link
$cfrom :: forall x. Link -> Rep Link x
Generic)

scrapeSubReddit :: Manager -> String -> IO (Maybe [Link])
scrapeSubReddit :: Manager -> String -> IO (Maybe [Link])
scrapeSubReddit Manager
manager String
subReddit =
  Config Text -> String -> Scraper Text [Link] -> IO (Maybe [Link])
forall str a.
StringLike str =>
Config str -> String -> Scraper str a -> IO (Maybe a)
scrapeURLWithConfig
    (Config :: forall str. Decoder str -> Maybe Manager -> Config str
Config {decoder :: Decoder Text
decoder = Decoder Text
forall str. StringLike str => Decoder str
defaultDecoder, manager :: Maybe Manager
manager = Manager -> Maybe Manager
forall a. a -> Maybe a
Just Manager
manager})
    ([String] -> String
forall a. Monoid a => [a] -> a
mconcat [String
"https://old.reddit.com/r/", String
subReddit, String
"/"])
    Scraper Text [Link]
links

links :: Scraper Text [Link]
links :: Scraper Text [Link]
links = Selector -> ScraperT Text Identity Link -> Scraper Text [Link]
forall str (m :: * -> *) a.
(StringLike str, Monad m) =>
Selector -> ScraperT str m a -> ScraperT str m [a]
chroots (TagName
"div" TagName -> [AttributePredicate] -> Selector
@: [String -> AttributePredicate
hasClass String
"thing", String -> AttributePredicate
hasClass String
"link"]) ScraperT Text Identity Link
link'

link' :: Scraper Text Link
link' :: ScraperT Text Identity Link
link' = do
  Text
title <- Selector -> ScraperT Text Identity Text
forall str (m :: * -> *).
(StringLike str, Monad m) =>
Selector -> ScraperT str m str
text (Selector -> ScraperT Text Identity Text)
-> Selector -> ScraperT Text Identity Text
forall a b. (a -> b) -> a -> b
$ TagName
"a" TagName -> [AttributePredicate] -> Selector
@: [String -> AttributePredicate
hasClass String
"title"]
  String
href <- Text -> String
Text.unpack (Text -> String)
-> ScraperT Text Identity Text -> ScraperT Text Identity String
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> String -> Selector -> ScraperT Text Identity Text
forall str (m :: * -> *).
(Show str, StringLike str, Monad m) =>
String -> Selector -> ScraperT str m str
attr String
"href" (TagName
"a" TagName -> [AttributePredicate] -> Selector
@: [String -> AttributePredicate
hasClass String
"title"])
  Maybe Int
currentScore <- (Text -> String
Text.unpack (Text -> String) -> (String -> Maybe Int) -> Text -> Maybe Int
forall k (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>> String -> Maybe Int
forall a. Read a => String -> Maybe a
readMaybe) (Text -> Maybe Int)
-> ScraperT Text Identity Text
-> ScraperT Text Identity (Maybe Int)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> String -> Selector -> ScraperT Text Identity Text
forall str (m :: * -> *).
(Show str, StringLike str, Monad m) =>
String -> Selector -> ScraperT str m str
attr String
"title" (TagName
"div" TagName -> [AttributePredicate] -> Selector
@: [String -> AttributePredicate
hasClass String
"score"])
  Maybe Int
comments <- Selector
-> ScraperT Text Identity (Maybe Int)
-> ScraperT Text Identity (Maybe Int)
forall str (m :: * -> *) a.
(StringLike str, Monad m) =>
Selector -> ScraperT str m a -> ScraperT str m a
chroot (TagName
"a" TagName -> [AttributePredicate] -> Selector
@: [String -> AttributePredicate
hasClass String
"comments"]) ScraperT Text Identity (Maybe Int)
commentNumber
  Maybe UTCTime
date <- Selector
-> ScraperT Text Identity (Maybe UTCTime)
-> ScraperT Text Identity (Maybe UTCTime)
forall str (m :: * -> *) a.
(StringLike str, Monad m) =>
Selector -> ScraperT str m a -> ScraperT str m a
chroot (TagName
"time" TagName -> [AttributePredicate] -> Selector
@: [String -> AttributePredicate
hasClass String
"live-timestamp"]) ScraperT Text Identity (Maybe UTCTime)
dateTimeFromTime
  pure $ Link :: Text -> String -> Maybe Int -> Maybe Int -> Maybe UTCTime -> Link
Link {Text
title :: Text
$sel:title:Link :: Text
title, String
href :: String
$sel:href:Link :: String
href, Maybe Int
currentScore :: Maybe Int
$sel:currentScore:Link :: Maybe Int
currentScore, Maybe Int
comments :: Maybe Int
$sel:comments:Link :: Maybe Int
comments, Maybe UTCTime
date :: Maybe UTCTime
$sel:date:Link :: Maybe UTCTime
date}

commentNumber :: Scraper Text (Maybe Int)
commentNumber :: ScraperT Text Identity (Maybe Int)
commentNumber = do
  String
commentLinkText <- Text -> String
Text.unpack (Text -> String)
-> ScraperT Text Identity Text -> ScraperT Text Identity String
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Selector -> ScraperT Text Identity Text
forall str (m :: * -> *).
(StringLike str, Monad m) =>
Selector -> ScraperT str m str
text Selector
anySelector
  case String -> [String]
words String
commentLinkText of
    [String
numberText, String
_comments] -> Maybe Int -> ScraperT Text Identity (Maybe Int)
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Maybe Int -> ScraperT Text Identity (Maybe Int))
-> Maybe Int -> ScraperT Text Identity (Maybe Int)
forall a b. (a -> b) -> a -> b
$ String -> Maybe Int
forall a. Read a => String -> Maybe a
readMaybe String
numberText
    [String]
_anythingElse -> Maybe Int -> ScraperT Text Identity (Maybe Int)
forall (f :: * -> *) a. Applicative f => a -> f a
pure Maybe Int
forall a. Maybe a
Nothing

dateTimeFromTime :: Scraper Text (Maybe UTCTime)
dateTimeFromTime :: ScraperT Text Identity (Maybe UTCTime)
dateTimeFromTime = do
  String
timeString <- Text -> String
Text.unpack (Text -> String)
-> ScraperT Text Identity Text -> ScraperT Text Identity String
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> String -> Selector -> ScraperT Text Identity Text
forall str (m :: * -> *).
(Show str, StringLike str, Monad m) =>
String -> Selector -> ScraperT str m str
attr String
"datetime" Selector
anySelector
  pure $ Bool -> TimeLocale -> String -> String -> Maybe UTCTime
forall (m :: * -> *) t.
(MonadFail m, ParseTime t) =>
Bool -> TimeLocale -> String -> String -> m t
parseTimeM Bool
True TimeLocale
defaultTimeLocale String
dateFormat String
timeString

dateFormat :: String
dateFormat :: String
dateFormat = Maybe String -> String
iso8601DateFormat (Maybe String -> String) -> Maybe String -> String
forall a b. (a -> b) -> a -> b
$ String -> Maybe String
forall a. a -> Maybe a
Just String
"%H:%M:%S+00:00"