{-# LANGUAGE OverloadedStrings #-}
module MBug.Scrape (parseBugs) where
import Control.Applicative (Alternative, empty)
import Control.Monad (guard)
import qualified Data.ByteString.Lazy as BL
import Data.Maybe.Extended (maybeToAlternative)
import Data.Text (Text)
import qualified Data.Text.Compat as T
import Data.Text.Encoding (decodeUtf8)
import MBug.Data.Bug (Bug (..))
import Text.HTML.Scalpel.Core.Extended
( Scraper
, chroots
, guardPos
, innerHTMLs
, scrape
, text
, (@:)
, (@=)
)
import Text.HTML.TagSoup (parseTags)
import Text.Read (readMaybe)
bugNumber :: (Alternative m) => Text -> m Int
bugNumber txt =
case T.uncons txt of
Nothing -> empty
Just (c, bug) -> guard (c == '#')
*> maybeToAlternative (readMaybe $ T.unpack bug)
bugSeverity :: (Alternative m) => Text -> m Text
bugSeverity txt =
let (prefix, suffix) = T.splitAt 10 txt
in case T.unsnoc suffix of
Nothing -> empty
Just (severity, c) -> guard (prefix == "Severity: " && c == ';')
*> pure severity
bugsScraper :: Scraper BL.ByteString [Bug]
bugsScraper =
let decode = decodeUtf8 . BL.toStrict
in chroots ("div" @: [ "class" @= "shortbugstatus" ]) $ do
(bug:package:subject:_) <- map decode <$> innerHTMLs "a"
(severity:_) <- chroots "span" $ do guardPos (== 3)
decode <$> text "span"
Bug <$> bugNumber bug
<*> bugSeverity severity
<*> pure package
<*> pure subject
parseBugs :: BL.ByteString -> Maybe [Bug]
parseBugs input = scrape bugsScraper (parseTags input)