{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE NumericUnderscores #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}

module LinkCheck
  ( linkCheck,
    runLinkCheck,
  )
where

import Control.Concurrent
import Control.Concurrent.STM (stateTVar)
import Control.Monad
import Control.Monad.IO.Class
import Control.Monad.Logger
import Control.Retry
import qualified Data.ByteString as SB
import qualified Data.ByteString.Lazy as LB
import Data.Cache.LRU (LRU, newLRU)
import qualified Data.Cache.LRU as LRU
import Data.IntMap (IntMap)
import qualified Data.IntMap.Strict as IM
import Data.Map (Map)
import qualified Data.Map as M
import Data.Maybe
import Data.Set (Set)
import qualified Data.Set as S
import Data.String
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import Data.Tuple
import Data.Version
import LinkCheck.OptParse
import Network.HTTP.Client as HTTP
import Network.HTTP.Client.Internal as HTTP (toHttpException)
import Network.HTTP.Client.TLS as HTTP
import Network.HTTP.Types as HTTP
import Network.URI
import Paths_linkcheck
import System.Exit
import Text.HTML.TagSoup
import Text.Printf
import UnliftIO

linkCheck :: IO ()
linkCheck :: IO ()
linkCheck = IO Settings
getSettings IO Settings -> (Settings -> IO ()) -> IO ()
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Settings -> IO ()
runLinkCheck

runLinkCheck :: Settings -> IO ()
runLinkCheck :: Settings -> IO ()
runLinkCheck Settings {Bool
Maybe Int
Maybe Word
URI
LogLevel
setCacheSize :: Settings -> Maybe Word
setMaxDepth :: Settings -> Maybe Word
setCheckFragments :: Settings -> Bool
setExternal :: Settings -> Bool
setFetchers :: Settings -> Maybe Int
setLogLevel :: Settings -> LogLevel
setUri :: Settings -> URI
setCacheSize :: Maybe Word
setMaxDepth :: Maybe Word
setCheckFragments :: Bool
setExternal :: Bool
setFetchers :: Maybe Int
setLogLevel :: LogLevel
setUri :: URI
..} = do
  let managerSets :: ManagerSettings
managerSets =
        ManagerSettings
HTTP.tlsManagerSettings
          { managerModifyRequest :: Request -> IO Request
managerModifyRequest = \Request
request -> do
              let headers :: [(HeaderName, ByteString)]
headers =
                    ( HeaderName
"User-Agent",
                      Text -> ByteString
TE.encodeUtf8 (Text -> ByteString) -> Text -> ByteString
forall a b. (a -> b) -> a -> b
$ String -> Text
T.pack (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$ String
"linkcheck-" String -> String -> String
forall a. Semigroup a => a -> a -> a
<> Version -> String
showVersion Version
version
                    ) (HeaderName, ByteString)
-> [(HeaderName, ByteString)] -> [(HeaderName, ByteString)]
forall a. a -> [a] -> [a]
:
                    Request -> [(HeaderName, ByteString)]
requestHeaders Request
request
              Request -> IO Request
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Request -> IO Request) -> Request -> IO Request
forall a b. (a -> b) -> a -> b
$ Request
request {requestHeaders :: [(HeaderName, ByteString)]
requestHeaders = [(HeaderName, ByteString)]
headers}
          }
  Manager
man <- IO Manager -> IO Manager
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO Manager -> IO Manager) -> IO Manager -> IO Manager
forall a b. (a -> b) -> a -> b
$ ManagerSettings -> IO Manager
HTTP.newManager ManagerSettings
managerSets
  TQueue QueueURI
queue <- IO (TQueue QueueURI)
forall (m :: * -> *) a. MonadIO m => m (TQueue a)
newTQueueIO
  TVar (Set URI)
seen <- Set URI -> IO (TVar (Set URI))
forall (m :: * -> *) a. MonadIO m => a -> m (TVar a)
newTVarIO Set URI
forall a. Set a
S.empty
  Maybe (TVar (LRU URI [ByteString]))
mCache <-
    if Bool
setCheckFragments
      then
        (TVar (LRU URI [ByteString])
 -> Maybe (TVar (LRU URI [ByteString])))
-> IO (TVar (LRU URI [ByteString]))
-> IO (Maybe (TVar (LRU URI [ByteString])))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap TVar (LRU URI [ByteString]) -> Maybe (TVar (LRU URI [ByteString]))
forall a. a -> Maybe a
Just (IO (TVar (LRU URI [ByteString]))
 -> IO (Maybe (TVar (LRU URI [ByteString]))))
-> IO (TVar (LRU URI [ByteString]))
-> IO (Maybe (TVar (LRU URI [ByteString])))
forall a b. (a -> b) -> a -> b
$
          LRU URI [ByteString] -> IO (TVar (LRU URI [ByteString]))
forall (m :: * -> *) a. MonadIO m => a -> m (TVar a)
newTVarIO (LRU URI [ByteString] -> IO (TVar (LRU URI [ByteString])))
-> LRU URI [ByteString] -> IO (TVar (LRU URI [ByteString]))
forall a b. (a -> b) -> a -> b
$
            Maybe Integer -> LRU URI [ByteString]
forall key val. Ord key => Maybe Integer -> LRU key val
newLRU (Maybe Integer -> LRU URI [ByteString])
-> Maybe Integer -> LRU URI [ByteString]
forall a b. (a -> b) -> a -> b
$ Word -> Integer
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word -> Integer) -> Maybe Word -> Maybe Integer
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Maybe Word
setCacheSize
      else Maybe (TVar (LRU URI [ByteString]))
-> IO (Maybe (TVar (LRU URI [ByteString])))
forall (f :: * -> *) a. Applicative f => a -> f a
pure Maybe (TVar (LRU URI [ByteString]))
forall a. Maybe a
Nothing -- no need to cache anything if we don't check fragments anyway.
  TVar (Map URI Result)
results <- Map URI Result -> IO (TVar (Map URI Result))
forall (m :: * -> *) a. MonadIO m => a -> m (TVar a)
newTVarIO Map URI Result
forall k a. Map k a
M.empty
  Int
fetchers <- case Maybe Int
setFetchers of
    Maybe Int
Nothing -> IO Int
getNumCapabilities
    Just Int
f -> Int -> IO Int
forall (f :: * -> *) a. Applicative f => a -> f a
pure Int
f
  let indexes :: [Int]
indexes = [Int
0 .. Int
fetchers Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1]
  TVar (IntMap Bool)
fetcherStati <- IntMap Bool -> IO (TVar (IntMap Bool))
forall (m :: * -> *) a. MonadIO m => a -> m (TVar a)
newTVarIO (IntMap Bool -> IO (TVar (IntMap Bool)))
-> IntMap Bool -> IO (TVar (IntMap Bool))
forall a b. (a -> b) -> a -> b
$ [(Int, Bool)] -> IntMap Bool
forall a. [(Int, a)] -> IntMap a
IM.fromList ([(Int, Bool)] -> IntMap Bool) -> [(Int, Bool)] -> IntMap Bool
forall a b. (a -> b) -> a -> b
$ [Int] -> [Bool] -> [(Int, Bool)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Int]
indexes (Bool -> [Bool]
forall a. a -> [a]
repeat Bool
True)
  STM () -> IO ()
forall (m :: * -> *) a. MonadIO m => STM a -> m a
atomically (STM () -> IO ()) -> STM () -> IO ()
forall a b. (a -> b) -> a -> b
$ TQueue QueueURI -> QueueURI -> STM ()
forall a. TQueue a -> a -> STM ()
writeTQueue TQueue QueueURI
queue QueueURI :: URI -> Word -> [URI] -> QueueURI
QueueURI {queueURI :: URI
queueURI = URI
setUri, queueURIDepth :: Word
queueURIDepth = Word
0, queueURITrace :: [URI]
queueURITrace = []}
  LoggingT IO () -> IO ()
forall (m :: * -> *) a. MonadIO m => LoggingT m a -> m a
runStderrLoggingT (LoggingT IO () -> IO ()) -> LoggingT IO () -> IO ()
forall a b. (a -> b) -> a -> b
$
    (Text -> LogLevel -> Bool) -> LoggingT IO () -> LoggingT IO ()
forall (m :: * -> *) a.
(Text -> LogLevel -> Bool) -> LoggingT m a -> LoggingT m a
filterLogger (\Text
_ LogLevel
ll -> LogLevel
ll LogLevel -> LogLevel -> Bool
forall a. Ord a => a -> a -> Bool
>= LogLevel
setLogLevel) (LoggingT IO () -> LoggingT IO ())
-> LoggingT IO () -> LoggingT IO ()
forall a b. (a -> b) -> a -> b
$ do
      Text -> LoggingT IO ()
forall (m :: * -> *). MonadLogger m => Text -> m ()
logInfoN (Text -> LoggingT IO ()) -> Text -> LoggingT IO ()
forall a b. (a -> b) -> a -> b
$
        String -> Text
T.pack (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$
          [String] -> String
unwords
            [ String
"Running with",
              Int -> String
forall a. Show a => a -> String
show Int
fetchers,
              String
"fetchers"
            ]
      [Int] -> (Int -> LoggingT IO ()) -> LoggingT IO ()
forall (m :: * -> *) (f :: * -> *) a b.
(MonadUnliftIO m, Foldable f) =>
f a -> (a -> m b) -> m ()
forConcurrently_ [Int]
indexes ((Int -> LoggingT IO ()) -> LoggingT IO ())
-> (Int -> LoggingT IO ()) -> LoggingT IO ()
forall a b. (a -> b) -> a -> b
$ \Int
ix ->
        WorkerSettings -> LoggingT IO ()
worker
          WorkerSettings :: Bool
-> Bool
-> Maybe Word
-> URI
-> Manager
-> TQueue QueueURI
-> TVar (Set URI)
-> Maybe (TVar (LRU URI [ByteString]))
-> TVar (Map URI Result)
-> TVar (IntMap Bool)
-> Int
-> Int
-> WorkerSettings
WorkerSettings
            { workerSetExternal :: Bool
workerSetExternal = Bool
setExternal,
              workerSetCheckFragments :: Bool
workerSetCheckFragments = Bool
setCheckFragments,
              workerSetMaxDepth :: Maybe Word
workerSetMaxDepth = Maybe Word
setMaxDepth,
              workerSetRoot :: URI
workerSetRoot = URI
setUri,
              workerSetHTTPManager :: Manager
workerSetHTTPManager = Manager
man,
              workerSetURIQueue :: TQueue QueueURI
workerSetURIQueue = TQueue QueueURI
queue,
              workerSetSeenSet :: TVar (Set URI)
workerSetSeenSet = TVar (Set URI)
seen,
              workerSetCache :: Maybe (TVar (LRU URI [ByteString]))
workerSetCache = Maybe (TVar (LRU URI [ByteString]))
mCache,
              workerSetResultsMap :: TVar (Map URI Result)
workerSetResultsMap = TVar (Map URI Result)
results,
              workerSetStatusMap :: TVar (IntMap Bool)
workerSetStatusMap = TVar (IntMap Bool)
fetcherStati,
              workerSetTotalFetchers :: Int
workerSetTotalFetchers = Int
fetchers,
              workerSetWorkerIndex :: Int
workerSetWorkerIndex = Int
ix
            }
  [(URI, Result)]
resultsList <- Map URI Result -> [(URI, Result)]
forall k a. Map k a -> [(k, a)]
M.toList (Map URI Result -> [(URI, Result)])
-> IO (Map URI Result) -> IO [(URI, Result)]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> TVar (Map URI Result) -> IO (Map URI Result)
forall (m :: * -> *) a. MonadIO m => TVar a -> m a
readTVarIO TVar (Map URI Result)
results
  Bool -> IO () -> IO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless ([(URI, Result)] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [(URI, Result)]
resultsList) (IO () -> IO ()) -> IO () -> IO ()
forall a b. (a -> b) -> a -> b
$
    String -> IO ()
forall a. String -> IO a
die (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$
      [String] -> String
unlines ([String] -> String) -> [String] -> String
forall a b. (a -> b) -> a -> b
$
        ((URI, Result) -> String) -> [(URI, Result)] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map
          ( \(URI
uri, Result
result) ->
              [String] -> String
unwords
                [ URI -> String
forall a. Show a => a -> String
show URI
uri,
                  Result -> String
prettyResult Result
result
                ]
          )
          [(URI, Result)]
resultsList

data Result = Result
  { Result -> ResultReason
resultReason :: !ResultReason,
    Result -> [URI]
resultTrace :: ![URI]
  }
  deriving (Int -> Result -> String -> String
[Result] -> String -> String
Result -> String
(Int -> Result -> String -> String)
-> (Result -> String)
-> ([Result] -> String -> String)
-> Show Result
forall a.
(Int -> a -> String -> String)
-> (a -> String) -> ([a] -> String -> String) -> Show a
showList :: [Result] -> String -> String
$cshowList :: [Result] -> String -> String
show :: Result -> String
$cshow :: Result -> String
showsPrec :: Int -> Result -> String -> String
$cshowsPrec :: Int -> Result -> String -> String
Show)

data ResultReason
  = ResultReasonException HttpException
  | ResultReasonStatus HTTP.Status
  | ResultReasonFragmentMissing String
  deriving (Int -> ResultReason -> String -> String
[ResultReason] -> String -> String
ResultReason -> String
(Int -> ResultReason -> String -> String)
-> (ResultReason -> String)
-> ([ResultReason] -> String -> String)
-> Show ResultReason
forall a.
(Int -> a -> String -> String)
-> (a -> String) -> ([a] -> String -> String) -> Show a
showList :: [ResultReason] -> String -> String
$cshowList :: [ResultReason] -> String -> String
show :: ResultReason -> String
$cshow :: ResultReason -> String
showsPrec :: Int -> ResultReason -> String -> String
$cshowsPrec :: Int -> ResultReason -> String -> String
Show)

prettyResult :: Result -> String
prettyResult :: Result -> String
prettyResult Result {[URI]
ResultReason
resultTrace :: [URI]
resultReason :: ResultReason
resultTrace :: Result -> [URI]
resultReason :: Result -> ResultReason
..} = do
  [String] -> String
unlines
    ( [String] -> String
unwords [String
"Reason:", ResultReason -> String
prettyResultReason ResultReason
resultReason] String -> [String] -> [String]
forall a. a -> [a] -> [a]
:
      String
"Trace:" String -> [String] -> [String]
forall a. a -> [a] -> [a]
:
      (URI -> String) -> [URI] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map URI -> String
forall a. Show a => a -> String
show [URI]
resultTrace
    )

prettyResultReason :: ResultReason -> String
prettyResultReason :: ResultReason -> String
prettyResultReason = \case
  ResultReasonException HttpException
e -> HttpException -> String
forall e. Exception e => e -> String
displayException HttpException
e
  ResultReasonStatus Status
status -> Status -> String
forall a. Show a => a -> String
show Status
status
  ResultReasonFragmentMissing String
f -> String
"Fragment name or id not found: #" String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
f

data WorkerSettings = WorkerSettings
  { WorkerSettings -> Bool
workerSetExternal :: !Bool,
    WorkerSettings -> Bool
workerSetCheckFragments :: !Bool,
    WorkerSettings -> Maybe Word
workerSetMaxDepth :: !(Maybe Word),
    WorkerSettings -> URI
workerSetRoot :: !URI,
    WorkerSettings -> Manager
workerSetHTTPManager :: !HTTP.Manager,
    WorkerSettings -> TQueue QueueURI
workerSetURIQueue :: !(TQueue QueueURI),
    WorkerSettings -> TVar (Set URI)
workerSetSeenSet :: !(TVar (Set URI)),
    WorkerSettings -> Maybe (TVar (LRU URI [ByteString]))
workerSetCache :: !(Maybe (TVar (LRU URI [SB.ByteString]))),
    WorkerSettings -> TVar (Map URI Result)
workerSetResultsMap :: !(TVar (Map URI Result)),
    WorkerSettings -> TVar (IntMap Bool)
workerSetStatusMap :: !(TVar (IntMap Bool)),
    WorkerSettings -> Int
workerSetTotalFetchers :: !Int,
    WorkerSettings -> Int
workerSetWorkerIndex :: !Int
  }

data QueueURI = QueueURI
  { QueueURI -> URI
queueURI :: !URI,
    QueueURI -> Word
queueURIDepth :: !Word,
    QueueURI -> [URI]
queueURITrace :: ![URI]
  }

worker ::
  WorkerSettings ->
  LoggingT IO ()
worker :: WorkerSettings -> LoggingT IO ()
worker WorkerSettings {Bool
Int
Maybe Word
Maybe (TVar (LRU URI [ByteString]))
TVar (Map URI Result)
TVar (IntMap Bool)
TVar (Set URI)
URI
Manager
TQueue QueueURI
workerSetWorkerIndex :: Int
workerSetTotalFetchers :: Int
workerSetStatusMap :: TVar (IntMap Bool)
workerSetResultsMap :: TVar (Map URI Result)
workerSetCache :: Maybe (TVar (LRU URI [ByteString]))
workerSetSeenSet :: TVar (Set URI)
workerSetURIQueue :: TQueue QueueURI
workerSetHTTPManager :: Manager
workerSetRoot :: URI
workerSetMaxDepth :: Maybe Word
workerSetCheckFragments :: Bool
workerSetExternal :: Bool
workerSetWorkerIndex :: WorkerSettings -> Int
workerSetTotalFetchers :: WorkerSettings -> Int
workerSetStatusMap :: WorkerSettings -> TVar (IntMap Bool)
workerSetResultsMap :: WorkerSettings -> TVar (Map URI Result)
workerSetCache :: WorkerSettings -> Maybe (TVar (LRU URI [ByteString]))
workerSetSeenSet :: WorkerSettings -> TVar (Set URI)
workerSetURIQueue :: WorkerSettings -> TQueue QueueURI
workerSetHTTPManager :: WorkerSettings -> Manager
workerSetRoot :: WorkerSettings -> URI
workerSetMaxDepth :: WorkerSettings -> Maybe Word
workerSetCheckFragments :: WorkerSettings -> Bool
workerSetExternal :: WorkerSettings -> Bool
..} = Text -> LoggingT IO () -> LoggingT IO ()
forall (m :: * -> *) a. Text -> LoggingT m a -> LoggingT m a
addFetcherNameToLog Text
fetcherName (LoggingT IO () -> LoggingT IO ())
-> LoggingT IO () -> LoggingT IO ()
forall a b. (a -> b) -> a -> b
$ Bool -> LoggingT IO ()
go Bool
True
  where
    fetcherName :: Text
fetcherName = case Int
workerSetTotalFetchers of
      Int
1 -> Text
"fetcher"
      Int
_ ->
        let digits :: Int
            digits :: Int
digits = Double -> Int
forall a b. (RealFrac a, Integral b) => a -> b
ceiling (Double -> Double -> Double
forall a. Floating a => a -> a -> a
logBase Double
10 (Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
workerSetTotalFetchers) :: Double)
            formatStr :: String
formatStr = String
"%0" String -> String -> String
forall a. Semigroup a => a -> a -> a
<> Int -> String
forall a. Show a => a -> String
show Int
digits String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
"d"
         in String -> Text
T.pack (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$ String
"fetcher-" String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String -> Int -> String
forall r. PrintfType r => String -> r
printf String
formatStr Int
workerSetWorkerIndex
    setStatus :: Bool -> m ()
setStatus Bool
b = STM () -> m ()
forall (m :: * -> *) a. MonadIO m => STM a -> m a
atomically (STM () -> m ()) -> STM () -> m ()
forall a b. (a -> b) -> a -> b
$ TVar (IntMap Bool) -> (IntMap Bool -> IntMap Bool) -> STM ()
forall a. TVar a -> (a -> a) -> STM ()
modifyTVar' TVar (IntMap Bool)
workerSetStatusMap ((IntMap Bool -> IntMap Bool) -> STM ())
-> (IntMap Bool -> IntMap Bool) -> STM ()
forall a b. (a -> b) -> a -> b
$ Int -> Bool -> IntMap Bool -> IntMap Bool
forall a. Int -> a -> IntMap a -> IntMap a
IM.insert Int
workerSetWorkerIndex Bool
b
    setBusy :: LoggingT IO ()
setBusy = Bool -> LoggingT IO ()
forall (m :: * -> *). MonadIO m => Bool -> m ()
setStatus Bool
True
    setIdle :: LoggingT IO ()
setIdle = Bool -> LoggingT IO ()
forall (m :: * -> *). MonadIO m => Bool -> m ()
setStatus Bool
False
    allDone :: MonadIO m => m Bool
    allDone :: m Bool
allDone = (Bool -> Bool) -> IntMap Bool -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Bool -> Bool
not (IntMap Bool -> Bool) -> m (IntMap Bool) -> m Bool
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> TVar (IntMap Bool) -> m (IntMap Bool)
forall (m :: * -> *) a. MonadIO m => TVar a -> m a
readTVarIO TVar (IntMap Bool)
workerSetStatusMap
    go :: Bool -> LoggingT IO ()
go Bool
busy = do
      Maybe QueueURI
mv <- STM (Maybe QueueURI) -> LoggingT IO (Maybe QueueURI)
forall (m :: * -> *) a. MonadIO m => STM a -> m a
atomically (STM (Maybe QueueURI) -> LoggingT IO (Maybe QueueURI))
-> STM (Maybe QueueURI) -> LoggingT IO (Maybe QueueURI)
forall a b. (a -> b) -> a -> b
$ TQueue QueueURI -> STM (Maybe QueueURI)
forall a. TQueue a -> STM (Maybe a)
tryReadTQueue TQueue QueueURI
workerSetURIQueue
      -- Get an item off the queue
      case Maybe QueueURI
mv of
        -- No items on the queue
        Maybe QueueURI
Nothing -> do
          -- Set this worker as idle
          Text -> LoggingT IO ()
forall (m :: * -> *). MonadLogger m => Text -> m ()
logDebugN (Text -> LoggingT IO ()) -> Text -> LoggingT IO ()
forall a b. (a -> b) -> a -> b
$
            String -> Text
T.pack (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$
              [String] -> String
unwords
                [ String
"Worker is idle:",
                  Int -> String
forall a. Show a => a -> String
show Int
workerSetWorkerIndex
                ]
          Bool -> LoggingT IO () -> LoggingT IO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when Bool
busy LoggingT IO ()
setIdle
          -- If all workers are idle, we are done.
          Bool
ad <- LoggingT IO Bool
forall (m :: * -> *). MonadIO m => m Bool
allDone
          Bool -> LoggingT IO () -> LoggingT IO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless Bool
ad (LoggingT IO () -> LoggingT IO ())
-> LoggingT IO () -> LoggingT IO ()
forall a b. (a -> b) -> a -> b
$ do
            IO () -> LoggingT IO ()
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> LoggingT IO ()) -> IO () -> LoggingT IO ()
forall a b. (a -> b) -> a -> b
$ Int -> IO ()
threadDelay Int
10000 -- 10 ms
            Bool -> LoggingT IO ()
go Bool
False
        -- An item on the queue
        Just QueueURI {[URI]
Word
URI
queueURITrace :: [URI]
queueURIDepth :: Word
queueURI :: URI
queueURITrace :: QueueURI -> [URI]
queueURIDepth :: QueueURI -> Word
queueURI :: QueueURI -> URI
..} -> do
          -- Set this worker as busy
          Text -> LoggingT IO ()
forall (m :: * -> *). MonadLogger m => Text -> m ()
logDebugN (Text -> LoggingT IO ()) -> Text -> LoggingT IO ()
forall a b. (a -> b) -> a -> b
$
            String -> Text
T.pack (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$
              [String] -> String
unwords
                [ String
"Worker is busy:",
                  Int -> String
forall a. Show a => a -> String
show Int
workerSetWorkerIndex
                ]
          Bool -> LoggingT IO () -> LoggingT IO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless Bool
busy LoggingT IO ()
setBusy
          -- Check if the uri has been seen already
          Bool
alreadySeen <- STM Bool -> LoggingT IO Bool
forall (m :: * -> *) a. MonadIO m => STM a -> m a
atomically (STM Bool -> LoggingT IO Bool) -> STM Bool -> LoggingT IO Bool
forall a b. (a -> b) -> a -> b
$ URI -> Set URI -> Bool
forall a. Ord a => a -> Set a -> Bool
S.member URI
queueURI (Set URI -> Bool) -> STM (Set URI) -> STM Bool
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> TVar (Set URI) -> STM (Set URI)
forall a. TVar a -> STM a
readTVar TVar (Set URI)
workerSetSeenSet
          if Bool
alreadySeen
            then do
              -- We've already seen it, don't do anything.
              Text -> LoggingT IO ()
forall (m :: * -> *). MonadLogger m => Text -> m ()
logDebugN (Text -> LoggingT IO ()) -> Text -> LoggingT IO ()
forall a b. (a -> b) -> a -> b
$
                String -> Text
T.pack (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$
                  [String] -> String
unwords
                    [ String
"Not fetching again:",
                      URI -> String
forall a. Show a => a -> String
show URI
queueURI
                    ]
            else do
              -- We haven't seen it yet. Mark it as seen.
              STM () -> LoggingT IO ()
forall (m :: * -> *) a. MonadIO m => STM a -> m a
atomically (STM () -> LoggingT IO ()) -> STM () -> LoggingT IO ()
forall a b. (a -> b) -> a -> b
$ TVar (Set URI) -> (Set URI -> Set URI) -> STM ()
forall a. TVar a -> (a -> a) -> STM ()
modifyTVar' TVar (Set URI)
workerSetSeenSet ((Set URI -> Set URI) -> STM ()) -> (Set URI -> Set URI) -> STM ()
forall a b. (a -> b) -> a -> b
$ URI -> Set URI -> Set URI
forall a. Ord a => a -> Set a -> Set a
S.insert URI
queueURI

              -- Helper function for inserting a result.
              -- We'll need this in both the cached and uncached branches below
              -- so we'll already define it here.
              let insertResult :: ResultReason -> m ()
insertResult ResultReason
reason =
                    STM () -> m ()
forall (m :: * -> *) a. MonadIO m => STM a -> m a
atomically (STM () -> m ()) -> STM () -> m ()
forall a b. (a -> b) -> a -> b
$
                      TVar (Map URI Result)
-> (Map URI Result -> Map URI Result) -> STM ()
forall a. TVar a -> (a -> a) -> STM ()
modifyTVar' TVar (Map URI Result)
workerSetResultsMap ((Map URI Result -> Map URI Result) -> STM ())
-> (Map URI Result -> Map URI Result) -> STM ()
forall a b. (a -> b) -> a -> b
$
                        URI -> Result -> Map URI Result -> Map URI Result
forall k a. Ord k => k -> a -> Map k a -> Map k a
M.insert
                          URI
queueURI
                          Result :: ResultReason -> [URI] -> Result
Result
                            { resultReason :: ResultReason
resultReason = ResultReason
reason,
                              resultTrace :: [URI]
resultTrace = [URI]
queueURITrace
                            }

              -- Check if the response is cached
              let cacheURI :: URI
cacheURI = URI
queueURI {uriFragment :: String
uriFragment = String
""}
              Maybe [ByteString]
mCachedResult <- case Maybe (TVar (LRU URI [ByteString]))
workerSetCache of
                Maybe (TVar (LRU URI [ByteString]))
Nothing -> Maybe [ByteString] -> LoggingT IO (Maybe [ByteString])
forall (f :: * -> *) a. Applicative f => a -> f a
pure Maybe [ByteString]
forall a. Maybe a
Nothing -- Can't be cached if there is no cache
                Just TVar (LRU URI [ByteString])
cache -> STM (Maybe [ByteString]) -> LoggingT IO (Maybe [ByteString])
forall (m :: * -> *) a. MonadIO m => STM a -> m a
atomically (STM (Maybe [ByteString]) -> LoggingT IO (Maybe [ByteString]))
-> STM (Maybe [ByteString]) -> LoggingT IO (Maybe [ByteString])
forall a b. (a -> b) -> a -> b
$ TVar (LRU URI [ByteString])
-> (LRU URI [ByteString]
    -> (Maybe [ByteString], LRU URI [ByteString]))
-> STM (Maybe [ByteString])
forall s a. TVar s -> (s -> (a, s)) -> STM a
stateTVar TVar (LRU URI [ByteString])
cache ((LRU URI [ByteString]
  -> (Maybe [ByteString], LRU URI [ByteString]))
 -> STM (Maybe [ByteString]))
-> (LRU URI [ByteString]
    -> (Maybe [ByteString], LRU URI [ByteString]))
-> STM (Maybe [ByteString])
forall a b. (a -> b) -> a -> b
$ (LRU URI [ByteString], Maybe [ByteString])
-> (Maybe [ByteString], LRU URI [ByteString])
forall a b. (a, b) -> (b, a)
swap ((LRU URI [ByteString], Maybe [ByteString])
 -> (Maybe [ByteString], LRU URI [ByteString]))
-> (LRU URI [ByteString]
    -> (LRU URI [ByteString], Maybe [ByteString]))
-> LRU URI [ByteString]
-> (Maybe [ByteString], LRU URI [ByteString])
forall b c a. (b -> c) -> (a -> b) -> a -> c
. URI
-> LRU URI [ByteString]
-> (LRU URI [ByteString], Maybe [ByteString])
forall key val.
Ord key =>
key -> LRU key val -> (LRU key val, Maybe val)
LRU.lookup URI
cacheURI

              -- Check if we have the fragments cached
              Maybe [ByteString]
mResp <- case Maybe [ByteString]
mCachedResult of
                -- Found in cache
                Just [ByteString]
cachedResponse -> do
                  Text -> LoggingT IO ()
forall (m :: * -> *). MonadLogger m => Text -> m ()
logInfoN (Text -> LoggingT IO ()) -> Text -> LoggingT IO ()
forall a b. (a -> b) -> a -> b
$
                    String -> Text
T.pack (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$
                      [String] -> String
unwords
                        [ String
"Not fetching because the page is already cached:",
                          URI -> String
forall a. Show a => a -> String
show URI
queueURI
                        ]
                  Maybe [ByteString] -> LoggingT IO (Maybe [ByteString])
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Maybe [ByteString] -> LoggingT IO (Maybe [ByteString]))
-> Maybe [ByteString] -> LoggingT IO (Maybe [ByteString])
forall a b. (a -> b) -> a -> b
$ [ByteString] -> Maybe [ByteString]
forall a. a -> Maybe a
Just [ByteString]
cachedResponse
                -- Not found in cache
                Maybe [ByteString]
Nothing -> do
                  -- Create a request
                  case URI -> Maybe Request
forall (m :: * -> *). MonadThrow m => URI -> m Request
requestFromURI URI
queueURI of
                    -- Making the request failed
                    Maybe Request
Nothing -> do
                      Text -> LoggingT IO ()
forall (m :: * -> *). MonadLogger m => Text -> m ()
logErrorN (Text -> LoggingT IO ()) -> Text -> LoggingT IO ()
forall a b. (a -> b) -> a -> b
$
                        String -> Text
T.pack (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$
                          [String] -> String
unwords
                            [ String
"Unable to construct a request from this uri:",
                              URI -> String
forall a. Show a => a -> String
show URI
queueURI
                            ]
                      Maybe [ByteString] -> LoggingT IO (Maybe [ByteString])
forall (f :: * -> *) a. Applicative f => a -> f a
pure Maybe [ByteString]
forall a. Maybe a
Nothing
                    -- Making the request succeeded
                    Just Request
req -> do
                      -- Do the actual fetch
                      let fetchingLog :: [String]
fetchingLog = case Maybe Word
workerSetMaxDepth of
                            Maybe Word
Nothing ->
                              [ String
"Fetching: ",
                                URI -> String
forall a. Show a => a -> String
show URI
queueURI
                              ]
                            Just Word
md ->
                              [ String
"Depth ",
                                Word -> String
forall a. Show a => a -> String
show Word
queueURIDepth,
                                String
"/",
                                Word -> String
forall a. Show a => a -> String
show Word
md,
                                String
"; Fetching: ",
                                URI -> String
forall a. Show a => a -> String
show URI
queueURI
                              ]
                      Text -> LoggingT IO ()
forall (m :: * -> *). MonadLogger m => Text -> m ()
logInfoN (Text -> LoggingT IO ()) -> Text -> LoggingT IO ()
forall a b. (a -> b) -> a -> b
$ String -> Text
T.pack (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$ [String] -> String
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [String]
fetchingLog
                      Either HttpException (Response ByteString)
errOrResp <- IO (Either HttpException (Response ByteString))
-> LoggingT IO (Either HttpException (Response ByteString))
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (Either HttpException (Response ByteString))
 -> LoggingT IO (Either HttpException (Response ByteString)))
-> IO (Either HttpException (Response ByteString))
-> LoggingT IO (Either HttpException (Response ByteString))
forall a b. (a -> b) -> a -> b
$ Request
-> IO (Response ByteString)
-> IO (Either HttpException (Response ByteString))
forall a.
Request
-> IO (Response a) -> IO (Either HttpException (Response a))
retryHTTP Request
req (IO (Response ByteString)
 -> IO (Either HttpException (Response ByteString)))
-> IO (Response ByteString)
-> IO (Either HttpException (Response ByteString))
forall a b. (a -> b) -> a -> b
$ Request -> Manager -> IO (Response ByteString)
httpLbs Request
req Manager
workerSetHTTPManager
                      case Either HttpException (Response ByteString)
errOrResp of
                        -- Something went wrong during the fetch.
                        Left HttpException
err -> do
                          Text -> LoggingT IO ()
forall (m :: * -> *). MonadLogger m => Text -> m ()
logDebugN (Text -> LoggingT IO ()) -> Text -> LoggingT IO ()
forall a b. (a -> b) -> a -> b
$
                            String -> Text
T.pack (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$
                              [String] -> String
unwords
                                [ String
"Got exception for",
                                  URI -> String
forall a. Show a => a -> String
show URI
queueURI String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
":",
                                  HttpException -> String
forall a. Show a => a -> String
show HttpException
err
                                ]
                          ResultReason -> LoggingT IO ()
forall (m :: * -> *). MonadIO m => ResultReason -> m ()
insertResult (ResultReason -> LoggingT IO ()) -> ResultReason -> LoggingT IO ()
forall a b. (a -> b) -> a -> b
$ HttpException -> ResultReason
ResultReasonException HttpException
err
                          Maybe [ByteString] -> LoggingT IO (Maybe [ByteString])
forall (f :: * -> *) a. Applicative f => a -> f a
pure Maybe [ByteString]
forall a. Maybe a
Nothing

                        -- Got a response
                        Right Response ByteString
resp -> do
                          let status :: Status
status = Response ByteString -> Status
forall body. Response body -> Status
responseStatus Response ByteString
resp
                          let sci :: Int
sci = Status -> Int
HTTP.statusCode Status
status
                          Text -> LoggingT IO ()
forall (m :: * -> *). MonadLogger m => Text -> m ()
logDebugN (Text -> LoggingT IO ()) -> Text -> LoggingT IO ()
forall a b. (a -> b) -> a -> b
$
                            String -> Text
T.pack (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$
                              [String] -> String
unwords
                                [ String
"Got response for",
                                  URI -> String
forall a. Show a => a -> String
show URI
queueURI String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
": ",
                                  Int -> String
forall a. Show a => a -> String
show Int
sci
                                ]
                          -- If the status code is not in the 2XX range, add it to the results
                          Bool -> LoggingT IO () -> LoggingT IO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless (Int
200 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
sci Bool -> Bool -> Bool
&& Int
sci Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
300) (LoggingT IO () -> LoggingT IO ())
-> LoggingT IO () -> LoggingT IO ()
forall a b. (a -> b) -> a -> b
$ ResultReason -> LoggingT IO ()
forall (m :: * -> *). MonadIO m => ResultReason -> m ()
insertResult (ResultReason -> LoggingT IO ()) -> ResultReason -> LoggingT IO ()
forall a b. (a -> b) -> a -> b
$ Status -> ResultReason
ResultReasonStatus Status
status

                          -- Read the entire response and parse tags
                          let body :: ByteString
body = ByteString -> ByteString
LB.toStrict (ByteString -> ByteString) -> ByteString -> ByteString
forall a b. (a -> b) -> a -> b
$ Response ByteString -> ByteString
forall body. Response body -> body
responseBody Response ByteString
resp
                          let tags :: [Tag ByteString]
tags = ParseOptions ByteString -> ByteString -> [Tag ByteString]
forall str. StringLike str => ParseOptions str -> str -> [Tag str]
parseTagsOptions ParseOptions ByteString
forall str. StringLike str => ParseOptions str
parseOptionsFast ByteString
body

                          -- Only recurse into the page if we're not deep enough already
                          let shouldRecurseByDepth :: Bool
shouldRecurseByDepth = case Maybe Word
workerSetMaxDepth of
                                Maybe Word
Nothing -> Bool
True
                                Just Word
md -> Word
queueURIDepth Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
< Word
md

                          -- Only recurse into the page if the page has the same root.
                          let shouldRecurseByAuthority :: Bool
shouldRecurseByAuthority = URI -> Maybe URIAuth
uriAuthority URI
queueURI Maybe URIAuth -> Maybe URIAuth -> Bool
forall a. Eq a => a -> a -> Bool
== URI -> Maybe URIAuth
uriAuthority URI
workerSetRoot

                          let shouldRecurse :: Bool
shouldRecurse = Bool
shouldRecurseByDepth Bool -> Bool -> Bool
&& Bool
shouldRecurseByAuthority

                          Bool -> LoggingT IO () -> LoggingT IO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when Bool
shouldRecurse (LoggingT IO () -> LoggingT IO ())
-> LoggingT IO () -> LoggingT IO ()
forall a b. (a -> b) -> a -> b
$ do
                            let removeFragment :: URI -> URI
removeFragment URI
u = URI
u {uriFragment :: String
uriFragment = String
""}
                            let uris :: [URI]
uris =
                                  (if Bool
workerSetCheckFragments then [URI] -> [URI]
forall a. a -> a
id else (URI -> URI) -> [URI] -> [URI]
forall a b. (a -> b) -> [a] -> [b]
map URI -> URI
removeFragment) ([URI] -> [URI]) -> [URI] -> [URI]
forall a b. (a -> b) -> a -> b
$
                                    (String -> Maybe URI) -> [String] -> [URI]
forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe (URI -> String -> Maybe URI
parseURIRelativeTo URI
queueURI) ([String] -> [URI]) -> [String] -> [URI]
forall a b. (a -> b) -> a -> b
$
                                      (ByteString -> Maybe String) -> [ByteString] -> [String]
forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe ((Text -> String) -> Maybe Text -> Maybe String
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Text -> String
T.unpack (Maybe Text -> Maybe String)
-> (ByteString -> Maybe Text) -> ByteString -> Maybe String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Either UnicodeException Text -> Maybe Text
forall e a. Either e a -> Maybe a
rightToMaybe (Either UnicodeException Text -> Maybe Text)
-> (ByteString -> Either UnicodeException Text)
-> ByteString
-> Maybe Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Either UnicodeException Text
TE.decodeUtf8') ([ByteString] -> [String]) -> [ByteString] -> [String]
forall a b. (a -> b) -> a -> b
$
                                        (Tag ByteString -> Maybe ByteString)
-> [Tag ByteString] -> [ByteString]
forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe Tag ByteString -> Maybe ByteString
forall str. (Eq str, IsString str) => Tag str -> Maybe str
aTagHref [Tag ByteString]
tags

                            let predicate :: URI -> Bool
predicate =
                                  if Bool
workerSetExternal
                                    then Bool -> URI -> Bool
forall a b. a -> b -> a
const Bool
True
                                    else -- Filter out the ones that are not on the same host.
                                      (Maybe URIAuth -> Maybe URIAuth -> Bool
forall a. Eq a => a -> a -> Bool
== URI -> Maybe URIAuth
uriAuthority URI
workerSetRoot) (Maybe URIAuth -> Bool) -> (URI -> Maybe URIAuth) -> URI -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. URI -> Maybe URIAuth
uriAuthority
                            let urisToAddToQueue :: [QueueURI]
urisToAddToQueue =
                                  (URI -> QueueURI) -> [URI] -> [QueueURI]
forall a b. (a -> b) -> [a] -> [b]
map
                                    ( \URI
u ->
                                        QueueURI :: URI -> Word -> [URI] -> QueueURI
QueueURI
                                          { queueURI :: URI
queueURI = URI
u,
                                            queueURIDepth :: Word
queueURIDepth = Word -> Word
forall a. Enum a => a -> a
succ Word
queueURIDepth,
                                            queueURITrace :: [URI]
queueURITrace = URI
queueURI URI -> [URI] -> [URI]
forall a. a -> [a] -> [a]
: [URI]
queueURITrace
                                          }
                                    )
                                    ([URI] -> [QueueURI]) -> [URI] -> [QueueURI]
forall a b. (a -> b) -> a -> b
$ (URI -> Bool) -> [URI] -> [URI]
forall a. (a -> Bool) -> [a] -> [a]
filter URI -> Bool
predicate [URI]
uris
                            STM () -> LoggingT IO ()
forall (m :: * -> *) a. MonadIO m => STM a -> m a
atomically (STM () -> LoggingT IO ()) -> STM () -> LoggingT IO ()
forall a b. (a -> b) -> a -> b
$ (QueueURI -> STM ()) -> [QueueURI] -> STM ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ (TQueue QueueURI -> QueueURI -> STM ()
forall a. TQueue a -> a -> STM ()
writeTQueue TQueue QueueURI
workerSetURIQueue) [QueueURI]
urisToAddToQueue

                          -- Compute the fragments
                          let fragments :: [ByteString]
fragments = (Tag ByteString -> [ByteString])
-> [Tag ByteString] -> [ByteString]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap Tag ByteString -> [ByteString]
forall str. (Eq str, IsString str) => Tag str -> [str]
tagIdOrName [Tag ByteString]
tags

                          -- Insert the fragments into the cache.
                          Maybe (TVar (LRU URI [ByteString]))
-> (TVar (LRU URI [ByteString]) -> LoggingT IO ())
-> LoggingT IO ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ Maybe (TVar (LRU URI [ByteString]))
workerSetCache ((TVar (LRU URI [ByteString]) -> LoggingT IO ()) -> LoggingT IO ())
-> (TVar (LRU URI [ByteString]) -> LoggingT IO ())
-> LoggingT IO ()
forall a b. (a -> b) -> a -> b
$ \TVar (LRU URI [ByteString])
cache ->
                            STM () -> LoggingT IO ()
forall (m :: * -> *) a. MonadIO m => STM a -> m a
atomically (STM () -> LoggingT IO ()) -> STM () -> LoggingT IO ()
forall a b. (a -> b) -> a -> b
$ TVar (LRU URI [ByteString])
-> (LRU URI [ByteString] -> LRU URI [ByteString]) -> STM ()
forall a. TVar a -> (a -> a) -> STM ()
modifyTVar' TVar (LRU URI [ByteString])
cache ((LRU URI [ByteString] -> LRU URI [ByteString]) -> STM ())
-> (LRU URI [ByteString] -> LRU URI [ByteString]) -> STM ()
forall a b. (a -> b) -> a -> b
$ URI -> [ByteString] -> LRU URI [ByteString] -> LRU URI [ByteString]
forall key val. Ord key => key -> val -> LRU key val -> LRU key val
LRU.insert URI
cacheURI [ByteString]
fragments

                          Maybe [ByteString] -> LoggingT IO (Maybe [ByteString])
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Maybe [ByteString] -> LoggingT IO (Maybe [ByteString]))
-> Maybe [ByteString] -> LoggingT IO (Maybe [ByteString])
forall a b. (a -> b) -> a -> b
$ [ByteString] -> Maybe [ByteString]
forall a. a -> Maybe a
Just [ByteString]
fragments

              case Maybe [ByteString]
mResp of
                Maybe [ByteString]
Nothing -> () -> LoggingT IO ()
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
                Just [ByteString]
fragments -> do
                  -- Check that the fragments are in order.
                  Bool -> LoggingT IO () -> LoggingT IO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when Bool
workerSetCheckFragments (LoggingT IO () -> LoggingT IO ())
-> LoggingT IO () -> LoggingT IO ()
forall a b. (a -> b) -> a -> b
$ do
                    case URI -> String
uriFragment URI
queueURI of
                      String
"" -> () -> LoggingT IO ()
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
                      Char
'#' : String
f -> do
                        let fragmentLinkGood :: Bool
fragmentLinkGood = Text -> ByteString
TE.encodeUtf8 (String -> Text
T.pack String
f) ByteString -> [ByteString] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [ByteString]
fragments
                        Bool -> LoggingT IO () -> LoggingT IO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Bool -> Bool
not Bool
fragmentLinkGood) (LoggingT IO () -> LoggingT IO ())
-> LoggingT IO () -> LoggingT IO ()
forall a b. (a -> b) -> a -> b
$ ResultReason -> LoggingT IO ()
forall (m :: * -> *). MonadIO m => ResultReason -> m ()
insertResult (ResultReason -> LoggingT IO ()) -> ResultReason -> LoggingT IO ()
forall a b. (a -> b) -> a -> b
$ String -> ResultReason
ResultReasonFragmentMissing (URI -> String
uriFragment URI
queueURI)
                      String
_ -> () -> LoggingT IO ()
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
          Bool -> LoggingT IO ()
go Bool
True

addFetcherNameToLog :: Text -> LoggingT m a -> LoggingT m a
addFetcherNameToLog :: Text -> LoggingT m a -> LoggingT m a
addFetcherNameToLog Text
fetcherName = (Text -> Text) -> LoggingT m a -> LoggingT m a
forall (m :: * -> *) a.
(Text -> Text) -> LoggingT m a -> LoggingT m a
modLogSource ((Text -> Text) -> LoggingT m a -> LoggingT m a)
-> (Text -> Text) -> LoggingT m a -> LoggingT m a
forall a b. (a -> b) -> a -> b
$ \Text
source -> if Text
source Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"" then Text
fetcherName else Text
source

modLogSource :: (LogSource -> LogSource) -> LoggingT m a -> LoggingT m a
modLogSource :: (Text -> Text) -> LoggingT m a -> LoggingT m a
modLogSource Text -> Text
func (LoggingT (Loc -> Text -> LogLevel -> LogStr -> IO ()) -> m a
mFunc) = ((Loc -> Text -> LogLevel -> LogStr -> IO ()) -> m a)
-> LoggingT m a
forall (m :: * -> *) a.
((Loc -> Text -> LogLevel -> LogStr -> IO ()) -> m a)
-> LoggingT m a
LoggingT (((Loc -> Text -> LogLevel -> LogStr -> IO ()) -> m a)
 -> LoggingT m a)
-> ((Loc -> Text -> LogLevel -> LogStr -> IO ()) -> m a)
-> LoggingT m a
forall a b. (a -> b) -> a -> b
$ \Loc -> Text -> LogLevel -> LogStr -> IO ()
logFunc ->
  let newLogFunc :: Loc -> Text -> LogLevel -> LogStr -> IO ()
newLogFunc Loc
loc Text
source LogLevel
level LogStr
str =
        let source' :: Text
source' = Text -> Text
func Text
source
         in Loc -> Text -> LogLevel -> LogStr -> IO ()
logFunc Loc
loc Text
source' LogLevel
level LogStr
str
   in (Loc -> Text -> LogLevel -> LogStr -> IO ()) -> m a
mFunc Loc -> Text -> LogLevel -> LogStr -> IO ()
newLogFunc

parseURIRelativeTo :: URI -> String -> Maybe URI
parseURIRelativeTo :: URI -> String -> Maybe URI
parseURIRelativeTo URI
root String
s =
  [Maybe URI] -> Maybe URI
forall (t :: * -> *) (m :: * -> *) a.
(Foldable t, MonadPlus m) =>
t (m a) -> m a
msum
    [ (URI -> URI -> URI
`relativeTo` URI
root) (URI -> URI) -> Maybe URI -> Maybe URI
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> String -> Maybe URI
parseRelativeReference String
s,
      String -> Maybe URI
parseAbsoluteURI String
s
    ]

rightToMaybe :: Either e a -> Maybe a
rightToMaybe :: Either e a -> Maybe a
rightToMaybe = \case
  Left e
_ -> Maybe a
forall a. Maybe a
Nothing
  Right a
a -> a -> Maybe a
forall a. a -> Maybe a
Just a
a

aTagHref :: (Eq str, IsString str) => Tag str -> Maybe str
aTagHref :: Tag str -> Maybe str
aTagHref = \case
  TagOpen str
"a" [Attribute str]
as -> str -> [Attribute str] -> Maybe str
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup str
"href" [Attribute str]
as
  TagOpen str
"link" [Attribute str]
as -> str -> [Attribute str] -> Maybe str
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup str
"href" [Attribute str]
as
  TagOpen str
"img" [Attribute str]
as -> str -> [Attribute str] -> Maybe str
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup str
"src" [Attribute str]
as
  Tag str
_ -> Maybe str
forall a. Maybe a
Nothing

tagIdOrName :: (Eq str, IsString str) => Tag str -> [str]
tagIdOrName :: Tag str -> [str]
tagIdOrName = \case
  TagOpen str
_ [Attribute str]
as -> Maybe str -> [str]
forall a. Maybe a -> [a]
maybeToList (str -> [Attribute str] -> Maybe str
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup str
"id" [Attribute str]
as) [str] -> [str] -> [str]
forall a. [a] -> [a] -> [a]
++ Maybe str -> [str]
forall a. Maybe a -> [a]
maybeToList (str -> [Attribute str] -> Maybe str
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup str
"name" [Attribute str]
as)
  Tag str
_ -> []

retryHTTP ::
  -- | Just  for the error message
  Request ->
  IO (Response a) ->
  IO (Either HttpException (Response a))
retryHTTP :: Request
-> IO (Response a) -> IO (Either HttpException (Response a))
retryHTTP Request
req IO (Response a)
action =
  let policy :: RetryPolicyM IO
policy =
        [RetryPolicyM IO] -> RetryPolicyM IO
forall a. Monoid a => [a] -> a
mconcat
          [ Int -> RetryPolicyM IO
forall (m :: * -> *). Monad m => Int -> RetryPolicyM m
exponentialBackoff Int
100_000,
            Int -> RetryPolicy
limitRetries Int
3
          ]
   in RetryPolicyM IO
-> (RetryStatus -> Either HttpException (Response a) -> IO Bool)
-> (RetryStatus -> IO (Either HttpException (Response a)))
-> IO (Either HttpException (Response a))
forall (m :: * -> *) b.
MonadIO m =>
RetryPolicyM m
-> (RetryStatus -> b -> m Bool) -> (RetryStatus -> m b) -> m b
retrying
        RetryPolicyM IO
policy
        (\RetryStatus
_ Either HttpException (Response a)
e -> Bool -> IO Bool
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Either HttpException (Response a) -> Bool
forall b. Either HttpException b -> Bool
couldBeFlaky Either HttpException (Response a)
e))
        ( \RetryStatus
_ ->
            (Response a -> Either HttpException (Response a)
forall a b. b -> Either a b
Right (Response a -> Either HttpException (Response a))
-> IO (Response a) -> IO (Either HttpException (Response a))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> IO (Response a)
action)
              IO (Either HttpException (Response a))
-> [Handler IO (Either HttpException (Response a))]
-> IO (Either HttpException (Response a))
forall (m :: * -> *) a.
MonadUnliftIO m =>
m a -> [Handler m a] -> m a
`catches` [ (HttpException -> IO (Either HttpException (Response a)))
-> Handler IO (Either HttpException (Response a))
forall (m :: * -> *) a e. Exception e => (e -> m a) -> Handler m a
Handler ((HttpException -> IO (Either HttpException (Response a)))
 -> Handler IO (Either HttpException (Response a)))
-> (HttpException -> IO (Either HttpException (Response a)))
-> Handler IO (Either HttpException (Response a))
forall a b. (a -> b) -> a -> b
$ Either HttpException (Response a)
-> IO (Either HttpException (Response a))
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Either HttpException (Response a)
 -> IO (Either HttpException (Response a)))
-> (HttpException -> Either HttpException (Response a))
-> HttpException
-> IO (Either HttpException (Response a))
forall b c a. (b -> c) -> (a -> b) -> a -> c
. HttpException -> Either HttpException (Response a)
forall a b. a -> Either a b
Left,
                          (HttpExceptionContentWrapper
 -> IO (Either HttpException (Response a)))
-> Handler IO (Either HttpException (Response a))
forall (m :: * -> *) a e. Exception e => (e -> m a) -> Handler m a
Handler ((HttpExceptionContentWrapper
  -> IO (Either HttpException (Response a)))
 -> Handler IO (Either HttpException (Response a)))
-> (HttpExceptionContentWrapper
    -> IO (Either HttpException (Response a)))
-> Handler IO (Either HttpException (Response a))
forall a b. (a -> b) -> a -> b
$ Either HttpException (Response a)
-> IO (Either HttpException (Response a))
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Either HttpException (Response a)
 -> IO (Either HttpException (Response a)))
-> (HttpExceptionContentWrapper
    -> Either HttpException (Response a))
-> HttpExceptionContentWrapper
-> IO (Either HttpException (Response a))
forall b c a. (b -> c) -> (a -> b) -> a -> c
. HttpException -> Either HttpException (Response a)
forall a b. a -> Either a b
Left (HttpException -> Either HttpException (Response a))
-> (HttpExceptionContentWrapper -> HttpException)
-> HttpExceptionContentWrapper
-> Either HttpException (Response a)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Request -> HttpExceptionContentWrapper -> HttpException
toHttpException Request
req
                        ]
        )
  where
    couldBeFlaky :: Either HttpException b -> Bool
couldBeFlaky (Left HttpException
e) = case HttpException
e of
      HttpExceptionRequest Request
_ HttpExceptionContent
hec -> case HttpExceptionContent
hec of
        HttpExceptionContent
ResponseTimeout -> Bool
True
        HttpExceptionContent
ConnectionTimeout -> Bool
True
        ConnectionFailure SomeException
_ -> Bool
True
        HttpExceptionContent
NoResponseDataReceived -> Bool
True
        HttpExceptionContent
_ -> Bool
False
      InvalidUrlException String
_ String
_ -> Bool
False
    couldBeFlaky Either HttpException b
_ = Bool
False