{-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE ImpredicativeTypes #-} {-# LANGUAGE RankNTypes #-} -- | Craze is a small module for performing multiple similar HTTP GET requests -- in parallel. This is performed through the `raceGet` function, which will -- perform all the requests and pick the first successful response that passes -- a certain check, menaing that the parallel requests are essentially racing -- against each other. -- -- __What is the usefulness of this?__ -- -- If you are dealing with data source or API that is very unreliable (high -- latency, random failures) and there are no limitations or consequences on -- perfoming significantly more requests, then performing multiple requests -- (through direct connections, proxies, VPNs) may increase the chances of -- getting a successful response faster and more reliably. -- -- However, if using a different data source or transport is a possibility, it -- is potentially a better option that this approach. -- -- __Examples:__ -- -- Performing two parallel GET requests against https://chromabits.com and -- returning the status code of the first successful one: -- -- >>> :{ -- let racer = (Racer -- { racerProviders = -- [ return defaultProviderOptions -- , return defaultProviderOptions -- ] -- , racerHandler = return . respStatus -- , racerChecker = (200 ==) -- , racerDebug = False -- } :: Racer [(String, String)] ByteString Int) -- in (raceGet racer "https://chromabits.com" >>= print) -- :} -- Just 200 -- module Network.Craze ( -- * Types RacerHandler , RacerChecker , Racer(..) , ProviderOptions(..) -- * Functions , defaultRacer , defaultProviderOptions , raceGet -- * Providers , simple , delayed ) where import Control.Concurrent (threadDelay) import Control.Concurrent.Async import Control.Monad (when) import Data.ByteString (ByteString) import Data.Default.Class (Default, def) import Network.Curl -- | A `RacerHandler` is simply a function for transforming a response after it -- is received. The handler is only applied to successful requests before they -- are checked by the `RacerChecker`. -- -- This is primarily for extracting or parsing a `CurlResponse_` before doing -- any further work. The type returned by the handler will be used as the -- input of the checker and will be the return type of `raceGet`. type RacerHandler headerTy bodyTy a = CurlResponse_ headerTy bodyTy -> IO a -- | A function that computes whether or not a result is valid or not. -- Successful responses that do not pass the checker are discarded. -- -- This should help filtering out successful responses that do not, for some -- reason, have the expected result (e.g. Random content changes, Rate -- Limitting, etc). type RacerChecker a = a -> Bool -- | A function that returns the @ProviderOptions@ to use for making a request. type RacerProvider = IO ProviderOptions -- | Options for a specific provider. data ProviderOptions = ProviderOptions { -- | Options to pass down to Curl. poOptions :: [CurlOption] -- | Number of microseconds to delay the request by. , poDelay :: Maybe Int } instance Default ProviderOptions where def = ProviderOptions { poOptions = [] , poDelay = Nothing } -- | A record describing the rules for racing requests. data Racer headerTy bodyTy a = Racer { racerHandler :: RacerHandler headerTy bodyTy a , racerChecker :: RacerChecker a -- | On a `Racer`, each `RaceProvider` represents a separate client -- configuration. When performing a race, each provider will be used to spwan -- a client and perform a request. This allows one to control the number of -- requests performed and with which `CurlOption`s. , racerProviders :: [RacerProvider] -- | When set to `True`, debugging messages will be written to stdout. , racerDebug :: Bool } instance Default (Racer [(String,String)] ByteString ByteString) where def = Racer { racerHandler = return . respBody , racerChecker = const True , racerProviders = [] , racerDebug = False } -- | A `Racer` with some default values. -- -- __Note:__ The handler will extract the response body as a `ByteString` and -- ignore everything else, hence the type: -- -- @ -- Racer [(String, String)] ByteString ByteString -- @ -- -- If this is not the desired behavior, or if the response should be parsed or -- processed, you should use the `Racer` constructor directly and provide all -- fields. defaultRacer :: Racer [(String,String)] ByteString ByteString defaultRacer = def -- | A default set of options for a provider. defaultProviderOptions :: ProviderOptions defaultProviderOptions = def -- | A simple provider. It does not delay requests. simple :: [CurlOption] -> IO ProviderOptions simple xs = pure $ def { poOptions = xs } -- | A provider which will delay a request by the provided number of -- microseconds. delayed :: [CurlOption] -> Int -> IO ProviderOptions delayed xs d = pure $ def { poOptions = xs , poDelay = Just d } -- | Perform a GET request on the provided URL using all providers in -- parallel. -- -- Rough summary of the algorithm: -- -- - Start all requests -- - Wait for a request to finish. -- -- * If the request is successful, apply the handler on it. -- -- - If the result of the handler passes the checker, cancel all other -- requests, and return the result. -- - If the check fails, go back to waiting for another request to finish. -- -- * If the request fails, go back to waiting for another request to finish. -- raceGet :: (Eq a, CurlHeader ht, CurlBuffer bt) => Racer ht bt a -> URLString -> IO (Maybe a) raceGet r url = do asyncs <- mapM performGetAsync_ (racerProviders r) when (racerDebug r) $ do putStr "[racer] Created Asyncs: " print (map asyncThreadId asyncs) waitForOne asyncs (racerHandler r) (racerChecker r) (racerDebug r) where performGetAsync_ :: (CurlHeader ht, CurlBuffer bt) => RacerProvider -> IO (Async (CurlResponse_ ht bt)) performGetAsync_ = performGetAsync url waitForOne :: (Eq a) => [Async (CurlResponse_ ht bt)] -> RacerHandler ht bt a -> RacerChecker a -> Bool -> IO (Maybe a) waitForOne asyncs handler check debug = if null asyncs then pure Nothing else do winner <- waitAnyCatch asyncs case winner of (as, Right a) -> do result <- handler a if check result then do cancelAll (except as asyncs) when debug $ putStr "[racer] Winner: " >> print (asyncThreadId as) pure $ Just result else waitForOne (except as asyncs) handler check debug (as, Left _) -> waitForOne (except as asyncs) handler check debug cancelAll :: [Async a] -> IO () cancelAll = mapM_ (async . cancel) except :: (Eq a) => a -> [a] -> [a] except x = filter (x /=) performGetAsync :: (CurlHeader ht, CurlBuffer bt) => URLString -> RacerProvider -> IO (Async (CurlResponse_ ht bt)) performGetAsync url provider = async $ do options <- provider case poDelay options of Nothing -> pure () Just delay -> threadDelay delay curlGetResponse_ url (poOptions options)