{-# LANGUAGE OverloadedStrings, BangPatterns, RecordWildCards, ViewPatterns,
             DoAndIfThenElse, PatternGuards, ScopedTypeVariables,
             TupleSections #-}
{-# OPTIONS_GHC -fwarn-incomplete-patterns -fwarn-unused-imports #-}
-- | HTTP downloader tailored for web-crawler needs.
--
--  * Handles all possible http-client exceptions and returns
--    human readable error messages.
--
--  * Handles some web server bugs (returning @deflate@ data instead of @gzip@,
--    invalid @gzip@ encoding).
--
--  * Uses OpenSSL instead of @tls@ package (since @tls@ doesn't handle all sites and works slower than OpenSSL).
--
--  * Ignores invalid SSL sertificates.
--
--  * Receives data in 32k chunks internally to reduce memory fragmentation
--    on many parallel downloads.
--
--  * Download timeout.
--
--  * Total download size limit.
--
--  * Returns HTTP headers for subsequent redownloads
--    and handles @Not modified@ results.
--
--  * Can be used with external DNS resolver (hsdns-cache for example).
--
--  * Keep-alive connections pool (thanks to http-client).
--
--  Typical workflow in crawler:
--
--  @
--  withDnsCache $ \ c -> withDownloader $ \ d -> do
--  ... -- got URL from queue
--  ra <- resolveA c $ hostNameFromUrl url
--  case ra of
--      Left err -> ... -- uh oh, bad host
--      Right ha -> do
--          ... -- crawler politeness stuff (rate limits, queues)
--          dr <- download d url (Just ha) opts
--          case dr of
--              DROK dat redownloadOptions ->
--                  ... -- analyze data, save redownloadOpts for next download
--              DRRedirect .. -> ...
--              DRNotModified -> ...
--              DRError e -> ...
--  @
--
-- It's highly recommended to use
-- <http://hackage.haskell.org/package/concurrent-dns-cache>
-- (preferably with single resolver pointing to locally running BIND)
-- for DNS resolution since @getAddrInfo@ used in @http-client@ can be
-- buggy and ineffective when it needs to resolve many hosts per second for
-- a long time.
--
module Network.HTTP.Conduit.Downloader
    ( -- * Download operations
      urlGetContents, urlGetContentsPost
    , download, post, downloadG, rawDownload
    , DownloadResult(..), RawDownloadResult(..), DownloadOptions

      -- * Downloader
    , DownloaderSettings(..)
    , Downloader, withDownloader, withDownloaderSettings, newDownloader

      -- * Utils
    , postRequest
    ) where

import qualified Data.Text as T
import qualified Data.ByteString.Lazy.Char8 as BL
import qualified Data.ByteString.Char8 as B
import qualified Data.ByteString.Internal as B
import Control.Monad
import qualified Control.Exception as E
import Data.Default as C
import Data.String
import Data.Char
import Data.Maybe
import Data.List
import Foreign
import qualified Network.Socket as NS

import qualified OpenSSL as SSL
import qualified OpenSSL.Session as SSL
import qualified Network.HTTP.Types as N
import qualified Network.HTTP.Client as C
import qualified Network.HTTP.Client.Internal as C
import qualified Network.HTTP.Client.OpenSSL as C
import Codec.Compression.Zlib.Raw as Deflate
import Network.URI
import System.IO.Unsafe
import Data.Time.Format
import Data.Time.Clock
import Data.Time.Clock.POSIX
import System.Timeout

-- | Result of 'download' operation.
data DownloadResult
    = DROK       B.ByteString DownloadOptions
      -- ^ Successful download with data and options for next download.
    | DRRedirect String
      -- ^ Redirect URL
    | DRError    String
      -- ^ Error
    | DRNotModified
      -- ^ HTTP 304 Not Modified
    deriving (Int -> DownloadResult -> ShowS
[DownloadResult] -> ShowS
DownloadResult -> [Char]
(Int -> DownloadResult -> ShowS)
-> (DownloadResult -> [Char])
-> ([DownloadResult] -> ShowS)
-> Show DownloadResult
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> DownloadResult -> ShowS
showsPrec :: Int -> DownloadResult -> ShowS
$cshow :: DownloadResult -> [Char]
show :: DownloadResult -> [Char]
$cshowList :: [DownloadResult] -> ShowS
showList :: [DownloadResult] -> ShowS
Show, ReadPrec [DownloadResult]
ReadPrec DownloadResult
Int -> ReadS DownloadResult
ReadS [DownloadResult]
(Int -> ReadS DownloadResult)
-> ReadS [DownloadResult]
-> ReadPrec DownloadResult
-> ReadPrec [DownloadResult]
-> Read DownloadResult
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
$creadsPrec :: Int -> ReadS DownloadResult
readsPrec :: Int -> ReadS DownloadResult
$creadList :: ReadS [DownloadResult]
readList :: ReadS [DownloadResult]
$creadPrec :: ReadPrec DownloadResult
readPrec :: ReadPrec DownloadResult
$creadListPrec :: ReadPrec [DownloadResult]
readListPrec :: ReadPrec [DownloadResult]
Read, DownloadResult -> DownloadResult -> Bool
(DownloadResult -> DownloadResult -> Bool)
-> (DownloadResult -> DownloadResult -> Bool) -> Eq DownloadResult
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: DownloadResult -> DownloadResult -> Bool
== :: DownloadResult -> DownloadResult -> Bool
$c/= :: DownloadResult -> DownloadResult -> Bool
/= :: DownloadResult -> DownloadResult -> Bool
Eq)

-- | Result of 'rawDownload' operation.
data RawDownloadResult
    = RawDownloadResult
      { RawDownloadResult -> Status
rdrStatus :: N.Status
      , RawDownloadResult -> HttpVersion
rdrHttpVersion :: N.HttpVersion
      , RawDownloadResult -> ResponseHeaders
rdrHeaders :: N.ResponseHeaders
      , RawDownloadResult -> ByteString
rdrBody :: B.ByteString
      , RawDownloadResult -> CookieJar
rdrCookieJar :: C.CookieJar
      }
    deriving Int -> RawDownloadResult -> ShowS
[RawDownloadResult] -> ShowS
RawDownloadResult -> [Char]
(Int -> RawDownloadResult -> ShowS)
-> (RawDownloadResult -> [Char])
-> ([RawDownloadResult] -> ShowS)
-> Show RawDownloadResult
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> RawDownloadResult -> ShowS
showsPrec :: Int -> RawDownloadResult -> ShowS
$cshow :: RawDownloadResult -> [Char]
show :: RawDownloadResult -> [Char]
$cshowList :: [RawDownloadResult] -> ShowS
showList :: [RawDownloadResult] -> ShowS
Show

-- | @If-None-Match@ and/or @If-Modified-Since@ headers.
type DownloadOptions = [String]

-- | Settings used in downloader.
data DownloaderSettings
    = DownloaderSettings
      { DownloaderSettings -> ByteString
dsUserAgent :: B.ByteString
        -- ^ User agent string. Default: @\"Mozilla\/5.0 (compatible; HttpConduitDownloader\/1.0; +http:\/\/hackage.haskell.org\/package\/http-conduit-downloader)\"@.
        --
        -- Be a good crawler. Provide your User-Agent please.
      , DownloaderSettings -> Int
dsTimeout :: Int
        -- ^ Download timeout. Default: 30 seconds.
      , DownloaderSettings -> ManagerSettings
dsManagerSettings :: C.ManagerSettings
        -- ^ Conduit 'Manager' settings.
        -- Default: ManagerSettings with SSL certificate checks removed.
      , DownloaderSettings -> Int
dsMaxDownloadSize :: Int
        -- ^ Download size limit in bytes. Default: 10MB.
      }
-- http://wiki.apache.org/nutch/OptimizingCrawls
-- use 10 seconds as default timeout (too small).

instance Default DownloaderSettings where
    def :: DownloaderSettings
def =
        DownloaderSettings
        { dsUserAgent :: ByteString
dsUserAgent = ByteString
"Mozilla/5.0 (compatible; HttpConduitDownloader/1.0; +http://hackage.haskell.org/package/http-conduit-downloader)"
        , dsTimeout :: Int
dsTimeout = Int
30
        , dsManagerSettings :: ManagerSettings
dsManagerSettings =
            (IO SSLContext -> ManagerSettings
C.opensslManagerSettings (IO SSLContext -> ManagerSettings)
-> IO SSLContext -> ManagerSettings
forall a b. (a -> b) -> a -> b
$ SSLContext -> IO SSLContext
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return SSLContext
globalSSLContext)
            { C.managerProxyInsecure = C.proxyFromRequest
            , C.managerProxySecure = C.proxyFromRequest
            , C.managerMaxHeaderLength = Just $ C.MaxHeaderLength 65536
            }
        , dsMaxDownloadSize :: Int
dsMaxDownloadSize = Int
10Int -> Int -> Int
forall a. Num a => a -> a -> a
*Int
1024Int -> Int -> Int
forall a. Num a => a -> a -> a
*Int
1024
        }

-- tls package doesn't handle some sites:
-- https://github.com/vincenthz/hs-tls/issues/53
-- plus tls is about 2 times slower than HsOpenSSL
-- using OpenSSL instead

globalSSLContext :: SSL.SSLContext
globalSSLContext :: SSLContext
globalSSLContext = IO SSLContext -> SSLContext
forall a. IO a -> a
unsafePerformIO (IO SSLContext -> SSLContext) -> IO SSLContext -> SSLContext
forall a b. (a -> b) -> a -> b
$ do
    SSLContext
ctx <- IO SSLContext
SSL.context
--     SSL.contextSetCiphers ctx "DEFAULT"
--     SSL.contextSetVerificationMode ctx SSL.VerifyNone
--     SSL.contextAddOption ctx SSL.SSL_OP_NO_SSLv3
--     SSL.contextAddOption ctx SSL.SSL_OP_ALL
    SSLContext -> IO SSLContext
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return SSLContext
ctx
{-# NOINLINE globalSSLContext #-}

-- | Keeps http-client 'Manager' and 'DownloaderSettings'.
data Downloader
    = Downloader
      { Downloader -> Manager
manager :: C.Manager
      , Downloader -> DownloaderSettings
settings :: DownloaderSettings
      }

-- | Create a 'Downloader' with settings.
newDownloader :: DownloaderSettings -> IO Downloader
newDownloader :: DownloaderSettings -> IO Downloader
newDownloader DownloaderSettings
s = do
    IO () -> IO ()
forall a. IO a -> IO a
SSL.withOpenSSL (IO () -> IO ()) -> IO () -> IO ()
forall a b. (a -> b) -> a -> b
$ () -> IO ()
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return () -- init in case it wasn't initialized yet
    Manager
m <- ManagerSettings -> IO Manager
C.newManager (ManagerSettings -> IO Manager) -> ManagerSettings -> IO Manager
forall a b. (a -> b) -> a -> b
$ DownloaderSettings -> ManagerSettings
dsManagerSettings DownloaderSettings
s
    Downloader -> IO Downloader
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (Downloader -> IO Downloader) -> Downloader -> IO Downloader
forall a b. (a -> b) -> a -> b
$ Manager -> DownloaderSettings -> Downloader
Downloader Manager
m DownloaderSettings
s

-- | Create a new 'Downloader', use it in the provided function,
-- and then release it.
withDownloader :: (Downloader -> IO a) -> IO a
withDownloader :: forall a. (Downloader -> IO a) -> IO a
withDownloader = DownloaderSettings -> (Downloader -> IO a) -> IO a
forall a. DownloaderSettings -> (Downloader -> IO a) -> IO a
withDownloaderSettings DownloaderSettings
forall a. Default a => a
def

-- | Create a new 'Downloader' with provided settings,
-- use it in the provided function, and then release it.
withDownloaderSettings :: DownloaderSettings -> (Downloader -> IO a) -> IO a
withDownloaderSettings :: forall a. DownloaderSettings -> (Downloader -> IO a) -> IO a
withDownloaderSettings DownloaderSettings
s Downloader -> IO a
f = Downloader -> IO a
f (Downloader -> IO a) -> IO Downloader -> IO a
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< DownloaderSettings -> IO Downloader
newDownloader DownloaderSettings
s

parseUrl :: String -> Either E.SomeException C.Request
parseUrl :: [Char] -> Either SomeException Request
parseUrl = [Char] -> Either SomeException Request
forall (m :: * -> *). MonadThrow m => [Char] -> m Request
C.parseRequest ([Char] -> Either SomeException Request)
-> ShowS -> [Char] -> Either SomeException Request
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> ShowS
forall a. (a -> Bool) -> [a] -> [a]
takeWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'#')

-- | Perform download
download  ::    Downloader
             -> String -- ^ URL
             -> Maybe NS.HostAddress -- ^ Optional resolved 'HostAddress'
             -> DownloadOptions
             -> IO DownloadResult
download :: Downloader
-> [Char]
-> Maybe HostAddress
-> DownloadOptions
-> IO DownloadResult
download = (Request -> IO Request)
-> Downloader
-> [Char]
-> Maybe HostAddress
-> DownloadOptions
-> IO DownloadResult
downloadG Request -> IO Request
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return

-- | Perform HTTP POST.
post :: Downloader -> String -> Maybe NS.HostAddress -> B.ByteString
     -> IO DownloadResult
post :: Downloader
-> [Char] -> Maybe HostAddress -> ByteString -> IO DownloadResult
post Downloader
d [Char]
url Maybe HostAddress
ha ByteString
dat =
    (Request -> IO Request)
-> Downloader
-> [Char]
-> Maybe HostAddress
-> DownloadOptions
-> IO DownloadResult
downloadG (Request -> IO Request
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (Request -> IO Request)
-> (Request -> Request) -> Request -> IO Request
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Request -> Request
postRequest ByteString
dat) Downloader
d [Char]
url Maybe HostAddress
ha []

-- | Make HTTP POST request.
postRequest :: B.ByteString -> C.Request -> C.Request
postRequest :: ByteString -> Request -> Request
postRequest ByteString
dat Request
rq =
    Request
rq { C.method = N.methodPost
       , C.requestBody = C.RequestBodyBS dat }

-- | Generic version of 'download'
-- with ability to modify http-client 'Request'.
downloadG ::    (C.Request -> IO C.Request)
                -- ^ Function to modify 'Request'
                -- (e.g. sign or make 'postRequest')
             -> Downloader
             -> String -- ^ URL
             -> Maybe NS.HostAddress -- ^ Optional resolved 'HostAddress'
             -> DownloadOptions
             -> IO (DownloadResult)
downloadG :: (Request -> IO Request)
-> Downloader
-> [Char]
-> Maybe HostAddress
-> DownloadOptions
-> IO DownloadResult
downloadG Request -> IO Request
f Downloader
d [Char]
u Maybe HostAddress
h DownloadOptions
o = ((DownloadResult, Maybe RawDownloadResult) -> DownloadResult)
-> IO (DownloadResult, Maybe RawDownloadResult)
-> IO DownloadResult
forall a b. (a -> b) -> IO a -> IO b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (DownloadResult, Maybe RawDownloadResult) -> DownloadResult
forall a b. (a, b) -> a
fst (IO (DownloadResult, Maybe RawDownloadResult) -> IO DownloadResult)
-> IO (DownloadResult, Maybe RawDownloadResult)
-> IO DownloadResult
forall a b. (a -> b) -> a -> b
$ (Request -> IO Request)
-> Downloader
-> [Char]
-> Maybe HostAddress
-> DownloadOptions
-> IO (DownloadResult, Maybe RawDownloadResult)
rawDownload Request -> IO Request
f Downloader
d [Char]
u Maybe HostAddress
h DownloadOptions
o

-- | Even more generic version of 'download', which returns 'RawDownloadResult'.
-- 'RawDownloadResult' is optional since it can not be determined on timeouts
-- and connection errors.
rawDownload ::  (C.Request -> IO C.Request)
                -- ^ Function to modify 'Request'
                -- (e.g. sign or make 'postRequest')
             -> Downloader
             -> String -- ^ URL
             -> Maybe NS.HostAddress -- ^ Optional resolved 'HostAddress'
             -> DownloadOptions
             -> IO (DownloadResult, Maybe RawDownloadResult)
rawDownload :: (Request -> IO Request)
-> Downloader
-> [Char]
-> Maybe HostAddress
-> DownloadOptions
-> IO (DownloadResult, Maybe RawDownloadResult)
rawDownload Request -> IO Request
f (Downloader {Manager
DownloaderSettings
manager :: Downloader -> Manager
settings :: Downloader -> DownloaderSettings
manager :: Manager
settings :: DownloaderSettings
..}) [Char]
url Maybe HostAddress
hostAddress DownloadOptions
opts =
  case [Char] -> Either SomeException Request
parseUrl [Char]
url of
    Left SomeException
e ->
        (DownloadResult -> (DownloadResult, Maybe RawDownloadResult))
-> IO DownloadResult
-> IO (DownloadResult, Maybe RawDownloadResult)
forall a b. (a -> b) -> IO a -> IO b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (, Maybe RawDownloadResult
forall a. Maybe a
Nothing) (IO DownloadResult -> IO (DownloadResult, Maybe RawDownloadResult))
-> IO DownloadResult
-> IO (DownloadResult, Maybe RawDownloadResult)
forall a b. (a -> b) -> a -> b
$
        IO DownloadResult
-> (HttpException -> IO DownloadResult)
-> Maybe HttpException
-> IO DownloadResult
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (DownloadResult -> IO DownloadResult
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (DownloadResult -> IO DownloadResult)
-> DownloadResult -> IO DownloadResult
forall a b. (a -> b) -> a -> b
$ [Char] -> DownloadResult
DRError ([Char] -> DownloadResult) -> [Char] -> DownloadResult
forall a b. (a -> b) -> a -> b
$ SomeException -> [Char]
forall a. Show a => a -> [Char]
show SomeException
e) ([Char] -> HttpException -> IO DownloadResult
forall (m :: * -> *).
Monad m =>
[Char] -> HttpException -> m DownloadResult
httpExceptionToDR [Char]
url)
              (SomeException -> Maybe HttpException
forall e. Exception e => SomeException -> Maybe e
E.fromException SomeException
e)
    Right Request
rq -> do
        let dl :: Request -> Bool -> IO (DownloadResult, Maybe RawDownloadResult)
dl Request
req Bool
firstTime = do
                UTCTime
t0 <- IO UTCTime
getCurrentTime
                (DownloadResult, Maybe RawDownloadResult)
r <- (HttpException -> IO (DownloadResult, Maybe RawDownloadResult))
-> IO (DownloadResult, Maybe RawDownloadResult)
-> IO (DownloadResult, Maybe RawDownloadResult)
forall e a. Exception e => (e -> IO a) -> IO a -> IO a
E.handle ((DownloadResult -> (DownloadResult, Maybe RawDownloadResult))
-> IO DownloadResult
-> IO (DownloadResult, Maybe RawDownloadResult)
forall a b. (a -> b) -> IO a -> IO b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (, Maybe RawDownloadResult
forall a. Maybe a
Nothing) (IO DownloadResult -> IO (DownloadResult, Maybe RawDownloadResult))
-> (HttpException -> IO DownloadResult)
-> HttpException
-> IO (DownloadResult, Maybe RawDownloadResult)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> HttpException -> IO DownloadResult
forall (m :: * -> *).
Monad m =>
[Char] -> HttpException -> m DownloadResult
httpExceptionToDR [Char]
url) (IO (DownloadResult, Maybe RawDownloadResult)
 -> IO (DownloadResult, Maybe RawDownloadResult))
-> IO (DownloadResult, Maybe RawDownloadResult)
-> IO (DownloadResult, Maybe RawDownloadResult)
forall a b. (a -> b) -> a -> b
$
                    Request
-> Manager
-> (Response BodyReader
    -> IO (DownloadResult, Maybe RawDownloadResult))
-> IO (DownloadResult, Maybe RawDownloadResult)
forall a.
Request -> Manager -> (Response BodyReader -> IO a) -> IO a
C.withResponse Request
req Manager
manager ((Response BodyReader
  -> IO (DownloadResult, Maybe RawDownloadResult))
 -> IO (DownloadResult, Maybe RawDownloadResult))
-> (Response BodyReader
    -> IO (DownloadResult, Maybe RawDownloadResult))
-> IO (DownloadResult, Maybe RawDownloadResult)
forall a b. (a -> b) -> a -> b
$ \ Response BodyReader
r -> do
                    let s :: Status
s = Response BodyReader -> Status
forall body. Response body -> Status
C.responseStatus Response BodyReader
r
                        h :: ResponseHeaders
h = Response BodyReader -> ResponseHeaders
forall body. Response body -> ResponseHeaders
C.responseHeaders Response BodyReader
r
                        rdr :: ByteString -> RawDownloadResult
rdr ByteString
d =
                            RawDownloadResult
                            { rdrStatus :: Status
rdrStatus = Status
s
                            , rdrHttpVersion :: HttpVersion
rdrHttpVersion = Response BodyReader -> HttpVersion
forall body. Response body -> HttpVersion
C.responseVersion Response BodyReader
r
                            , rdrHeaders :: ResponseHeaders
rdrHeaders = ResponseHeaders
h
                            , rdrBody :: ByteString
rdrBody = ByteString
d
                            , rdrCookieJar :: CookieJar
rdrCookieJar = Response BodyReader -> CookieJar
forall body. Response body -> CookieJar
C.responseCookieJar Response BodyReader
r
                            }
                        readLen :: ByteString -> Int
readLen = (Int -> Char -> Int) -> Int -> ByteString -> Int
forall a. (a -> Char -> a) -> a -> ByteString -> a
B.foldl' (\ Int
a Char
d -> Int
a Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
10 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Char -> Int
ord Char
d Int -> Int -> Int
forall a. Num a => a -> a -> a
- Char -> Int
ord Char
'0') Int
0
                    Maybe (Maybe ByteString)
mbb <- case HeaderName -> ResponseHeaders -> Maybe ByteString
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup HeaderName
"Content-Length" ResponseHeaders
h of
                        Just ByteString
l
                            | (Char -> Bool) -> ByteString -> Bool
B.all (\ Char
c -> Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'0' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'9') ByteString
l
                              Bool -> Bool -> Bool
&& Bool -> Bool
not (ByteString -> Bool
B.null ByteString
l)
                              Bool -> Bool -> Bool
&& ByteString -> Int
readLen ByteString
l Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> DownloaderSettings -> Int
dsMaxDownloadSize DownloaderSettings
settings
                            -> do
                               -- liftIO $ putStrLn "Content-Length too large"
                               Maybe (Maybe ByteString) -> IO (Maybe (Maybe ByteString))
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe (Maybe ByteString)
forall a. Maybe a
Nothing
                               -- no reason to download body
                        Maybe ByteString
_ -> do
                            UTCTime
t1 <- IO UTCTime
getCurrentTime
                            let timeSpentMicro :: POSIXTime
timeSpentMicro = UTCTime -> UTCTime -> POSIXTime
diffUTCTime UTCTime
t1 UTCTime
t0 POSIXTime -> POSIXTime -> POSIXTime
forall a. Num a => a -> a -> a
* POSIXTime
1000000
                                remainingTime :: Int
remainingTime =
                                    POSIXTime -> Int
forall b. Integral b => POSIXTime -> b
forall a b. (RealFrac a, Integral b) => a -> b
round (POSIXTime -> Int) -> POSIXTime -> Int
forall a b. (a -> b) -> a -> b
$ Int -> POSIXTime
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
to POSIXTime -> POSIXTime -> POSIXTime
forall a. Num a => a -> a -> a
- POSIXTime
timeSpentMicro
                            if Int
remainingTime Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0 then
                                Maybe (Maybe ByteString) -> IO (Maybe (Maybe ByteString))
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe (Maybe ByteString)
forall a. Maybe a
Nothing
                            else
                                Int -> IO (Maybe ByteString) -> IO (Maybe (Maybe ByteString))
forall a. Int -> IO a -> IO (Maybe a)
timeout Int
remainingTime
                                (IO (Maybe ByteString) -> IO (Maybe (Maybe ByteString)))
-> IO (Maybe ByteString) -> IO (Maybe (Maybe ByteString))
forall a b. (a -> b) -> a -> b
$ BodyReader -> Int -> IO (Maybe ByteString)
sinkByteString (BodyReader -> BodyReader
C.brRead (BodyReader -> BodyReader) -> BodyReader -> BodyReader
forall a b. (a -> b) -> a -> b
$ Response BodyReader -> BodyReader
forall body. Response body -> body
C.responseBody Response BodyReader
r)
                                    (DownloaderSettings -> Int
dsMaxDownloadSize DownloaderSettings
settings)
                    case Maybe (Maybe ByteString)
mbb of
                        Maybe (Maybe ByteString)
Nothing ->
                            (DownloadResult, Maybe RawDownloadResult)
-> IO (DownloadResult, Maybe RawDownloadResult)
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return ([Char] -> DownloadResult
DRError [Char]
"Timeout", RawDownloadResult -> Maybe RawDownloadResult
forall a. a -> Maybe a
Just (RawDownloadResult -> Maybe RawDownloadResult)
-> RawDownloadResult -> Maybe RawDownloadResult
forall a b. (a -> b) -> a -> b
$ ByteString -> RawDownloadResult
rdr ByteString
"")
                        Just (Just ByteString
b) -> do
                            let d :: ByteString
d = ResponseHeaders -> ByteString -> ByteString
forall {a}.
(Eq a, IsString a) =>
[(a, ByteString)] -> ByteString -> ByteString
tryDeflate ResponseHeaders
h ByteString
b
                            UTCTime
curTime <- IO UTCTime
getCurrentTime
                            (DownloadResult, Maybe RawDownloadResult)
-> IO (DownloadResult, Maybe RawDownloadResult)
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return
                                (UTCTime
-> [Char]
-> Status
-> ResponseHeaders
-> ByteString
-> DownloadResult
makeDownloadResultC UTCTime
curTime [Char]
url Status
s ResponseHeaders
h ByteString
d
                                , RawDownloadResult -> Maybe RawDownloadResult
forall a. a -> Maybe a
Just (RawDownloadResult -> Maybe RawDownloadResult)
-> RawDownloadResult -> Maybe RawDownloadResult
forall a b. (a -> b) -> a -> b
$ ByteString -> RawDownloadResult
rdr ByteString
d)
                        Just Maybe ByteString
Nothing ->
                            (DownloadResult, Maybe RawDownloadResult)
-> IO (DownloadResult, Maybe RawDownloadResult)
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return ([Char] -> DownloadResult
DRError [Char]
"Too much data", RawDownloadResult -> Maybe RawDownloadResult
forall a. a -> Maybe a
Just (RawDownloadResult -> Maybe RawDownloadResult)
-> RawDownloadResult -> Maybe RawDownloadResult
forall a b. (a -> b) -> a -> b
$ ByteString -> RawDownloadResult
rdr ByteString
"")
                case (DownloadResult, Maybe RawDownloadResult)
r of
                    (DRError [Char]
e, Maybe RawDownloadResult
_)
                        | [Char]
"ZlibException" [Char] -> [Char] -> Bool
forall a. Eq a => [a] -> [a] -> Bool
`isPrefixOf` [Char]
e Bool -> Bool -> Bool
&& Bool
firstTime ->
                            -- some sites return junk instead of gzip data.
                            -- retrying without compression
                            Request -> Bool -> IO (DownloadResult, Maybe RawDownloadResult)
dl (Request -> Request
disableCompression Request
req) Bool
False
                    (DownloadResult, Maybe RawDownloadResult)
_ ->
                        (DownloadResult, Maybe RawDownloadResult)
-> IO (DownloadResult, Maybe RawDownloadResult)
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (DownloadResult, Maybe RawDownloadResult)
r
            disableCompression :: Request -> Request
disableCompression Request
req =
                Request
req { C.requestHeaders =
                          ("Accept-Encoding", "") : C.requestHeaders req }
            rq1 :: Request
rq1 = Request
rq { C.requestHeaders =
                               [("Accept", "*/*")
                               ,("User-Agent", dsUserAgent settings)
                               ]
                               ++ map toHeader opts
                               ++ C.requestHeaders rq
                     , C.redirectCount = 0
                     , C.responseTimeout = C.responseTimeoutMicro to
                       -- it's only connection + headers timeout,
                       -- response body needs additional timeout
                     , C.hostAddress = hostAddress
                     , C.checkResponse = \ Request
_ Response BodyReader
_ -> () -> IO ()
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return ()
                     }
            to :: Int
to = DownloaderSettings -> Int
dsTimeout DownloaderSettings
settings Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
1000000
        Request
req <- Request -> IO Request
f Request
rq1
        Request -> Bool -> IO (DownloadResult, Maybe RawDownloadResult)
dl Request
req Bool
True
    where toHeader :: String -> N.Header
          toHeader :: [Char] -> Header
toHeader [Char]
h = let ([Char]
a,[Char]
b) = (Char -> Bool) -> [Char] -> ([Char], [Char])
forall a. (a -> Bool) -> [a] -> ([a], [a])
break (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
':') [Char]
h in
                       ([Char] -> HeaderName
forall a. IsString a => [Char] -> a
fromString [Char]
a, [Char] -> ByteString
forall a. IsString a => [Char] -> a
fromString (ShowS
forall a. HasCallStack => [a] -> [a]
tail [Char]
b))
          tryDeflate :: [(a, ByteString)] -> ByteString -> ByteString
tryDeflate [(a, ByteString)]
headers ByteString
b
              | Just ByteString
d <- a -> [(a, ByteString)] -> Maybe ByteString
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup a
"Content-Encoding" [(a, ByteString)]
headers
              , (Char -> Char) -> ByteString -> ByteString
B.map Char -> Char
toLower ByteString
d ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
== ByteString
"deflate"
                  = ByteString -> ByteString
BL.toStrict (ByteString -> ByteString) -> ByteString -> ByteString
forall a b. (a -> b) -> a -> b
$ ByteString -> ByteString
Deflate.decompress (ByteString -> ByteString) -> ByteString -> ByteString
forall a b. (a -> b) -> a -> b
$ ByteString -> ByteString
BL.fromStrict ByteString
b
              | Bool
otherwise = ByteString
b

httpExceptionToDR :: Monad m => String -> C.HttpException -> m DownloadResult
httpExceptionToDR :: forall (m :: * -> *).
Monad m =>
[Char] -> HttpException -> m DownloadResult
httpExceptionToDR [Char]
url HttpException
exn = DownloadResult -> m DownloadResult
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return (DownloadResult -> m DownloadResult)
-> DownloadResult -> m DownloadResult
forall a b. (a -> b) -> a -> b
$ case HttpException
exn of
    C.HttpExceptionRequest Request
_ HttpExceptionContent
ec -> [Char] -> HttpExceptionContent -> DownloadResult
httpExceptionContentToDR [Char]
url HttpExceptionContent
ec
    C.InvalidUrlException [Char]
_ [Char]
e
        | [Char]
e [Char] -> [Char] -> Bool
forall a. Eq a => a -> a -> Bool
== [Char]
"Invalid URL" -> [Char] -> DownloadResult
DRError [Char]
e
        | Bool
otherwise -> [Char] -> DownloadResult
DRError ([Char] -> DownloadResult) -> [Char] -> DownloadResult
forall a b. (a -> b) -> a -> b
$ [Char]
"Invalid URL: " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
e

httpExceptionContentToDR :: String -> C.HttpExceptionContent -> DownloadResult
httpExceptionContentToDR :: [Char] -> HttpExceptionContent -> DownloadResult
httpExceptionContentToDR [Char]
url HttpExceptionContent
ec = case HttpExceptionContent
ec of
    C.StatusCodeException Response ()
r ByteString
b ->
      UTCTime
-> [Char]
-> Status
-> ResponseHeaders
-> ByteString
-> DownloadResult
makeDownloadResultC (POSIXTime -> UTCTime
posixSecondsToUTCTime POSIXTime
0) [Char]
url
      (Response () -> Status
forall body. Response body -> Status
C.responseStatus Response ()
r) (Response () -> ResponseHeaders
forall body. Response body -> ResponseHeaders
C.responseHeaders Response ()
r) ByteString
b
    C.TooManyRedirects [Response ByteString]
_ -> [Char] -> DownloadResult
DRError [Char]
"Too many redirects"
    HttpExceptionContent
C.OverlongHeaders -> [Char] -> DownloadResult
DRError [Char]
"Overlong HTTP headers"
    HttpExceptionContent
C.ResponseTimeout -> [Char] -> DownloadResult
DRError [Char]
"Response timeout"
    HttpExceptionContent
C.ConnectionTimeout -> [Char] -> DownloadResult
DRError [Char]
"Connection timeout"
    C.ConnectionFailure SomeException
e -> [Char] -> DownloadResult
DRError ([Char] -> DownloadResult) -> [Char] -> DownloadResult
forall a b. (a -> b) -> a -> b
$ [Char]
"Connection failed: " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ SomeException -> [Char]
forall a. Show a => a -> [Char]
show SomeException
e
    C.InvalidStatusLine ByteString
l -> [Char] -> DownloadResult
DRError ([Char] -> DownloadResult) -> [Char] -> DownloadResult
forall a b. (a -> b) -> a -> b
$ [Char]
"Invalid HTTP status line:\n" [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ ByteString -> [Char]
B.unpack ByteString
l
    C.InvalidHeader ByteString
h -> [Char] -> DownloadResult
DRError ([Char] -> DownloadResult) -> [Char] -> DownloadResult
forall a b. (a -> b) -> a -> b
$ [Char]
"Invalid HTTP header:\n" [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ ByteString -> [Char]
B.unpack ByteString
h
    C.InvalidRequestHeader ByteString
h -> [Char] -> DownloadResult
DRError ([Char] -> DownloadResult) -> [Char] -> DownloadResult
forall a b. (a -> b) -> a -> b
$ [Char]
"Invalid HTTP request header:\n" [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ ByteString -> [Char]
B.unpack ByteString
h
    C.InternalException SomeException
e
        | Just (ConnectionAbruptlyTerminated
_ :: SSL.ConnectionAbruptlyTerminated) <- SomeException -> Maybe ConnectionAbruptlyTerminated
forall e. Exception e => SomeException -> Maybe e
E.fromException SomeException
e ->
            [Char] -> DownloadResult
DRError [Char]
"Connection abruptly terminated"
        | Just (SSL.ProtocolError [Char]
pe) <- SomeException -> Maybe ProtocolError
forall e. Exception e => SomeException -> Maybe e
E.fromException SomeException
e ->
            [Char] -> DownloadResult
DRError ([Char] -> DownloadResult) -> [Char] -> DownloadResult
forall a b. (a -> b) -> a -> b
$ [Char]
"SSL protocol error: " [Char] -> ShowS
forall a. Semigroup a => a -> a -> a
<> [Char]
pe
        | Bool
otherwise -> [Char] -> DownloadResult
DRError ([Char] -> DownloadResult) -> [Char] -> DownloadResult
forall a b. (a -> b) -> a -> b
$ SomeException -> [Char]
forall a. Show a => a -> [Char]
show SomeException
e
    C.ProxyConnectException ByteString
_ Int
_ Status
s ->
        [Char] -> DownloadResult
DRError ([Char] -> DownloadResult) -> [Char] -> DownloadResult
forall a b. (a -> b) -> a -> b
$ [Char]
"Proxy CONNECT failed: " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Status -> [Char]
httpStatusString Status
s
    HttpExceptionContent
C.NoResponseDataReceived -> [Char] -> DownloadResult
DRError [Char]
"No response data received"
    HttpExceptionContent
C.TlsNotSupported -> [Char] -> DownloadResult
DRError [Char]
"TLS not supported"
    C.WrongRequestBodyStreamSize Word64
e Word64
a ->
        [Char] -> DownloadResult
DRError ([Char] -> DownloadResult) -> [Char] -> DownloadResult
forall a b. (a -> b) -> a -> b
$ [Char]
"The request body provided did not match the expected size "
        [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Word64 -> Word64 -> [Char]
forall {a} {a}. (Show a, Show a) => a -> a -> [Char]
ea Word64
e Word64
a
    C.ResponseBodyTooShort Word64
e Word64
a -> [Char] -> DownloadResult
DRError ([Char] -> DownloadResult) -> [Char] -> DownloadResult
forall a b. (a -> b) -> a -> b
$ [Char]
"Response body too short " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Word64 -> Word64 -> [Char]
forall {a} {a}. (Show a, Show a) => a -> a -> [Char]
ea Word64
e Word64
a
    HttpExceptionContent
C.InvalidChunkHeaders -> [Char] -> DownloadResult
DRError [Char]
"Invalid chunk headers"
    HttpExceptionContent
C.IncompleteHeaders -> [Char] -> DownloadResult
DRError [Char]
"Incomplete headers"
    C.InvalidDestinationHost ByteString
_ -> [Char] -> DownloadResult
DRError [Char]
"Invalid destination host"
    C.HttpZlibException ZlibException
e -> [Char] -> DownloadResult
DRError ([Char] -> DownloadResult) -> [Char] -> DownloadResult
forall a b. (a -> b) -> a -> b
$ ZlibException -> [Char]
forall a. Show a => a -> [Char]
show ZlibException
e
    C.InvalidProxyEnvironmentVariable Text
n Text
v ->
        [Char] -> DownloadResult
DRError ([Char] -> DownloadResult) -> [Char] -> DownloadResult
forall a b. (a -> b) -> a -> b
$ [Char]
"Invalid proxy environment variable "
        [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Text -> [Char]
forall a. Show a => a -> [Char]
show Text
n [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
"=" [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Text -> [Char]
forall a. Show a => a -> [Char]
show Text
v
    C.InvalidProxySettings Text
s -> [Char] -> DownloadResult
DRError ([Char] -> DownloadResult) -> [Char] -> DownloadResult
forall a b. (a -> b) -> a -> b
$ [Char]
"Invalid proxy settings:\n" [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Text -> [Char]
T.unpack Text
s
    HttpExceptionContent
C.ConnectionClosed -> [Char] -> DownloadResult
DRError [Char]
"Connection closed"
    where ea :: a -> a -> [Char]
ea a
expected a
actual =
              [Char]
"(expected " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ a -> [Char]
forall a. Show a => a -> [Char]
show a
expected [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
" bytes, actual is "
              [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ a -> [Char]
forall a. Show a => a -> [Char]
show a
actual [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
" bytes)"

bufSize :: Int
bufSize :: Int
bufSize = Int
32 Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
1024 Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
overhead -- Copied from Data.ByteString.Lazy.
    where overhead :: Int
overhead = Int
2 Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int -> Int
forall a. Storable a => a -> Int
sizeOf (Int
forall a. HasCallStack => a
undefined :: Int)

newBuf :: IO B.ByteString
newBuf :: BodyReader
newBuf = do
    ForeignPtr Word8
fp <- Int -> IO (ForeignPtr Word8)
forall a. Int -> IO (ForeignPtr a)
B.mallocByteString Int
bufSize
    ByteString -> BodyReader
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (ByteString -> BodyReader) -> ByteString -> BodyReader
forall a b. (a -> b) -> a -> b
$ ForeignPtr Word8 -> Int -> Int -> ByteString
B.PS ForeignPtr Word8
fp Int
0 Int
0

addBs :: [B.ByteString] -> B.ByteString -> B.ByteString
      -> IO ([B.ByteString], B.ByteString)
addBs :: [ByteString]
-> ByteString -> ByteString -> IO ([ByteString], ByteString)
addBs [ByteString]
acc (B.PS ForeignPtr Word8
bfp Int
_ Int
bl) (B.PS ForeignPtr Word8
sfp Int
offs Int
sl) = do
    let cpSize :: Int
cpSize = Int -> Int -> Int
forall a. Ord a => a -> a -> a
min (Int
bufSize Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
bl) Int
sl
        bl' :: Int
bl' = Int
bl Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
cpSize
    ForeignPtr Word8 -> (Ptr Word8 -> IO ()) -> IO ()
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr Word8
bfp ((Ptr Word8 -> IO ()) -> IO ()) -> (Ptr Word8 -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \ Ptr Word8
dst -> ForeignPtr Word8 -> (Ptr Word8 -> IO ()) -> IO ()
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr Word8
sfp ((Ptr Word8 -> IO ()) -> IO ()) -> (Ptr Word8 -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \ Ptr Word8
src ->
        Ptr Word8 -> Ptr Word8 -> Int -> IO ()
B.memcpy (Ptr Word8
dst Ptr Word8 -> Int -> Ptr Word8
forall a b. Ptr a -> Int -> Ptr b
`plusPtr` Int
bl) (Ptr Word8
src Ptr Word8 -> Int -> Ptr Word8
forall a b. Ptr a -> Int -> Ptr b
`plusPtr` Int
offs) (Int -> Int
forall a. Enum a => Int -> a
toEnum Int
cpSize)
    if Int
bl' Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
bufSize then do
        ByteString
buf' <- BodyReader
newBuf
--        print ("filled", cpSize)
        [ByteString]
-> ByteString -> ByteString -> IO ([ByteString], ByteString)
addBs (ForeignPtr Word8 -> Int -> Int -> ByteString
B.PS ForeignPtr Word8
bfp Int
0 Int
bufSize ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
: [ByteString]
acc) ByteString
buf'
              (ForeignPtr Word8 -> Int -> Int -> ByteString
B.PS ForeignPtr Word8
sfp (Int
offs Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
cpSize) (Int
sl Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
cpSize))
    else do
--        print ("ok", cpSize, bl')
        ([ByteString], ByteString) -> IO ([ByteString], ByteString)
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return ([ByteString]
acc, ForeignPtr Word8 -> Int -> Int -> ByteString
B.PS ForeignPtr Word8
bfp Int
0 Int
bl')

-- | Sink data using 32k buffers to reduce memory fragmentation.
-- Returns 'Nothing' if downloaded too much data.
sinkByteString :: IO B.ByteString -> Int -> IO (Maybe B.ByteString)
sinkByteString :: BodyReader -> Int -> IO (Maybe ByteString)
sinkByteString BodyReader
readChunk Int
limit = do
    ByteString
buf <- BodyReader
newBuf
    Int -> [ByteString] -> ByteString -> IO (Maybe ByteString)
go Int
0 [] ByteString
buf
    where go :: Int -> [ByteString] -> ByteString -> IO (Maybe ByteString)
go Int
len [ByteString]
acc ByteString
buf = do
              ByteString
inp <- BodyReader
readChunk
              if ByteString -> Bool
B.null ByteString
inp then
                  Maybe ByteString -> IO (Maybe ByteString)
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (Maybe ByteString -> IO (Maybe ByteString))
-> Maybe ByteString -> IO (Maybe ByteString)
forall a b. (a -> b) -> a -> b
$ ByteString -> Maybe ByteString
forall a. a -> Maybe a
Just (ByteString -> Maybe ByteString) -> ByteString -> Maybe ByteString
forall a b. (a -> b) -> a -> b
$ [ByteString] -> ByteString
B.concat ([ByteString] -> ByteString) -> [ByteString] -> ByteString
forall a b. (a -> b) -> a -> b
$ [ByteString] -> [ByteString]
forall a. [a] -> [a]
reverse (ByteString
bufByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
:[ByteString]
acc)
              else do
                  ([ByteString]
acc', ByteString
buf') <- [ByteString]
-> ByteString -> ByteString -> IO ([ByteString], ByteString)
addBs [ByteString]
acc ByteString
buf ByteString
inp
                  let len' :: Int
len' = Int
len Int -> Int -> Int
forall a. Num a => a -> a -> a
+ ByteString -> Int
B.length ByteString
inp
                  if Int
len' Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
limit then
                      Maybe ByteString -> IO (Maybe ByteString)
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe ByteString
forall a. Maybe a
Nothing
                  else
                      Int -> [ByteString] -> ByteString -> IO (Maybe ByteString)
go Int
len' [ByteString]
acc' ByteString
buf'

makeDownloadResultC :: UTCTime -> String -> N.Status -> N.ResponseHeaders
                    -> B.ByteString -> DownloadResult
makeDownloadResultC :: UTCTime
-> [Char]
-> Status
-> ResponseHeaders
-> ByteString
-> DownloadResult
makeDownloadResultC UTCTime
curTime [Char]
url Status
c ResponseHeaders
headers ByteString
b = do
    if Status -> Int
N.statusCode Status
c Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
304 then
        DownloadResult
DRNotModified
    else if Status -> Int
N.statusCode Status
c Int -> [Int] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem`
          [ Int
300 -- Multiple choices
          , Int
301 -- Moved permanently
          , Int
302 -- Found
          , Int
303 -- See other
          , Int
307 -- Temporary redirect
          , Int
308 -- Permanent redirect
          ] then
        case HeaderName -> ResponseHeaders -> Maybe ByteString
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup HeaderName
"location" ResponseHeaders
headers of
            Just (ByteString -> [Char]
B.unpack -> [Char]
loc) ->
                [Char] -> DownloadResult
redirect ([Char] -> DownloadResult) -> [Char] -> DownloadResult
forall a b. (a -> b) -> a -> b
$
                    ShowS
relUri ((Char -> Bool) -> ShowS
forall a. (a -> Bool) -> [a] -> [a]
takeWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'#') ShowS -> ShowS
forall a b. (a -> b) -> a -> b
$ (Char -> Bool) -> ShowS
forall a. (a -> Bool) -> [a] -> [a]
dropWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
' ') [Char]
loc)
                    --  ^ Location can be relative and contain #fragment
            Maybe ByteString
_ ->
                [Char] -> DownloadResult
DRError ([Char] -> DownloadResult) -> [Char] -> DownloadResult
forall a b. (a -> b) -> a -> b
$ [Char]
"Redirect status, but no Location field\n"
                    [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ ByteString -> [Char]
B.unpack (Status -> ByteString
N.statusMessage Status
c) [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
"\n"
                    [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ DownloadOptions -> [Char]
unlines ((Header -> [Char]) -> ResponseHeaders -> DownloadOptions
forall a b. (a -> b) -> [a] -> [b]
map Header -> [Char]
forall a. Show a => a -> [Char]
show ResponseHeaders
headers)
    else if Status -> Int
N.statusCode Status
c Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
300 then
        [Char] -> DownloadResult
DRError ([Char] -> DownloadResult) -> [Char] -> DownloadResult
forall a b. (a -> b) -> a -> b
$ Status -> [Char]
httpStatusString Status
c
    else
        ByteString -> DownloadOptions -> DownloadResult
DROK ByteString
b (DownloadOptions -> ResponseHeaders -> DownloadOptions
forall {a}.
(Eq a, IsString a) =>
DownloadOptions -> [(a, ByteString)] -> DownloadOptions
redownloadOpts [] ResponseHeaders
headers)
    where redirect :: [Char] -> DownloadResult
redirect [Char]
r
--              | r == url = DRError $ "HTTP redirect to the same url?"
              | Bool
otherwise = [Char] -> DownloadResult
DRRedirect [Char]
r
          redownloadOpts :: DownloadOptions -> [(a, ByteString)] -> DownloadOptions
redownloadOpts DownloadOptions
acc [] = DownloadOptions -> DownloadOptions
forall a. [a] -> [a]
reverse DownloadOptions
acc
          redownloadOpts DownloadOptions
_ ((a
"Pragma", (Char -> Char) -> ByteString -> ByteString
B.map Char -> Char
toLower -> ByteString
tag) : [(a, ByteString)]
_)
              | ByteString
"no-cache" ByteString -> ByteString -> Bool
`B.isInfixOf` ByteString
tag = []
          redownloadOpts DownloadOptions
_ ((a
"Cache-Control", (Char -> Char) -> ByteString -> ByteString
B.map Char -> Char
toLower -> ByteString
tag) : [(a, ByteString)]
_)
              | (ByteString -> Bool) -> [ByteString] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any (ByteString -> ByteString -> Bool
`B.isInfixOf` ByteString
tag)
                [ByteString
"no-cache", ByteString
"no-store", ByteString
"must-revalidate", ByteString
"max-age=0"] = []
          redownloadOpts DownloadOptions
acc ((a
"Expires", ByteString
time):[(a, ByteString)]
xs)
              | [Char]
ts <- ByteString -> [Char]
B.unpack ByteString
time
              , Just UTCTime
t <- [Char] -> Maybe UTCTime
parseHttpTime [Char]
ts
              , UTCTime
t UTCTime -> UTCTime -> Bool
forall a. Ord a => a -> a -> Bool
> UTCTime
curTime =
                   DownloadOptions -> [(a, ByteString)] -> DownloadOptions
redownloadOpts DownloadOptions
acc [(a, ByteString)]
xs
              | Bool
otherwise = [] -- expires is non-valid or in the past
          redownloadOpts DownloadOptions
acc ((a
"ETag", ByteString
tag):[(a, ByteString)]
xs) =
              DownloadOptions -> [(a, ByteString)] -> DownloadOptions
redownloadOpts (([Char]
"If-None-Match: " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ ByteString -> [Char]
B.unpack ByteString
tag) [Char] -> DownloadOptions -> DownloadOptions
forall a. a -> [a] -> [a]
: DownloadOptions
acc) [(a, ByteString)]
xs
          redownloadOpts DownloadOptions
acc ((a
"Last-Modified", ByteString
time):[(a, ByteString)]
xs)
              | [Char]
ts <- ByteString -> [Char]
B.unpack ByteString
time
              , Just UTCTime
t <- [Char] -> Maybe UTCTime
parseHttpTime [Char]
ts
              , UTCTime
t UTCTime -> UTCTime -> Bool
forall a. Ord a => a -> a -> Bool
<= UTCTime
curTime = -- use only valid timestamps
              DownloadOptions -> [(a, ByteString)] -> DownloadOptions
redownloadOpts (([Char]
"If-Modified-Since: " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ ByteString -> [Char]
B.unpack ByteString
time) [Char] -> DownloadOptions -> DownloadOptions
forall a. a -> [a] -> [a]
: DownloadOptions
acc) [(a, ByteString)]
xs
          redownloadOpts DownloadOptions
acc ((a, ByteString)
_:[(a, ByteString)]
xs) = DownloadOptions -> [(a, ByteString)] -> DownloadOptions
redownloadOpts DownloadOptions
acc [(a, ByteString)]
xs
          fixNonAscii :: ShowS
fixNonAscii =
              (Char -> Bool) -> ShowS
escapeURIString
                  (\ Char
x -> Char -> Int
ord Char
x Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0x7f Bool -> Bool -> Bool
&& Char
x Char -> [Char] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` ([Char]
" []{}|\"" :: String)) ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
              ShowS
trimString
          relUri :: ShowS
relUri (ShowS
fixNonAscii -> [Char]
r) =
              [Char] -> Maybe [Char] -> [Char]
forall a. a -> Maybe a -> a
fromMaybe [Char]
r (Maybe [Char] -> [Char]) -> Maybe [Char] -> [Char]
forall a b. (a -> b) -> a -> b
$
              (URI -> [Char]) -> Maybe URI -> Maybe [Char]
forall a b. (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ((ShowS -> ShowS
forall a b. (a -> b) -> a -> b
$ [Char]
"") (ShowS -> [Char]) -> (URI -> ShowS) -> URI -> [Char]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShowS -> URI -> ShowS
uriToString ShowS
forall a. a -> a
id) (Maybe URI -> Maybe [Char]) -> Maybe URI -> Maybe [Char]
forall a b. (a -> b) -> a -> b
$
              (URI -> URI -> URI) -> Maybe URI -> Maybe URI -> Maybe URI
forall (m :: * -> *) a1 a2 r.
Monad m =>
(a1 -> a2 -> r) -> m a1 -> m a2 -> m r
liftM2 URI -> URI -> URI
relativeTo
                  ([Char] -> Maybe URI
parseURIReference [Char]
r)
                  ([Char] -> Maybe URI
parseURI ([Char] -> Maybe URI) -> [Char] -> Maybe URI
forall a b. (a -> b) -> a -> b
$ ShowS
fixNonAscii [Char]
url)

httpStatusString :: N.Status -> [Char]
httpStatusString :: Status -> [Char]
httpStatusString Status
c =
    [Char]
"HTTP " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> [Char]
forall a. Show a => a -> [Char]
show (Status -> Int
N.statusCode Status
c) [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
" " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ ByteString -> [Char]
B.unpack (Status -> ByteString
N.statusMessage Status
c)

tryParseTime :: [String] -> String -> Maybe UTCTime
tryParseTime :: DownloadOptions -> [Char] -> Maybe UTCTime
tryParseTime DownloadOptions
formats [Char]
string =
    (Maybe UTCTime -> Maybe UTCTime -> Maybe UTCTime)
-> Maybe UTCTime -> [Maybe UTCTime] -> Maybe UTCTime
forall a b. (a -> b -> b) -> b -> [a] -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr Maybe UTCTime -> Maybe UTCTime -> Maybe UTCTime
forall a. Maybe a -> Maybe a -> Maybe a
forall (m :: * -> *) a. MonadPlus m => m a -> m a -> m a
mplus Maybe UTCTime
forall a. Maybe a
Nothing ([Maybe UTCTime] -> Maybe UTCTime)
-> [Maybe UTCTime] -> Maybe UTCTime
forall a b. (a -> b) -> a -> b
$
    ([Char] -> Maybe UTCTime) -> DownloadOptions -> [Maybe UTCTime]
forall a b. (a -> b) -> [a] -> [b]
map (\ [Char]
fmt -> Bool -> TimeLocale -> [Char] -> [Char] -> Maybe UTCTime
forall (m :: * -> *) t.
(MonadFail m, ParseTime t) =>
Bool -> TimeLocale -> [Char] -> [Char] -> m t
parseTimeM Bool
True TimeLocale
defaultTimeLocale [Char]
fmt (ShowS
trimString [Char]
string))
        DownloadOptions
formats

trimString :: String -> String
trimString :: ShowS
trimString = ShowS
forall a. [a] -> [a]
reverse ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> ShowS
forall a. (a -> Bool) -> [a] -> [a]
dropWhile Char -> Bool
isSpace ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShowS
forall a. [a] -> [a]
reverse ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> ShowS
forall a. (a -> Bool) -> [a] -> [a]
dropWhile Char -> Bool
isSpace

parseHttpTime :: String -> Maybe UTCTime
parseHttpTime :: [Char] -> Maybe UTCTime
parseHttpTime =
    DownloadOptions -> [Char] -> Maybe UTCTime
tryParseTime
    [[Char]
"%a, %e %b %Y %k:%M:%S %Z" -- Sun, 06 Nov 1994 08:49:37 GMT
    ,[Char]
"%A, %e-%b-%y %k:%M:%S %Z" -- Sunday, 06-Nov-94 08:49:37 GMT
    ,[Char]
"%a %b %e %k:%M:%S %Y"     -- Sun Nov  6 08:49:37 1994
    ]

globalDownloader :: Downloader
globalDownloader :: Downloader
globalDownloader = IO Downloader -> Downloader
forall a. IO a -> a
unsafePerformIO (IO Downloader -> Downloader) -> IO Downloader -> Downloader
forall a b. (a -> b) -> a -> b
$ DownloaderSettings -> IO Downloader
newDownloader DownloaderSettings
forall a. Default a => a
def
{-# NOINLINE globalDownloader #-}

-- | Download single URL with default 'DownloaderSettings'.
-- Fails if result is not 'DROK'.
urlGetContents :: String -> IO B.ByteString
urlGetContents :: [Char] -> BodyReader
urlGetContents [Char]
url = do
    DownloadResult
r <- Downloader
-> [Char]
-> Maybe HostAddress
-> DownloadOptions
-> IO DownloadResult
download Downloader
globalDownloader [Char]
url Maybe HostAddress
forall a. Maybe a
Nothing []
    case DownloadResult
r of
        DROK ByteString
c DownloadOptions
_ -> ByteString -> BodyReader
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return ByteString
c
        DownloadResult
e -> [Char] -> BodyReader
forall a. [Char] -> IO a
forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail ([Char] -> BodyReader) -> [Char] -> BodyReader
forall a b. (a -> b) -> a -> b
$ [Char]
"urlGetContents " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ ShowS
forall a. Show a => a -> [Char]
show [Char]
url [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
": " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ DownloadResult -> [Char]
forall a. Show a => a -> [Char]
show DownloadResult
e

-- | Post data and download single URL with default 'DownloaderSettings'.
-- Fails if result is not 'DROK'.
urlGetContentsPost :: String -> B.ByteString -> IO B.ByteString
urlGetContentsPost :: [Char] -> ByteString -> BodyReader
urlGetContentsPost [Char]
url ByteString
dat = do
    DownloadResult
r <- Downloader
-> [Char] -> Maybe HostAddress -> ByteString -> IO DownloadResult
post Downloader
globalDownloader [Char]
url Maybe HostAddress
forall a. Maybe a
Nothing ByteString
dat
    case DownloadResult
r of
        DROK ByteString
c DownloadOptions
_ -> ByteString -> BodyReader
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return ByteString
c
        DownloadResult
e -> [Char] -> BodyReader
forall a. [Char] -> IO a
forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail ([Char] -> BodyReader) -> [Char] -> BodyReader
forall a b. (a -> b) -> a -> b
$ [Char]
"urlGetContentsPost " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ ShowS
forall a. Show a => a -> [Char]
show [Char]
url [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
": " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ DownloadResult -> [Char]
forall a. Show a => a -> [Char]
show DownloadResult
e