{-# LANGUAGE OverloadedStrings, BangPatterns, RecordWildCards, ViewPatterns,
DoAndIfThenElse, PatternGuards, ScopedTypeVariables,
TupleSections #-}
{-# OPTIONS_GHC -fwarn-incomplete-patterns #-}
module Network.HTTP.Conduit.Downloader
(
urlGetContents, urlGetContentsPost
, download, post, downloadG, rawDownload
, DownloadResult(..), RawDownloadResult(..), DownloadOptions
, DownloaderSettings(..)
, Downloader, withDownloader, withDownloaderSettings, newDownloader
, postRequest, sinkByteString
) where
import Control.Monad.Trans
import qualified Data.Text as T
import qualified Data.ByteString.Lazy.Char8 as BL
import qualified Data.ByteString.Char8 as B
import qualified Data.ByteString.Internal as B
import Control.Monad
import qualified Control.Exception as E
import Data.Default as C
import Data.String
import Data.Char
import Data.Maybe
import Data.List
import Foreign
import qualified Network.Socket as NS
import qualified OpenSSL as SSL
import qualified OpenSSL.Session as SSL
import qualified Network.HTTP.Types as N
import qualified Network.HTTP.Conduit as C
import Network.HTTP.Client.Internal (makeConnection, Connection)
import qualified Control.Monad.Trans.Resource as C
import qualified Data.Conduit as C
import System.Timeout
import Codec.Compression.Zlib.Raw as Deflate
import Network.URI
import Data.Time.Format
import Data.Time.Clock
import Data.Time.Clock.POSIX
data DownloadResult
= DROK B.ByteString DownloadOptions
| DRRedirect String
| DRError String
| DRNotModified
deriving (Show, Read, Eq)
data RawDownloadResult
= RawDownloadResult
{ rdrStatus :: N.Status
, rdrHttpVersion :: N.HttpVersion
, rdrHeaders :: N.ResponseHeaders
, rdrBody :: B.ByteString
, rdrCookieJar :: C.CookieJar
}
deriving (Show, Eq)
type DownloadOptions = [String]
data DownloaderSettings
= DownloaderSettings
{ dsUserAgent :: B.ByteString
, dsTimeout :: Int
, dsManagerSettings :: C.ManagerSettings
, dsMaxDownloadSize :: Int
}
instance Default DownloaderSettings where
def =
DownloaderSettings
{ dsUserAgent = "Mozilla/5.0 (compatible; HttpConduitDownloader/1.0; +http://hackage.haskell.org/package/http-conduit-downloader)"
, dsTimeout = 30
, dsManagerSettings =
C.tlsManagerSettings
{ C.managerTlsConnection =
getOpenSSLConnection
}
, dsMaxDownloadSize = 10*1024*1024
}
getOpenSSLConnection :: IO (Maybe NS.HostAddress -> String -> Int
-> IO Connection)
getOpenSSLConnection = do
ctx <- SSL.context
return $ \ mbha host port -> do
sock <- case mbha of
Nothing -> openSocketByName host port
Just ha -> openSocket ha port
ssl <- SSL.connection ctx sock
SSL.setTlsextHostName ssl host
SSL.connect ssl
makeConnection
(SSL.read ssl bufSize
`E.catch`
\ (_ :: SSL.ConnectionAbruptlyTerminated) -> return ""
)
(SSL.write ssl)
(NS.close sock `E.catch` \(_ :: E.IOException) -> return ())
openSocket :: NS.HostAddress
-> Int
-> IO NS.Socket
openSocket ha port =
openSocket' $
NS.AddrInfo
{ NS.addrFlags = []
, NS.addrFamily = NS.AF_INET
, NS.addrSocketType = NS.Stream
, NS.addrProtocol = 6
, NS.addrAddress = NS.SockAddrInet (toEnum port) ha
, NS.addrCanonName = Nothing
}
openSocket' :: NS.AddrInfo -> IO NS.Socket
openSocket' addr = do
E.bracketOnError
(NS.socket (NS.addrFamily addr) (NS.addrSocketType addr)
(NS.addrProtocol addr))
(NS.close)
(\sock -> do
NS.setSocketOption sock NS.NoDelay 1
NS.connect sock (NS.addrAddress addr)
return sock)
openSocketByName :: Show a => NS.HostName -> a -> IO NS.Socket
openSocketByName host port = do
let hints = NS.defaultHints
{ NS.addrFlags = []
, NS.addrFamily = NS.AF_INET
, NS.addrSocketType = NS.Stream
, NS.addrProtocol = 6
}
(addrInfo:_) <- NS.getAddrInfo (Just hints) (Just host) (Just $ show port)
openSocket' addrInfo
data Downloader
= Downloader
{ manager :: C.Manager
, settings :: DownloaderSettings
}
newDownloader :: DownloaderSettings -> IO Downloader
newDownloader s = do
SSL.withOpenSSL $ return ()
m <- C.newManager $ dsManagerSettings s
return $ Downloader m s
withDownloader :: (Downloader -> IO a) -> IO a
withDownloader = withDownloaderSettings def
withDownloaderSettings :: DownloaderSettings -> (Downloader -> IO a) -> IO a
withDownloaderSettings s f = f =<< newDownloader s
parseUrl :: String -> Either E.SomeException C.Request
parseUrl = C.parseRequest . takeWhile (/= '#')
download :: Downloader
-> String
-> Maybe NS.HostAddress
-> DownloadOptions
-> IO DownloadResult
download = downloadG return
post :: Downloader -> String -> Maybe NS.HostAddress -> B.ByteString
-> IO DownloadResult
post d url ha dat =
downloadG (return . postRequest dat) d url ha []
postRequest :: B.ByteString -> C.Request -> C.Request
postRequest dat rq =
rq { C.method = N.methodPost
, C.requestBody = C.RequestBodyBS dat }
downloadG ::
(C.Request -> C.ResourceT IO C.Request)
-> Downloader
-> String
-> Maybe NS.HostAddress
-> DownloadOptions
-> IO (DownloadResult)
downloadG f d u h o = fmap fst $ rawDownload f d u h o
rawDownload ::
(C.Request -> C.ResourceT IO C.Request)
-> Downloader
-> String
-> Maybe NS.HostAddress
-> DownloadOptions
-> IO (DownloadResult, Maybe RawDownloadResult)
rawDownload f (Downloader {..}) url hostAddress opts =
case parseUrl url of
Left e ->
fmap (, Nothing) $
maybe (return $ DRError $ show e) (httpExceptionToDR url)
(E.fromException e)
Right rq -> do
let dl req firstTime = do
r <- (timeout (dsTimeout settings * 1000000) $ C.runResourceT $ do
r <- C.http req manager
mbb <- C.sealConduitT (C.responseBody r) C.$$+-
sinkByteString (dsMaxDownloadSize settings)
case mbb of
Just b -> do
let c = C.responseStatus r
h = C.responseHeaders r
d = tryDeflate h b
curTime <- liftIO $ getCurrentTime
return
(makeDownloadResultC curTime url c h d
, Just $ RawDownloadResult
{ rdrStatus = c
, rdrHttpVersion = C.responseVersion r
, rdrHeaders = h
, rdrBody = d
, rdrCookieJar = C.responseCookieJar r
})
Nothing -> return (DRError "Too much data", Nothing))
`E.catch`
(fmap (Just . (, Nothing)) . httpExceptionToDR url)
`E.catch`
(return . (Just . (, Nothing)) . someException)
case r of
Just (DRError e, _)
| ("EOF reached" `isSuffixOf` e
|| e == "Invalid HTTP status line:\n"
|| e == "Incomplete headers"
) && firstTime ->
dl req False
| "ZlibException" `isPrefixOf` e && firstTime ->
dl (disableCompression req) False
_ ->
return $ fromMaybe (DRError "Timeout", Nothing) r
disableCompression req =
req { C.requestHeaders =
("Accept-Encoding", "") : C.requestHeaders req }
rq1 = rq { C.requestHeaders =
[("Accept", "*/*")
,("User-Agent", dsUserAgent settings)
]
++ map toHeader opts
++ C.requestHeaders rq
, C.redirectCount = 0
, C.responseTimeout = C.responseTimeoutNone
, C.hostAddress = hostAddress
, C.checkResponse = \ _ _ -> return ()
}
req <- C.runResourceT $ f rq1
dl req True
where toHeader :: String -> N.Header
toHeader h = let (a,b) = break (== ':') h in
(fromString a, fromString (tail b))
someException :: E.SomeException -> DownloadResult
someException e = case show e of
"<<timeout>>" -> DRError "Timeout"
s -> DRError s
tryDeflate headers b
| Just d <- lookup "Content-Encoding" headers
, B.map toLower d == "deflate"
= BL.toStrict $ Deflate.decompress $ BL.fromStrict b
| otherwise = b
httpExceptionToDR :: Monad m => String -> C.HttpException -> m DownloadResult
httpExceptionToDR url exn = return $ case exn of
C.HttpExceptionRequest _ ec -> httpExceptionContentToDR url ec
C.InvalidUrlException _ e -> DRError $ "Invalid URL: " ++ e
httpExceptionContentToDR :: String -> C.HttpExceptionContent -> DownloadResult
httpExceptionContentToDR url ec = case ec of
C.StatusCodeException r b ->
makeDownloadResultC (posixSecondsToUTCTime 0) url
(C.responseStatus r) (C.responseHeaders r) b
C.TooManyRedirects _ -> DRError "Too many redirects"
C.OverlongHeaders -> DRError "Overlong HTTP headers"
C.ResponseTimeout -> DRError "Timeout"
C.ConnectionTimeout -> DRError "Connection timeout"
C.ConnectionFailure e -> DRError $ "Connection failed: " ++ show e
C.InvalidStatusLine l -> DRError $ "Invalid HTTP status line:\n" ++ B.unpack l
C.InvalidHeader h -> DRError $ "Invalid HTTP header:\n" ++ B.unpack h
C.InternalException e ->
case show e of
"<<timeout>>" -> DRError "Timeout"
s -> DRError s
C.ProxyConnectException _ _ _ -> DRError "Can't connect to proxy"
C.NoResponseDataReceived -> DRError "No response data received"
C.TlsNotSupported -> DRError "TLS not supported"
C.WrongRequestBodyStreamSize e a ->
DRError $ "The request body provided did not match the expected size "
++ ea e a
C.ResponseBodyTooShort e a -> DRError $ "Response body too short " ++ ea e a
C.InvalidChunkHeaders -> DRError "Invalid chunk headers"
C.IncompleteHeaders -> DRError "Incomplete headers"
C.InvalidDestinationHost _ -> DRError "Invalid destination host"
C.HttpZlibException e -> DRError $ show e
C.InvalidProxyEnvironmentVariable n v ->
DRError $ "Invalid proxy environment variable "
++ show n ++ "=" ++ show v
C.InvalidProxySettings s -> DRError $ "Invalid proxy settings:\n" ++ T.unpack s
C.ConnectionClosed -> DRError "Connection closed"
where ea expected actual =
"(expected " ++ show expected ++ " bytes, actual is "
++ show actual ++ " bytes)"
bufSize :: Int
bufSize = 32 * 1024 - overhead
where overhead = 2 * sizeOf (undefined :: Int)
newBuf :: IO B.ByteString
newBuf = do
fp <- B.mallocByteString bufSize
return $ B.PS fp 0 0
addBs :: [B.ByteString] -> B.ByteString -> B.ByteString
-> IO ([B.ByteString], B.ByteString)
addBs acc (B.PS bfp _ bl) (B.PS sfp offs sl) = do
let cpSize = min (bufSize - bl) sl
bl' = bl + cpSize
withForeignPtr bfp $ \ dst -> withForeignPtr sfp $ \ src ->
B.memcpy (dst `plusPtr` bl) (src `plusPtr` offs) (toEnum cpSize)
if bl' == bufSize then do
buf' <- newBuf
addBs (B.PS bfp 0 bufSize : acc) buf'
(B.PS sfp (offs + cpSize) (sl - cpSize))
else do
return (acc, B.PS bfp 0 bl')
sinkByteString :: MonadIO m => Int -> C.ConduitT B.ByteString C.Void m (Maybe B.ByteString)
sinkByteString limit = do
buf <- liftIO $ newBuf
go 0 [] buf
where go len acc buf = do
mbinp <- C.await
case mbinp of
Just inp -> do
(acc', buf') <- liftIO $ addBs acc buf inp
let len' = len + B.length inp
if len' > limit then
return Nothing
else
go len' acc' buf'
Nothing -> do
return $ Just $ B.concat $ reverse (buf:acc)
makeDownloadResultC :: UTCTime -> String -> N.Status -> N.ResponseHeaders
-> B.ByteString -> DownloadResult
makeDownloadResultC curTime url c headers b = do
if N.statusCode c == 304 then
DRNotModified
else if N.statusCode c `elem`
[ 300
, 301
, 302
, 303
, 307
] then
case lookup "location" headers of
Just (B.unpack -> loc) ->
redirect $
relUri (takeWhile (/= '#') $ dropWhile (== ' ') loc)
_ ->
DRError $ "Redirect status, but no Location field\n"
++ B.unpack (N.statusMessage c) ++ "\n"
++ unlines (map show headers)
else if N.statusCode c >= 300 then
DRError $ "HTTP " ++ show (N.statusCode c) ++ " "
++ B.unpack (N.statusMessage c)
else
DROK b (redownloadOpts [] headers)
where redirect r
| otherwise = DRRedirect r
redownloadOpts acc [] = reverse acc
redownloadOpts _ (("Pragma", B.map toLower -> tag) : _)
| "no-cache" `B.isInfixOf` tag = []
redownloadOpts _ (("Cache-Control", B.map toLower -> tag) : _)
| any (`B.isInfixOf` tag)
["no-cache", "no-store", "must-revalidate", "max-age=0"] = []
redownloadOpts acc (("Expires", time):xs)
| ts <- B.unpack time
, Just t <- parseHttpTime ts
, t > curTime =
redownloadOpts acc xs
| otherwise = []
redownloadOpts acc (("ETag", tag):xs) =
redownloadOpts (("If-None-Match: " ++ B.unpack tag) : acc) xs
redownloadOpts acc (("Last-Modified", time):xs)
| ts <- B.unpack time
, Just t <- parseHttpTime ts
, t <= curTime =
redownloadOpts (("If-Modified-Since: " ++ B.unpack time) : acc) xs
redownloadOpts acc (_:xs) = redownloadOpts acc xs
fixNonAscii =
escapeURIString
(\ x -> ord x <= 0x7f && x `notElem` (" []{}|\"" :: String)) .
trimString
relUri (fixNonAscii -> r) =
fromMaybe r $
fmap (($ "") . uriToString id) $
liftM2 relativeTo
(parseURIReference r)
(parseURI $ fixNonAscii url)
tryParseTime :: [String] -> String -> Maybe UTCTime
tryParseTime formats string =
foldr mplus Nothing $
map (\ fmt -> parseTimeM True defaultTimeLocale fmt (trimString string))
formats
trimString :: String -> String
trimString = reverse . dropWhile isSpace . reverse . dropWhile isSpace
parseHttpTime :: String -> Maybe UTCTime
parseHttpTime =
tryParseTime
["%a, %e %b %Y %k:%M:%S %Z"
,"%A, %e-%b-%y %k:%M:%S %Z"
,"%a %b %e %k:%M:%S %Y"
]
urlGetContents :: String -> IO B.ByteString
urlGetContents url = withDownloader $ \ d -> do
r <- download d url Nothing []
case r of
DROK c _ -> return c
e -> fail $ "urlGetContents " ++ show url ++ ": " ++ show e
urlGetContentsPost :: String -> B.ByteString -> IO B.ByteString
urlGetContentsPost url dat = withDownloader $ \ d -> do
r <- post d url Nothing dat
case r of
DROK c _ -> return c
e -> fail $ "urlGetContentsPost " ++ show url ++ ": " ++ show e