module HttpFetchF(httpFetchF,HttpF,HttpResp(..),HttpResp'(..),HttpReq(..), packResp,unpackResp) where import AllFudgets import Control.Applicative((<|>)) import Data.Char(isAlphaNum) import qualified Data.ByteString as BS import qualified Data.ByteString.Lazy.Char8 as LBS import System.Time(ctMin,ctSec,ctPicosec) import ParseURL(parseURL) import URL(URL(..),url2str,joinURL,docURL) import Http import MimeMessage(getheader,updateHdr) import HeaderNames as HN(userAgent,location,host) import SocketK import DialogueIO hiding (IOError) import IOUtil(getEnvi) -- !! not standard Haskell import HbcUtils(apSnd,breakAt) import Utils2(apFst,strToLower) --import qualified PackedString as PS type HttpResp = HttpResp' LBS.ByteString type HttpResp' body = Either String (URL,Either String (HttpResponse body)) type HttpReq = HttpRequest URL packResp = mapResp LBS.pack unpackResp = mapResp LBS.unpack mapResp :: (a->b) -> HttpResp' a -> HttpResp' b mapResp = fmap . fmap . fmap . fmap {- type HttpSP sp = sp HttpRequest HttpResp type HttpF = HttpSP F -- Ha! type HttpK = HttpSP K -- Ha! -} type HttpF = F HttpReq HttpResp type HttpK = K HttpReq HttpResp type ErrK = String->HttpK type SuccK = HttpResponse LBS.ByteString->HttpK httpFetchF :: HttpF httpFetchF = ioF httpFetchK httpFetchK :: HttpK httpFetchK = putStatusMsg () "Ready" $ httpFetchK' httpFetchK' :: HttpK httpFetchK' = getK $ \ msg -> case msg of High req -> fetchUrlK req errK succK where succK ans = putAns (reqURI req) ans $ httpFetchK' errK err = putErr (reqURI req) err $ httpFetchK' _ -> httpFetchK where putAns url ans = putRes url (Right ans) putErr url msg = putRes url (Left msg) putRes url ans = putK (High (Right (url,ans))) fetchUrlK :: HttpReq -> ErrK -> SuccK -> HttpK fetchUrlK = fetchUrlK' [] fetchUrlK' from httpReq@(HttpReq { reqURI=url }) errK succK = case url of URL (Just proto) opthost optport path _ -> case strToLower proto of p | p `elem` ["http","https"] -> proxySendQueryK p httpReq' url (host,port,path) errK $ \ ans -> followRedirectionK from url httpReq' (parseHttpResp ans) errK succK where port = fromMaybe (if p=="https" then 443 else 80) optport hdrs = reqHdrs httpReq agentHdr = header userAgent "HttpFetchF/0 (http://www.cse.chalmers.se/~hallgren/)" httpReq' = httpReq { reqHdrs = hdrs' } hdrs' = (if getheader hdrs userAgent=="" then (agentHdr:) else id) $ updateHdr (header HN.host (host++maybe "" ((':':).show) optport)) hdrs "gopher" -> sendQueryK host port (gopherQuery path) url errK $ \ ans -> succK (httpResp "gopher" ans) where port = fromMaybe 70 optport "finger" -> sendQueryK host port (fingerQuery path) url errK $ \ ans -> succK (httpResp "finger" ans) where port = fromMaybe 79 optport -- finger is not part of the URL standard _ -> errK ("Protocol "++proto++" is not implemented yet.") where host = fromMaybe "localhost" opthost httpResp proto body = HttpResp (SC 200 ("OK "++proto)) [] body _ -> errK "Can't retrieve documents from incomplete URLs" followRedirectionK from url httpReq ans errK succK = case respCode ans of -- See https://en.wikipedia.org/wiki/List_of_HTTP_status_codes 301 -> follow "permanent" method 302 -> follow "found" HttpGet 303 -> follow "see other" HttpGet 307 -> follow "temporary redirect" method _ -> dontfollow where method = reqMethod httpReq dontfollow = succK ans' where ans' = ans {respHdrs = updateHdr (location,url2str url) (respHdrs ans)} follow descr method = case parseURL (getheader (respHdrs ans) location) of Just url' | aurl `notElem` from && length from<20 -> putStatusMsg url' ("Following "++descr++" redirection") $ fetchUrlK' (url:from) httpReq {reqMethod=method,reqURI=aurl} errK succK where aurl=joinURL url url' _ -> dontfollow proxySendQueryK = case apSnd reads $ breakAt ':' proxy of (proxyhost,[(proxyport,"")]) -> proxySendQueryK' proxyhost proxyport _ -> noProxySendQueryK where noProxySendQueryK proto httpReq url (host,port,path) errK succK = case proto of "http" -> sendQueryK host port reqStr url errK succK "https" -> sslSendQueryK host port reqStr url errK succK _ -> errK (proto ++ " is only supported through a http proxy") where path' = if null path then "/" else path reqStr = showHttpRequest (const path') httpReq proxySendQueryK' proxyhost proxyport proto httpReq url u@(host,_,_) = if host `elem` noproxyhosts then noProxySendQueryK proto httpReq url u else sendQueryK proxyhost proxyport reqStr url where reqStr = showHttpRequest (url2str . docURL) httpReq sslSendQueryK host port req url errK succK = putStatusMsg url ("Sending https request to "++host) $ storeRequestK $ \ base reqPath -> let respPath = base++"resp" cmd = "openssl s_client -quiet -connect "++host++":"++show port++ " < "++reqPath++" > "++respPath in --hIOSucc (WriteFile (base++"cmd") cmd) $ haskellIO (System cmd) $ \ resp -> hIOSucc (DeleteFile reqPath) $ hIO (ReadBinFile respPath) $ \ (Bn s) -> hIOSucc (DeleteFile respPath) $ case resp of Success -> putStatusMsg url "Response received" $ succK s Failure e -> errK ("https: "++show e) where storeRequestK cont = getLocalTime $ \ ct -> let tmpdir = fromMaybe "/tmp" $ getEnvi "TMPDIR" t = ctPicosec ct `quot` 1000000 + 1000000 * fromIntegral (ctSec ct + 60*ctMin ct) u = filter isAlphaNum (url2str url) base = tmpdir++"/"++show t++"-"++u++"." filename = base++"req" in hIOSucc (WriteBinaryFile filename req) $ cont base filename --sendQueryK :: String -> Int -> String -> URL -> Cont HttpK String sendQueryK host port query url errK succK = putStatusMsg url ("Connecting to "++host) $ openSocketK host port errK1 $ \ socket -> putStatusMsg url ("Sending request to "++host) $ writeSocketK socket query $ (if showreq then echoK query else id) $ selectK [BinSocketDe socket] $ putStatusMsg url ("Waiting for response from "++host) $ waitReplyK [] 0 socket url errK succK where errK1 ioerror = errK ("http: "++show ioerror) --waitReplyK :: [PackedString] -> Int -> Socket -> URL -> Cont HttpK String waitReplyK acc n socket url errK succK = getK $ \ msg -> case msg of High req -> -- If a new request arrives, forget about the one in progress closeSocketK socket $ startupK [High req] $ errK ("Aborting the fetch of "++url2str url) Low (DResp (AsyncInput (_,SocketReadBin bs))) -> if BS.null bs then closeSocketK socket $ putStatusMsg url ("Got all "++show n++" bytes") $ succK (revconcat acc) else let acc' = bs:acc n' = n + BS.length bs in seq n' $ -- This also evaluates ps and gets rid of s. putStatusMsg url ("Got "++show n'++" bytes") $ waitReplyK acc' n' socket url errK succK _ -> waitReplyK acc n socket url errK succK --revconcat = foldl (flip (++)) [] --revconcat = concatMap id{-PS.unpackPS-} . reverse revconcat = LBS.fromChunks . reverse parseHttpResp = parseHttpResponse' . rmcrHdr rmcrHdr bs = maybe undefined id $ empty <|> check "\n\r\n" <|> check "\n\n" <|> one where empty = if LBS.null bs then Just ("",LBS.empty) else Nothing one = (\(c,bs)->(if c=='\r' then id else apFst (c:)) (rmcrHdr bs)) <$> LBS.uncons bs check s = (,) "" <$> LBS.stripPrefix (LBS.pack s) bs gopherQuery path = drop 2 (map qt path) ++ "\n\n" -- drops a "/" and the type character -- query shouldn't be URLencoded. !! where qt '?' = '\t' qt c = c fingerQuery path = drop 1 path ++ "\n\n" -- drops the leading "/" putStatusMsg _url msg = putsK [High (Left msg){-,Low Flush-}] proxy = argKey "proxy" httpProxy where httpProxy = fromMaybe "" (getEnvi "HTTP_PROXY") noproxyhosts = argKeyList "noproxy" [] showreq = argFlag "showhttpreq" False