{-# LANGUAGE PatternGuards #-}
module Lambdabot.Util.Browser
( urlPageTitle
, browseLB
) where
import Codec.Binary.UTF8.String
import Control.Applicative
import Control.Monad.Trans
import Lambdabot.Config
import Lambdabot.Config.Reference
import Lambdabot.Monad
import Lambdabot.Util (limitStr)
import Network.Browser
import Network.HTTP
import Network.URI
import Text.HTML.TagSoup
import Text.HTML.TagSoup.Match
import Data.Char (toLower)
import Data.List (isPrefixOf)
browseLB :: MonadLB m => BrowserAction conn a -> m a
browseLB :: forall (m :: * -> *) conn a.
MonadLB m =>
BrowserAction conn a -> m a
browseLB BrowserAction conn a
act = forall (m :: * -> *) a. MonadLB m => LB a -> m a
lb forall a b. (a -> b) -> a -> b
$ do
Proxy
proxy' <- forall (m :: * -> *) a. MonadConfig m => Config a -> m a
getConfig Config Proxy
proxy
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall conn a. BrowserAction conn a -> IO a
browse forall a b. (a -> b) -> a -> b
$ do
forall t. (String -> IO ()) -> BrowserAction t ()
setOutHandler (forall a b. a -> b -> a
const (forall (m :: * -> *) a. Monad m => a -> m a
return ()))
forall t. (String -> IO ()) -> BrowserAction t ()
setErrHandler (forall a b. a -> b -> a
const (forall (m :: * -> *) a. Monad m => a -> m a
return ()))
forall t. Bool -> BrowserAction t ()
setAllowRedirects Bool
True
forall t. Maybe Int -> BrowserAction t ()
setMaxRedirects (forall a. a -> Maybe a
Just Int
5)
forall t. Proxy -> BrowserAction t ()
setProxy Proxy
proxy'
BrowserAction conn a
act
maxTitleLength :: Int
maxTitleLength :: Int
maxTitleLength = Int
80
urlPageTitle :: String -> BrowserAction (HandleStream String) (Maybe String)
urlPageTitle :: String -> BrowserAction (HandleStream String) (Maybe String)
urlPageTitle = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Int -> String -> String
limitStr Int
maxTitleLength)) forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> BrowserAction (HandleStream String) (Maybe String)
rawPageTitle
rawPageTitle :: String -> BrowserAction (HandleStream String) (Maybe String)
rawPageTitle :: String -> BrowserAction (HandleStream String) (Maybe String)
rawPageTitle String
url = forall {m :: * -> *} {a}. Monad m => m (Maybe a) -> m (Maybe a)
checkHTTPS forall a b. (a -> b) -> a -> b
$ do
(URI
_, Response String
result) <- forall ty.
HStream ty =>
Request ty -> BrowserAction (HandleStream ty) (URI, Response ty)
request (String -> Request_String
getRequest (forall a. (a -> Bool) -> [a] -> [a]
takeWhile (forall a. Eq a => a -> a -> Bool
/=Char
'#') String
url))
case forall a. Response a -> ResponseCode
rspCode Response String
result of
(Int
2,Int
0,Int
0) -> do
case forall a. (a -> Bool) -> [a] -> [a]
takeWhile (forall a. Eq a => a -> a -> Bool
/= Char
';') forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> HeaderName -> [Header] -> Maybe String
lookupHeader HeaderName
HdrContentType (forall a. Response a -> [Header]
rspHeaders Response String
result) of
Just String
"text/html" -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ String -> Maybe String
extractTitle (forall a. Response a -> a
rspBody Response String
result)
Just String
"application/pdf" -> String -> BrowserAction (HandleStream String) (Maybe String)
rawPageTitle (String -> String
googleCacheURL String
url)
Maybe String
_ -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ forall a. Maybe a
Nothing
ResponseCode
_ -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a. Maybe a
Nothing
where googleCacheURL :: String -> String
googleCacheURL = (String
gURLforall a. [a] -> [a] -> [a]
++) forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> String -> String
escapeURIString (forall a b. a -> b -> a
const Bool
False)
gURL :: String
gURL = String
"http://www.google.com/search?hl=en&q=cache:"
checkHTTPS :: m (Maybe a) -> m (Maybe a)
checkHTTPS m (Maybe a)
act | String
"https:" forall a. Eq a => [a] -> [a] -> Bool
`isPrefixOf` forall a b. (a -> b) -> [a] -> [b]
map Char -> Char
toLower String
url = forall (m :: * -> *) a. Monad m => a -> m a
return forall a. Maybe a
Nothing
| Bool
otherwise = m (Maybe a)
act
extractTitle :: String -> Maybe String
= [Tag String] -> Maybe String
content forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [Tag String]
tags forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
decodeString where
tags :: String -> [Tag String]
tags = [Tag String] -> [Tag String]
closing forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Tag String] -> [Tag String]
opening forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall str. StringLike str => [Tag str] -> [Tag str]
canonicalizeTags forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall str. StringLike str => str -> [Tag str]
parseTags
opening :: [Tag String] -> [Tag String]
opening = forall a. (a -> Bool) -> [a] -> [a]
dropWhile (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall str.
Eq str =>
str -> ([Attribute str] -> Bool) -> Tag str -> Bool
tagOpenLit String
"title" (forall a b. a -> b -> a
const Bool
True))
closing :: [Tag String] -> [Tag String]
closing = forall a. (a -> Bool) -> [a] -> [a]
takeWhile (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall str. Eq str => str -> Tag str -> Bool
tagCloseLit String
"title")
content :: [Tag String] -> Maybe String
content = String -> Maybe String
maybeText forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
format forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall str. StringLike str => [Tag str] -> str
innerText
format :: String -> String
format = [String] -> String
unwords forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [String]
words
maybeText :: String -> Maybe String
maybeText [] = forall a. Maybe a
Nothing
maybeText String
t = forall a. a -> Maybe a
Just (String -> String
encodeString String
t)