{-# LANGUAGE PatternGuards #-}

-- | URL Utility Functions

module Lambdabot.Util.Browser
    ( urlPageTitle
    , browseLB
    ) where

import Codec.Binary.UTF8.String
import Control.Applicative
import Control.Monad.Trans
import Lambdabot.Config
import Lambdabot.Config.Reference
import Lambdabot.Monad
import Lambdabot.Util (limitStr)
import Network.Browser
import Network.HTTP
import Network.URI
import Text.HTML.TagSoup
import Text.HTML.TagSoup.Match
import Data.Char (toLower)
import Data.List (isPrefixOf)

-- | Run a browser action with some standardized settings
browseLB :: MonadLB m => BrowserAction conn a -> m a
browseLB :: forall (m :: * -> *) conn a.
MonadLB m =>
BrowserAction conn a -> m a
browseLB BrowserAction conn a
act = forall (m :: * -> *) a. MonadLB m => LB a -> m a
lb forall a b. (a -> b) -> a -> b
$ do
    Proxy
proxy' <- forall (m :: * -> *) a. MonadConfig m => Config a -> m a
getConfig Config Proxy
proxy
    forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall conn a. BrowserAction conn a -> IO a
browse forall a b. (a -> b) -> a -> b
$ do
        forall t. (String -> IO ()) -> BrowserAction t ()
setOutHandler (forall a b. a -> b -> a
const (forall (m :: * -> *) a. Monad m => a -> m a
return ()))
        forall t. (String -> IO ()) -> BrowserAction t ()
setErrHandler (forall a b. a -> b -> a
const (forall (m :: * -> *) a. Monad m => a -> m a
return ()))

        forall t. Bool -> BrowserAction t ()
setAllowRedirects Bool
True
        forall t. Maybe Int -> BrowserAction t ()
setMaxRedirects (forall a. a -> Maybe a
Just Int
5)
        forall t. Proxy -> BrowserAction t ()
setProxy Proxy
proxy'
        BrowserAction conn a
act

-- | Limit the maximum title length to prevent jokers from spamming
-- the channel with specially crafted HTML pages.
maxTitleLength :: Int
maxTitleLength :: Int
maxTitleLength = Int
80

-- | Fetches a page title suitable for display.  Ideally, other
-- plugins should make use of this function if the result is to be
-- displayed in an IRC channel because it ensures that a consistent
-- look is used (and also lets the URL plugin effectively ignore
-- contextual URLs that might be generated by another instance of
-- lambdabot; the URL plugin matches on 'urlTitlePrompt').
urlPageTitle :: String -> BrowserAction (HandleStream String) (Maybe String)
urlPageTitle :: String -> BrowserAction (HandleStream String) (Maybe String)
urlPageTitle = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Int -> String -> String
limitStr Int
maxTitleLength)) forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> BrowserAction (HandleStream String) (Maybe String)
rawPageTitle

-- | Fetches a page title for the specified URL.  This function should
-- only be used by other plugins if and only if the result is not to
-- be displayed in an IRC channel.  Instead, use 'urlPageTitle'.
rawPageTitle :: String -> BrowserAction (HandleStream String) (Maybe String)
rawPageTitle :: String -> BrowserAction (HandleStream String) (Maybe String)
rawPageTitle String
url = forall {m :: * -> *} {a}. Monad m => m (Maybe a) -> m (Maybe a)
checkHTTPS forall a b. (a -> b) -> a -> b
$ do
    (URI
_, Response String
result) <- forall ty.
HStream ty =>
Request ty -> BrowserAction (HandleStream ty) (URI, Response ty)
request (String -> Request_String
getRequest (forall a. (a -> Bool) -> [a] -> [a]
takeWhile (forall a. Eq a => a -> a -> Bool
/=Char
'#') String
url))
    case forall a. Response a -> ResponseCode
rspCode Response String
result of
        (Int
2,Int
0,Int
0)   -> do
            case forall a. (a -> Bool) -> [a] -> [a]
takeWhile (forall a. Eq a => a -> a -> Bool
/= Char
';') forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> HeaderName -> [Header] -> Maybe String
lookupHeader HeaderName
HdrContentType (forall a. Response a -> [Header]
rspHeaders Response String
result) of
                Just String
"text/html"       -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ String -> Maybe String
extractTitle (forall a. Response a -> a
rspBody Response String
result)
                Just String
"application/pdf" -> String -> BrowserAction (HandleStream String) (Maybe String)
rawPageTitle (String -> String
googleCacheURL String
url)
                Maybe String
_                      -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ forall a. Maybe a
Nothing
        ResponseCode
_         -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a. Maybe a
Nothing
    
    where googleCacheURL :: String -> String
googleCacheURL = (String
gURLforall a. [a] -> [a] -> [a]
++) forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> String -> String
escapeURIString (forall a b. a -> b -> a
const Bool
False)
          gURL :: String
gURL = String
"http://www.google.com/search?hl=en&q=cache:"
          checkHTTPS :: m (Maybe a) -> m (Maybe a)
checkHTTPS m (Maybe a)
act | String
"https:" forall a. Eq a => [a] -> [a] -> Bool
`isPrefixOf` forall a b. (a -> b) -> [a] -> [b]
map Char -> Char
toLower String
url = forall (m :: * -> *) a. Monad m => a -> m a
return forall a. Maybe a
Nothing
                         | Bool
otherwise = m (Maybe a)
act

-- | Given a server response (list of Strings), return the text in
-- between the title HTML element, only if it is text/html content.
-- Now supports all(?) HTML entities thanks to TagSoup.
extractTitle :: String -> Maybe String
extractTitle :: String -> Maybe String
extractTitle = [Tag String] -> Maybe String
content forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [Tag String]
tags forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
decodeString where
    tags :: String -> [Tag String]
tags = [Tag String] -> [Tag String]
closing forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Tag String] -> [Tag String]
opening forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall str. StringLike str => [Tag str] -> [Tag str]
canonicalizeTags forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall str. StringLike str => str -> [Tag str]
parseTags
    opening :: [Tag String] -> [Tag String]
opening = forall a. (a -> Bool) -> [a] -> [a]
dropWhile (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall str.
Eq str =>
str -> ([Attribute str] -> Bool) -> Tag str -> Bool
tagOpenLit String
"title" (forall a b. a -> b -> a
const Bool
True))
    closing :: [Tag String] -> [Tag String]
closing = forall a. (a -> Bool) -> [a] -> [a]
takeWhile (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall str. Eq str => str -> Tag str -> Bool
tagCloseLit String
"title")

    content :: [Tag String] -> Maybe String
content = String -> Maybe String
maybeText forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
format forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall str. StringLike str => [Tag str] -> str
innerText
    format :: String -> String
format = [String] -> String
unwords forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [String]
words
    maybeText :: String -> Maybe String
maybeText [] = forall a. Maybe a
Nothing
    maybeText String
t  = forall a. a -> Maybe a
Just (String -> String
encodeString String
t)