{-# LANGUAGE OverloadedStrings #-}
module Network.HTTP.Robots where
import Control.Applicative
import Control.Monad
import Data.Attoparsec.ByteString.Char8 hiding (skipSpace)
import qualified Data.Attoparsec.Text as AT (isEndOfLine)
import Data.ByteString.Char8 (ByteString)
import qualified Data.ByteString.Char8 as BS
import Data.Char (toUpper)
import Data.Either (partitionEithers)
import Data.List (find)
import Data.Maybe (catMaybes)
import Data.Ratio
import Data.Time.Clock
import Data.Time.LocalTime ()
type Robot = ([([UserAgent], [Directive])], [Unparsable])
type Unparsable = ByteString
data UserAgent = Wildcard | Literal ByteString
deriving (Int -> UserAgent -> ShowS
[UserAgent] -> ShowS
UserAgent -> [Char]
(Int -> UserAgent -> ShowS)
-> (UserAgent -> [Char])
-> ([UserAgent] -> ShowS)
-> Show UserAgent
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> UserAgent -> ShowS
showsPrec :: Int -> UserAgent -> ShowS
$cshow :: UserAgent -> [Char]
show :: UserAgent -> [Char]
$cshowList :: [UserAgent] -> ShowS
showList :: [UserAgent] -> ShowS
Show,UserAgent -> UserAgent -> Bool
(UserAgent -> UserAgent -> Bool)
-> (UserAgent -> UserAgent -> Bool) -> Eq UserAgent
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: UserAgent -> UserAgent -> Bool
== :: UserAgent -> UserAgent -> Bool
$c/= :: UserAgent -> UserAgent -> Bool
/= :: UserAgent -> UserAgent -> Bool
Eq)
type Path = ByteString
type TimeInterval = (DiffTime, DiffTime)
data Directive = Allow Path
| Disallow Path
| CrawlDelay { Directive -> Rational
crawlDelay :: Rational
, Directive -> TimeInterval
timeInterval :: TimeInterval
}
| NoArchive Path
| NoSnippet Path
| NoTranslate Path
| NoIndex Path
deriving (Int -> Directive -> ShowS
[Directive] -> ShowS
Directive -> [Char]
(Int -> Directive -> ShowS)
-> (Directive -> [Char])
-> ([Directive] -> ShowS)
-> Show Directive
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> Directive -> ShowS
showsPrec :: Int -> Directive -> ShowS
$cshow :: Directive -> [Char]
show :: Directive -> [Char]
$cshowList :: [Directive] -> ShowS
showList :: [Directive] -> ShowS
Show,Directive -> Directive -> Bool
(Directive -> Directive -> Bool)
-> (Directive -> Directive -> Bool) -> Eq Directive
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: Directive -> Directive -> Bool
== :: Directive -> Directive -> Bool
$c/= :: Directive -> Directive -> Bool
/= :: Directive -> Directive -> Bool
Eq)
subParser :: Parser a -> ByteString -> Parser a
subParser :: forall a. Parser a -> Path -> Parser a
subParser Parser a
p = ([Char] -> Parser a)
-> (a -> Parser a) -> Either [Char] a -> Parser a
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (Parser a -> [Char] -> Parser a
forall a b. a -> b -> a
const Parser a
forall a. Parser Path a
forall (m :: * -> *) a. MonadPlus m => m a
mzero) a -> Parser a
forall a. a -> Parser Path a
forall (m :: * -> *) a. Monad m => a -> m a
return (Either [Char] a -> Parser a)
-> (Path -> Either [Char] a) -> Path -> Parser a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Parser a -> Path -> Either [Char] a
forall a. Parser a -> Path -> Either [Char] a
parseOnly Parser a
p
safeParseRational :: Parser Rational
safeParseRational :: Parser Rational
safeParseRational = do
(Path
bs,Scientific
_) <- Parser Scientific -> Parser (Path, Scientific)
forall a. Parser a -> Parser (Path, a)
match Parser Scientific
scientific
if Char -> Path -> Bool
BS.elem Char
'e' Path
bs Bool -> Bool -> Bool
|| Char -> Path -> Bool
BS.elem Char
'E' Path
bs
then Parser Rational
forall a. Parser Path a
forall (m :: * -> *) a. MonadPlus m => m a
mzero
else Parser Rational -> Path -> Parser Rational
forall a. Parser a -> Path -> Parser a
subParser Parser Rational
forall a. Fractional a => Parser a
rational Path
bs
dropUTF8BOM :: ByteString -> ByteString
dropUTF8BOM :: Path -> Path
dropUTF8BOM Path
bs = if Int -> Path -> Path
BS.take Int
3 Path
bs Path -> Path -> Bool
forall a. Eq a => a -> a -> Bool
== ( Char
'\239' Char -> Path -> Path
`BS.cons`
Char
'\187' Char -> Path -> Path
`BS.cons`
Char
'\191' Char -> Path -> Path
`BS.cons` Path
BS.empty)
then Int -> Path -> Path
BS.drop Int
3 Path
bs
else Path
bs
parseHourMinute :: Parser (Integer,Integer)
parseHourMinute :: Parser (Integer, Integer)
parseHourMinute = Parser (Integer, Integer)
parseWithColon Parser (Integer, Integer)
-> Parser (Integer, Integer) -> Parser (Integer, Integer)
forall a. Parser Path a -> Parser Path a -> Parser Path a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser (Integer, Integer)
parseWithoutColon
where
parseWithColon :: Parser (Integer, Integer)
parseWithColon = do
Integer
hours <- Parser ()
skipSpace Parser () -> Parser Path Integer -> Parser Path Integer
forall a b. Parser Path a -> Parser Path b -> Parser Path b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Path Integer
forall a. Integral a => Parser a
decimal
Parser Path Char -> Parser ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (Parser Path Char -> Parser ()) -> Parser Path Char -> Parser ()
forall a b. (a -> b) -> a -> b
$ Parser ()
skipSpace Parser () -> Parser Path Char -> Parser Path Char
forall a b. Parser Path a -> Parser Path b -> Parser Path b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Char -> Parser Path Char
char Char
':'
Integer
mins <- Parser ()
skipSpace Parser () -> Parser Path Integer -> Parser Path Integer
forall a b. Parser Path a -> Parser Path b -> Parser Path b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Path Integer
forall a. Integral a => Parser a
decimal
(Integer, Integer) -> Parser (Integer, Integer)
forall a. a -> Parser Path a
forall (m :: * -> *) a. Monad m => a -> m a
return (Integer
hours,Integer
mins)
parseWithoutColon :: Parser (Integer, Integer)
parseWithoutColon = do
Integer
h <- Int -> Parser Path
Data.Attoparsec.ByteString.Char8.take Int
2 Parser Path -> (Path -> Parser Path Integer) -> Parser Path Integer
forall a b. Parser Path a -> (a -> Parser Path b) -> Parser Path b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Parser Path Integer -> Path -> Parser Path Integer
forall a. Parser a -> Path -> Parser a
subParser Parser Path Integer
forall a. Integral a => Parser a
decimal
Integer
m <- Int -> Parser Path
Data.Attoparsec.ByteString.Char8.take Int
2 Parser Path -> (Path -> Parser Path Integer) -> Parser Path Integer
forall a b. Parser Path a -> (a -> Parser Path b) -> Parser Path b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Parser Path Integer -> Path -> Parser Path Integer
forall a. Parser a -> Path -> Parser a
subParser Parser Path Integer
forall a. Integral a => Parser a
decimal
(Integer, Integer) -> Parser (Integer, Integer)
forall a. a -> Parser Path a
forall (m :: * -> *) a. Monad m => a -> m a
return (Integer
h,Integer
m)
parseTimeInterval :: Parser TimeInterval
parseTimeInterval :: Parser TimeInterval
parseTimeInterval = do
(Integer
hours_start, Integer
mins_start) <- Parser (Integer, Integer)
parseHourMinute
Parser () -> Parser ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (Parser () -> Parser ()) -> Parser () -> Parser ()
forall a b. (a -> b) -> a -> b
$ (Parser ()
skipSpace Parser () -> Parser Path Char -> Parser Path Char
forall a b. Parser Path a -> Parser Path b -> Parser Path b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Char -> Parser Path Char
char Char
'-' Parser Path Char -> Parser () -> Parser ()
forall a b. Parser Path a -> Parser Path b -> Parser Path b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser ()
skipSpace) Parser () -> Parser () -> Parser ()
forall a. Parser Path a -> Parser Path a -> Parser Path a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser ()
skipSpace
(Integer
hours_end , Integer
mins_end ) <- Parser (Integer, Integer)
parseHourMinute
TimeInterval -> Parser TimeInterval
forall a. a -> Parser Path a
forall (m :: * -> *) a. Monad m => a -> m a
return ( Integer -> DiffTime
secondsToDiffTime (Integer
hours_start Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
* Integer
60 Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
* Integer
60 Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
+ Integer
mins_start Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
* Integer
60)
, Integer -> DiffTime
secondsToDiffTime (Integer
hours_end Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
* Integer
60 Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
* Integer
60 Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
+ Integer
mins_end Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
* Integer
60))
allDay :: TimeInterval
allDay :: TimeInterval
allDay = ( Integer -> DiffTime
secondsToDiffTime Integer
0
, Integer -> DiffTime
secondsToDiffTime (Integer
24Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
*Integer
60Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
*Integer
60)
)
parseRequestRate :: Parser Directive
parseRequestRate :: Parser Directive
parseRequestRate = do
Parser Path -> Parser ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (Parser Path -> Parser ()) -> Parser Path -> Parser ()
forall a b. (a -> b) -> a -> b
$ Path -> Parser Path
stringCI Path
"Request-rate:"
Integer
docs <- Parser ()
skipSpace Parser () -> Parser Path Integer -> Parser Path Integer
forall a b. Parser Path a -> Parser Path b -> Parser Path b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Path Integer
forall a. Integral a => Parser a
decimal
Parser Path Char -> Parser ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (Parser Path Char -> Parser ()) -> Parser Path Char -> Parser ()
forall a b. (a -> b) -> a -> b
$ Parser ()
skipSpace Parser () -> Parser Path Char -> Parser Path Char
forall a b. Parser Path a -> Parser Path b -> Parser Path b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Char -> Parser Path Char
char Char
'/'
Integer
ptim <- Parser ()
skipSpace Parser () -> Parser Path Integer -> Parser Path Integer
forall a b. Parser Path a -> Parser Path b -> Parser Path b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Path Integer
forall a. Integral a => Parser a
decimal
Integer
units<- Parser ()
skipSpace Parser () -> Parser Path Integer -> Parser Path Integer
forall a b. Parser Path a -> Parser Path b -> Parser Path b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ( (Char -> Parser Path Char
char Char
's' Parser Path Char -> Parser Path Integer -> Parser Path Integer
forall a b. Parser Path a -> Parser Path b -> Parser Path b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Integer -> Parser Path Integer
forall a. a -> Parser Path a
forall (m :: * -> *) a. Monad m => a -> m a
return ( Integer
1 :: Integer))
Parser Path Integer -> Parser Path Integer -> Parser Path Integer
forall a. Parser Path a -> Parser Path a -> Parser Path a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Parser Path Char
char Char
'm' Parser Path Char -> Parser Path Integer -> Parser Path Integer
forall a b. Parser Path a -> Parser Path b -> Parser Path b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Integer -> Parser Path Integer
forall a. a -> Parser Path a
forall (m :: * -> *) a. Monad m => a -> m a
return ( Integer
60 :: Integer))
Parser Path Integer -> Parser Path Integer -> Parser Path Integer
forall a. Parser Path a -> Parser Path a -> Parser Path a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Parser Path Char
char Char
'h' Parser Path Char -> Parser Path Integer -> Parser Path Integer
forall a b. Parser Path a -> Parser Path b -> Parser Path b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Integer -> Parser Path Integer
forall a. a -> Parser Path a
forall (m :: * -> *) a. Monad m => a -> m a
return (Integer
60Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
*Integer
60 :: Integer))
Parser Path Integer -> Parser Path Integer -> Parser Path Integer
forall a. Parser Path a -> Parser Path a -> Parser Path a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Integer -> Parser Path Integer
forall a. a -> Parser Path a
forall (m :: * -> *) a. Monad m => a -> m a
return ( Integer
1 :: Integer)
)
TimeInterval
tint <- Parser ()
skipSpace Parser () -> Parser TimeInterval -> Parser TimeInterval
forall a b. Parser Path a -> Parser Path b -> Parser Path b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ( Parser TimeInterval
parseTimeInterval Parser TimeInterval -> Parser TimeInterval -> Parser TimeInterval
forall a. Parser Path a -> Parser Path a -> Parser Path a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> TimeInterval -> Parser TimeInterval
forall a. a -> Parser Path a
forall (m :: * -> *) a. Monad m => a -> m a
return TimeInterval
allDay)
Directive -> Parser Directive
forall a. a -> Parser Path a
forall (m :: * -> *) a. Monad m => a -> m a
return (Directive -> Parser Directive) -> Directive -> Parser Directive
forall a b. (a -> b) -> a -> b
$ Rational -> TimeInterval -> Directive
CrawlDelay ((Integer
ptim Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
* Integer
units) Integer -> Integer -> Rational
forall a. Integral a => a -> a -> Ratio a
% Integer
docs) TimeInterval
tint
parseVisitTime :: Parser Directive
parseVisitTime :: Parser Directive
parseVisitTime = do
Parser Path -> Parser ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (Parser Path -> Parser ()) -> Parser Path -> Parser ()
forall a b. (a -> b) -> a -> b
$ Path -> Parser Path
stringCI Path
"Visit-time:"
TimeInterval
tint <- Parser ()
skipSpace Parser () -> Parser TimeInterval -> Parser TimeInterval
forall a b. Parser Path a -> Parser Path b -> Parser Path b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser TimeInterval
parseTimeInterval
Directive -> Parser Directive
forall a. a -> Parser Path a
forall (m :: * -> *) a. Monad m => a -> m a
return (Directive -> Parser Directive) -> Directive -> Parser Directive
forall a b. (a -> b) -> a -> b
$ Rational -> TimeInterval -> Directive
CrawlDelay ( Integer
0 Integer -> Integer -> Rational
forall a. Integral a => a -> a -> Ratio a
% Integer
1) TimeInterval
tint
parseCrawlDelay :: Parser Directive
parseCrawlDelay :: Parser Directive
parseCrawlDelay = do
Rational
delay <- Path -> Parser Path
stringCI Path
"Crawl-Delay:" Parser Path -> Parser () -> Parser ()
forall a b. Parser Path a -> Parser Path b -> Parser Path b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser ()
skipSpace Parser () -> Parser Rational -> Parser Rational
forall a b. Parser Path a -> Parser Path b -> Parser Path b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Rational
safeParseRational
Directive -> Parser Directive
forall a. a -> Parser Path a
forall (m :: * -> *) a. Monad m => a -> m a
return (Directive -> Parser Directive) -> Directive -> Parser Directive
forall a b. (a -> b) -> a -> b
$ Rational -> TimeInterval -> Directive
CrawlDelay Rational
delay TimeInterval
allDay
strip :: ByteString -> ByteString
strip :: Path -> Path
strip = Path -> Path
BS.reverse (Path -> Path) -> (Path -> Path) -> Path -> Path
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> Path -> Path
BS.dropWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
' ') (Path -> Path) -> (Path -> Path) -> Path -> Path
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Path -> Path
BS.reverse (Path -> Path) -> (Path -> Path) -> Path -> Path
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> Path -> Path
BS.dropWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
' ')
parseRobots :: ByteString -> Either String Robot
parseRobots :: Path -> Either [Char] Robot
parseRobots Path
input = case Either [Char] Robot
parsed of
Right ([], out :: [Path]
out@(Path
_:[Path]
_)) ->
[Char] -> Either [Char] Robot
forall a b. a -> Either a b
Left ([Char]
"no parsable lines: " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Path] -> [Char]
forall a. Show a => a -> [Char]
show [Path]
out)
Either [Char] Robot
_ -> Either [Char] Robot
parsed
where parsed :: Either [Char] Robot
parsed = Parser Robot -> Path -> Either [Char] Robot
forall a. Parser a -> Path -> Either [Char] a
parseOnly Parser Robot
robotP
(Path -> Either [Char] Robot)
-> (Path -> Path) -> Path -> Either [Char] Robot
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Path] -> Path
BS.unlines
([Path] -> Path) -> (Path -> [Path]) -> Path -> Path
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Path -> Bool) -> [Path] -> [Path]
forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not (Bool -> Bool) -> (Path -> Bool) -> Path -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Path -> Path -> Bool
BS.isPrefixOf Path
"SITEMAP:" (Path -> Bool) -> (Path -> Path) -> Path -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Char) -> Path -> Path
BS.map Char -> Char
toUpper)
([Path] -> [Path]) -> (Path -> [Path]) -> Path -> [Path]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Path -> Bool) -> [Path] -> [Path]
forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not (Bool -> Bool) -> (Path -> Bool) -> Path -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Path -> Path -> Bool
BS.isPrefixOf Path
"HOST:" (Path -> Bool) -> (Path -> Path) -> Path -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Char) -> Path -> Path
BS.map Char -> Char
toUpper)
([Path] -> [Path]) -> (Path -> [Path]) -> Path -> [Path]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Path -> Bool) -> [Path] -> [Path]
forall a. (a -> Bool) -> [a] -> [a]
filter (\Path
x -> Path -> Char
BS.head Path
x Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'#' )
([Path] -> [Path]) -> (Path -> [Path]) -> Path -> [Path]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Path -> Bool) -> [Path] -> [Path]
forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not (Bool -> Bool) -> (Path -> Bool) -> Path -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Path -> Bool
BS.null)
([Path] -> [Path]) -> (Path -> [Path]) -> Path -> [Path]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Path -> Path) -> [Path] -> [Path]
forall a b. (a -> b) -> [a] -> [b]
map Path -> Path
strip
([Path] -> [Path]) -> (Path -> [Path]) -> Path -> [Path]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Path -> [Path]
BS.lines
(Path -> [Path]) -> (Path -> Path) -> Path -> [Path]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> Path -> Path
BS.filter (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'\r')
(Path -> Path) -> (Path -> Path) -> Path -> Path
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Path -> Path
dropUTF8BOM
(Path -> Either [Char] Robot) -> Path -> Either [Char] Robot
forall a b. (a -> b) -> a -> b
$ Path
input
robotP :: Parser Robot
robotP :: Parser Robot
robotP = do
([([UserAgent], [Directive])]
dirs, [Path]
unparsable) <- [Either ([UserAgent], [Directive]) Path] -> Robot
forall a b. [Either a b] -> ([a], [b])
partitionEithers ([Either ([UserAgent], [Directive]) Path] -> Robot)
-> Parser Path [Either ([UserAgent], [Directive]) Path]
-> Parser Robot
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Path (Either ([UserAgent], [Directive]) Path)
-> Parser Path [Either ([UserAgent], [Directive]) Path]
forall a. Parser Path a -> Parser Path [a]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many (Parser Path ([UserAgent], [Directive])
-> Parser Path
-> Parser Path (Either ([UserAgent], [Directive]) Path)
forall (f :: * -> *) a b.
Alternative f =>
f a -> f b -> f (Either a b)
eitherP Parser Path ([UserAgent], [Directive])
agentDirectiveP Parser Path
unparsableP) Parser Robot -> [Char] -> Parser Robot
forall i a. Parser i a -> [Char] -> Parser i a
<?> [Char]
"robot"
Robot -> Parser Robot
forall a. a -> Parser Path a
forall (m :: * -> *) a. Monad m => a -> m a
return ([([UserAgent], [Directive])]
dirs, (Path -> Bool) -> [Path] -> [Path]
forall a. (a -> Bool) -> [a] -> [a]
filter (Path -> Path -> Bool
forall a. Eq a => a -> a -> Bool
/= Path
"") [Path]
unparsable)
unparsableP :: Parser ByteString
unparsableP :: Parser Path
unparsableP = (Char -> Bool) -> Parser Path
takeTill Char -> Bool
AT.isEndOfLine Parser Path -> Parser () -> Parser Path
forall a b. Parser Path a -> Parser Path b -> Parser Path a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
endOfLine
agentDirectiveP :: Parser ([UserAgent],[Directive])
agentDirectiveP :: Parser Path ([UserAgent], [Directive])
agentDirectiveP = (,) ([UserAgent] -> [Directive] -> ([UserAgent], [Directive]))
-> Parser Path [UserAgent]
-> Parser Path ([Directive] -> ([UserAgent], [Directive]))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Path UserAgent -> Parser Path [UserAgent]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many1 Parser Path UserAgent
agentP Parser Path ([Directive] -> ([UserAgent], [Directive]))
-> Parser Path [Directive]
-> Parser Path ([UserAgent], [Directive])
forall a b. Parser Path (a -> b) -> Parser Path a -> Parser Path b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser Directive -> Parser Path [Directive]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many1 Parser Directive
directiveP Parser Path ([UserAgent], [Directive])
-> [Char] -> Parser Path ([UserAgent], [Directive])
forall i a. Parser i a -> [Char] -> Parser i a
<?> [Char]
"agentDirective"
skipSpace :: Parser ()
skipSpace :: Parser ()
skipSpace = (Char -> Bool) -> Parser ()
skipWhile (\Char
x -> Char
xChar -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
' ' Bool -> Bool -> Bool
|| Char
x Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\t')
directiveP :: Parser Directive
directiveP :: Parser Directive
directiveP = do
Parser ()
skipSpace
[Parser Directive] -> Parser Directive
forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice [ Path -> Parser Path
stringCI Path
"Disallow:" Parser Path -> Parser () -> Parser ()
forall a b. Parser Path a -> Parser Path b -> Parser Path b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser ()
skipSpace Parser () -> Parser Directive -> Parser Directive
forall a b. Parser Path a -> Parser Path b -> Parser Path b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>>
((Path -> Directive
Disallow (Path -> Directive) -> Parser Path -> Parser Directive
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Path
tokenP) Parser Directive -> Parser Directive -> Parser Directive
forall a. Parser Path a -> Parser Path a -> Parser Path a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
(Parser ()
endOfLine Parser () -> Parser Directive -> Parser Directive
forall a b. Parser Path a -> Parser Path b -> Parser Path b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Directive -> Parser Directive
forall a. a -> Parser Path a
forall (m :: * -> *) a. Monad m => a -> m a
return (Path -> Directive
Allow Path
"/")))
, Path -> Parser Path
stringCI Path
"Allow:" Parser Path -> Parser () -> Parser ()
forall a b. Parser Path a -> Parser Path b -> Parser Path b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser ()
skipSpace Parser () -> Parser Directive -> Parser Directive
forall a b. Parser Path a -> Parser Path b -> Parser Path b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>>
((Path -> Directive
Allow (Path -> Directive) -> Parser Path -> Parser Directive
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Path
tokenP) Parser Directive -> Parser Directive -> Parser Directive
forall a. Parser Path a -> Parser Path a -> Parser Path a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
(Parser ()
endOfLine Parser () -> Parser Directive -> Parser Directive
forall a b. Parser Path a -> Parser Path b -> Parser Path b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Directive -> Parser Directive
forall a. a -> Parser Path a
forall (m :: * -> *) a. Monad m => a -> m a
return (Path -> Directive
Disallow Path
"/")))
, Parser Directive
parseCrawlDelay
, Parser Directive
parseRequestRate
, Parser Directive
parseVisitTime
, Path -> Directive
NoArchive (Path -> Directive) -> Parser Path -> Parser Directive
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Path -> Parser Path
stringCI Path
"Noarchive:" Parser Path -> Parser () -> Parser ()
forall a b. Parser Path a -> Parser Path b -> Parser Path b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser ()
skipSpace Parser () -> Parser Path -> Parser Path
forall a b. Parser Path a -> Parser Path b -> Parser Path b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Path
tokenP)
, Path -> Directive
NoSnippet (Path -> Directive) -> Parser Path -> Parser Directive
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Path -> Parser Path
stringCI Path
"Nosnippet:" Parser Path -> Parser () -> Parser ()
forall a b. Parser Path a -> Parser Path b -> Parser Path b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser ()
skipSpace Parser () -> Parser Path -> Parser Path
forall a b. Parser Path a -> Parser Path b -> Parser Path b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Path
tokenP)
, Path -> Directive
NoTranslate (Path -> Directive) -> Parser Path -> Parser Directive
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Path -> Parser Path
stringCI Path
"Notranslate:"Parser Path -> Parser () -> Parser ()
forall a b. Parser Path a -> Parser Path b -> Parser Path b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser ()
skipSpace Parser () -> Parser Path -> Parser Path
forall a b. Parser Path a -> Parser Path b -> Parser Path b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Path
tokenP)
, Path -> Directive
NoIndex (Path -> Directive) -> Parser Path -> Parser Directive
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Path -> Parser Path
stringCI Path
"Noindex:" Parser Path -> Parser () -> Parser ()
forall a b. Parser Path a -> Parser Path b -> Parser Path b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser ()
skipSpace Parser () -> Parser Path -> Parser Path
forall a b. Parser Path a -> Parser Path b -> Parser Path b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Path
tokenP)
] Parser Directive -> Parser () -> Parser Directive
forall a b. Parser Path a -> Parser Path b -> Parser Path a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
commentsP Parser Directive -> [Char] -> Parser Directive
forall i a. Parser i a -> [Char] -> Parser i a
<?> [Char]
"directive"
agentP :: Parser UserAgent
agentP :: Parser Path UserAgent
agentP = do
Parser Path -> Parser ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (Parser Path -> Parser ()) -> Parser Path -> Parser ()
forall a b. (a -> b) -> a -> b
$ Path -> Parser Path
stringCI Path
"user-agent:"
Parser ()
skipSpace
((Path -> Parser Path
string Path
"*" Parser Path -> Parser Path UserAgent -> Parser Path UserAgent
forall a b. Parser Path a -> Parser Path b -> Parser Path b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> UserAgent -> Parser Path UserAgent
forall a. a -> Parser Path a
forall (m :: * -> *) a. Monad m => a -> m a
return UserAgent
Wildcard) Parser Path UserAgent
-> Parser Path UserAgent -> Parser Path UserAgent
forall a. Parser Path a -> Parser Path a -> Parser Path a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
(Path -> UserAgent
Literal (Path -> UserAgent) -> Parser Path -> Parser Path UserAgent
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Path
tokenWithSpacesP)) Parser Path UserAgent -> Parser () -> Parser Path UserAgent
forall a b. Parser Path a -> Parser Path b -> Parser Path a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
skipSpace Parser Path UserAgent -> Parser () -> Parser Path UserAgent
forall a b. Parser Path a -> Parser Path b -> Parser Path a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
endOfLine Parser Path UserAgent -> [Char] -> Parser Path UserAgent
forall i a. Parser i a -> [Char] -> Parser i a
<?> [Char]
"agent"
commentsP :: Parser ()
= Parser ()
skipSpace Parser () -> Parser () -> Parser ()
forall a b. Parser Path a -> Parser Path b -> Parser Path b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>>
( (Path -> Parser Path
string Path
"#" Parser Path -> Parser Path -> Parser Path
forall a b. Parser Path a -> Parser Path b -> Parser Path b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> (Char -> Bool) -> Parser Path
takeTill Char -> Bool
AT.isEndOfLine Parser Path -> Parser () -> Parser ()
forall a b. Parser Path a -> Parser Path b -> Parser Path b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser ()
endOfLine)
Parser () -> Parser () -> Parser ()
forall a. Parser Path a -> Parser Path a -> Parser Path a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser ()
endOfLine
Parser () -> Parser () -> Parser ()
forall a. Parser Path a -> Parser Path a -> Parser Path a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> () -> Parser ()
forall a. a -> Parser Path a
forall (m :: * -> *) a. Monad m => a -> m a
return ())
tokenP :: Parser ByteString
tokenP :: Parser Path
tokenP = Parser ()
skipSpace Parser () -> Parser Path -> Parser Path
forall a b. Parser Path a -> Parser Path b -> Parser Path b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> (Char -> Bool) -> Parser Path
takeWhile1 (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
isSpace) Parser Path -> Parser () -> Parser Path
forall a b. Parser Path a -> Parser Path b -> Parser Path a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
skipSpace
tokenWithSpacesP :: Parser ByteString
tokenWithSpacesP :: Parser Path
tokenWithSpacesP = Parser ()
skipSpace Parser () -> Parser Path -> Parser Path
forall a b. Parser Path a -> Parser Path b -> Parser Path b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> (Char -> Bool) -> Parser Path
takeWhile1 (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (\Char
c -> Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'#' Bool -> Bool -> Bool
|| Char -> Bool
AT.isEndOfLine Char
c))
Parser Path -> Parser Path -> Parser Path
forall a b. Parser Path a -> Parser Path b -> Parser Path a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* (Char -> Bool) -> Parser Path
takeTill Char -> Bool
AT.isEndOfLine
canAccess :: ByteString -> Robot -> Path -> Bool
canAccess :: Path -> Robot -> Path -> Bool
canAccess Path
_ Robot
_ Path
"/robots.txt" = Bool
True
canAccess Path
agent ([([UserAgent], [Directive])]
robot,[Path]
_) Path
path = case [([UserAgent], [Directive])]
stanzas of
[] -> Bool
True
(([UserAgent]
_,[Directive]
directives):[([UserAgent], [Directive])]
_) -> [Directive] -> Bool
matchingDirective [Directive]
directives
where stanzas :: [([UserAgent], [Directive])]
stanzas = [Maybe ([UserAgent], [Directive])] -> [([UserAgent], [Directive])]
forall a. [Maybe a] -> [a]
catMaybes [(([UserAgent], [Directive]) -> Bool)
-> [([UserAgent], [Directive])] -> Maybe ([UserAgent], [Directive])
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Maybe a
find ((UserAgent -> Bool) -> [UserAgent] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any (UserAgent -> Path -> Bool
`isLiteralSubstring` Path
agent) ([UserAgent] -> Bool)
-> (([UserAgent], [Directive]) -> [UserAgent])
-> ([UserAgent], [Directive])
-> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([UserAgent], [Directive]) -> [UserAgent]
forall a b. (a, b) -> a
fst) [([UserAgent], [Directive])]
robot,
(([UserAgent], [Directive]) -> Bool)
-> [([UserAgent], [Directive])] -> Maybe ([UserAgent], [Directive])
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Maybe a
find ((UserAgent
Wildcard UserAgent -> [UserAgent] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem`) ([UserAgent] -> Bool)
-> (([UserAgent], [Directive]) -> [UserAgent])
-> ([UserAgent], [Directive])
-> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([UserAgent], [Directive]) -> [UserAgent]
forall a b. (a, b) -> a
fst) [([UserAgent], [Directive])]
robot]
isLiteralSubstring :: UserAgent -> Path -> Bool
isLiteralSubstring (Literal Path
a) Path
us = Path
a Path -> Path -> Bool
`BS.isInfixOf` Path
us
isLiteralSubstring UserAgent
_ Path
_ = Bool
False
matchingDirective :: [Directive] -> Bool
matchingDirective [] = Bool
True
matchingDirective (Directive
x:[Directive]
xs) = case Directive
x of
Allow Path
robot_path ->
Path
robot_path Path -> Path -> Bool
`BS.isPrefixOf` Path
path Bool -> Bool -> Bool
|| [Directive] -> Bool
matchingDirective [Directive]
xs
Disallow Path
robot_path ->
Bool -> Bool
not (Path
robot_path Path -> Path -> Bool
`BS.isPrefixOf` Path
path) Bool -> Bool -> Bool
&& [Directive] -> Bool
matchingDirective [Directive]
xs
Directive
_ -> [Directive] -> Bool
matchingDirective [Directive]
xs