{-# LANGUAGE OverloadedStrings, RecordWildCards #-} module Text.Feed.Crawl.Common where import Data.Char (toLower) import qualified Data.ByteString.Char8 as B import qualified Data.ByteString.Lazy.Char8 as BL import Network.HTTP.Conduit (HttpException) type CrawlResult = Either CrawlFail CrawlSuccess data CrawlFail = CrawlFoundFeedLinks { crawlNotFeedResponse :: BL.ByteString , crawlNotFeedWithLinks :: [Link] } | CrawlHttpError HttpException deriving Show data CrawlSuccess = CrawlSuccess { crawlLastContentType :: Maybe B.ByteString , crawlLastUrl :: B.ByteString , crawlFeedContent :: BL.ByteString } deriving Show data Status = Status { sStatusCode :: Int , sLocation :: Maybe B.ByteString , sContentType :: Maybe B.ByteString } deriving Show data Link = Link { linkRel :: String , linkHref :: String , linkType :: String , linkTitle :: String } deriving Show isFeedContentType :: Maybe B.ByteString -> Bool isFeedContentType Nothing = False -- right logic? maybe default to trying to parse unknown type isFeedContentType (Just bs) = -- e.g. input is "text/html; charset=utf-8" let (mimetype, _) = B.break (== ';') bs in map toLower (B.unpack mimetype) `elem` feedMimeTypes feedMimeTypes = [ "application/rss+xml" , "application/rdf+xml" , "application/atom+xml" , "application/xml" , "text/xml"]