{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE DeriveAnyClass #-} {-# LANGUAGE LambdaCase #-} module Data.BrowsCap ( BrowsCap(..) , BrowserEntry(..) , BrowserType(..), DeviceType(..), PointingMethod(..), BitCount(..) , loadBrowsCap , lookupBrowser ) where import Control.DeepSeq import Control.Monad import Control.Monad.Except import qualified Data.ByteString as BS import qualified Data.Text as T import qualified Data.Text.Encoding as TE import qualified Data.HashSet as Set import qualified Data.HashMap.Strict as HM import qualified Data.Cache.LRU.IO as LRU import qualified Data.Ini as Ini import Data.Maybe import Data.Cache.LRU.IO (AtomicLRU) import Data.List import GHC.Generics data BrowsCap = BrowsCap { bcEntries :: ![BrowserEntry] , bcCache :: AtomicLRU BS.ByteString BrowserEntry } data MatchPart = TPart !T.Text | One | Many deriving (Eq, Ord, Show, Read, Generic, NFData) data BrowserType = Browser | Application | BotCrawler | UseragentAnonymizer | OfflineBrowser | MultimediaPlayer | Library | FeedReader | EmailClient | Tool | BrowserTypeUnknown deriving (Read, Show, Eq, Ord, Generic, NFData) readBrowserType :: T.Text -> Except String BrowserType readBrowserType "Browser" = return Browser readBrowserType "Application" = return Application readBrowserType "Bot/Crawler" = return BotCrawler readBrowserType "Useragent Anonymizer" = return UseragentAnonymizer readBrowserType "Offline Browser" = return OfflineBrowser readBrowserType "Multimedia Player" = return MultimediaPlayer readBrowserType "Library" = return Library readBrowserType "Feed Reader" = return FeedReader readBrowserType "Email Client" = return EmailClient readBrowserType "Tool" = return Tool readBrowserType "unknown" = return BrowserTypeUnknown readBrowserType bt = throwError $ "Unspecified browser type: "++T.unpack bt data DeviceType = DeviceMobilePhone | DeviceMobileDevice | DeviceTablet | DeviceDesktop | DeviceTV | DeviceConsole | DeviceFonePad | DeviceEbookReader | DeviceCarEntertainmentSystem | DeviceDigitalCamera | DeviceUnknown deriving (Read, Show, Eq, Ord, Generic, NFData) readDeviceType :: T.Text -> Except String DeviceType readDeviceType "Mobile Phone" = return DeviceMobilePhone readDeviceType "Mobile Device" = return DeviceMobileDevice readDeviceType "Tablet" = return DeviceTablet readDeviceType "Desktop" = return DeviceDesktop readDeviceType "TV Device" = return DeviceTV readDeviceType "Console" = return DeviceConsole readDeviceType "FonePad" = return DeviceFonePad readDeviceType "Ebook Reader" = return DeviceEbookReader readDeviceType "Car Entertainment System" = return DeviceCarEntertainmentSystem readDeviceType "Digital Camera" = return DeviceDigitalCamera readDeviceType "unknown" = return DeviceUnknown readDeviceType d = throwError $ "Unlisted DeviceType: " ++ T.unpack d data PointingMethod = Mouse | Touchscreen | Joystick | Stylus | Clickwheel | Trackpad | Trackball | PointingDeviceUnknown deriving (Read, Show, Eq, Ord, Generic, NFData) readPointingMethod :: T.Text -> Except String PointingMethod readPointingMethod "mouse" = return Mouse readPointingMethod "touchscreen" = return Touchscreen readPointingMethod "joystick" = return Joystick readPointingMethod "stylus" = return Stylus readPointingMethod "clickwheel" = return Clickwheel readPointingMethod "trackpad" = return Trackpad readPointingMethod "trackball" = return Trackball readPointingMethod "unknown" = return PointingDeviceUnknown readPointingMethod pm = throwError $ "Unlisted pointing method: "++T.unpack pm data BitCount = Bits0 | Bits8 | Bits16 | Bits32 | Bits64 deriving (Read, Show, Eq, Ord, Generic, NFData) readBitCount :: T.Text -> Except String BitCount readBitCount "0" = return Bits0 readBitCount "8" = return Bits8 readBitCount "16" = return Bits16 readBitCount "32" = return Bits32 readBitCount "64" = return Bits64 readBitCount b = throwError $ "Invalid bit count: " ++ T.unpack b data BrowserEntry = BrowserEntry { beUserAgent :: !T.Text , beUserAgentMatcher :: ![MatchPart] , beBrowser :: !T.Text , beBrowserType :: !BrowserType , beBrowserBits :: !BitCount , beBrowserMaker :: !T.Text -- Modus skipped , beVersion :: !T.Text , beMajorVer :: !T.Text , beMinorVer :: !T.Text , bePlatform :: !T.Text , bePlatformVersion :: !T.Text , bePlatformDescription :: !T.Text , bePlatformBits :: !BitCount , bePlatformMaker :: !T.Text , beSyndicationReader :: !Bool , beCSSVersion :: !T.Text , beDeviceName :: !T.Text , beDeviceMaker :: !T.Text , beDeviceType :: !DeviceType , beDevicePointingMethod :: !PointingMethod , beDeviceCodeName :: !T.Text , beDeviceBrandName :: !T.Text , beRenderingEngineName :: !T.Text , beRenderingEngineVer :: !T.Text , beRenderingEngineDesc :: !T.Text , beRenderingEngineMaker :: !T.Text } deriving (Eq, Ord, Show, Read, Generic, NFData) -- This would be faster if it returned a list of offsets -- and took the origional string and dropped the already-matched length. -- That is because it would improve match simplification. nextOptions :: [MatchPart] -> T.Text -> [T.Text] nextOptions ((TPart t0):_) rest = maybeToList $ T.stripPrefix t0 rest nextOptions (Many:(TPart t0):_) rest = map snd $ T.breakOnAll t0 rest nextOptions (One:_) rest = maybeToList $ fmap snd $ T.uncons rest nextOptions [Many] _ = [""] nextOptions [] rest = if T.null rest then [""] else [] nextOptions mp _ = error $ "Failed match: " ++ show mp isMatch :: T.Text -> [MatchPart] -> Bool isMatch t mparts = let finalEnds = foldl (\ends mp -> concatMap (Set.toList . Set.fromList . nextOptions mp) ends) [t] $ tails mparts in "" `elem` finalEnds toMatcher :: T.Text -> [MatchPart] toMatcher t = optimize $ map toMatchPart $ T.unpack t where toMatchPart '?' = One toMatchPart '*' = Many toMatchPart c = TPart (T.pack [c]) optimize ((TPart t0):(TPart t1):r) = optimize ((TPart (t0 `T.append` t1)):r) optimize (Many:Many:r) = optimize (Many : r) optimize (One:Many:r) = optimize (Many : r) optimize (Many:One:r) = optimize (Many : r) optimize (x:xs) = x : optimize xs optimize [] = [] lookupBrowser :: BrowsCap -> BS.ByteString -> IO (Maybe BrowserEntry) lookupBrowser BrowsCap {bcEntries=entries, bcCache=cacheRef} ua = do cache <- LRU.lookup ua cacheRef case cache of Just be -> return $ Just be Nothing -> do case bestMatching of Nothing -> return Nothing Just be -> do LRU.insert ua be cacheRef return $ Just be where bestMatching = listToMaybe $ map snd $ sortBy cmpByFst $ map (\be -> (T.length $ beUserAgent be, be)) allMatching allMatching = mapMaybe match entries cmpByFst a b = compare (fst b) (fst a) match :: BrowserEntry -> Maybe BrowserEntry match be = if isMatch (TE.decodeUtf8 ua) $ beUserAgentMatcher be then Just be else Nothing loadBrowsCap :: T.Text -> IO BrowsCap loadBrowsCap txt = do case runExcept ((either throwError return $ Ini.parseIni txt) >>= convertShims) of Left err -> fail err Right bces -> do bccR <- LRU.newAtomicLRU (Just 16) bces `deepseq` return $ BrowsCap { bcEntries = bces , bcCache = bccR } convertShims :: Ini.Ini -> Except String [BrowserEntry] convertShims dPlusVer = mapM buildBE . HM.keys $ d where d = HM.delete "GJK_Browscap_Version" . Ini.unIni $ dPlusVer mErr :: String -> Maybe a -> Except String a mErr err Nothing = throwError err mErr _ (Just a) = return a sectionValue :: T.Text -> T.Text -> Maybe T.Text sectionValue s v = fmap dropQuotes . join . fmap (HM.lookup v) . HM.lookup s $ d beVal :: T.Text -> T.Text -> Except String T.Text beVal b v | b == "DefaultProperties" = mErr ("Could not find "++T.unpack v++" for "++T.unpack b) . sectionValue b $ v | otherwise = do -- All non-DefaultProperty entries must have a parent. p <- mErr ("Could not find Parent for "++T.unpack b) . sectionValue b $ "Parent" maybe (beVal p v) (return) . sectionValue b $ v readBool :: T.Text -> Except String Bool readBool "true" = return True readBool "false" = return False readBool o = throwError $ "Bool format invalid: "++T.unpack o -- If the value is eclosed in quotes, remove them dropQuotes :: T.Text -> T.Text dropQuotes = T.dropWhile (=='"') . T.dropWhileEnd (=='"') buildBE :: T.Text -> Except String BrowserEntry buildBE a = do browser <- beVal a "Browser" bType <- readBrowserType =<< beVal a "Browser_Type" bBits <- readBitCount =<< beVal a "Browser_Bits" bMaker <- beVal a "Browser_Maker" ver <- beVal a "Version" verMaj <- beVal a "MajorVer" verMin <- beVal a "MinorVer" plat <- beVal a "Platform" platVer <- beVal a "Platform_Version" platDesc <- beVal a "Platform_Description" platBits <- readBitCount =<< beVal a "Platform_Bits" platMaker <- beVal a "Platform_Maker" synd <- readBool =<< beVal a "isSyndicationReader" cssVer <- beVal a "CssVersion" devName <- beVal a "Device_Name" devMaker <- beVal a "Device_Maker" devType <- readDeviceType =<< beVal a "Device_Type" devPointMeth <- readPointingMethod =<< beVal a "Device_Pointing_Method" devCodeName <- beVal a "Device_Code_Name" devBrand <- beVal a "Device_Brand_Name" rendName <- beVal a "RenderingEngine_Name" rendVer <- beVal a "RenderingEngine_Version" rendDesc <- beVal a "RenderingEngine_Description" rendMaker <- beVal a "RenderingEngine_Maker" return $ BrowserEntry { beUserAgent = a , beUserAgentMatcher = toMatcher a , beBrowser = browser , beBrowserType = bType , beBrowserBits = bBits , beBrowserMaker = bMaker , beVersion = ver , beMajorVer = verMaj , beMinorVer = verMin , bePlatform = plat , bePlatformVersion = platVer , bePlatformDescription = platDesc , bePlatformBits = platBits , bePlatformMaker = platMaker , beSyndicationReader = synd , beCSSVersion = cssVer , beDeviceName = devName , beDeviceMaker = devMaker , beDeviceType = devType , beDevicePointingMethod = devPointMeth , beDeviceCodeName = devCodeName , beDeviceBrandName = devBrand , beRenderingEngineName = rendName , beRenderingEngineVer = rendVer , beRenderingEngineDesc = rendDesc , beRenderingEngineMaker = rendMaker }