{-# LANGUAGE OverloadedStrings #-}
module Network.PublicSuffixList.Lookup (effectiveTLDPlusOne, effectiveTLDPlusOne', isSuffix, isSuffix') where

import qualified Data.Map          as M
import           Data.Maybe (isNothing)
import qualified Data.Text         as T

import qualified Network.PublicSuffixList.DataStructure as DS
import           Network.PublicSuffixList.Types

{-|
OffEnd's Bool argument represents whether we fell off a
leaf or whether we fell off a non-leaf. True means that
we fell off a leaf. Its Text argument is the component
that pushed us off the end, along with all the components
to the right of that one, interspersed with "."s
-}
data LookupResult = Inside | AtLeaf | OffEnd Bool T.Text
  deriving (Eq)

{-|
This function returns whether or not this domain is owned by a
registrar or a regular person. 'Nothing' means that this is a registrar
domain; 'Just x' means it's owned by a person. This is used to determine
if a cookie is allowed to bet set for a particular domain. For
example, you shouldn't be able to set a cookie for \"com\".

If the value is 'Just x', then the x value is what is known as the
effective TLD plus one. This is one segment more than the suffix of the
domain. For example, the eTLD+1 for "this.is.a.subdom.com" is Just
"subdom.com"

Note that this function expects lowercase ASCII strings. These strings
should be gotten from the toASCII algorithm as described in RFC 3490.
These strings should not start or end with the \'.\' character, and should
not have two \'.\' characters next to each other.
(The toASCII algorithm is implemented in the \'idna\' hackage package,
though that package doesn't always map strings to lowercase)
-}
effectiveTLDPlusOne' :: DataStructure -> T.Text -> Maybe T.Text
effectiveTLDPlusOne' dataStructure s
  -- Any TLD is a suffix
  | length ss == 1 = Nothing
  | otherwise = output rulesResult exceptionResult
  where ss = T.splitOn "." s
        ps = reverse ss
        exceptionResult = recurse ps [] $ snd dataStructure
        rulesResult = recurse ps [] $ fst dataStructure
        -- If we fell off, did we do it at a leaf? Otherwise, what's the
        -- subtree that we're at
        getNext :: Tree T.Text -> T.Text -> Either Bool (Tree T.Text)
        getNext t s' = case M.lookup s' $ children t of
          Nothing -> Left (M.null $ children t)
          Just t' -> Right t'
        -- Look up the component we're looking for...
        getNextWithStar t s' = case getNext t s' of
          -- and if that fails, look up "*"
          Left _ -> getNext t "*"
          r -> r
        recurse :: [T.Text] -> [T.Text] -> Tree T.Text -> LookupResult
        recurse [] _ t
          | M.null $ children t = AtLeaf
          | otherwise = Inside
        recurse (c : cs) prev t = case getNextWithStar t c of
          Left b -> OffEnd b $ T.intercalate "." (c : prev)
          Right t' -> recurse cs (c : prev) t'
        -- Only match against the exception rules if we have a full match
        output _ AtLeaf = Just s
        output _ (OffEnd True x) = Just $ T.intercalate "." $ tail $ T.splitOn "." x
        -- If we have a subdomain on an existing rule, we're not a suffix
        output (OffEnd _ x) _
          -- A single level domain can never be a eTLD+1
          | isNothing $ T.find (== '.') x = Just $ T.intercalate "." $ drop (length ss - 2) ss
          | otherwise = Just x
        -- Otherwise, we're a suffix of a suffix, which is a suffix
        output _ _ = Nothing

-- | >>> effectiveTLDPlusOne = effectiveTLDPlusOne' Network.PublicSuffixList.DataStructure.dataStructure
effectiveTLDPlusOne :: T.Text -> Maybe T.Text
effectiveTLDPlusOne = effectiveTLDPlusOne' DS.dataStructure

-- | >>> isSuffix' dataStructure = isNothing . effectiveTLDPlusOne' dataStructure
isSuffix' :: DataStructure -> T.Text -> Bool
isSuffix' dataStructure = isNothing . effectiveTLDPlusOne' dataStructure

-- | >>> isSuffix = isSuffix' Network.PublicSuffixList.DataStructure.dataStructure
isSuffix :: T.Text -> Bool
isSuffix = isNothing . effectiveTLDPlusOne