module Text.Matchers
  ( Matcher(..)
  , CaseSensitive(..)
  , tdfa
  , pcre
  , within
  , exact
  , CompUTC(..)
  , descUTC
  , compUTCtoCmp
  , date
  ) where

import Control.Applicative ((<$>), (<*>), (<*), (<$), optional, (<|>))
import Control.Monad (replicateM, mzero)
import Control.Monad.Exception.Synchronous
  ( Exceptional (Exception, Success))
import qualified Data.ByteString as BS
import Data.Fixed (Pico)
import Data.Maybe (fromMaybe)
import Data.Text (Text, pack, unpack, toCaseFold, isInfixOf)
import Data.Text.Encoding (encodeUtf8)
import qualified Text.Regex.TDFA as TDFA
import qualified Text.Regex.Base.RegexLike as RL
import qualified Text.Regex.PCRE.Light as PCRE
import Text.Parsec (many, satisfy)
import qualified Text.Parsec as P
import Text.Parsec.Text (Parser)
import qualified Data.Time as Time

data CaseSensitive = Sensitive | Insensitive deriving (Eq, Ord, Show)

data Matcher = Matcher
  { shortDesc :: Text
    -- ^ Short description of this matcher, e.g. @PCRE@ or @Exact@.

  , matchDesc :: Text
    -- ^ Description of a successful match, e.g.
    -- @Matches the PCRE pattern abc@, or
    -- @a valid date with optional time@.

  , match :: Text -> Bool
    -- ^ Function to carry out the match
    }

-- | Uses the regular expression matcher from the regex-tdfa
-- package. This is a POSIX extended regular expression. It should
-- work correctly with Unicode.
tdfa
  :: CaseSensitive

  -> Text
  -- ^ The pattern

  -> Exceptional Text Matcher
  -- ^ The Matcher if the pattern is good; if the pattern is bad,
  -- returns an error message. The error message has a trailing
  -- newline.

tdfa c t = case tdfaPrim c (unpack t) of
  Exception e -> Exception $ pack e
  Success f ->
    let sDesc = pack "POSIX-like regular expression (TDFA)"
        mrDesc = pack $ "matches the POSIX regular expression \""
          ++ unpack t ++ "\"" ++ descSensitive c
        mr = f . unpack
    in return $ Matcher sDesc mrDesc mr

descSensitive :: CaseSensitive -> String
descSensitive c = case c of
  Sensitive -> " (case sensitive)"
  Insensitive -> " (case insensitive)"

-- | Uses the PCRE regular expression engine. Currently the pcre-light
-- package is used, as it has a simpler interface than the
-- regex-pcre-builtin. It should work correctly with Unicode.
pcre
  :: CaseSensitive

  -> Text
  -- ^ Pattern

  -> Exceptional Text Matcher
  -- ^ The Matcher if the pattern is good; if the pattern is bad,
  -- returns an error message.

pcre c t = case pcrePrim c (encodeUtf8 t) of
  Exception e -> Exception $ pack e
  Success f ->
    let sDesc = pack "Perl-compatible regular expression"
        mrDesc = pack $ "matches the PCRE pattern \""
          ++ unpack t ++ "\"" ++ descSensitive c
        mr = f . encodeUtf8
    in return $ Matcher sDesc mrDesc mr

-- | Matcher that succeeds if the pattern text is found anywhere
-- within the subject.
within
  :: CaseSensitive

  -> Text
  -- ^ The pattern

  -> Matcher
within cs t = Matcher sDesc mrDesc mr
  where
    sDesc = pack "within"
    mrDesc = pack $ "contains the text \"" ++ unpack t
             ++ "\"" ++ descSensitive cs
    mr = txtMatch isInfixOf cs t

-- | Matcher that succeeds if the pattern text exactly matches the
-- subject (with case sensitivity as appropriate.)
exact :: CaseSensitive -> Text -> Matcher
exact cs t = Matcher sDesc mrDesc mr
  where
    sDesc = pack "exact"
    mrDesc = pack $ "matches the text \"" ++ unpack t
             ++ "\"" ++ descSensitive cs
    mr = txtMatch (==) cs t

txtMatch :: (Text -> Text -> Bool)
            -> CaseSensitive
            -> Text
            -> Text -> Bool
txtMatch f c p t = pat `f` txt where
  txt = flipCase t
  pat = flipCase p
  flipCase = case c of
    Sensitive -> id
    Insensitive -> toCaseFold

-- | Matcher that succeeds if the subject represents a valid date with
-- an optional time.
date
  :: Maybe (CompUTC, Time.UTCTime)
  -- ^ If Nothing, any valid date and time will succeed as a match;
  -- the matcher will return False if the subject is not a valid
  -- date. If Just, the subject must be a valid date and must fit
  -- within the range indicated.
  -> Matcher
date mayPair = Matcher (pack "date") md mr
  where
    md = case mayPair of
      Nothing -> pack "any valid date with optional time"
      Just (c, t) -> pack $ "valid date and optional time, "
        ++ descUTC c t
    mr x = fromMaybe False $ do
      subjDT <- case P.parse dateTime "" x of
        Left _ -> mzero
        Right g -> return g
      case mayPair of
        Nothing -> return True
        Just (c, t) ->
          let cmp = compUTCtoCmp c
          in return $ subjDT `cmp` t

------------------------------------------------------------
-- StrErr monad
------------------------------------------------------------

-- | This monad exists because using the mtl Monad instance of (Either
-- String) causes problems due to orphan instances.
--
-- See http://www.haskell.org/pipermail/haskell-cafe/2011-December/098079.html

data StrErr a = Good a
              | Bad String
              deriving (Show, Eq)

instance Monad StrErr where
  return = Good
  (Good a) >>= f = f a
  (Bad s) >>= _ = Bad s
  fail s = Bad s

------------------------------------------------------------
-- TDFA primitives
------------------------------------------------------------

tdfaPrim
  :: CaseSensitive -> String -> Exceptional String (String -> Bool)
tdfaPrim c regexStr = case RL.makeRegexOptsM comp exec regexStr of
  (Bad s) -> Exception s
  (Good rx) -> return (RL.matchTest rx)
  where
    comp = RL.defaultCompOpt { TDFA.caseSensitive = case c of
                                  Sensitive -> True
                                  Insensitive -> False
                             , TDFA.newSyntax = True
                             , TDFA.lastStarGreedy = True }
    exec = RL.defaultExecOpt { TDFA.captureGroups = False }

------------------------------------------------------------
-- PCRE primitives
------------------------------------------------------------

pcrePrim :: CaseSensitive
        -> BS.ByteString
        -> Exceptional String (BS.ByteString -> Bool)
pcrePrim c bs = let
  u8 = [PCRE.utf8]
  opts = case c of
    Sensitive -> u8
    Insensitive -> PCRE.caseless:u8 in
  case PCRE.compileM bs opts of
    (Left err) -> Exception err
    (Right rx) -> Success $ \s ->
      case PCRE.match rx s [] of
        (Just _) -> True
        Nothing -> False

------------------------------------------------------------
-- Date parsers
------------------------------------------------------------
year :: Parser Integer
year = read <$> replicateM 4 P.digit

month :: Parser Int
month = read <$> replicateM 2 P.digit

day :: Parser Int
day = read <$> replicateM 2 P.digit

pDate :: Parser Time.Day
pDate = p >>= failOnErr
  where
    p = Time.fromGregorianValid
        <$> year  <* satisfy dateSep
        <*> month <* satisfy dateSep
        <*> day
    failOnErr = maybe (fail "could not parse date") return

dateSep :: Char -> Bool
dateSep c = c == '/' || c == '-'

digit :: Char -> Bool
digit c = c >= '0' && c <= '9'

colon :: Char -> Bool
colon = (== ':')

hours :: Parser Int
hours = p >>= (maybe (fail "could not parse hours") return)
  where
    p = f <$> satisfy digit <*> satisfy digit
    f d1 d2 =
      let r = read [d1,d2]
      in if r < 0 || r > 23
         then Nothing
         else Just r


minutes :: Parser Int
minutes = p >>= maybe (fail "could not parse minutes") return
  where
    p = f <$ satisfy colon <*> satisfy digit <*> satisfy digit
    f d1 d2 =
      let r = read [d1, d2]
      in if r < 0 || r > 59
         then Nothing
         else Just r

seconds :: Parser Pico
seconds = p >>= maybe (fail "could not parse seconds") return
  where
    p = f <$ satisfy colon <*> satisfy digit <*> satisfy digit
    f d1 d2 =
      let r = read [d1, d2] :: Int
      in if r < 0 || r > 59
         then Nothing
         else Just . fromIntegral $ r

time :: Parser Time.TimeOfDay
time = f <$> hours <*> minutes <*> optional seconds
  where
    f h m ms = Time.TimeOfDay h m (fromMaybe 0 ms)

tzSign :: Parser (Int -> Int)
tzSign = (id <$ satisfy plus) <|> (negate <$ satisfy minus)
  where
    plus = (== '+')
    minus = (== '-')

tzNumber :: Parser Int
tzNumber = read <$> replicateM 4 (satisfy digit)

timeZone :: Parser Time.TimeZone
timeZone = p >>= maybe (fail "could not parse time zone") return
  where
    p = f <$> tzSign <*> tzNumber
    f s = minsToOffset . s
    minsToOffset m = if abs m > 840
                     then Nothing
                     else Just (Time.TimeZone m False "")

white :: Char -> Bool
white c = c == ' ' || c == '\t'

timeWithZone :: Parser (Time.TimeOfDay, Maybe Time.TimeZone)
timeWithZone =
  (,) <$> time <* many (satisfy white) <*> optional timeZone


dateTime :: Parser Time.UTCTime
dateTime =
  f <$> pDate <* many (satisfy white) <*> optional timeWithZone
  where
    f d mayTwithZ = Time.zonedTimeToUTC zt
      where
        zt = Time.ZonedTime lt tz
        lt = Time.LocalTime d tod
        (tod, tz) = case mayTwithZ of
          Nothing -> (Time.midnight, Time.utc)
          Just (t, mayZ) -> case mayZ of
            Nothing -> (t, Time.utc)
            Just z -> (t, z)

------------------------------------------------------------
-- Other date things
------------------------------------------------------------

data CompUTC
  = UAfter
  | UOnOrAfter
  | UExactly
  | UBefore
  | UOnOrBefore
  deriving (Eq, Show, Ord)

descUTC :: CompUTC -> Time.UTCTime -> String
descUTC c u = "date is " ++ co ++ " " ++ dt
  where
    co = case c of
      UAfter -> "after"
      UOnOrAfter -> "on or after"
      UExactly -> "on"
      UBefore -> "before"
      UOnOrBefore -> "on or before"
    dt = show dy ++ " " ++ hs ++ ":" ++ ms ++ ":" ++ ss ++ " UTC"
    Time.UTCTime dy difft = u
    Time.TimeOfDay h m s = Time.timeToTimeOfDay difft
    (hs, ms, ss) = (show h, show m, show (round s :: Int))

compUTCtoCmp :: Ord a => CompUTC -> a -> a -> Bool
compUTCtoCmp c = case c of
  UAfter -> (>)
  UOnOrAfter -> (>=)
  UExactly -> (==)
  UBefore -> (<)
  UOnOrBefore -> (<=)