{-# LANGUAGE DeriveDataTypeable, ViewPatterns, OverloadedStrings, TupleSections #-}

-- | Module    : NLP.Antfarm.Internal
-- Copyright   : 2012 Eric Kow (Computational Linguistics Ltd.)
-- License     : BSD3
-- Maintainer  : eric.kow@gmail.com
-- Stability   : experimental
-- Portability : portable
--
-- The heart of the referring expression generation
module NLP.Antfarm.Internal where

import Data.Maybe ( fromMaybe )
import Data.Tree ( Tree(..) )
import Prelude hiding ( lex )
import qualified Data.Set  as Set

import NLP.Minimorph.Number

import NLP.Antfarm.Refex
import NLP.Antfarm.History

-- ----------------------------------------------------------------------
-- concepts (sub-unit of referring expressions)
-- ----------------------------------------------------------------------
-- englishDiscriminator (fromSP agr (srxDet srx))
-- word  = fromSP agr (srxWord srx)

-- | Decide how to realise a referring expression
rx :: RefHistory -> [Tree SubRxInput] -> [Tree SubRx]
rx st = map (subrx st)

-- | Decide how to realise a single unit within a referring expression
--
--   Keep in mind that this is only for one 'DiscourseUnit' within a single rx.
--   An rx may involve multiple discourse units (eg. 3 cats and 1 dog)
subrx :: RefHistory
      -> Tree SubRxInput
      -> Tree SubRx
subrx st (Node srx egs) =
    (Node root kids)
  where
    du    = srxInpEntity srx
    num   = surfaceNumber st du
    discr = discriminate  st du
    root  = SubRx num discr (srxInpDet srx) (srxInpWord srx)
    --
    kids | null egs               = []
         | lastMentions st du > 0 = []
         | otherwise              = map (subrx st) egs

-- ----------------------------------------------------------------------
-- Number
-- ----------------------------------------------------------------------

-- | Helper for 'fromConcept'
--
--   Whether the noun in a 'DiscourseUnit' should be realised as singular
--   or plural.  Note the difference between this and
--   'actualNumber'
surfaceNumber :: RefHistory -> DiscourseUnit -> Number
surfaceNumber st du =
    case subrxNumber du of
        FN_Singular      -> Singular
        FN_Plural        -> Plural
        FN_MaybeSingular -> if override then Plural else Singular
  where
    override = hasTidyBackpointer st du

-- | Whether a 'DiscourseUnit' should be considered *morally*
--   (semantically) singular or plural.  The actual form used may be
--   different (see 'conceptNumber' because of deeper issues that
--   override this).
--
--   Consider one of the *dogs*; here the rx number is 'Singular'
--   — one dog — but on the surface we use the 'Plural' (the NP
--   'the dogs' is itself plural).  This discrepency is partly due
--   to the hacky way we've written this.  A cleaner implementation
--   would recursively realise 'the dogs' as a separate expression
--   with its own number.
subrxNumber :: DiscourseUnit -> FuzzyNumber
subrxNumber (Node rg _) =
    fromMaybe usualAgr (constrAgr bounds)
  where
    bounds  = rgBounds rg
    count   = Set.size (rgIdxes rg)
    usualAgr
        | isClasswide rg  = FN_Plural -- dog*s* like food
        | count == 1      = FN_MaybeSingular
        | otherwise       = FN_Plural
    -- TODO not sure if this really belongs here or more in the surface?
    constrAgr (Bounds [] l u) =
        case (l,u) of
           (Just 1, Nothing)  -> Just FN_Singular -- TODO hmm?
           (Just 1, Just 1)   -> Just FN_Singular
           (Nothing, Just 1)  -> Just FN_Singular -- TODO hmm?
           (Nothing, Nothing) -> Nothing
           _                  -> Just FN_Plural
    constrAgr (Bounds _ _ _) = Just FN_Plural

-- ----------------------------------------------------------------------
-- Discriminator
-- ----------------------------------------------------------------------

-- | Helper for 'fromConcept'
--
--   A discriminator is what we call the optional bit of text that helps
--   you distinguish one set instances of a class from another, eg,
--   “the same” or “another three”, or simply “the“
discriminate :: RefHistory   -- ^ discourse history
             -> DiscourseUnit
             -> Discriminator
discriminate st du@(Node rg _) =
    helper $ boundsText bounds
  where
    keys    = [ (rgClass rg, idx) | idx <- rgIdxList rg ]
    bounds  = rgBounds rg
    count   = Set.size (rgIdxes rg)
    --
    helper (Just bnds) = case keys of
        _ | lastMentions st du > 0             -> TheSame
          | otherwise                          -> Bounded bnds
    helper Nothing = case keys of
        [k] | isTheOther st k                  -> TheOther
        [mentionOrder st -> Just i]            -> TheOrdinal i
        [distractorGroups st -> ds@(_:_)]      -> NewOrdinal (1 + length ds) -- an 8th ant
        _   | isClasswide rg                   -> NilDiscriminator
            | any (hasDistractorGroup st) keys -> Another count
            | all (isFirstMention st)     keys -> PlainCardinal count
            | hasTidyBackpointer st du         -> CardinalOfThe count
            | lastMentions st du > 0           -> The
            | otherwise                        -> PlainCardinal count

-- ----------------------------------------------------------------------
-- determiners
-- ----------------------------------------------------------------------

-- | If there are any unknown constraints, we pick the first one.
--   Otherwise, we generate an expression appropriate for the lower/upper bounds
boundsText :: Bounds -> Maybe BoundsExpr
boundsText (Bounds [] l u)    =
    case (l,u) of
        (Nothing, Nothing)   -> Nothing
        (Nothing, Just x)    -> Just $ SayAtMost  x
        (Just x, Nothing)    -> Just $ SayAtLeast x
        (Just x1, Just x2)
           | x1 == x2        -> Just $ SayExactly x1
           | otherwise       -> Just $ SayBetween x1 x2
boundsText (Bounds (x:_) _ _) = Just $ SayArbitrary x