{-|
Module: Multiwords
Description: Multiwords represent non deterministic words.
License: GPL-3

A multiword is given by some finite set of meaning/gramamr pairs. They
generalize words and can be used to represent reductions where multiple
parsings are possible but also words with multiple acceptations.

This is experimental and it is not part of the standard DisCoCat
framework.
|-}
module Discokitty.Multiwords where

import           Data.List
import           Data.Semigroup
import           Discokitty.HasCups
import           Discokitty.Lambek
import           Discokitty.Words

-- | The probability is given by real numbers.
type Probability = Double

-- | A multiword is given by a list of different words with different
-- probabilities. Note that these words do not need to have the same
-- grammar types.
newtype Multiword m = Multiword [(Words m , Probability)]

-- | Shows a multiword as a list of acceptations.
instance (Show m) => Show (Multiword m) where
  show =
    intercalate "\n" .
    fmap (\ (w, p) -> show w ++ " with p=" ++ show p) .
    toList

toList :: Multiword m -> [(Words m , Probability)]
toList (Multiword a) = a

fromList :: [(Words m , Probability)] -> Multiword m
fromList = Multiword

singleton :: Words m -> Multiword m
singleton w = fromList [(w,1.0)]

-- | Concatenates the meaning of multiple multiwords using the formal
-- cups on the meaning category.
multiconcat :: (HasCups m) => Multiword m -> Multiword m -> Multiword m
multiconcat x y = fromList $ do
  (w , p) <- toList x
  (v , q) <- toList y
  let concats = concatenate w v
  let newprob = (p * q) / fromIntegral (length concats)
  zip concats (repeat newprob)

infixr 4 `multiconcat`

-- | The empty word for a formal cups multiword.
multiempty :: (HasCups m) => Multiword m
multiempty = fromList [( emptyWord , 1 )]

instance (HasCups m) => Semigroup (Multiword m) where
  (<>) = multiconcat

instance (HasCups m) => Monoid (Multiword m) where
  mempty = multiempty
  mappend = multiconcat

-- | Concatenates a whole sentence of multiwords into a single one.
sentence :: (HasCups m) => [Multiword m] -> Multiword m
sentence = mconcat

-- | Filters the acceptations of the multiword that match the given
-- Lambek type.
(@@) :: Multiword m -> Lambek -> Multiword m
ws @@ l = fromList $ fmap (\ (x,p) -> (x , p / totalprob)) newlist
  where
    totalprob = sum $ fmap snd newlist
    newlist = filter (\ (x , _) -> grammar x == l) (toList ws)