{- |
   Module      :   Cookbook.Ingredients.Lists.Stats
   Copyright   :   (c) 2014 by Nate Pisarski
   License     :   BSD3
   Maintainer  :   nathanpisarski@gmail.com
   Stability   :   Stable
   Portability :   Portable (Cookbook)
Library for determining mathematically whether two lists are similar.
-}

module Cookbook.Ingredients.Lists.Stats where

import qualified Cookbook.Ingredients.Lists.Access     as Ac
import qualified Cookbook.Ingredients.Lists.Modify     as Md
import qualified Cookbook.Ingredients.Tupples.Assemble as As
import qualified Cookbook.Recipes.Sanitize             as Sn
import qualified Cookbook.Recipes.Math                 as Ma

-- | Creates a list with the frequency of elements in a list.
frequency :: (Eq a) => [a] -> [(a,Int)] 
frequency x = let y = map (\c -> (c,Ac.count x c)) x in As.rmDb y

-- | Returns the x-amount of most frequent elements in a list. If there is a "tie", the order it appears in a list takes precedence. 
mostFrequent :: (Eq a) => [a] -> Int -> [a]
mostFrequent x c = take c $ Md.rev (As.assemble  $ frequency x)

-- | Provides a mathematical score out of 1 based on the similarities between the two words. This is freqScore, but it takes into account length.
wordscore :: (Eq a) => [a] -> [a] -> Double
wordscore a b = (freqScore a b - 0.1) + (0.1 / realToFrac (if diffLen == 0 then 1 else diffLen))
  where diffLen = abs $ length a - length b

-- | Provides a frequency score between two lists.
freqScore :: (Eq a) => [a] -> [a] -> Double
freqScore a b =  rawFreq / fromIntegral diffLen
  where diffLen = fromIntegral  $ length (frequency (if length (frequency a) < length (frequency b) then a else b))
        rawFreq = fromIntegral (sum $ map (\e -> if e `elem` d then 1 else 0) c)
        (c:d:_) = map (`mostFrequent` diffLen) [a,b]