-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree.


{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE NoRebindableSyntax #-}

module Duckling.Ranking.Rank
  ( rank
  ) where

import qualified Data.HashMap.Strict as HashMap
import Data.HashSet (HashSet)
import qualified Data.HashSet as HashSet
import Data.Maybe
import qualified Data.Set as Set
import Prelude

import Duckling.Dimensions.Types
import Duckling.Ranking.Extraction
import Duckling.Ranking.Types
import Duckling.Types

-- | computes log likelihood of a class
ll :: BagOfFeatures -> ClassData -> Double
ll :: BagOfFeatures -> ClassData -> Double
ll BagOfFeatures
feats ClassData{Double
Int
HashMap Feature Double
n :: ClassData -> Int
likelihoods :: ClassData -> HashMap Feature Double
unseen :: ClassData -> Double
prior :: ClassData -> Double
n :: Int
likelihoods :: HashMap Feature Double
unseen :: Double
prior :: Double
..} =
  Double
prior Double -> Double -> Double
forall a. Num a => a -> a -> a
+
    (Feature -> Int -> Double -> Double)
-> Double -> BagOfFeatures -> Double
forall k v a. (k -> v -> a -> a) -> a -> HashMap k v -> a
HashMap.foldrWithKey
      (\Feature
feat Int
x Double
res ->
       Double
res Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
x Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double -> Feature -> HashMap Feature Double -> Double
forall k v. (Eq k, Hashable k) => v -> k -> HashMap k v -> v
HashMap.lookupDefault Double
unseen Feature
feat HashMap Feature Double
likelihoods)
      Double
0.0
      BagOfFeatures
feats

-- | computes positive class log likelihood
posLL :: Classifier -> BagOfFeatures -> Double
posLL :: Classifier -> BagOfFeatures -> Double
posLL Classifier {ClassData
koData :: Classifier -> ClassData
okData :: Classifier -> ClassData
koData :: ClassData
okData :: ClassData
..} BagOfFeatures
feats = BagOfFeatures -> ClassData -> Double
ll BagOfFeatures
feats ClassData
okData

score :: Classifiers -> Node -> Double
score :: Classifiers -> Node -> Double
score Classifiers
classifiers node :: Node
node@Node {rule :: Node -> Maybe Feature
rule = Just Feature
rule, [Node]
Range
Token
children :: Node -> [Node]
token :: Node -> Token
nodeRange :: Node -> Range
children :: [Node]
token :: Token
nodeRange :: Range
..} =
  case Feature -> Classifiers -> Maybe Classifier
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
HashMap.lookup Feature
rule Classifiers
classifiers of
    Just Classifier
c -> let feats :: BagOfFeatures
feats = Node -> BagOfFeatures
extractFeatures Node
node
      in Classifier -> BagOfFeatures -> Double
posLL Classifier
c BagOfFeatures
feats Double -> Double -> Double
forall a. Num a => a -> a -> a
+ [Double] -> Double
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum ((Node -> Double) -> [Node] -> [Double]
forall a b. (a -> b) -> [a] -> [b]
map (Classifiers -> Node -> Double
score Classifiers
classifiers) [Node]
children)
    Maybe Classifier
Nothing -> Double
0.0
score Classifiers
_ Node {rule :: Node -> Maybe Feature
rule = Maybe Feature
Nothing} = Double
0.0

-- | Return all superior candidates, as defined by the partial ordering
winners :: Ord a => [a] -> [a]
winners :: [a] -> [a]
winners [a]
xs = (a -> Bool) -> [a] -> [a]
forall a. (a -> Bool) -> [a] -> [a]
filter (\a
x -> (a -> Bool) -> [a] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all (Ordering -> Ordering -> Bool
forall a. Eq a => a -> a -> Bool
(/=) Ordering
LT (Ordering -> Bool) -> (a -> Ordering) -> a -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> a -> Ordering
forall a. Ord a => a -> a -> Ordering
compare a
x) [a]
xs) [a]
xs

-- | Return a curated list of tokens
rank
  :: Classifiers
  -> HashSet (Seal Dimension)
  -> [ResolvedToken]
  -> [ResolvedToken]
rank :: Classifiers
-> HashSet (Seal Dimension) -> [ResolvedToken] -> [ResolvedToken]
rank Classifiers
classifiers HashSet (Seal Dimension)
targets [ResolvedToken]
tokens =
  Set ResolvedToken -> [ResolvedToken]
forall a. Set a -> [a]
Set.toList (Set ResolvedToken -> [ResolvedToken])
-> ([Candidate] -> Set ResolvedToken)
-> [Candidate]
-> [ResolvedToken]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [ResolvedToken] -> Set ResolvedToken
forall a. Ord a => [a] -> Set a
Set.fromList
  ([ResolvedToken] -> Set ResolvedToken)
-> ([Candidate] -> [ResolvedToken])
-> [Candidate]
-> Set ResolvedToken
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Candidate -> ResolvedToken) -> [Candidate] -> [ResolvedToken]
forall a b. (a -> b) -> [a] -> [b]
map (\(Candidate ResolvedToken
token Double
_ Bool
_) -> ResolvedToken
token)
  ([Candidate] -> [ResolvedToken])
-> ([Candidate] -> [Candidate]) -> [Candidate] -> [ResolvedToken]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Candidate] -> [Candidate]
forall a. Ord a => [a] -> [a]
winners
  ([Candidate] -> [ResolvedToken]) -> [Candidate] -> [ResolvedToken]
forall a b. (a -> b) -> a -> b
$ (ResolvedToken -> Candidate) -> [ResolvedToken] -> [Candidate]
forall a b. (a -> b) -> [a] -> [b]
map ResolvedToken -> Candidate
makeCandidate [ResolvedToken]
tokens
  where
    makeCandidate :: ResolvedToken -> Candidate
    makeCandidate :: ResolvedToken -> Candidate
makeCandidate token :: ResolvedToken
token@Resolved {node :: ResolvedToken -> Node
node = n :: Node
n@Node {token :: Node -> Token
token = Token Dimension a
d a
_}} =
      ResolvedToken -> Double -> Bool -> Candidate
Candidate ResolvedToken
token (Classifiers -> Node -> Double
score Classifiers
classifiers Node
n) (Bool -> Candidate) -> Bool -> Candidate
forall a b. (a -> b) -> a -> b
$ Seal Dimension -> HashSet (Seal Dimension) -> Bool
forall a. (Eq a, Hashable a) => a -> HashSet a -> Bool
HashSet.member (Dimension a -> Seal Dimension
forall (s :: * -> *) a. s a -> Seal s
Seal Dimension a
d) HashSet (Seal Dimension)
targets