-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree.


{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE NoRebindableSyntax #-}
{-# LANGUAGE RecordWildCards #-}

module Duckling.Api
  ( analyze
  , formatToken
  , parse
  , supportedDimensions
  ) where

import Data.HashMap.Strict (HashMap)
import Data.HashSet (HashSet)
import Data.Text (Text)
import Prelude
import qualified Data.HashMap.Strict as HashMap
import qualified Data.HashSet as HashSet
import qualified Data.Text as Text

import Duckling.Dimensions.Types
import Duckling.Dimensions
import Duckling.Engine
import Duckling.Locale
import Duckling.Ranking.Classifiers
import Duckling.Ranking.Rank
import Duckling.Resolve
import Duckling.Rules
import Duckling.Types

-- | Parses `input` and returns a curated list of entities found.
parse :: Text -> Context -> Options -> [Seal Dimension] -> [Entity]
parse :: Text -> Context -> Options -> [Seal Dimension] -> [Entity]
parse Text
input Context
ctx Options
options = (ResolvedToken -> Entity) -> [ResolvedToken] -> [Entity]
forall a b. (a -> b) -> [a] -> [b]
map (Text -> ResolvedToken -> Entity
formatToken Text
input) ([ResolvedToken] -> [Entity])
-> ([Seal Dimension] -> [ResolvedToken])
-> [Seal Dimension]
-> [Entity]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text
-> Context
-> Options
-> HashSet (Seal Dimension)
-> [ResolvedToken]
analyze Text
input Context
ctx Options
options (HashSet (Seal Dimension) -> [ResolvedToken])
-> ([Seal Dimension] -> HashSet (Seal Dimension))
-> [Seal Dimension]
-> [ResolvedToken]
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
  [Seal Dimension] -> HashSet (Seal Dimension)
forall a. (Eq a, Hashable a) => [a] -> HashSet a
HashSet.fromList

supportedDimensions :: HashMap Lang [Seal Dimension]
supportedDimensions :: HashMap Lang [Seal Dimension]
supportedDimensions =
  [(Lang, [Seal Dimension])] -> HashMap Lang [Seal Dimension]
forall k v. (Eq k, Hashable k) => [(k, v)] -> HashMap k v
HashMap.fromList [ (Lang
l, Lang -> [Seal Dimension]
allDimensions Lang
l) | Lang
l <- [Lang
forall a. Bounded a => a
minBound..Lang
forall a. Bounded a => a
maxBound] ]

-- | Returns a curated list of resolved tokens found
-- When `targets` is non-empty, returns only tokens of such dimensions.
analyze :: Text -> Context -> Options -> HashSet (Seal Dimension)
  -> [ResolvedToken]
analyze :: Text
-> Context
-> Options
-> HashSet (Seal Dimension)
-> [ResolvedToken]
analyze Text
input context :: Context
context@Context{Locale
DucklingTime
locale :: Context -> Locale
referenceTime :: Context -> DucklingTime
locale :: Locale
referenceTime :: DucklingTime
..} Options
options HashSet (Seal Dimension)
targets =
  Classifiers
-> HashSet (Seal Dimension) -> [ResolvedToken] -> [ResolvedToken]
rank (Locale -> Classifiers
classifiers Locale
locale) HashSet (Seal Dimension)
targets
  ([ResolvedToken] -> [ResolvedToken])
-> ([ResolvedToken] -> [ResolvedToken])
-> [ResolvedToken]
-> [ResolvedToken]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (ResolvedToken -> Bool) -> [ResolvedToken] -> [ResolvedToken]
forall a. (a -> Bool) -> [a] -> [a]
filter (\Resolved{node :: ResolvedToken -> Node
node = Node{token :: Node -> Token
token = (Token Dimension a
d a
_)}} ->
      HashSet (Seal Dimension) -> Bool
forall a. HashSet a -> Bool
HashSet.null HashSet (Seal Dimension)
targets Bool -> Bool -> Bool
|| Seal Dimension -> HashSet (Seal Dimension) -> Bool
forall a. (Eq a, Hashable a) => a -> HashSet a -> Bool
HashSet.member (Dimension a -> Seal Dimension
forall (s :: * -> *) a. s a -> Seal s
Seal Dimension a
d) HashSet (Seal Dimension)
targets
    )
  ([ResolvedToken] -> [ResolvedToken])
-> [ResolvedToken] -> [ResolvedToken]
forall a b. (a -> b) -> a -> b
$ [Rule] -> Text -> Context -> Options -> [ResolvedToken]
parseAndResolve (Locale -> HashSet (Seal Dimension) -> [Rule]
rulesFor Locale
locale HashSet (Seal Dimension)
targets) Text
input Context
context Options
options

-- | Converts the resolved token to the API format
formatToken :: Text -> ResolvedToken -> Entity
formatToken :: Text -> ResolvedToken -> Entity
formatToken Text
sentence
  (Resolved (Range Int
start Int
end) node :: Node
node@Node{token :: Node -> Token
token = Token Dimension a
dimension a
_} ResolvedVal
value Bool
latent)
  = Text -> Text -> ResolvedVal -> Int -> Int -> Bool -> Node -> Entity
Entity (Dimension a -> Text
forall a. Dimension a -> Text
toName Dimension a
dimension) Text
body ResolvedVal
value Int
start Int
end Bool
latent Node
node
  where
    body :: Text
body = Int -> Text -> Text
Text.drop Int
start (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ Int -> Text -> Text
Text.take Int
end Text
sentence