{-# LANGUAGE GADTs #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE NoRebindableSyntax #-}
module Duckling.Numeral.BG.Rules
( rules
) where
import Data.HashMap.Strict (HashMap)
import Data.Maybe
import Data.Text (Text)
import Prelude
import qualified Data.HashMap.Strict as HashMap
import qualified Data.Text as Text
import Duckling.Dimensions.Types
import Duckling.Numeral.Helpers
import Duckling.Numeral.Types (NumeralData (..))
import Duckling.Regex.Types
import Duckling.Types
import qualified Duckling.Numeral.Types as TNumeral
zeroNineteenMap :: HashMap Text Integer
zeroNineteenMap :: HashMap Text Integer
zeroNineteenMap = [(Text, Integer)] -> HashMap Text Integer
forall k v. (Eq k, Hashable k) => [(k, v)] -> HashMap k v
HashMap.fromList
[ ( Text
"нула", Integer
0 )
, ( Text
"един", Integer
1 )
, ( Text
"една", Integer
1 )
, ( Text
"едно", Integer
1 )
, ( Text
"два", Integer
2 )
, ( Text
"две", Integer
2 )
, ( Text
"три", Integer
3 )
, ( Text
"четири", Integer
4 )
, ( Text
"пет", Integer
5)
, ( Text
"шест", Integer
6)
, ( Text
"седем", Integer
7)
, ( Text
"осем", Integer
8)
, ( Text
"девет", Integer
9)
, ( Text
"десет", Integer
10)
, ( Text
"единадесет", Integer
11 )
, ( Text
"единайсет", Integer
11 )
, ( Text
"дванадесет", Integer
12 )
, ( Text
"дванайсет", Integer
12 )
, ( Text
"тринадесет", Integer
13 )
, ( Text
"тринайсет", Integer
13 )
, ( Text
"четиринадесет", Integer
14)
, ( Text
"четиринайсет", Integer
14)
, ( Text
"петнадесет", Integer
15)
, ( Text
"петнайсет", Integer
15)
, ( Text
"шестнадесет", Integer
16)
, ( Text
"шестнайсет", Integer
16)
, ( Text
"седемнадесет", Integer
17)
, ( Text
"седемнайсет", Integer
17)
, ( Text
"осемнадесет", Integer
18)
, ( Text
"осемнайсет", Integer
18)
, ( Text
"деветнадесет", Integer
19)
, ( Text
"деветнайсет", Integer
19)
]
ruleToNineteen :: Rule
ruleToNineteen :: Rule
ruleToNineteen = Rule :: Text -> Pattern -> Production -> Rule
Rule
{ name :: Text
name = Text
"number (0..19)"
, pattern :: Pattern
pattern =
[ String -> PatternItem
regex String
"(нула|едина(де|й)сет|двана(де|й)сет|трина(де|й)сет|четирина(де|й)сет|петна(де|й)сет|шестна(де|й)сет|седемна(де|й)сет|осемна(де|й)сет|деветна(де|й)сет|един|една|едно|два|две|три|четири|пет|шест|седем|осем|девет|десет)"
]
, prod :: Production
prod = \[Token]
tokens -> case [Token]
tokens of
(Token Dimension a
RegexMatch (GroupMatch (match:_)):[Token]
_) ->
let x :: Text
x = Text -> Text
Text.toLower Text
match in
Text -> HashMap Text Integer -> Maybe Integer
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
HashMap.lookup Text
x HashMap Text Integer
zeroNineteenMap Maybe Integer -> (Integer -> Maybe Token) -> Maybe Token
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Integer -> Maybe Token
integer
[Token]
_ -> Maybe Token
forall a. Maybe a
Nothing
}
ruleTens :: Rule
ruleTens :: Rule
ruleTens = Rule :: Text -> Pattern -> Production -> Rule
Rule
{ name :: Text
name = Text
"integer (20..90)"
, pattern :: Pattern
pattern =
[ String -> PatternItem
regex String
"((два|три|четири|пет|шест|седем|осем|девет)десет)"
]
, prod :: Production
prod = \[Token]
tokens ->
case [Token]
tokens of
(Token Dimension a
RegexMatch (GroupMatch (_:match:_)):[Token]
_) -> do
Integer
x <- Text -> HashMap Text Integer -> Maybe Integer
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
HashMap.lookup (Text -> Text
Text.toLower Text
match) HashMap Text Integer
zeroNineteenMap
Integer -> Maybe Token
integer (Integer -> Maybe Token) -> Integer -> Maybe Token
forall a b. (a -> b) -> a -> b
$ Integer
x Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
* Integer
10
[Token]
_ -> Maybe Token
forall a. Maybe a
Nothing
}
rulePowersOfTen :: Rule
rulePowersOfTen :: Rule
rulePowersOfTen = Rule :: Text -> Pattern -> Production -> Rule
Rule
{ name :: Text
name = Text
"powers of tens"
, pattern :: Pattern
pattern =
[ String -> PatternItem
regex String
"(хиляд(а|и)|милион(а|и)?|милиард(а|и)?)"
]
, prod :: Production
prod = \[Token]
tokens -> case [Token]
tokens of
(Token Dimension a
RegexMatch (GroupMatch (match:_)):[Token]
_) -> case Text -> Text
Text.toLower Text
match of
Text
"хиляд" -> Double -> Maybe Token
double Double
1e3 Maybe Token -> (Token -> Maybe Token) -> Maybe Token
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Int -> Token -> Maybe Token
withGrain Int
3 Maybe Token -> (Token -> Maybe Token) -> Maybe Token
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Token -> Maybe Token
withMultipliable
Text
"милион" -> Double -> Maybe Token
double Double
1e6 Maybe Token -> (Token -> Maybe Token) -> Maybe Token
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Int -> Token -> Maybe Token
withGrain Int
6 Maybe Token -> (Token -> Maybe Token) -> Maybe Token
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Token -> Maybe Token
withMultipliable
Text
"милиард" -> Double -> Maybe Token
double Double
1e9 Maybe Token -> (Token -> Maybe Token) -> Maybe Token
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Int -> Token -> Maybe Token
withGrain Int
9 Maybe Token -> (Token -> Maybe Token) -> Maybe Token
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Token -> Maybe Token
withMultipliable
Text
_ -> Maybe Token
forall a. Maybe a
Nothing
[Token]
_ -> Maybe Token
forall a. Maybe a
Nothing
}
ruleCompositeTens :: Rule
ruleCompositeTens :: Rule
ruleCompositeTens = Rule :: Text -> Pattern -> Production -> Rule
Rule
{ name :: Text
name = Text
"integer 21..99"
, pattern :: Pattern
pattern =
[ [Double] -> PatternItem
oneOf [Double
20, Double
30..Double
90]
, String -> PatternItem
regex String
"и"
, Double -> Double -> PatternItem
numberBetween Double
1 Double
10
]
, prod :: Production
prod = \[Token]
tokens -> case [Token]
tokens of
(Token Dimension a
Numeral NumeralData{TNumeral.value = tens}:
Token
_:
Token Dimension a
Numeral NumeralData{TNumeral.value = units}:
[Token]
_) -> Double -> Maybe Token
double (Double -> Maybe Token) -> Double -> Maybe Token
forall a b. (a -> b) -> a -> b
$ Double
tens Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
units
[Token]
_ -> Maybe Token
forall a. Maybe a
Nothing
}
ruleHundreds :: Rule
ruleHundreds :: Rule
ruleHundreds = Rule :: Text -> Pattern -> Production -> Rule
Rule
{ name :: Text
name = Text
"integer (100..900)"
, pattern :: Pattern
pattern =
[ String -> PatternItem
regex String
"(сто|двеста|триста|(четири|пет|шест|седем|осем|девет)стотин)"
]
, prod :: Production
prod = \[Token]
tokens -> case [Token]
tokens of
(Token Dimension a
RegexMatch (GroupMatch (match:_)):[Token]
_) -> case Text -> Text
Text.toLower Text
match of
Text
"сто" -> Integer -> Maybe Token
integer Integer
100
Text
"двеста" -> Integer -> Maybe Token
integer Integer
200
Text
"триста" -> Integer -> Maybe Token
integer Integer
300
Text
"четиристотин" -> Integer -> Maybe Token
integer Integer
400
Text
"петстотин" -> Integer -> Maybe Token
integer Integer
500
Text
"шестстотин" -> Integer -> Maybe Token
integer Integer
600
Text
"седемстотин" -> Integer -> Maybe Token
integer Integer
700
Text
"осемстотин" -> Integer -> Maybe Token
integer Integer
800
Text
"деветстотин" -> Integer -> Maybe Token
integer Integer
900
Text
_ -> Maybe Token
forall a. Maybe a
Nothing
[Token]
_ -> Maybe Token
forall a. Maybe a
Nothing
}
ruleCompositeHundreds :: Rule
ruleCompositeHundreds :: Rule
ruleCompositeHundreds = Rule :: Text -> Pattern -> Production -> Rule
Rule
{ name :: Text
name = Text
"integer 101..999"
, pattern :: Pattern
pattern =
[ [Double] -> PatternItem
oneOf [Double
200, Double
300..Double
900]
, Double -> Double -> PatternItem
numberBetween Double
1 Double
100
]
, prod :: Production
prod = \[Token]
tokens -> case [Token]
tokens of
(Token Dimension a
Numeral NumeralData{TNumeral.value = hundreds}:
Token Dimension a
Numeral NumeralData{TNumeral.value = tens}:
[Token]
_) -> Double -> Maybe Token
double (Double -> Maybe Token) -> Double -> Maybe Token
forall a b. (a -> b) -> a -> b
$ Double
hundreds Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
tens
[Token]
_ -> Maybe Token
forall a. Maybe a
Nothing
}
ruleDotSpelledOut :: Rule
ruleDotSpelledOut :: Rule
ruleDotSpelledOut = Rule :: Text -> Pattern -> Production -> Rule
Rule
{ name :: Text
name = Text
"one point 2"
, pattern :: Pattern
pattern =
[ Dimension NumeralData -> PatternItem
forall a. Typeable a => Dimension a -> PatternItem
dimension Dimension NumeralData
Numeral
, String -> PatternItem
regex String
"цяло и"
, Predicate -> PatternItem
Predicate (Predicate -> PatternItem) -> Predicate -> PatternItem
forall a b. (a -> b) -> a -> b
$ Bool -> Bool
not (Bool -> Bool) -> Predicate -> Predicate
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Predicate
hasGrain
]
, prod :: Production
prod = \[Token]
tokens -> case [Token]
tokens of
(Token Dimension a
Numeral a
nd1:Token
_:Token Dimension a
Numeral a
nd2:[Token]
_) ->
Double -> Maybe Token
double (Double -> Maybe Token) -> Double -> Maybe Token
forall a b. (a -> b) -> a -> b
$ NumeralData -> Double
TNumeral.value a
NumeralData
nd1 Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double -> Double
decimalsToDouble (NumeralData -> Double
TNumeral.value a
NumeralData
nd2)
[Token]
_ -> Maybe Token
forall a. Maybe a
Nothing
}
ruleDecimals :: Rule
ruleDecimals :: Rule
ruleDecimals = Rule :: Text -> Pattern -> Production -> Rule
Rule
{ name :: Text
name = Text
"decimal number"
, pattern :: Pattern
pattern =
[ String -> PatternItem
regex String
"(\\d*\\.\\d+)"
]
, prod :: Production
prod = \[Token]
tokens -> case [Token]
tokens of
(Token Dimension a
RegexMatch (GroupMatch (match:_)):[Token]
_) -> Bool -> Text -> Maybe Token
parseDecimal Bool
True Text
match
[Token]
_ -> Maybe Token
forall a. Maybe a
Nothing
}
ruleCommas :: Rule
ruleCommas :: Rule
ruleCommas = Rule :: Text -> Pattern -> Production -> Rule
Rule
{ name :: Text
name = Text
"comma-separated numbers"
, pattern :: Pattern
pattern =
[ String -> PatternItem
regex String
"(\\d+(,\\d\\d\\d)+(\\.\\d+)?)"
]
, prod :: Production
prod = \[Token]
tokens -> case [Token]
tokens of
(Token Dimension a
RegexMatch (GroupMatch (match:_)):[Token]
_) ->
Text -> Maybe Double
parseDouble (Text -> Text -> Text -> Text
Text.replace Text
"," Text
Text.empty Text
match) Maybe Double -> (Double -> Maybe Token) -> Maybe Token
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Double -> Maybe Token
double
[Token]
_ -> Maybe Token
forall a. Maybe a
Nothing
}
ruleSuffixes :: Rule
ruleSuffixes :: Rule
ruleSuffixes = Rule :: Text -> Pattern -> Production -> Rule
Rule
{ name :: Text
name = Text
"suffixes (K,M,G))"
, pattern :: Pattern
pattern =
[ Dimension NumeralData -> PatternItem
forall a. Typeable a => Dimension a -> PatternItem
dimension Dimension NumeralData
Numeral
, String -> PatternItem
regex String
"((к|м|г)|(К|М|Г))(?=[\\W$€¢£]|$)"
]
, prod :: Production
prod = \[Token]
tokens ->
case [Token]
tokens of
(Token Dimension a
Numeral a
nd : Token Dimension a
RegexMatch (GroupMatch (match : _)):[Token]
_) -> do
Double
x <- case Text -> Text
Text.toLower Text
match of
Text
"к" -> Double -> Maybe Double
forall a. a -> Maybe a
Just Double
1e3
Text
"К" -> Double -> Maybe Double
forall a. a -> Maybe a
Just Double
1e3
Text
"м" -> Double -> Maybe Double
forall a. a -> Maybe a
Just Double
1e6
Text
"М" -> Double -> Maybe Double
forall a. a -> Maybe a
Just Double
1e6
Text
"г" -> Double -> Maybe Double
forall a. a -> Maybe a
Just Double
1e9
Text
"Г" -> Double -> Maybe Double
forall a. a -> Maybe a
Just Double
1e9
Text
_ -> Maybe Double
forall a. Maybe a
Nothing
Double -> Maybe Token
double (Double -> Maybe Token) -> Double -> Maybe Token
forall a b. (a -> b) -> a -> b
$ NumeralData -> Double
TNumeral.value a
NumeralData
nd Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
x
[Token]
_ -> Maybe Token
forall a. Maybe a
Nothing
}
ruleNegative :: Rule
ruleNegative :: Rule
ruleNegative = Rule :: Text -> Pattern -> Production -> Rule
Rule
{ name :: Text
name = Text
"negative numbers"
, pattern :: Pattern
pattern =
[ String -> PatternItem
regex String
"-|минус\\s?"
, Dimension NumeralData -> PatternItem
forall a. Typeable a => Dimension a -> PatternItem
dimension Dimension NumeralData
Numeral
]
, prod :: Production
prod = \[Token]
tokens -> case [Token]
tokens of
(Token
_:Token Dimension a
Numeral a
nd:[Token]
_) -> Double -> Maybe Token
double (NumeralData -> Double
TNumeral.value a
NumeralData
nd Double -> Double -> Double
forall a. Num a => a -> a -> a
* (-Double
1))
[Token]
_ -> Maybe Token
forall a. Maybe a
Nothing
}
rules :: [Rule]
rules :: [Rule]
rules =
[ Rule
ruleToNineteen
, Rule
ruleTens
, Rule
rulePowersOfTen
, Rule
ruleCompositeTens
, Rule
ruleHundreds
, Rule
ruleCompositeHundreds
, Rule
ruleDotSpelledOut
, Rule
ruleDecimals
, Rule
ruleCommas
, Rule
ruleSuffixes
, Rule
ruleNegative
]