{-# LANGUAGE GADTs #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
module Duckling.Numeral.CA.Rules (rules) where
import Data.HashMap.Strict (HashMap)
import qualified Data.HashMap.Strict as HashMap
import Data.Maybe
import Data.String
import Data.Text (Text)
import qualified Data.Text as Text
import Prelude
import Duckling.Dimensions.Types
import Duckling.Numeral.Helpers
import Duckling.Numeral.Types (NumeralData(..))
import qualified Duckling.Numeral.Types as TNumeral
import Duckling.Regex.Types
import Duckling.Types
zeroToFifteenMap :: HashMap Text Integer
zeroToFifteenMap :: HashMap Text Integer
zeroToFifteenMap =
[(Text, Integer)] -> HashMap Text Integer
forall k v. (Eq k, Hashable k) => [(k, v)] -> HashMap k v
HashMap.fromList
[ (Text
"zero", Integer
0)
, (Text
"u", Integer
1)
, (Text
"un", Integer
1)
, (Text
"una", Integer
1)
, (Text
"dos", Integer
2)
, (Text
"dues", Integer
2)
, (Text
"tres", Integer
3)
, (Text
"quatre", Integer
4)
, (Text
"cinc", Integer
5)
, (Text
"sis", Integer
6)
, (Text
"set", Integer
7)
, (Text
"vuit", Integer
8)
, (Text
"nou", Integer
9)
, (Text
"deu", Integer
10)
, (Text
"onze", Integer
11)
, (Text
"dotze", Integer
12)
, (Text
"tretze", Integer
13)
, (Text
"catorze", Integer
14)
, (Text
"quinze", Integer
15)
]
ruleZeroToFifteen :: Rule
ruleZeroToFifteen :: Rule
ruleZeroToFifteen = Rule :: Text -> Pattern -> Production -> Rule
Rule
{ name :: Text
name = Text
"number (0..15)"
, pattern :: Pattern
pattern =
[ String -> PatternItem
regex
String
"(zero|u(na|n)?|d(o|ue)s|tres|quatre|cinc|sis|set|vuit|nou|deu|onze|dotze|tretze|catorze|quinze)"
]
, prod :: Production
prod = \case
(Token Dimension a
RegexMatch (GroupMatch (match:_)):[Token]
_) ->
Text -> HashMap Text Integer -> Maybe Integer
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
HashMap.lookup (Text -> Text
Text.toLower Text
match) HashMap Text Integer
zeroToFifteenMap Maybe Integer -> (Integer -> Maybe Token) -> Maybe Token
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Integer -> Maybe Token
integer
[Token]
_ -> Maybe Token
forall a. Maybe a
Nothing
}
ruleNumeralsPrefixWithNegativeOrMinus :: Rule
ruleNumeralsPrefixWithNegativeOrMinus :: Rule
ruleNumeralsPrefixWithNegativeOrMinus = Rule :: Text -> Pattern -> Production -> Rule
Rule
{ name :: Text
name = Text
"numbers prefix with -, negative or minus"
, pattern :: Pattern
pattern = [String -> PatternItem
regex String
"-|menys", Predicate -> PatternItem
Predicate Predicate
isPositive]
, prod :: Production
prod = \case
(Token
_ : Token Dimension a
Numeral NumeralData { TNumeral.value = v } : [Token]
_) ->
Double -> Maybe Token
double (Double -> Maybe Token) -> Double -> Maybe Token
forall a b. (a -> b) -> a -> b
$ Double -> Double
forall a. Num a => a -> a
negate Double
v
[Token]
_ -> Maybe Token
forall a. Maybe a
Nothing
}
tensMap :: HashMap Text Integer
tensMap :: HashMap Text Integer
tensMap =
[(Text, Integer)] -> HashMap Text Integer
forall k v. (Eq k, Hashable k) => [(k, v)] -> HashMap k v
HashMap.fromList
[ (Text
"vint", Integer
20)
, (Text
"trenta", Integer
30)
, (Text
"quaranta", Integer
40)
, (Text
"cinquanta", Integer
50)
, (Text
"seixanta", Integer
60)
, (Text
"setanta", Integer
70)
, (Text
"vuitanta", Integer
80)
, (Text
"noranta", Integer
90)
]
ruleTens :: Rule
ruleTens :: Rule
ruleTens = Rule :: Text -> Pattern -> Production -> Rule
Rule
{ name :: Text
name = Text
"number (20..90)"
, pattern :: Pattern
pattern =
[ String -> PatternItem
regex
String
"(vint|(tre|quara|cinqua|seixa|seta|vuita|nora)nta)"
]
, prod :: Production
prod = \case
(Token Dimension a
RegexMatch (GroupMatch (match:_)) : [Token]
_) ->
Text -> HashMap Text Integer -> Maybe Integer
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
HashMap.lookup (Text -> Text
Text.toLower Text
match) HashMap Text Integer
tensMap Maybe Integer -> (Integer -> Maybe Token) -> Maybe Token
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Integer -> Maybe Token
integer
[Token]
_ -> Maybe Token
forall a. Maybe a
Nothing
}
sixteenToTwentyNineMap :: HashMap Text Integer
sixteenToTwentyNineMap :: HashMap Text Integer
sixteenToTwentyNineMap =
[(Text, Integer)] -> HashMap Text Integer
forall k v. (Eq k, Hashable k) => [(k, v)] -> HashMap k v
HashMap.fromList
[ (Text
"setze", Integer
16)
, (Text
"disset", Integer
17)
, (Text
"dèsset", Integer
17)
, (Text
"devuit", Integer
18)
, (Text
"divuit", Integer
18)
, (Text
"dihuit", Integer
18)
, (Text
"dinou", Integer
19)
, (Text
"dènou", Integer
19)
, (Text
"denou", Integer
19)
, (Text
"vint-i-u", Integer
21)
, (Text
"vint-i-una", Integer
21)
, (Text
"vint-i-dos", Integer
22)
, (Text
"vint-i-tres", Integer
23)
, (Text
"vint-i-quatre", Integer
24)
, (Text
"vint-i-cinc", Integer
25)
, (Text
"vint-i-sis", Integer
26)
, (Text
"vint-i-set", Integer
27)
, (Text
"vint-i-vuit", Integer
28)
, (Text
"vint-i-nou", Integer
29)
]
ruleLowerTensWithOnes :: Rule
ruleLowerTensWithOnes :: Rule
ruleLowerTensWithOnes = Rule :: Text -> Pattern -> Production -> Rule
Rule
{ name :: Text
name = Text
"number (16..19 21..29)"
, pattern :: Pattern
pattern =
[ String -> PatternItem
regex
String
"(setze|d(i|e|è)sset|d(e|i)(v|h)uit|d(i|e|è)nou|vint-i-u(na)?|vint-i-dos|vint-i-tres|vint-i-quatre|vint-i-cinc|vint-i-sis|vint-i-set|vint-i-vuit|vint-i-nou)"
]
, prod :: Production
prod = \case
(Token Dimension a
RegexMatch (GroupMatch (match:_)):[Token]
_) ->
Text -> HashMap Text Integer -> Maybe Integer
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
HashMap.lookup (Text -> Text
Text.toLower Text
match) HashMap Text Integer
sixteenToTwentyNineMap Maybe Integer -> (Integer -> Maybe Token) -> Maybe Token
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Integer -> Maybe Token
integer
[Token]
_ -> Maybe Token
forall a. Maybe a
Nothing
}
ruleHigherTensWithOnes :: Rule
ruleHigherTensWithOnes :: Rule
ruleHigherTensWithOnes = Rule :: Text -> Pattern -> Production -> Rule
Rule
{ name :: Text
name = Text
"number (31..39 41..49 51..59 61..69 71..79 81..89 91..99)"
, pattern :: Pattern
pattern =
[[Double] -> PatternItem
oneOf [Double
30, Double
40, Double
50, Double
60, Double
70, Double
80, Double
90], String -> PatternItem
regex String
"-", Double -> Double -> PatternItem
numberBetween Double
1 Double
9]
, prod :: Production
prod = \case
(Token Dimension a
Numeral NumeralData{TNumeral.value = v1}:
Token
_:
Token Dimension a
Numeral NumeralData{TNumeral.value = v2}:
[Token]
_) -> Double -> Maybe Token
double (Double -> Maybe Token) -> Double -> Maybe Token
forall a b. (a -> b) -> a -> b
$ Double
v1 Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
v2
[Token]
_ -> Maybe Token
forall a. Maybe a
Nothing
}
ruleNumeralsSuffixesKMG :: Rule
ruleNumeralsSuffixesKMG :: Rule
ruleNumeralsSuffixesKMG = Rule :: Text -> Pattern -> Production -> Rule
Rule
{ name :: Text
name = Text
"numbers suffixes (K, M, G)"
, pattern :: Pattern
pattern = [Dimension NumeralData -> PatternItem
forall a. Typeable a => Dimension a -> PatternItem
dimension Dimension NumeralData
Numeral, String -> PatternItem
regex String
"([kmg])(?=[\\W\\$€]|$)"]
, prod :: Production
prod = \case
(Token Dimension a
Numeral NumeralData{TNumeral.value = v}:
Token Dimension a
RegexMatch (GroupMatch (match:_)):
[Token]
_) ->
case Text -> Text
Text.toLower Text
match of
Text
"k" -> Double -> Maybe Token
double (Double -> Maybe Token) -> Double -> Maybe Token
forall a b. (a -> b) -> a -> b
$ Double
v Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
1e3
Text
"m" -> Double -> Maybe Token
double (Double -> Maybe Token) -> Double -> Maybe Token
forall a b. (a -> b) -> a -> b
$ Double
v Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
1e6
Text
"g" -> Double -> Maybe Token
double (Double -> Maybe Token) -> Double -> Maybe Token
forall a b. (a -> b) -> a -> b
$ Double
v Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
1e9
Text
_ -> Maybe Token
forall a. Maybe a
Nothing
[Token]
_ -> Maybe Token
forall a. Maybe a
Nothing
}
oneHundredToThousandMap :: HashMap Text Integer
oneHundredToThousandMap :: HashMap Text Integer
oneHundredToThousandMap =
[(Text, Integer)] -> HashMap Text Integer
forall k v. (Eq k, Hashable k) => [(k, v)] -> HashMap k v
HashMap.fromList
[ (Text
"cent", Integer
100)
, (Text
"cents", Integer
100)
, (Text
"dos-cents", Integer
200)
, (Text
"tres-cents", Integer
300)
, (Text
"quatre-cents", Integer
400)
, (Text
"cinc-cents", Integer
500)
, (Text
"sis-cents", Integer
600)
, (Text
"set-cents", Integer
700)
, (Text
"vuit-cents", Integer
800)
, (Text
"nou-cents", Integer
900)
, (Text
"mil", Integer
1000)
]
ruleTwenties :: Rule
ruleTwenties :: Rule
ruleTwenties = Rule :: Text -> Pattern -> Production -> Rule
Rule
{ name :: Text
name = Text
"number (21..29)"
, pattern :: Pattern
pattern =
[[Double] -> PatternItem
oneOf [Double
20], String -> PatternItem
regex String
"(-i-| i )", Double -> Double -> PatternItem
numberBetween Double
1 Double
10]
, prod :: Production
prod = \case
(Token Dimension a
Numeral NumeralData{TNumeral.value = v1}:
Token
_:
Token Dimension a
Numeral NumeralData{TNumeral.value = v2}:
[Token]
_) -> Double -> Maybe Token
double (Double -> Maybe Token) -> Double -> Maybe Token
forall a b. (a -> b) -> a -> b
$ Double
v1 Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
v2
[Token]
_ -> Maybe Token
forall a. Maybe a
Nothing
}
ruleHundreds :: Rule
ruleHundreds :: Rule
ruleHundreds = Rule :: Text -> Pattern -> Production -> Rule
Rule
{ name :: Text
name = Text
"number 100..1000 "
, pattern :: Pattern
pattern =
[ String -> PatternItem
regex
String
"(cent(s)?|dos-cents|tres-cents|quatre-cents|cinc-cents|sis-cents|set-cents|vuit-cents|nou-cents|mil)"
]
, prod :: Production
prod = \case
(Token Dimension a
RegexMatch (GroupMatch (match:_)):[Token]
_) ->
Text -> HashMap Text Integer -> Maybe Integer
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
HashMap.lookup (Text -> Text
Text.toLower Text
match) HashMap Text Integer
oneHundredToThousandMap Maybe Integer -> (Integer -> Maybe Token) -> Maybe Token
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Integer -> Maybe Token
integer
[Token]
_ -> Maybe Token
forall a. Maybe a
Nothing
}
ruleNumerals :: Rule
ruleNumerals :: Rule
ruleNumerals = Rule :: Text -> Pattern -> Production -> Rule
Rule
{ name :: Text
name = Text
"numbers 200..999"
, pattern :: Pattern
pattern =
[ Double -> Double -> PatternItem
numberBetween Double
2 Double
10
, String -> PatternItem
regex String
"-"
, (NumeralData -> Double) -> (Double -> Bool) -> PatternItem
forall t. (NumeralData -> t) -> (t -> Bool) -> PatternItem
numberWith NumeralData -> Double
TNumeral.value (Double -> Double -> Bool
forall a. Eq a => a -> a -> Bool
== Double
100)
, Double -> Double -> PatternItem
numberBetween Double
0 Double
100
]
, prod :: Production
prod = \case
(Token Dimension a
Numeral NumeralData{TNumeral.value = v1}:
Token
_:
Token Dimension a
Numeral NumeralData{TNumeral.value = v2}:
[Token]
_) -> Double -> Maybe Token
double (Double -> Maybe Token) -> Double -> Maybe Token
forall a b. (a -> b) -> a -> b
$ Double
100 Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
v1 Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
v2
[Token]
_ -> Maybe Token
forall a. Maybe a
Nothing
}
ruleNumeralDotNumeral :: Rule
ruleNumeralDotNumeral :: Rule
ruleNumeralDotNumeral = Rule :: Text -> Pattern -> Production -> Rule
Rule
{ name :: Text
name = Text
"number dot number"
, pattern :: Pattern
pattern = [Dimension NumeralData -> PatternItem
forall a. Typeable a => Dimension a -> PatternItem
dimension Dimension NumeralData
Numeral, String -> PatternItem
regex String
"coma", Predicate -> PatternItem
Predicate (Predicate -> PatternItem) -> Predicate -> PatternItem
forall a b. (a -> b) -> a -> b
$ Bool -> Bool
not (Bool -> Bool) -> Predicate -> Predicate
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Predicate
hasGrain]
, prod :: Production
prod = \case
(Token Dimension a
Numeral NumeralData{TNumeral.value = v1}:
Token
_:
Token Dimension a
Numeral NumeralData{TNumeral.value = v2}:
[Token]
_) -> Double -> Maybe Token
double (Double -> Maybe Token) -> Double -> Maybe Token
forall a b. (a -> b) -> a -> b
$ Double
v1 Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double -> Double
decimalsToDouble Double
v2
[Token]
_ -> Maybe Token
forall a. Maybe a
Nothing
}
ruleBelowTenWithTwoDigits :: Rule
ruleBelowTenWithTwoDigits :: Rule
ruleBelowTenWithTwoDigits = Rule :: Text -> Pattern -> Production -> Rule
Rule
{ name :: Text
name = Text
"integer (0-9) with two digits"
, pattern :: Pattern
pattern =
[ String -> PatternItem
regex String
"zero|0"
, Double -> Double -> PatternItem
numberBetween Double
1 Double
10
]
, prod :: Production
prod = \case
(Token
_:Token Dimension a
Numeral NumeralData{TNumeral.value = v}:[Token]
_) -> Double -> Maybe Token
double Double
v
[Token]
_ -> Maybe Token
forall a. Maybe a
Nothing
}
ruleDecimalWithThousandsSeparator :: Rule
ruleDecimalWithThousandsSeparator :: Rule
ruleDecimalWithThousandsSeparator = Rule :: Text -> Pattern -> Production -> Rule
Rule
{ name :: Text
name = Text
"decimal with thousands separator ."
, pattern :: Pattern
pattern = [String -> PatternItem
regex String
"(\\d+(\\.\\d\\d\\d)+,\\d+)"]
, prod :: Production
prod = \case
(Token Dimension a
RegexMatch (GroupMatch (match:_)):[Token]
_) ->
let fmt :: Text
fmt = Text -> Text -> Text -> Text
Text.replace Text
"," Text
"." (Text -> Text) -> (Text -> Text) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text -> Text -> Text
Text.replace Text
"." Text
Text.empty (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ Text
match
in Text -> Maybe Double
parseDouble Text
fmt Maybe Double -> (Double -> Maybe Token) -> Maybe Token
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Double -> Maybe Token
double
[Token]
_ -> Maybe Token
forall a. Maybe a
Nothing
}
ruleDecimalNumeral :: Rule
ruleDecimalNumeral :: Rule
ruleDecimalNumeral = Rule :: Text -> Pattern -> Production -> Rule
Rule
{ name :: Text
name = Text
"decimal number ,"
, pattern :: Pattern
pattern = [String -> PatternItem
regex String
"(\\d*,\\d+)"]
, prod :: Production
prod = \case
(Token Dimension a
RegexMatch (GroupMatch (match:_)):[Token]
_) ->
Bool -> Text -> Maybe Token
parseDecimal Bool
False Text
match
[Token]
_ -> Maybe Token
forall a. Maybe a
Nothing
}
ruleIntegerWithThousandsSeparator :: Rule
ruleIntegerWithThousandsSeparator :: Rule
ruleIntegerWithThousandsSeparator = Rule :: Text -> Pattern -> Production -> Rule
Rule
{ name :: Text
name = Text
"integer with thousands separator ."
, pattern :: Pattern
pattern = [String -> PatternItem
regex String
"(\\d{1,3}(\\.\\d\\d\\d){1,5})"]
, prod :: Production
prod = \case
(Token Dimension a
RegexMatch (GroupMatch (match:_)):[Token]
_) ->
Text -> Maybe Double
parseDouble (Text -> Text -> Text -> Text
Text.replace Text
"." Text
Text.empty Text
match) Maybe Double -> (Double -> Maybe Token) -> Maybe Token
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Double -> Maybe Token
double
[Token]
_ -> Maybe Token
forall a. Maybe a
Nothing
}
rules :: [Rule]
rules :: [Rule]
rules =
[ Rule
ruleBelowTenWithTwoDigits
, Rule
ruleZeroToFifteen
, Rule
ruleTens
, Rule
ruleTwenties
, Rule
ruleLowerTensWithOnes
, Rule
ruleHigherTensWithOnes
, Rule
ruleHundreds
, Rule
ruleNumeralDotNumeral
, Rule
ruleNumerals
, Rule
ruleNumeralsPrefixWithNegativeOrMinus
, Rule
ruleNumeralsSuffixesKMG
, Rule
ruleDecimalNumeral
, Rule
ruleDecimalWithThousandsSeparator
, Rule
ruleIntegerWithThousandsSeparator
]