module Duckling.Numeral.KO.Rules
( rules ) where
import Data.Maybe
import qualified Data.Text as Text
import Prelude
import Data.String
import Duckling.Dimensions.Types
import Duckling.Numeral.Helpers
import Duckling.Numeral.Types (NumeralData (..))
import qualified Duckling.Numeral.Types as TNumeral
import Duckling.Regex.Types
import Duckling.Types
ruleIntegerForOrdinals :: Rule
ruleIntegerForOrdinals = Rule
{ name = "integer (1..4) - for ordinals"
, pattern =
[ regex "(\xd55c|\xccab|\xb450|\xc138|\xb124)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) -> case match of
"\xd55c" -> integer 1
"\xccab" -> integer 1
"\xb450" -> integer 2
"\xc138" -> integer 3
"\xb124" -> integer 4
_ -> Nothing
_ -> Nothing
}
ruleIntegerNumeric :: Rule
ruleIntegerNumeric = Rule
{ name = "integer (numeric)"
, pattern =
[ regex "(\\d{1,18})"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) -> do
v <- parseInt match
integer $ toInteger v
_ -> Nothing
}
ruleFew :: Rule
ruleFew = Rule
{ name = "few 몇"
, pattern =
[ regex "\xba87"
]
, prod = \_ -> integer 3
}
ruleDecimalWithThousandsSeparator :: Rule
ruleDecimalWithThousandsSeparator = Rule
{ name = "decimal with thousands separator"
, pattern =
[ regex "(\\d+(,\\d\\d\\d)+\\.\\d+)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) ->
parseDouble (Text.replace (Text.singleton ',') Text.empty match) >>= double
_ -> Nothing
}
ruleDecimalNumeral :: Rule
ruleDecimalNumeral = Rule
{ name = "decimal number"
, pattern =
[ regex "(\\d*\\.\\d+)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) -> parseDecimal True match
_ -> Nothing
}
ruleFraction2 :: Rule
ruleFraction2 = Rule
{ name = "fraction"
, pattern =
[ dimension Numeral
, regex "/"
, dimension Numeral
]
, prod = \tokens -> case tokens of
(Token Numeral (NumeralData {TNumeral.value = v1}):
_:
Token Numeral (NumeralData {TNumeral.value = v2}):
_) -> double $ v1 / v2
_ -> Nothing
}
ruleNumeralsPrefixWithOr :: Rule
ruleNumeralsPrefixWithOr = Rule
{ name = "numbers prefix with -, 마이너스, or 마이나스"
, pattern =
[ regex "-|\xb9c8\xc774\xb108\xc2a4\\s?|\xb9c8\xc774\xb098\xc2a4\\s?"
, dimension Numeral
]
, prod = \tokens -> case tokens of
(_:Token Numeral nd:_) -> double (TNumeral.value nd * (1))
_ -> Nothing
}
ruleHalf :: Rule
ruleHalf = Rule
{ name = "half - 반"
, pattern =
[ regex "\xbc18"
]
, prod = \_ -> double 0.5
}
ruleInteger :: Rule
ruleInteger = Rule
{ name = "integer 0"
, pattern =
[ regex "\xc601|\xacf5|\xbe75"
]
, prod = \_ -> integer 0
}
ruleIntegerTypeAndOrdinals :: Rule
ruleIntegerTypeAndOrdinals = Rule
{ name = "integer (20..90) - TYPE 2 and ordinals"
, pattern =
[ regex "(\xc5f4|\xc2a4\xbb3c|\xc11c\xb978|\xb9c8\xd754|\xc270|\xc608\xc21c|\xc77c\xd754|\xc5ec\xb4e0|\xc544\xd754)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) -> case match of
"\xc5f4" -> integer 10
"\xc2a4\xbb3c" -> integer 20
"\xc11c\xb978" -> integer 30
"\xb9c8\xd754" -> integer 40
"\xc270" -> integer 50
"\xc608\xc21c" -> integer 60
"\xc77c\xd754" -> integer 70
"\xc5ec\xb4e0" -> integer 80
"\xc544\xd754" -> integer 90
_ -> Nothing
_ -> Nothing
}
ruleIntegerType1 :: Rule
ruleIntegerType1 = Rule
{ name = "integer - TYPE 1"
, pattern =
[ regex "(\xc601|\xc77c|\xc774|\xc0bc|\xc0ac|\xc624|\xc721|\xce60|\xd314|\xad6c)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) -> case match of
"\xc601" -> integer 0
"\xc77c" -> integer 1
"\xc774" -> integer 2
"\xc0bc" -> integer 3
"\xc0ac" -> integer 4
"\xc624" -> integer 5
"\xc721" -> integer 6
"\xce60" -> integer 7
"\xd314" -> integer 8
"\xad6c" -> integer 9
_ -> Nothing
_ -> Nothing
}
ruleIntegerType1PowersOfTen :: Rule
ruleIntegerType1PowersOfTen = Rule
{ name = "integer - TYPE 1: powers of ten"
, pattern =
[ regex "(\xc2ed|\xbc31|\xcc9c|\xb9cc|\xc5b5|\xc870)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) -> case match of
"\xc2ed" -> double 10 >>= withGrain 1 >>= withMultipliable
"\xbc31" -> double 1e2 >>= withGrain 2 >>= withMultipliable
"\xcc9c" -> double 1e3 >>= withGrain 3 >>= withMultipliable
"\xb9cc" -> double 1e4 >>= withGrain 4 >>= withMultipliable
"\xc5b5" -> double 1e8 >>= withGrain 8 >>= withMultipliable
"\xc870" -> double 1e12 >>= withGrain 12 >>= withMultipliable
_ -> Nothing
_ -> Nothing
}
ruleSum :: Rule
ruleSum = Rule
{ name = "intersect 2 numbers"
, pattern =
[ numberWith (fromMaybe 0 . TNumeral.grain) (>1)
, numberWith TNumeral.multipliable not
]
, prod = \tokens ->
case tokens of
(Token Numeral (NumeralData {TNumeral.value = val1, TNumeral.grain = Just g}):
Token Numeral (NumeralData {TNumeral.value = val2}):
_) | (10 ** fromIntegral g) > val2 -> double $ val1 + val2
_ -> Nothing
}
ruleMultiply :: Rule
ruleMultiply = Rule
{ name = "compose by multiplication"
, pattern =
[ dimension Numeral
, numberWith TNumeral.multipliable id
]
, prod = \tokens -> case tokens of
(token1:token2:_) -> multiply token1 token2
_ -> Nothing
}
ruleIntegerType2 :: Rule
ruleIntegerType2 = Rule
{ name = "integer (1..10) - TYPE 2"
, pattern =
[ regex "(\xd558\xb098|\xb458|\xc14b|\xb137|\xb2e4\xc12f|\xc5ec\xc12f|\xc77c\xacf1|\xc5ec\xb35f|\xc544\xd649)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) -> case match of
"\xd558\xb098" -> integer 1
"\xb458" -> integer 2
"\xc14b" -> integer 3
"\xb137" -> integer 4
"\xb2e4\xc12f" -> integer 5
"\xc5ec\xc12f" -> integer 6
"\xc77c\xacf1" -> integer 7
"\xc5ec\xb35f" -> integer 8
"\xc544\xd649" -> integer 9
_ -> Nothing
_ -> Nothing
}
ruleFraction :: Rule
ruleFraction = Rule
{ name = "fraction"
, pattern =
[ dimension Numeral
, regex "\xbd84(\xc758|\xc5d0)"
, dimension Numeral
]
, prod = \tokens -> case tokens of
(Token Numeral (NumeralData {TNumeral.value = v1}):
_:
Token Numeral (NumeralData {TNumeral.value = v2}):
_) -> double $ v2 / v1
_ -> Nothing
}
ruleNumeralDotNumeral :: Rule
ruleNumeralDotNumeral = Rule
{ name = "number dot number - 삼점사"
, pattern =
[ dimension Numeral
, regex "(\xc810|\xca5c)((\xc601|\xc77c|\xc774|\xc0bc|\xc0ac|\xc624|\xc721|\xce60|\xd314|\xad6c)+)"
]
, prod = \tokens -> case tokens of
(Token Numeral (NumeralData {TNumeral.value = v1}):
Token RegexMatch (GroupMatch (_:match:_)):
_) -> do
let getDigit '\xc601' = Just "0"
getDigit '\xc77c' = Just "1"
getDigit '\xc774' = Just "2"
getDigit '\xc0bc' = Just "3"
getDigit '\xc0ac' = Just "4"
getDigit '\xc624' = Just "5"
getDigit '\xc721' = Just "6"
getDigit '\xce60' = Just "7"
getDigit '\xd314' = Just "8"
getDigit '\xad6c' = Just "9"
getDigit _ = Nothing
v2 <- parseDouble . Text.concat . mapMaybe getDigit $ Text.unpack match
double $ v1 + decimalsToDouble v2
_ -> Nothing
}
ruleIntegerType3 :: Rule
ruleIntegerType3 = Rule
{ name = "integer (21..99) - TYPE 2"
, pattern =
[ oneOf [10, 20 .. 90]
, oneOf [1 .. 9]
]
, prod = \tokens -> case tokens of
(Token Numeral (NumeralData {TNumeral.value = v1}):
Token Numeral (NumeralData {TNumeral.value = v2}):
_) -> double $ v1 + v2
_ -> Nothing
}
ruleIntegerWithThousandsSeparator :: Rule
ruleIntegerWithThousandsSeparator = Rule
{ name = "integer with thousands separator ,"
, pattern =
[ regex "(\\d{1,3}(,\\d\\d\\d){1,5})"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) ->
parseDouble (Text.replace (Text.singleton ',') Text.empty match) >>= double
_ -> Nothing
}
rules :: [Rule]
rules =
[ ruleDecimalNumeral
, ruleDecimalWithThousandsSeparator
, ruleFew
, ruleFraction
, ruleFraction2
, ruleHalf
, ruleInteger
, ruleIntegerForOrdinals
, ruleIntegerNumeric
, ruleIntegerType1
, ruleIntegerType1PowersOfTen
, ruleSum
, ruleMultiply
, ruleIntegerType2
, ruleIntegerType3
, ruleIntegerTypeAndOrdinals
, ruleIntegerWithThousandsSeparator
, ruleNumeralDotNumeral
, ruleNumeralsPrefixWithOr
]