{-# LANGUAGE ViewPatterns #-}
module NLP.Minimorph.English where
#if !(MIN_VERSION_base(4,11,0))
import Data.Semigroup
#endif
import Data.Char (isSpace, isUpper, toLower)
import Data.Text (Text)
import qualified Data.Text as T
import NLP.Minimorph.Util
commas :: Text -> [Text] -> Text
commas :: Text -> [Text] -> Text
commas Text
_ [] = Text
""
commas Text
_ [Text
x] = Text
x
commas Text
et [Text]
xs = Text -> [Text] -> Text
T.intercalate Text
", " ([Text] -> [Text]
forall a. [a] -> [a]
init [Text]
xs) Text -> Text -> Text
<+> Text
et Text -> Text -> Text
<+> [Text] -> Text
forall a. [a] -> a
last [Text]
xs
cardinal :: Int -> Text
cardinal :: Int -> Text
cardinal Int
n = case Int
n of
Int
0 -> Text
"zero"
Int
1 -> Text
"one"
Int
2 -> Text
"two"
Int
3 -> Text
"three"
Int
4 -> Text
"four"
Int
5 -> Text
"five"
Int
6 -> Text
"six"
Int
7 -> Text
"seven"
Int
8 -> Text
"eight"
Int
9 -> Text
"nine"
Int
10 -> Text
"ten"
Int
_ -> Int -> Text
forall a. Show a => a -> Text
tshow Int
n
ordinalNotSpelled :: Int -> Text
ordinalNotSpelled :: Int -> Text
ordinalNotSpelled Int
k = case Int -> Int
forall a. Num a => a -> a
abs Int
k Int -> Int -> Int
forall a. Integral a => a -> a -> a
`rem` Int
100 of
Int
n | Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
3 Bool -> Bool -> Bool
&& Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
21 -> Int
k Int -> Text -> Text
forall a. Show a => a -> Text -> Text
`suf` Text
"th"
| Int
n Int -> Int -> Int
forall a. Integral a => a -> a -> a
`rem` Int
10 Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
1 -> Int
k Int -> Text -> Text
forall a. Show a => a -> Text -> Text
`suf` Text
"st"
| Int
n Int -> Int -> Int
forall a. Integral a => a -> a -> a
`rem` Int
10 Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
2 -> Int
k Int -> Text -> Text
forall a. Show a => a -> Text -> Text
`suf` Text
"nd"
| Int
n Int -> Int -> Int
forall a. Integral a => a -> a -> a
`rem` Int
10 Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
3 -> Int
k Int -> Text -> Text
forall a. Show a => a -> Text -> Text
`suf` Text
"rd"
| Bool
otherwise -> Int
k Int -> Text -> Text
forall a. Show a => a -> Text -> Text
`suf` Text
"th"
where
a
num suf :: a -> Text -> Text
`suf` Text
s = a -> Text
forall a. Show a => a -> Text
tshow a
num Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
s
ordinal :: Int -> Text
ordinal :: Int -> Text
ordinal Int
n = case Int
n of
Int
0 -> Text
"zeroth"
Int
1 -> Text
"first"
Int
2 -> Text
"second"
Int
3 -> Text
"third"
Int
4 -> Text
"fourth"
Int
5 -> Text
"fifth"
Int
6 -> Text
"sixth"
Int
7 -> Text
"seventh"
Int
8 -> Text
"eighth"
Int
9 -> Text
"ninth"
Int
10 -> Text
"tenth"
Int
k -> Int -> Text
ordinalNotSpelled Int
k
defaultNounPlural :: Text -> Text
defaultNounPlural :: Text -> Text
defaultNounPlural Text
x
| Text
"is" Text -> Text -> Bool
`T.isSuffixOf` Text
x = Text
thesis
| Text -> Bool
hasSibilantSuffix Text
x = Text
sibilant_o
| Text -> Bool
hasCoSuffix Text
x = Text
sibilant_o
| Text -> Bool
hasCySuffix Text
x = Text
y_final
| Text
"ff" Text -> Text -> Bool
`T.isSuffixOf` Text
x = Text
ff_final
| Text
"f" Text -> Text -> Bool
`T.isSuffixOf` Text
x = Text
f_final
| Bool
otherwise = Text
plain
where
plain :: Text
plain = Text
x Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
"s"
sibilant_o :: Text
sibilant_o = Text
x Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
"es"
y_final :: Text
y_final = Text -> Text
T.init Text
x Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
"ies"
f_final :: Text
f_final = Text -> Text
T.init Text
x Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
"ves"
ff_final :: Text
ff_final = Int -> Text -> Text
T.dropEnd Int
2 Text
x Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
"ves"
thesis :: Text
thesis = Int -> Text -> Text
T.dropEnd Int
2 Text
x Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
"es"
defaultVerbStuff :: Text -> (Text, Text)
defaultVerbStuff :: Text -> (Text, Text)
defaultVerbStuff Text
x
| Text -> Bool
hasSibilantSuffix Text
x = (Text, Text)
sibilant_o
| Text -> Bool
hasCoSuffix Text
x = (Text, Text)
sibilant_o
| Text -> Bool
hasCySuffix Text
x = (Text, Text)
y_final
| Text
"e" Text -> Text -> Bool
`T.isSuffixOf` Text
x = (Text, Text)
e_final
| Bool
otherwise = (Text, Text)
plain
where
plain :: (Text, Text)
plain = (Text
x Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
"s" , Text
x Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
"ed")
sibilant_o :: (Text, Text)
sibilant_o = (Text
x Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
"es" , Text
x Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
"ed")
e_final :: (Text, Text)
e_final = (Text
x Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
"s" , Text
x Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
"d")
y_final :: (Text, Text)
y_final = (Text -> Text
T.init Text
x Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
"ies", Text -> Text
T.init Text
x Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
"ied")
defaultPossesive :: Text -> Text
defaultPossesive :: Text -> Text
defaultPossesive Text
t =
case Text -> Char
T.last Text
t of
Char
's' -> Text
t Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
"'"
Char
'S' -> Text
t Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
"'"
Char
'\'' -> Text
t Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
"s"
Char
_ -> Text
t Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
"'s"
anNumerals :: [Text]
anNumerals :: [Text]
anNumerals = [ Text
"11", Text
"11th", Text
"18", Text
"18th" ]
indefiniteDet :: Text -> Text
indefiniteDet :: Text -> Text
indefiniteDet Text
t = if Text -> Bool
wantsAn Text
t then Text
"an" else Text
"a"
wantsAn :: Text -> Bool
wantsAn :: Text -> Bool
wantsAn Text
t_ =
if Text -> Bool
startsWithAcronym Text
t_
then Text -> Bool
acronymWantsAn Text
t_
else Bool
useAn0 Bool -> Bool -> Bool
|| Bool
useAn1
where
t :: Text
t = (Text, Text) -> Text
forall a b. (a, b) -> a
fst ((Text, Text) -> Text) -> (Text, Text) -> Text
forall a b. (a -> b) -> a -> b
$ (Char -> Bool) -> Text -> (Text, Text)
T.break Char -> Bool
isSep (Text -> (Text, Text)) -> Text -> (Text, Text)
forall a b. (a -> b) -> a -> b
$ Text -> Text
T.toLower Text
t_
useAn0 :: Bool
useAn0 = Text
t Text -> [Text] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Text]
anNumerals
useAn1 :: Bool
useAn1 = case Text -> Maybe (Char, Text)
T.uncons Text
t of
Just (Char
h, Text
"") -> Char -> Bool
isLetterWithInitialVowelSound Char
h
Just (Char
'8',Text
_) -> Bool
True
Just (Char
'u',Text
_) -> Text -> Bool
hasVowel_U_Prefix Text
t
Just (Char
h, Text
_) -> Char -> Bool
isVowel Char
h Bool -> Bool -> Bool
`butNot` Text -> Bool
hasSemivowelPrefix Text
t
Maybe (Char, Text)
Nothing -> Bool
False
Bool
x butNot :: Bool -> Bool -> Bool
`butNot` Bool
y = Bool
x Bool -> Bool -> Bool
&& Bool -> Bool
not Bool
y
isSep :: Char -> Bool
isSep Char
c = Char -> Bool
isSpace Char
c Bool -> Bool -> Bool
|| Char
c Char -> [Char] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` ([Char]
"-" :: String)
acronymWantsAn :: Text -> Bool
acronymWantsAn :: Text -> Bool
acronymWantsAn (Text -> Text
T.toLower -> Text
t) =
Bool
useAn0 Bool -> Bool -> Bool
|| Bool
useAn1
where
useAn0 :: Bool
useAn0 = Text
t Text -> [Text] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Text]
anNumerals
useAn1 :: Bool
useAn1 = case Text -> Maybe (Char, Text)
T.uncons Text
t of
Just (Char
'8',Text
_) -> Bool
True
Just (Char
h,Text
_) -> Char -> Bool
isLetterWithInitialVowelSound Char
h
Maybe (Char, Text)
Nothing -> Bool
False
looksLikeAcronym :: Text -> Bool
looksLikeAcronym :: Text -> Bool
looksLikeAcronym Text
"" = Bool
False
looksLikeAcronym Text
x = (Char -> Bool) -> Text -> Bool
T.all Char -> Bool
isUpper (if Text -> Int
T.length Text
x Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
1 then Int -> Text -> Text
T.drop Int
1 Text
x else Text
x)
startsWithAcronym :: Text -> Bool
startsWithAcronym :: Text -> Bool
startsWithAcronym =
Text -> Bool
looksLikeAcronym (Text -> Bool) -> (Text -> Text) -> Text -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text
firstWord
where
firstWord :: Text -> Text
firstWord = (Text, Text) -> Text
forall a b. (a, b) -> a
fst ((Text, Text) -> Text) -> (Text -> (Text, Text)) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> Text -> (Text, Text)
T.break Char -> Bool
isSep
isSep :: Char -> Bool
isSep Char
c = Char -> Bool
isSpace Char
c Bool -> Bool -> Bool
|| Char
c Char -> [Char] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` ([Char]
"-" :: String)
hasSibilantSuffix :: Text -> Bool
hasSibilantSuffix :: Text -> Bool
hasSibilantSuffix Text
x = (Text -> Bool) -> [Text] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any (Text -> Text -> Bool
`T.isSuffixOf` Text
x) [Text
"x",Text
"s",Text
"ch",Text
"sh",Text
"z",Text
"j"]
hasSemivowelPrefix :: Text -> Bool
hasSemivowelPrefix :: Text -> Bool
hasSemivowelPrefix Text
ls = (Text -> Bool) -> [Text] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any (Text -> Text -> Bool
`T.isPrefixOf` Text
ls) [Text
"y",Text
"w",Text
"eu",Text
"ewe"]
hasVowel_U_Prefix :: Text -> Bool
hasVowel_U_Prefix :: Text -> Bool
hasVowel_U_Prefix Text
t =
case Text -> [Char]
T.unpack Text
t of
[Char
'u'] -> Bool
False
[Char
'u',Char
_] -> Bool
True
(Char
'u':Char
c:Char
v:[Char]
_) -> Bool -> Bool
not (Char -> Bool
isConsonant Char
c Bool -> Bool -> Bool
&& Char -> Bool
isVowel Char
v)
[Char]
_ -> Bool
False
hasCySuffix :: Text -> Bool
hasCySuffix :: Text -> Bool
hasCySuffix (Text -> [Char]
T.unpack (Text -> [Char]) -> (Text -> Text) -> Text -> [Char]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Text -> Text
T.takeEnd Int
2 -> [Char
x, Char
'y']) = Char -> Bool
isConsonant Char
x
hasCySuffix Text
_ = Bool
False
hasCoSuffix :: Text -> Bool
hasCoSuffix :: Text -> Bool
hasCoSuffix (Text -> [Char]
T.unpack (Text -> [Char]) -> (Text -> Text) -> Text -> [Char]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Text -> Text
T.takeEnd Int
2 -> [Char
x, Char
'o']) = Char -> Bool
isConsonant Char
x
hasCoSuffix Text
_ = Bool
False
isVowel :: Char -> Bool
isVowel :: Char -> Bool
isVowel = (Char -> [Char] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` ([Char]
"aeiou" :: String)) (Char -> Bool) -> (Char -> Char) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Char
toLower
isLetterWithInitialVowelSound :: Char -> Bool
isLetterWithInitialVowelSound :: Char -> Bool
isLetterWithInitialVowelSound = (Char -> [Char] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` ([Char]
"aeiofhlmnrsx" :: String)) (Char -> Bool) -> (Char -> Char) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Char
toLower
isConsonant :: Char -> Bool
isConsonant :: Char -> Bool
isConsonant = (Char -> [Char] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` ([Char]
"bcdfghjklmnpqrstvwxyz" :: String)) (Char -> Bool) -> (Char -> Char) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Char
toLower