{-# LANGUAGE ViewPatterns, OverloadedStrings #-} -- TODO : learn how to use Functional Morphology instead -- | Module : NLP.Minimorph.English -- Copyright : 2012 Eric Kow (Computational Linguistics Ltd.) -- License : BSD3 -- Maintainer : eric.kow@gmail.com -- Stability : experimental -- Portability : portable -- -- Simple default rules for English morphology module NLP.Minimorph.English where import Data.Text ( Text ) import qualified Data.Text as T import NLP.Minimorph.Util -- | No Oxford commas, alas. -- -- > commas "and" "foo bar" == "foo and bar" -- > commas "and" "foo, bar, baz" == "foo, bar and baz" commas :: Text -> [Text] -> Text commas _ [] = "" commas _ [x] = x commas et xs = T.intercalate ", " (init xs) <+> et <+> last xs -- | > cardinal 1 == "one" -- > cardinal 2 == "two" -- > cardinal 3 == "three" -- > cardinal 4 == "4" cardinal :: Int -> Text cardinal n = case n of 1 -> "one" 2 -> "two" 3 -> "three" _ -> T.pack (show n) -- | > cardinal 1 == "first" -- > cardinal 2 == "second" -- > cardinal 3 == "third" -- > cardinal 4 == "4th" ordinal :: Int -> Text ordinal n = case n of 1 -> "first" 2 -> "second" 3 -> "third" n | n < 21 -> n `suf` "th" | n `rem` 10 == 2 -> n `suf` "nd" | n `rem` 10 == 3 -> n `suf` "rd" | otherwise -> n `suf` "th" where n `suf` s = T.pack (show n) <> s -- | Heuristics for English plural for an unknown noun -- -- > defaultNounPlural "egg" == "eggs" -- > defaultNounPlural "patch" == "patches" -- > defaultNounPlural "boy" == "boys" -- > defaultNounPlural "spy" == "spies" -- > defaultNounPlural "thesis" == "theses" -- -- http://www.paulnoll.com/Books/Clear-English/English-plurals-1.html defaultNounPlural :: Text -> Text defaultNounPlural x | "is" `T.isSuffixOf` x = thesis | hasSibilantSuffix x = es | hasCySuffix x = y_ies | "f" `T.isSuffixOf` x = f_ves | otherwise = plain where plain = x <> "s" es = x <> "es" y_ies = T.init x <> "ies" f_ves = T.init x <> "ves" thesis = tDropEnd 2 x <> "es" -- | Heuristics for 3rd person singular and past participle -- for an unknown regular verb -- -- > defaultVerbStuff "walk" == ("walks", "walked") -- > defaultVerbStuff "push" == ("pushes", "pushed") -- > defaultVerbStuff "play" == ("plays", "played") -- > defaultVerbStuff "cry" == ("cries", "cried") defaultVerbStuff :: Text -> (Text, Text) defaultVerbStuff v | hasSibilantSuffix v = sibilant_o v | "o" `T.isSuffixOf` v = sibilant_o v | "e" `T.isSuffixOf` v = e_final v | hasCySuffix v = y_final v | otherwise = plain v where plain x = (x <> "s" , x <> "ed") sibilant_o x = (x <> "es" , x <> "ed") e_final x = (x <> "s" , x <> "d") y_final x = (T.init x <> "ies", T.init x <> "ied") -- | > indefiniteDet "dog" == "a" -- > indefiniteDet "egg" == "an" -- > indefiniteDet "ewe" == "a" -- > indefiniteDet "ewok" == "an" -- > indefiniteDet "8th" == "an" indefiniteDet :: Text -> Text indefiniteDet (T.toLower -> t) = if useAn then "an" else "a" where useAn = t `elem` [ "11", "11th" ] || useAn' useAn' = case T.uncons t of Just ('8',_) -> True Just (h,_) -> isVowel h `butNot` hasSemivowelPrefix t Nothing -> False x `butNot` y = x && not y -- | Ends with a sh sound hasSibilantSuffix :: Text -> Bool hasSibilantSuffix x = any (`T.isSuffixOf` x) ["x","s","ch","sh"] -- | Starts with a semivowel hasSemivowelPrefix :: Text -> Bool hasSemivowelPrefix ls = any (`T.isPrefixOf` ls) ["y","w","eu","ewe"] -- | Last two letters are a consonant and 'y' hasCySuffix :: Text -> Bool hasCySuffix (T.unpack . tTakeEnd 2 -> [x, 'y']) = isConsonant x hasCySuffix _ = False -- | Is a vowel isVowel :: Char -> Bool isVowel = (`elem` "aeiouAEIOU") -- | Is a consonant isConsonant :: Char -> Bool isConsonant = not . isVowel