Safe Haskell | Safe-Inferred |
---|---|
Language | Haskell2010 |
Simple default rules for English morphology
Synopsis
- commas :: Text -> [Text] -> Text
- cardinal :: Int -> Text
- ordinalNotSpelled :: Int -> Text
- ordinal :: Int -> Text
- defaultNounPlural :: Text -> Text
- defaultVerbStuff :: Text -> (Text, Text)
- defaultPossesive :: Text -> Text
- anNumerals :: [Text]
- indefiniteDet :: Text -> Text
- wantsAn :: Text -> Bool
- acronymWantsAn :: Text -> Bool
- looksLikeAcronym :: Text -> Bool
- startsWithAcronym :: Text -> Bool
- hasSibilantSuffix :: Text -> Bool
- hasSemivowelPrefix :: Text -> Bool
- hasVowel_U_Prefix :: Text -> Bool
- hasCySuffix :: Text -> Bool
- hasCoSuffix :: Text -> Bool
- isVowel :: Char -> Bool
- isLetterWithInitialVowelSound :: Char -> Bool
- isConsonant :: Char -> Bool
Punctuation
commas :: Text -> [Text] -> Text Source #
No Oxford commas, alas.
commas "and" "foo bar" == "foo and bar" commas "and" "foo, bar, baz" == "foo, bar and baz"
Numbers
cardinal :: Int -> Text Source #
cardinal 0 == "zero" cardinal 1 == "one" cardinal 2 == "two" cardinal 10 == "ten" cardinal 11 == "11"
ordinalNotSpelled :: Int -> Text Source #
ordinalNotSpelled 1 == "1st" ordinalNotSpelled 2 == "2nd" ordinalNotSpelled 11 == "11th"
ordinal :: Int -> Text Source #
ordinal 1 == "first" ordinal 2 == "second" ordinal 3 == "third" ordinal 11 == "11th" ordinal 42 == "42nd"
Nouns and verbs
defaultNounPlural :: Text -> Text Source #
Heuristics for English plural for an unknown noun.
defaultNounPlural "egg" == "eggs" defaultNounPlural "patch" == "patches" defaultNounPlural "boy" == "boys" defaultNounPlural "spy" == "spies" defaultNounPlural "thesis" == "theses"
http://www.paulnoll.com/Books/Clear-English/English-plurals-1.html
defaultVerbStuff :: Text -> (Text, Text) Source #
Heuristics for 3rd person singular and past participle for an unknown regular verb. Doubling of final consonants can be handled via a table of (partially) irregular verbs.
defaultVerbStuff "walk" == ("walks", "walked") defaultVerbStuff "push" == ("pushes", "pushed") defaultVerbStuff "play" == ("plays", "played") defaultVerbStuff "cry" == ("cries", "cried")
defaultPossesive :: Text -> Text Source #
Heuristics for a possesive form for an unknown noun.
defaultPossesive "pass" == "pass'" defaultPossesive "SOS" == "SOS'" defaultPossesive "Mr Blinkin'" == "Mr Blinkin's" defaultPossesive "cry" == "cry's"
Determiners
anNumerals :: [Text] Source #
indefiniteDet :: Text -> Text Source #
indefiniteDet "dog" == "a" indefiniteDet "egg" == "an" indefiniteDet "ewe" == "a" indefiniteDet "ewok" == "an" indefiniteDet "8th" == "an"
wantsAn :: Text -> Bool Source #
True if the indefinite determiner for a word would normally be 'an' as opposed to 'a'.
acronymWantsAn :: Text -> Bool Source #
Variant of wantsAn
that assumes the input string is pronounced
one letter at a time.
wantsAn "x-ray" == False acronymWantsAn "x-ray" == True
Note that this won't do the right thing for words like "SCUBA". You really have to reserve it for those separate-letter acronyms.
Acronyms
looksLikeAcronym :: Text -> Bool Source #
True if all upper case from second letter and up.
looksLikeAcronym "DNA" == True looksLikeAcronym "tRNA" == True looksLikeAcronym "x" == False looksLikeAcronym "DnA" == False
startsWithAcronym :: Text -> Bool Source #
True if the first word (separating on either hyphen or space) looks like an acronym.
Sounds
hasSibilantSuffix :: Text -> Bool Source #
Ends with a 'sh' sound.
hasSemivowelPrefix :: Text -> Bool Source #
Starts with a semivowel.
hasVowel_U_Prefix :: Text -> Bool Source #
Starts with a vowel-y 'U' sound
hasCySuffix :: Text -> Bool Source #
Last two letters are a consonant and 'y'.
hasCoSuffix :: Text -> Bool Source #
Last two letters are a consonant and 'o'.
isLetterWithInitialVowelSound :: Char -> Bool Source #
Letters that when pronounced independently in English sound like they begin with vowels.
isLetterWithInitialVowelSound 'r' == True isLetterWithInitialVowelSound 'k' == False
(In the above, 'r' is pronounced "are", but 'k' is pronounced "kay".)