-- This file contains functions about the phonological and morphotactical -- rules of the Turkish language. The rule descriptions in the Haddock comments -- are borrowed from a zemberek-nlp document about Turkish morphology. module Guguk.Morphology.Phonology (vowelHarmony, aTypeHarmony, iTypeHarmony, firstConsonantAlteration, lastConsonantAlteration) where import Data.Maybe import qualified Data.List as L import qualified Data.Text as T import qualified Guguk.TurkishAlphabet as Alph import qualified Guguk.Phonetics as Ph import qualified Guguk.Syllabification as Syl same :: (Eq a) => [a] -> Bool same xs = and $ zipWith (==) xs (tail xs) -- Vowel harmony functions lastSyllable :: T.Text -> String lastSyllable w = T.unpack $ last (Syl.syllabify w) lastVowel :: T.Text -> Char lastVowel w = fromMaybe (error "Syllable without a vowel.") (L.find Alph.isVowel $ lastSyllable w) -- | Checks for the vowel harmony in the entire word. vowelHarmony :: T.Text -> Bool vowelHarmony t = same $ map Ph.vowelLocation vowelPhoneme where vowelList = T.unpack $ T.filter Alph.isVowel t vowelPhoneme = map (head . Alph.getPhonemes) vowelList {-| A syllable's vowels are formed according to the last vowel of the word it appends to. There is an A-Type harmony; if last vowel of the word is a back vowel (a, ı, o, u), a is produced. If last vowel is a frontal vowel (e, i, ö, ü), e is produced. For plural suffix [-lAr] vowel is formed as these; kalem-ler, kitap-lar. -} aTypeHarmony :: T.Text -> Char aTypeHarmony w = case loc of Ph.Back -> 'a' Ph.Front -> 'e' _ -> error "Unknown vowel location." where loc = Ph.vowelLocation $ head $ Ph.getBySurfaceForm $ T.singleton (lastVowel w) {-| There is also an I-Type harmony; if last vowel is back and unrounded (a, ı), ı is produced. if it is back and rounded (o,u), u is produced. if it is frontal and unrounded (e, i), i is produced and lastly if it is frontal and rounded (ö, ü), ü is produced. For genitive suffix [-(n)In] eight outcomes are possible, four of them: kalem-in, kitab-ın, okul-un, üzüm-ün. -} iTypeHarmony :: T.Text -> Char iTypeHarmony w = case (Ph.vowelLocation lV, Ph.vowelRoundedness lV) of (Ph.Back, Ph.Unrounded) -> 'ı' (Ph.Back, Ph.Rounded) -> 'u' (Ph.Front, Ph.Unrounded) -> 'i' (Ph.Front, Ph.Rounded) -> 'ü' _ -> error "Unknown vowel location and roundedness." where lV = head $ Ph.getBySurfaceForm $ T.singleton $ lastVowel w -- Consonant alteration functions harden :: Char -> Char harden 'c' = 'ç' harden 'd' = 't' harden x = x soften :: Char -> Char soften 'p' = 'b' soften 'ç' = 'c' soften 'k' = 'ğ' soften 't' = 'd' soften 'g' = 'ğ' soften x = x {-| When a suffix starting with some voiced consonants (c,d) is appended to a word ending with a voiceless consonant (f,s,t,k,ç,ş,h,p) first consonant of the suffix becomes voiceless (ç,t). fıstık->fıstık-çı (not fıstık-cı), muzip->muzip-tir (not muzip-dir) This function returns the two texts in a tuple, with the first consonant alteration done if necessary. -} firstConsonantAlteration :: T.Text -> T.Text -> (T.Text, T.Text) firstConsonantAlteration w1 w2 | isVoiceless (T.last w1) = (w1, T.cons (harden $ T.head w2) (T.tail w2)) | otherwise = (w1, w2) where isVoiceless c = Ph.Voiceless == Ph.consonantVoice firstPhoneme where firstPhoneme = head $ Ph.getBySurfaceForm $ T.singleton c {-| If last letter of a word or suffix is a stop consonant (tr: süreksiz sert sessiz), and a suffix that starts with a vowel is appended to that word, last letter changes (voicing). Changes are p-b, ç-c, k-ğ, t-d, g-ğ. kitap->kitab-a, pabuç->pabuc-u, çocuk->çocuğ-a, hasat->hasad-ı This function returns the two texts in a tuple, with the last consonant alteration done if necessary. -} lastConsonantAlteration :: T.Text -> T.Text -> (T.Text, T.Text) lastConsonantAlteration w1 w2 | Alph.isVowel (T.head w2) = (T.snoc (T.init w1) (soften $ T.last w1), w2) | otherwise = (w1, w2)