{-# LANGUAGE DeriveDataTypeable, FlexibleInstances #-}
module Languages.Phonetic.Ukrainian.Syllable where
import Prelude hiding (mappend)
import Data.Monoid
import Data.Typeable
import qualified Data.Vector.Unboxed as V
import qualified Data.Vector as VB
import qualified Data.List as L (groupBy)
import Melodics.ByteString.Ukrainian
import CaseBi.Unboxed (getBFst')
import qualified CaseBi as X (getBFst')
import Data.List.InnToOut.Basic (mapI)
data UZPP a b = UZ a b deriving ( Eq, Typeable )
instance (Ord a, Ord b) => Ord (UZPP a b) where
compare (UZ x1 y1) (UZ x2 y2) =
case compare x1 x2 of
EQ -> compare y1 y2
~z -> z
data PhoneticType = W | S | O | D | K | L | M | N | E deriving ( Eq, Ord, Typeable )
type UZPP2 = UZPP Char PhoneticType
instance Show (UZPP Char PhoneticType) where
show (UZ x y)
| y `notElem` [O,K,M] =
X.getBFst' ("", VB.fromList [('-'," "),('0'," "),('1'," "),('A',"дз"),('B',"ж"),('C',"й"),('D',"сь"),('E',"ч"),('F',"ш"),('G',"щ"),('L',"\700"),('M',"\8217"),
('a',"а"),('b',"б"),('c',"ц"),('d',"д"),('e',"е"),('f',"ф"),('g',"ґ"),('h',"г"),('i',"і"),('j',"дж"),('k',"к"),('l',"л"),('m',"м"),('n',"н"),('o',"о"),('p',"п"),('q',"ь"),
('r',"р"),('s',"с"),('t',"т"),('u',"у"),('v',"в"),('w',"ць"),('x',"х"),('y',"и"),('z',"з")]) x
| otherwise =
X.getBFst' ("", VB.fromList [('-'," "),('0'," "),('1'," "),('A',"дзь"),('B',"жь"),('E',"чь"),('F',"шь"),('G',"щь"),('b',"бь"),('d',"дь"),('f',"фь"),('g',"ґь"),
('h',"гь"),('j',"джь"),('k',"кь"),('l',"ль"),('m',"мь"),('n',"нь"),('p',"пь"),('q',"ь"),('r',"рь"),('t',"ть"),('v',"вь"),('x',"хь"),('z',"зь")]) x
phoneType :: UZPP2 -> PhoneticType
phoneType (UZ _ y) = y
{-# INLINE phoneType #-}
charUkr :: UZPP2 -> Char
charUkr (UZ x _) = x
{-# INLINE charUkr #-}
vec2UZPP2s :: V.Vector Char -> [UZPP2]
vec2UZPP2s v
| V.null v = []
| getBFst' (False, V.fromList [('a',True),('e',True),('i',True),('o',True),('u',True),('y',True)]) . V.unsafeHead $ v = UZ (V.unsafeHead v) W:vec2UZPP2s (V.unsafeTail v)
| V.unsafeHead v == 'D' || V.unsafeHead v == 'w' = UZ (V.unsafeHead v) N:vec2UZPP2s (V.unsafeTail v)
| ((V.null . V.unsafeTail $ v) || (V.unsafeIndex v 1 /= 'q')) && getBFst' (False, V.fromList [('C',True),('l',True),('m',True),('n',True),('r',True),('v',True)]) (V.unsafeHead v) =
UZ (V.unsafeHead v) S:vec2UZPP2s (V.unsafeTail v)
| ((V.null . V.unsafeTail $ v) || (V.unsafeIndex v 1 /= 'q')) &&
getBFst' (False, V.fromList [('A',True),('B',True),('b',True),('d',True),('g',True),('h',True),('j',True),('z',True)]) (V.unsafeHead v) =
UZ (V.unsafeHead v) D:vec2UZPP2s (V.unsafeTail v)
| ((V.null . V.unsafeTail $ v) || (V.unsafeIndex v 1 /= 'q')) = UZ (V.unsafeHead v) L:vec2UZPP2s (V.unsafeTail v)
| getBFst' (False, V.fromList [('l',True),('m',True),('n',True),('r',True),('v',True)]) (V.unsafeHead v) = UZ (V.unsafeHead v) O:vec2UZPP2s (V.unsafeDrop 2 v)
| getBFst' (False, V.fromList [('A',True),('B',True),('b',True),('d',True),('g',True),('h',True),('j',True),('z',True)]) (V.unsafeHead v) =
UZ (V.unsafeHead v) K:vec2UZPP2s (V.unsafeDrop 2 v)
| otherwise = UZ (V.unsafeHead v) M:vec2UZPP2s (V.unsafeDrop 2 v)
isVowel1 :: UZPP2 -> Bool
isVowel1 = (== W) . phoneType
{-# INLINE isVowel1 #-}
isVwl :: Char -> Bool
isVwl = getBFst' (False, (V.fromList [('a',True),('e',True),('i',True),('o',True),('u',True),('y',True)]))
{-# INLINE isVwl #-}
isSonorous1 :: UZPP2 -> Bool
isSonorous1 = (`elem` [S,O]) . phoneType
{-# INLINE isSonorous1 #-}
isVoicedC1 :: UZPP2 -> Bool
isVoicedC1 = (`elem` [D,K]) . phoneType
{-# INLINE isVoicedC1 #-}
isVoicelessC1 :: UZPP2 -> Bool
isVoicelessC1 = (`elem` [L,M]) . phoneType
{-# INLINE isVoicelessC1 #-}
isNotVowel2 :: UZPP2 -> UZPP2 -> Bool
isNotVowel2 x y
| phoneType x == W || phoneType y == W = False
| otherwise = True
{-# INLINE isNotVowel2 #-}
notEqC :: UZPP2 -> UZPP2 -> Bool
notEqC x y
| charUkr x == 's' || charUkr x == 'D' =
case charUkr y of
's' -> False
'D' -> False
_ -> True
| charUkr x == 'w' || charUkr x == 'c' =
case charUkr y of
'w' -> False
'c' -> False
_ -> True
| otherwise = charUkr x /= charUkr y
sndGroups :: [UZPP2] -> [[UZPP2]]
sndGroups ys@(_:_) = L.groupBy isNotVowel2 ys
sndGroups _ = []
vecWords :: V.Vector Char -> [V.Vector Char]
vecWords v | V.null v = []
| V.unsafeHead v == '-' || V.unsafeHead v == '0' || V.unsafeHead v == '1' = vecWords (V.unsafeTail v)
| otherwise =
let (v1, v2) = V.break (\x -> x == '-' || x == '0' || x == '1') v
v3 = snd . V.span (\x -> x == '-' || x == '0' || x == '1') $ v2 in v1:vecWords v3
groupSnds :: [UZPP2] -> [[UZPP2]]
groupSnds = L.groupBy (\x y -> ((== W) . phoneType $ x) == ((== W) . phoneType $ y))
divCnsnts :: [UZPP2] -> ([UZPP2] -> [UZPP2],[UZPP2] -> [UZPP2])
divCnsnts xs@(x:ys@(_:_:_:_))
| (isSonorous1 x) || (isVoicedC1 x) = ((`mappend` [x]),(ys `mappend`))
| otherwise = ((id),(xs `mappend`))
divCnsnts xs@(x:ys@(y:zs@(_:_)))
| isSonorous1 x = ((`mappend` [x]),(ys `mappend`))
| isSonorous1 y = ((`mappend` [x,y]),(zs `mappend`))
| otherwise = ((id),(xs `mappend`))
divCnsnts xs@(x:ys@(y:_))
| ((isSonorous1 x) && (x `notEqC` y)) || ((isVoicedC1 x) && (isVoicelessC1 y)) = ((`mappend` [x]),(ys `mappend`))
| otherwise = ((id),(xs `mappend`))
divCnsnts xs = ((id),(xs `mappend`))
reSyllableCntnts :: [[UZPP2]] -> [[UZPP2]]
reSyllableCntnts (xs:ys:zs:xss)
| (/= W) . phoneType . last $ ys = fst (divCnsnts ys) xs:reSyllableCntnts (snd (divCnsnts ys) zs:xss)
| otherwise = reSyllableCntnts ((xs `mappend` ys):zs:xss)
reSyllableCntnts (xs:ys:_) = [(xs `mappend` ys)]
reSyllableCntnts xss = xss
divVwls :: [[UZPP2]] -> [[UZPP2]]
divVwls = mapI (\ws -> (length . filter ((== W) . phoneType) $ ws) > 1) h3
where h3 us = [ys `mappend` take 1 zs] `mappend` (L.groupBy (\x y -> phoneType x == W && phoneType y /= W) . drop 1 $ zs)
where (ys,zs) = span (\t -> phoneType t /= W) us
createSyllablesUkr :: String -> [[[UZPP2]]]
createSyllablesUkr = map (divVwls . reSyllableCntnts . groupSnds . vec2UZPP2s) . vecWords . V.filter (/='0') . convertToProperUkrainian . map (\x -> if x == '-' then ' ' else x)
{-# INLINE createSyllablesUkr #-}
createSyllablesUkrP :: String -> [[[UZPP2]]]
createSyllablesUkrP = map (map representProlonged . divVwls . reSyllableCntnts . groupSnds . vec2UZPP2s) . vecWords . V.filter (/='0') . convertToProperUkrainian . map (\x -> if x == '-' then ' ' else x)
{-# INLINE createSyllablesUkrP #-}
createSyllablesUkrV :: VB.Vector Char -> [[[UZPP2]]]
createSyllablesUkrV = map (divVwls . reSyllableCntnts . groupSnds . vec2UZPP2s) . vecWords . V.filter (/='0') . convertToProperUkrainianV . VB.map (\x -> if x == '-' then ' ' else x)
{-# INLINE createSyllablesUkrV #-}
createSyllablesUkrVP :: VB.Vector Char -> [[[UZPP2]]]
createSyllablesUkrVP = map (map representProlonged . divVwls . reSyllableCntnts . groupSnds . vec2UZPP2s) . vecWords . V.filter (/='0') . convertToProperUkrainianV . VB.map (\x -> if x == '-' then ' ' else x)
{-# INLINE createSyllablesUkrVP #-}
representProlonged :: [UZPP2] -> [UZPP2]
representProlonged (x:y:xs)
| isVowel1 x = x:representProlonged (y:xs)
| not . notEqC x $ y = y:representProlonged xs
| otherwise = x:representProlonged (y:xs)
representProlonged xs = xs
str2Durat1 :: String -> Float
str2Durat1 = X.getBFst' ((0.153016), VB.fromList [("-", (0.101995)), ("0", (0.051020)), ("1", (0.153016)), ("а", 0.138231), ("б", 0.057143),
("в", 0.082268), ("г", 0.076825), ("д", 0.072063), ("дж", 0.048934), ("дз", 0.055601), ("е", 0.093605), ("ж", 0.070658), ("з", 0.056054),
("и", 0.099955), ("й", 0.057143), ("к", 0.045351), ("л", 0.064036), ("м", 0.077370), ("н", 0.074240), ("о", 0.116463), ("п", 0.134830),
("р", 0.049206), ("с", 0.074603), ("сь", 0.074558), ("т", 0.110658), ("у", 0.109070), ("ф", 0.062268), ("х", 0.077188), ("ц", 0.053061),
("ць", 0.089342), ("ч", 0.057596), ("ш", 0.066077), ("ь", 0.020227), ("і", 0.094150), ("ґ", 0.062948)])
uzpp2Durat1 :: UZPP2 -> Float
uzpp2Durat1 = X.getBFst' (0.051020, VB.fromList [(UZ 'A' D, 0.055601), (UZ 'A' K, 0.055601), (UZ 'B' D, 0.070658), (UZ 'B' K, 0.070658), (UZ 'C' S, 0.057143), (UZ 'D' N, 0.074558),
(UZ 'E' L, 0.057596), (UZ 'E' M, 0.057596), (UZ 'F' L, 0.066077), (UZ 'F' M, 0.066077), (UZ 'a' W, 0.138231), (UZ 'b' D, 0.057143), (UZ 'b' K, 0.057143), (UZ 'c' D, 0.053061),
(UZ 'd' D, 0.072063), (UZ 'd' K, 0.072063), (UZ 'e' W, 0.093605), (UZ 'f' L, 0.062268), (UZ 'f' M, 0.062268), (UZ 'g' D, 0.062948), (UZ 'g' K, 0.062948), (UZ 'h' D, 0.076825),
(UZ 'h' K, 0.076825), (UZ 'i' W, 0.094150), (UZ 'j' D, 0.048934), (UZ 'j' K, 0.048934), (UZ 'k' L, 0.045351), (UZ 'k' M, 0.045351), (UZ 'l' S, 0.064036), (UZ 'l' O, 0.064036),
(UZ 'm' S, 0.077370), (UZ 'm' O, 0.077370), (UZ 'n' S, 0.074240), (UZ 'n' O, 0.074240), (UZ 'o' W, 0.116463), (UZ 'p' L, 0.134830), (UZ 'p' M, 0.134830),
(UZ 'q' E, 0.020227), (UZ 'r' S, 0.049206), (UZ 'r' O, 0.049206), (UZ 's' L, 0.074603), (UZ 't' L, 0.110658), (UZ 't' M, 0.110658), (UZ 'u' W, 0.109070), (UZ 'v' S, 0.082268),
(UZ 'v' O, 0.082268), (UZ 'w' N, 0.089342), (UZ 'x' L, 0.077188), (UZ 'x' M, 0.077188), (UZ 'y' W, 0.099955), (UZ 'z' D, 0.056054), (UZ 'z' K, 0.056054)])
syllableDurations :: [[[UZPP2]]] -> [[Float]]
syllableDurations = fmap (fmap (sum . fmap (uzpp2Durat1)))