-- | -- Module : Melodics.ByteString.Ukrainian -- Copyright : (c) OleksandrZhabenko 2019-2020 -- License : MIT -- Maintainer : olexandr543@yahoo.com -- -- Functions provide functionality of a musical instrument synthesizer or for Ukrainian speech synthesis -- especially for poets, translators and writers. Is rewritten from the module Melodics.Ukrainian from the -- @mmsyn6ukr@ package : 'https://hackage.haskell.org/package/mmsyn6ukr' -- Phonetic material is taken from the : -- -- Solomija Buk, Ján Mačutek, Andrij Rovenchak. Some properties of -- the Ukrainian writing system. [Electronic resource] https://arxiv.org/ftp/arxiv/papers/0802/0802.4198.pdf {-# LANGUAGE OverloadedStrings #-} module Melodics.ByteString.Ukrainian ( -- * Basic functions convertToProperUkrainianS , convertToProperUkrainian , convertToProperUkrainianX , convertToProperUkrainianB , isUkrainianL , linkFileName , showInteresting ) where import qualified Data.String as S import Data.Maybe (fromJust) import Data.Char import qualified Data.Vector.Unboxed as V import qualified Data.Vector as VB import qualified Data.ByteString.Char8 as B import CaseBi.Unboxed (getBFst') import qualified CaseBi as X (getBFst') import Data.List.InnToOut.Basic (mapI) {- -- Inspired by: https://mail.haskell.org/pipermail/beginners/2011-October/008649.html -} data Triple = Z | O | T deriving (Eq,Ord,Show) -- | The function that converts a written Ukrainian text into the sounding in the program phonetical respesentation. -- It is not exact phonetically but you can make for yourself a general impression of the Ukrainian sounding. convertToProperUkrainian :: String -> V.Vector Char convertToProperUkrainian ys = toVector . convertToProperUkrainianS $ ys {-# INLINE convertToProperUkrainian #-} -- | Unlike the 'convertToProperUkrainian', it returns a boxed 'VB.Vector' instead of unboxed one 'V.Vector', -- @ since 0.1.2.0 convertToProperUkrainianX :: String -> VB.Vector Char convertToProperUkrainianX ys = toVectorB . convertToProperUkrainianS $ ys {-# INLINE convertToProperUkrainianX #-} convertToProperUkrainianS :: String -> String convertToProperUkrainianS ys = correctB . correctA . applyChanges . bsToCharUkr . createTuplesByAnalysis . secondConv . filterUkr . changeJotted . filter (\x -> isUkrainianL x || isSpace x || isControl x || isPunctuation x) . map toLower $ ys {-# INLINE convertToProperUkrainianS #-} isUkrainianL :: Char -> Bool isUkrainianL y | (y >= '\1070' && y <= '\1097') = True | otherwise = getBFst' (False, V.fromList . map (\x -> (x, True)) $ "'-\700\1028\1030\1031\1068\1100\1102\1103\1108\1110\1111\1168\1169\8217") y {-# INLINE isUkrainianL #-} changeJotted :: String -> String changeJotted (x:y:zs) | (y `elem` ("\1102\1103\1108\1110"::String)) && isConsNotJ x = x:'\1100':(case y of '\1102' -> '\1091' '\1103' -> '\1072' '\1108' -> '\1077' _ -> '\1110'):changeJotted zs | otherwise = x:changeJotted (y:zs) changeJotted xs = xs isConsNotJ :: Char -> Bool isConsNotJ = getBFst' (False, V.fromList $ zip "\1073\1074\1075\1076\1078\1079\1082\1083\1084\1085\1087\1088\1089\1090\1092\1093\1094\1095\1096\\1097\1169" (repeat True)) filterUkr :: String -> B.ByteString filterUkr = B.pack . map toBSUkr toBSUkr :: Char -> Char toBSUkr x = getBFst' (x, V.fromList . zip "\700\1072\1073\1074\1075\1076\1077\1078\1079\1080\1081\1082\1083\1084\1085\1086\1087\1088\1089\1090\1091\1092\1093\1094\1095\1096\1097\1100\1102\1103\1108\1110\1111\1169\8217" $ "LabvhdeBzyCklmnoprstufxcEFGqHIJiKgM") x secondConv :: B.ByteString -> B.ByteString secondConv = B.concatMap f where f y | isSpace y || isControl y = B.singleton '1' | otherwise = X.getBFst' (B.singleton y, VB.fromList $ zip "'-GHIJKLM" [B.singleton '0',B.singleton '0',"FE","Cu","Ca","Ce","Ci",B.singleton '0',B.singleton '0']) y {-# INLINE secondConv #-} createTuplesByAnalysis :: B.ByteString -> [(B.ByteString, Triple)] createTuplesByAnalysis x | B.null x = [] | getBFst' (False, V.fromList $ zip "BEFcdfhknpstxz" (repeat True)) . B.head $ x = initialA x | not (B.null . B.tail $ x) && (B.index x 1 == 'C' && isConsNotJ (B.head x)) = (B.copy . B.singleton . B.head $ x, T):(B.singleton 'q', Z):createTuplesByAnalysis (B.drop 2 x) | otherwise = (B.copy . B.singleton . B.head $ x, Z):createTuplesByAnalysis (B.tail x) initialA :: B.ByteString -> [(B.ByteString, Triple)] initialA t1 | B.null t1 = [] | canChange t == O = (B.singleton '1', Z):initialA ts | canChange t == Z = (B.singleton t, Z):initialA ts | getBFst' (False, V.fromList $ zip "cdnstx" (repeat True)) t = let (us,vs) = B.splitAt 2 t1 in if X.getBFst' (False, VB.fromList $ zip ["cq","dB","dz","nt","sq","st","tq","ts","xh"] (repeat True)) us then (B.copy us, T):initialA vs else (B.singleton t, T):initialA ts | otherwise = case getBFst' (False, V.fromList . zip "BEFfhkpz" . repeat $ True) t of ~True -> (B.singleton t, T):initialA ts where (t,ts) = fromJust . B.uncons $ t1 canChange :: Char -> Triple canChange x | isSpace x || isControl x || x == '-' = O | getBFst' (False, V.fromList $ zip "BEFcdfhknpstxz" (repeat True)) x = T | otherwise = Z {-# INLINE canChange #-} bsToCharUkr :: [(B.ByteString,Triple)] -> [(Char,Triple)] bsToCharUkr = map (\(xs,y) -> (X.getBFst' (B.head xs, VB.fromList . zip ["cq","dB","dz","nt","sq","st","tq","ts","xh"] $ "wjANDOPch") xs,y)) applyChanges :: [(Char, Triple)] -> [(Char, Triple)] applyChanges [] = [] applyChanges [(x, _)] = [(x, Z)] applyChanges xs | snd z == T = X.getBFst' ((fst z, Z), VB.fromList . zip "ABDEFNOPcdfhkpstwxz" $ [дзT zs, жT zs, сьT zs, чT zs, шT zs, нтT zs, стT zs, тьT zs, цT zs, дT zs, фT zs, гT zs, кT zs, пT zs, сT zs, тT zs, цьT zs, хT zs, зT zs]) (fst z):applyChanges zs | otherwise = z:applyChanges zs where z = head xs zs = tail xs isVoicedObstruent :: Char -> Bool isVoicedObstruent = getBFst' (False, V.fromList $ zip "ABbdghjz" (repeat True)) isSoftDOrL :: [(Char, Triple)] -> Bool isSoftDOrL = X.getBFst' (False, VB.fromList . zip ["bq","cq","dq","fq","lq","mq","nq","pq","sq","tq","vq"] $ (repeat True)) . takeFromFT_ 2 isSoftDen :: [(Char, Triple)] -> Bool isSoftDen = X.getBFst' (False, VB.fromList . zip ["Aq","cq","dq","lq","nq","sq","tq","zq"] $ (repeat True)) . takeFromFT_ 2 -- in the further ??T functions the last (, T) means that it must be afterwards be separated with the soft sign into two tuples (1 additional function in the composition) -- need further processing means that there should be additional checks and may be transformations. May be they can be omitted гT :: [(Char, Triple)] -> (Char, Triple) гT (t:_) | fst t == 'k' || fst t == 't' = ('x', Z) | otherwise = ('h', Z) гT _ = ('h', Z) дT :: [(Char, Triple)] -> (Char, Triple) дT t1@(_:_) | takeFromFT_ 1 t1 `elem` ["B","E","F"] = ('j', Z) -- need further processing д дж | takeFromFT_ 2 t1 `elem` ["sq","cq"] = ('Q', T) -- need further processing д дзь | takeFromFT_ 1 t1 `elem` ["D","w"] = ('Q', T) -- need further processing д дзь | takeFromFT_ 1 t1 `elem` ["z","s","c"] = ('A', Z) -- need further processing д дз | otherwise = ('d', Z) дT _ = ('d', Z) дзT :: [(Char, Triple)] -> (Char, Triple) дзT t1@(_:_) | isSoftDOrL t1 = ('Q', T) | otherwise = ('A', Z) дзT _ = ('A', Z) жT :: [(Char, Triple)] -> (Char, Triple) жT t1@(_:_) | takeFromFT 2 t1 `elem` ["sq","cq"] = ('R', T) | takeFromFT 1 t1 `elem` ["D","w"] = ('R', T) | otherwise = ('B', Z) жT _ = ('B', Z) зT :: [(Char, Triple)] -> (Char, Triple) зT t1@(_:_) | takeFromFT_ 1 t1 `elem` ["B","E","F"] || takeFromFT_ 2 t1 == "dB" || takeFromFT_ 1 t1 == "j" = ('B', Z) | isSoftDOrL t1 = ('R', T) | takeFromFT 1 t1 `elem` ["E","F"] = ('F', Z) -- need further processing з ш | takeFromFT 1 t1 `elem` ["s","c"] || takeFromFT_ 1 t1 `elem` ["k","p","t","f","x"] = ('s', Z) -- need further processing з с | otherwise = ('z', Z) зT _ = ('z', Z) кT :: [(Char, Triple)] -> (Char, Triple) кT t1@(_:_) | isVoicedObstruent . B.head . takeFromFT_ 1 $ t1 = ('g', Z) | otherwise = ('k', Z) кT _ = ('k', Z) нтT :: [(Char, Triple)] -> (Char, Triple) нтT t1@(_:_) | takeFromFT 2 t1 == "st" || takeFromFT 1 t1 == "O" = ('n', Z) | takeFromFT 3 t1 == "sqk" || takeFromFT 2 t1 == "Dk" = ('S', T) | otherwise = ('N', Z) нтT _ = ('N', T) пT :: [(Char, Triple)] -> (Char, Triple) пT t1@(_:_) | isVoicedObstruent . B.head . takeFromFT_ 1 $ t1 = ('b', Z) | otherwise = ('p', Z) пT _ = ('p', Z) сT :: [(Char, Triple)] -> (Char, Triple) сT t1@(_:_) | ((isVoicedObstruent . B.head . takeFromFT_ 1 $ t1) && B.drop 1 (takeFromFT_ 2 t1) == "q") = ('R', T) | isVoicedObstruent . B.head . takeFromFT_ 1 $ t1 = ('b', Z) | isSoftDOrL t1 = ('D', Z) | takeFromFT_ 1 t1 == "F" = ('F', Z) | otherwise = ('s', Z) сT _ = ('s', Z) стT :: [(Char, Triple)] -> (Char, Triple) стT t1@(_:_) | isVoicedObstruent . B.head . takeFromFT_ 1 $ t1 = ('z', Z) | takeFromFT_ 3 t1 == "sqk" || (takeFromFT_ 2 t1 `elem` ["Dk","cq"]) || takeFromFT_ 1 t1 == "w" = ('D', Z) | takeFromFT_ 1 t1 `elem` ["s","n"] = ('s', Z) | takeFromFT_ 1 t1 == "E" = ('F', Z) | otherwise = ('O', T) стT _ = ('O', T) сьT :: [(Char, Triple)] -> (Char, Triple) сьT t1@(_:_) | isVoicedObstruent . B.head . takeFromFT_ 1 $ t1 = ('R', T) | otherwise = ('D', Z) сьT _ = ('D', Z) тT :: [(Char, Triple)] -> (Char, Triple) тT t1@(_:_) | (isVoicedObstruent . B.head . takeFromFT_ 1 $ t1) && B.drop 1 (takeFromFT_ 2 t1) == "q" = ('T', T) | isVoicedObstruent . B.head . takeFromFT_ 1 $ t1 = ('d', Z) | takeFromFT_ 2 t1 == "cq" || takeFromFT_ 1 t1 == "w" = ('w', Z) | takeFromFT_ 1 t1 == "c" = ('c', Z) | isSoftDen t1 = ('P', T) | takeFromFT_ 1 t1 `elem` ["E","F"] = ('E', Z) | otherwise = ('t', Z) тT _ = ('t', Z) тьT :: [(Char, Triple)] -> (Char, Triple) тьT t1@(_:_) | isVoicedObstruent . B.head . takeFromFT_ 1 $ t1 = ('T', T) | takeFromFT_ 3 t1 == "sqa" || takeFromFT_ 2 t1 == "Da" = ('w', Z) | otherwise = ('P', T) тьT _ = ('P', T) фT :: [(Char, Triple)] -> (Char, Triple) фT t1@(_:_) | isVoicedObstruent . B.head . takeFromFT_ 1 $ t1 = ('v', Z) | otherwise = ('f', Z) фT _ = ('f', Z) хT :: [(Char, Triple)] -> (Char, Triple) хT t1@(_:_) | isVoicedObstruent . B.head . takeFromFT_ 1 $ t1 = ('h', Z) | otherwise = ('x', Z) хT _ = ('х', Z) цT :: [(Char, Triple)] -> (Char, Triple) цT t1@(_:_) | (isVoicedObstruent . B.head . takeFromFT_ 1 $ t1) && B.drop 1 (takeFromFT_ 2 t1) == "q" = ('Q', T) | isSoftDOrL t1 = ('w', Z) | isVoicedObstruent . B.head . takeFromFT_ 1 $ t1 = ('A', Z) | otherwise = ('c', Z) цT _ = ('c', Z) цьT :: [(Char, Triple)] -> (Char, Triple) цьT t1@(_:_) | (isVoicedObstruent . B.head . takeFromFT_ 1 $ t1) && B.drop 1 (takeFromFT_ 2 t1) == "q" = ('Q', T) | otherwise = ('w', Z) цьT _ = ('w', Z) чT :: [(Char, Triple)] -> (Char, Triple) чT t1@(_:_) | takeFromFT_ 2 t1 `elem` ["sq","cq"] || takeFromFT_ 1 t1 `elem` ["D","w"] = ('w', Z) | isVoicedObstruent . B.head . takeFromFT_ 1 $ t1 = ('j', Z) | otherwise = ('E', Z) чT _ = ('E', Z) шT :: [(Char, Triple)] -> (Char, Triple) шT t1@(_:_) | takeFromFT_ 2 t1 `elem` ["sq","cq"] || takeFromFT_ 1 t1 `elem` ["D","w"] = ('D', Z) | isVoicedObstruent . B.head . takeFromFT_ 1 $ t1 = ('B', Z) | otherwise = ('F', Z) шT _ = ('F', Z) takeFromFT :: Int -> [(Char, Triple)] -> B.ByteString takeFromFT n ts | if compare 0 n /= LT then True else null ts = B.empty | compare 1 n /= LT = B.singleton k | otherwise = k `B.cons` takeFromFT (n - 1) (take (n - 1) ts) where k = fst (head ts) takeFromFT2 :: Int -> [Char] -> [Char] takeFromFT2 n ts | if compare 0 n /= LT then True else null ts = [] | compare 1 n /= LT = [ks] | otherwise = ks:takeFromFT2 (n - 1) (tail ts) where ks = head ts dropFromFT2 :: Int -> [Char] -> [Char] dropFromFT2 n ts | if compare 0 n /= LT then True else null ts = [] | compare 1 n /= LT = tail ts | otherwise = dropFromFT2 (n - 1) (tail ts) takeFromFT_ :: Int -> [(Char, Triple)] -> B.ByteString takeFromFT_ n = takeFromFT n . filter (\(x, _) -> x /= '1' && x /= '0') {-#INLINE takeFromFT_ #-} correctA :: [(Char, Triple)] -> [Char] correctA = correctSomeW . separateSoftS {-# INLINE correctA #-} separateSoftS :: [(Char, Triple)] -> [Char] separateSoftS = map fst . mapI (\x -> snd x == T) divideToParts {-# INLINE separateSoftS #-} correctSomeW :: [Char] -> [Char] correctSomeW (x:y:z:xs) | x == 'w' && y == 'D' && z == 'a' = x:'w':z:correctSomeW xs | (x == '1' || x == '0') && y == 'C' && z == 'a' = if take 2 xs == "En" then x:y:z:'F':correctSomeW (tail xs) else x:correctSomeW (y:z:xs) | otherwise = x:correctSomeW (y:z:xs) correctSomeW zs = zs divideToParts :: (Char, Triple) -> [(Char, Triple)] divideToParts (x, z) = X.getBFst' ([(x, z)], VB.fromList . zip "NOPQRST" $ [[('n', Z), ('t', Z)], [('s', Z), ('t', Z)], [('t', Z), ('q', Z)], [('A', Z), ('q', Z)], [('z', Z), ('q', Z)], [('n', Z), ('q', Z)], [('d', Z), ('q', Z)]]) . fst $ (x, z) {-# INLINE divideToParts #-} toVector :: [Char] -> V.Vector Char toVector ts = V.fromList . correctB $ ts {-# INLINE toVector #-} toVectorB :: [Char] -> VB.Vector Char toVectorB ts = VB.fromList . correctB $ ts {-# INLINE toVectorB #-} correctB :: [Char] -> [Char] correctB ys@(x:xs) | compare (length . filter (== '1') . takeFromFT2 6 $ ys) 1 == GT = map (\t -> if t == '1' || isPunctuation t then '-' else t) (takeFromFT2 6 ys) ++ correctB (dropFromFT2 6 ys) | otherwise = (if isPunctuation x then '-' else x):correctB xs correctB _ = [] -- | A variant of the 'convertToProperUkrainian' with the 'B.ByteString' result. convertToProperUkrainianB :: String -> B.ByteString convertToProperUkrainianB ys = B.pack . convertToProperUkrainianS $ ys {-# INLINE convertToProperUkrainianB #-} linkFileName :: Char -> Char linkFileName x = getBFst' (x,V.fromList . zip "ABCDEFLMabcdefghijklmnopqrstuvwxyz" $ "GILUbc00ABZEHXfDeFMNOPQRdSTVWCaYKJ") x showInteresting :: String -> B.ByteString showInteresting = S.fromString . V.toList . convertToProperUkrainian