{-# OPTIONS_HADDOCK show-extensions #-} {-# LANGUAGE BangPatterns #-} -- | -- Module : Data.Phonetic.Languages.Base -- Copyright : (c) OleksandrZhabenko 2021 -- License : MIT -- Stability : Experimental -- Maintainer : olexandr543@yahoo.com -- -- This is a computational scheme for generalized usage of the phonetic languages approach. -- It is intended to be exported qualified, so that the functions in every language -- implementation have the same names and signatures as these ones and the data type used here. -- It is may be not the most efficient implementation. -- module Data.Phonetic.Languages.Base ( -- * Phonetics representation data type for the phonetic languages approach. PhoneticsRepresentationPL(..) , PhoneticsRepresentationPLX(..) , Generations , InterGenerationsString , WritingSystemPRPLX , GWritingSystemPRPLX , PhoneticRepresentationXInter , IGWritingSystemPRPLX , fromX2PRPL , fromPhoneticRX -- * Functions to work with the one. -- ** Predicates , isPRC , isPRAfterC , isPRBeforeC , isPREmptyC -- ** Convert to the 'PhoneticsRepresentationPLX'. , stringToXSG , stringToXG , stringToXS --, stringToX , string2X -- ** Apply conversion from 'PhoneticsRepresentationPLX'. , rulesX ) where import Data.List (sortBy,groupBy,nub,(\\),find,partition) import GHC.Int (Int8(..)) import Data.Maybe (isJust,fromJust) import Data.Either -- | The intended conversion to the syllables for a written word is: -- @ -- toSyllables . map rulesPR . stringToPRPL -- @ -- The syllable after this is encoded with the representation with every 'Char' being some phonetic language phenomenon. -- To see its usual written representation, use the defined 'showRepr' function (please, implement your own one). data PhoneticsRepresentationPL = PR { string :: String, afterString :: String, beforeString :: String } | PRAfter { string :: String, afterString :: String } | PRBefore { string :: String, beforeString :: String } | PREmpty { string :: String } deriving (Eq, Ord) -- | Extended variant of the 'PhoneticRepresentationPL' data type where the information for the 'Char' is encoded into the -- data itself. Is easier to implement the rules in the separate file by just specifying the proper and complete list of -- 'PhoneticsRepresentationPLX' values. data PhoneticsRepresentationPLX = PRC { stringX :: String, afterStringX :: String, beforeStringX :: String, char :: Char } | PRAfterC { stringX :: String, afterStringX :: String, char :: Char } | PRBeforeC { stringX :: String, beforeStringX :: String, char :: Char } | PREmptyC { stringX :: String, char :: Char } deriving (Eq, Ord) isPRC :: PhoneticsRepresentationPLX -> Bool isPRC (PRC _ _ _ _) = True isPRC _ = False isPRAfterC :: PhoneticsRepresentationPLX -> Bool isPRAfterC (PRAfterC _ _ _) = True isPRAfterC _ = False isPRBeforeC :: PhoneticsRepresentationPLX -> Bool isPRBeforeC (PRBeforeC _ _ _) = True isPRBeforeC _ = False isPREmptyC :: PhoneticsRepresentationPLX -> Bool isPREmptyC (PREmptyC _ _) = True isPREmptyC _ = False fromX2PRPL :: PhoneticsRepresentationPLX -> PhoneticsRepresentationPL fromX2PRPL (PREmptyC xs _) = PREmpty xs fromX2PRPL (PRAfterC xs ys _) = PRAfter xs ys fromX2PRPL (PRBeforeC xs zs _) = PRBefore xs zs fromX2PRPL (PRC xs ys zs _) = PR xs ys zs {-# INLINE fromX2PRPL #-} -- | An analogue of the 'rulesPR' function for 'PhoneticsRepresentationPLX'. rulesX :: PhoneticsRepresentationPLX -> Char rulesX = char {-# INLINE rulesX #-} stringToXS :: WritingSystemPRPLX -> String -> [String] stringToXS xs ys = ks : stringToX' zss l ts where !zss = nub . map stringX $ xs !l = maximum . map length $ zss f ys l zss = splitAt ((\xs -> if null xs then 1 else head xs) . filter (\n -> elem (take n ys) zss) $ [l,l-1..1]) ys {-# INLINE f #-} (!ks,!ts) = f ys l zss stringToX' rss m vs = bs : stringToX' rss m us where (!bs,!us) = f vs m rss {-| Uses the simplest variant of the 'GWritingSystemPRPLX' with just two generations where all the 'PREmptyC' elements in the 'WritingSystemPRPLX' are used in the last order. Can be suitable for simple languages (e. g. Esperanto). -} string2X :: WritingSystemPRPLX -> String -> [PhoneticsRepresentationPLX] string2X xs = stringToXG [(zs,1),(ys,0)] where (ys,zs) = partition isPREmptyC xs {-# INLINE string2X #-} {-| Each generation represents a subset of rules for representation transformation. The 'PhoneticsRepresentationPLX' are groupped by the generations so that in every group with the same generation number ('Int8' value, typically starting from 1) the rules represented have no conflicts with each other (this guarantees that they can be applied simultaneously without the danger of incorrect interference). Usage of 'Generations' is a design decision and is inspired by the GHC RULES pragma and the GHC compilation multistage process. -} type Generations = Int8 {-| Each value represents temporary intermediate resulting 'String' data to be transformed further into the representation. -} type InterGenerationsString = String {-| If the list here is proper and complete, then it usually represents the whole writing system of the language. For proper usage, the list must be sorted in the ascending order. -} type WritingSystemPRPLX = [PhoneticsRepresentationPLX] {-| The \'dynamic\' representation of the general writing system that specifies what transformations are made simultaneously during the conversion to the phonetic languages phonetics representation. During transformations those elements that have greater 'Generations' are used earlier than others. The last ones are used those elements with the 'Generations' element equal to 0 that must correspond to the 'PREmptyC' constructor-built records. For proper usage, the lists on the first place of the tuples must be sorted in the ascending order. -} type GWritingSystemPRPLX = [([PhoneticsRepresentationPLX],Generations)] type PhoneticRepresentationXInter = Either PhoneticsRepresentationPLX InterGenerationsString fromPhoneticRX :: [PhoneticsRepresentationPLX] -> [PhoneticRepresentationXInter] -> [PhoneticsRepresentationPLX] fromPhoneticRX ts = concatMap (fromInter2X ts) where fromInter2X :: [PhoneticsRepresentationPLX] -> PhoneticRepresentationXInter -> [PhoneticsRepresentationPLX] fromInter2X _ (Left x) = [x] fromInter2X ys (Right z) = filter ((== z) . stringX) ys {-| The \'dynamic\' representation of the process of transformation for the general writing system during the conversion. Is not intended to be produced by hand, but automatically by programs. -} type IGWritingSystemPRPLX = [(PhoneticRepresentationXInter,Generations)] stringToXSG :: GWritingSystemPRPLX -> Generations -> String -> IGWritingSystemPRPLX stringToXSG xs n ys | any ((== n) . snd) xs && n > 0 = stringToXSGI (xs \\ ts) (n - 1) . xsG zs n $ pss | otherwise = error "Data.Phonetic.Languages.Base.stringToXSG: Not defined for these first two arguments. " where !pss = stringToXS (concatMap fst xs) ys -- ps :: [String] !ts = filter ((== n) . snd) $ xs -- ts :: GWritingSystemPRPLX !zs = if null ts then [] else fst . head $ ts -- zs :: PhoneticRepresentationX xsG rs n (k1s:k2s:k3s:kss) -- xsG :: [PhoneticRepresentationPLX] -> [String] -> Generations -> IGWritingSystemPRPLX | any (\rec -> afterStringX rec == k3s && beforeStringX rec == k1s) . filter ((== k2s) . stringX) $ r2s = (Right k1s,n - 1):(Left . fromJust . find (\rec -> afterStringX rec == k3s && beforeStringX rec == k1s && stringX rec == k2s) $ r2s,n):xsG rs n (k3s:kss) | any (\rec -> afterStringX rec == k2s) . filter ((== k1s) . stringX) $ r3s = (Left . fromJust . find (\rec -> afterStringX rec == k2s && stringX rec == k1s) $ r3s,n):xsG rs n (k2s:k3s:kss) | any (\rec -> beforeStringX rec == k1s) . filter ((== k2s) . stringX) $ r4s = (Right k1s,n - 1):(Left . fromJust . find (\rec -> beforeStringX rec == k1s && stringX rec == k2s) $ r4s,n):xsG rs n (k3s:kss) | any ((== k1s) . stringX) r5s = (Left . fromJust . find (\rec -> stringX rec == k1s) $ r5s,n):xsG rs n (k2s:k3s:kss) | otherwise = (Right k1s,n - 1):xsG rs n (k2s:k3s:kss) where [!r2s,!r3s,!r4s,!r5s] = map (\f -> filter f rs) [isPRC, isPRAfterC, isPRBeforeC, isPREmptyC] xsG rs n (k1s:k2s:kss) | any (\rec -> afterStringX rec == k2s) . filter ((== k1s) . stringX) $ r3s = (Left . fromJust . find (\rec -> afterStringX rec == k2s && stringX rec == k1s) $ r3s,n):xsG rs n (k2s:kss) | any (\rec -> beforeStringX rec == k1s) . filter ((== k2s) . stringX) $ r4s = (Right k1s,n - 1):(Left . fromJust . find (\rec -> beforeStringX rec == k1s && stringX rec == k2s) $ r4s,n):xsG rs n (kss) | any ((== k1s) . stringX) r5s = (Left . fromJust . find (\rec -> stringX rec == k1s) $ r5s,n):xsG rs n (k2s:kss) | otherwise = (Right k1s,n - 1):xsG rs n (k2s:kss) where [r3s,!r4s,!r5s] = map (\f -> filter f rs) [isPRAfterC, isPRBeforeC, isPREmptyC] xsG rs n [k1s] | any ((== k1s) . stringX) r5s = [(Left . fromJust . find (\rec -> stringX rec == k1s) $ r5s,n)] | otherwise = [(Right k1s,n - 1)] where !r5s = filter isPREmptyC rs xsG rs n [] = [] {-| Is used internally in the 'stringToXSG' and 'stringToXG' functions respectively. -} stringToXSGI :: GWritingSystemPRPLX -> Generations -> IGWritingSystemPRPLX -> IGWritingSystemPRPLX stringToXSGI xs n ys | n > 0 = stringToXSGI (xs \\ ts) (n - 1) . xsGI zs n $ ys | otherwise = ys where !ts = filter ((== n) . snd) xs -- ts :: GWritingSystemPRPLX !zs = concatMap fst ts -- zs :: PhoneticRepresentationX xsGI rs n (k1s:k2s:k3s:kss) -- xsGI :: [PhoneticRepresentationPLX] -> Generations -> IGWritingSystemPRPLX -> IGWritingSystemPRPLX | snd k2s == n && (any (\rec -> either (const False) (== afterStringX rec) (fst k3s) && either (const False) (== beforeStringX rec) (fst k1s)) . filter (\rec -> either (const False) (== stringX rec) (fst k2s)) $ r2s) = (fst k1s,n - 1):(Left . fromJust . find (\rec -> either (const False) (== afterStringX rec) (fst k3s) && either (const False) (== beforeStringX rec) (fst k1s) && either (const False) (== stringX rec) (fst k2s)) $ r2s,n) : xsGI rs n (k3s:kss) | snd k1s == n && (any (\rec -> either (const False) (== afterStringX rec) (fst k2s)) . filter (\rec -> either (const False) (== stringX rec) (fst k1s)) $ r3s) = (Left . fromJust . find (\rec -> either (const False) (== afterStringX rec) (fst k2s) && either (const False) (== stringX rec) (fst k1s)) $ r3s,n):xsGI rs n (k2s:k3s:kss) | snd k2s == n && (any (\rec -> either (const False) (== beforeStringX rec) (fst k1s)) . filter (\rec -> either (const False) (== stringX rec) (fst k2s)) $ r4s) = (fst k1s,n - 1):(Left . fromJust . find (\rec -> either (const False) (== beforeStringX rec) (fst k1s) && either (const False) (== stringX rec) (fst k2s)) $ r4s,n):xsGI rs n (k3s:kss) | snd k1s == n && (any (\rec -> either (const False) (== stringX rec) (fst k1s)) r5s) = (Left . fromJust . find (\rec -> either (const False) (== stringX rec) (fst k1s)) $ r5s, n):xsGI rs n (k2s:k3s:kss) | otherwise = (fst k1s,n - 1):xsGI rs n (k2s:k3s:kss) where [!r2s,!r3s,!r4s,!r5s] = map (\f -> filter f rs) [isPRC, isPRAfterC, isPRBeforeC, isPREmptyC] xsGI rs n (k1s:k2s:kss) | snd k1s == n && (any (\rec -> either (const False) (== afterStringX rec) (fst k2s)) . filter (\rec -> either (const False) (== stringX rec) (fst k1s)) $ r3s) = (Left . fromJust . find (\rec -> either (const False) (== afterStringX rec) (fst k2s) && either (const False) (== stringX rec) (fst k1s)) $ r3s,n):xsGI rs n (k2s:kss) | snd k2s == n && (any (\rec -> either (const False) (== beforeStringX rec) (fst k1s)) . filter (\rec -> either (const False) (== stringX rec) (fst k2s)) $ r4s) = (fst k1s,n - 1):(Left . fromJust . find (\rec -> either (const False) (== beforeStringX rec) (fst k1s) && either (const False) (== stringX rec) (fst k2s)) $ r4s,n):xsGI rs n (kss) | snd k1s == n && (any (\rec -> either (const False) (== stringX rec) (fst k1s)) r5s) = (Left . fromJust . find (\rec -> either (const False) (== stringX rec) (fst k1s)) $ r5s,n):xsGI rs n (k2s:kss) | otherwise = (fst k1s,n - 1):xsGI rs n (k2s:kss) where [r3s,!r4s,!r5s] = map (\f -> filter f rs) [isPRAfterC, isPRBeforeC, isPREmptyC] xsGI rs n [k1s] | snd k1s == n && (any (\rec -> either (const False) (== stringX rec) (fst k1s)) r5s) = [(Left . fromJust . find (\rec -> either (const False) (== stringX rec) (fst k1s)) $ r5s,n)] | otherwise = [(fst k1s,n - 1)] where !r5s = filter isPREmptyC rs xsGI rs n [] = [] {-| The full conversion function. Applies conversion into representation using the 'GWritingSystemPRPLX' provided. -} stringToXG :: GWritingSystemPRPLX -> String -> [PhoneticsRepresentationPLX] stringToXG xs ys = fromPhoneticRX ts . map fst . stringToXSG xs n $ ys where n = maximum . map snd $ xs !ts = concatMap fst . filter ((== 0) . snd) $ xs