{-# OPTIONS_HADDOCK show-extensions #-}
{-# OPTIONS_GHC -funbox-strict-fields -fobject-code #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE UnboxedTuples #-}
{-# LANGUAGE MagicHash #-}

-- |
-- Module      :  Data.Phonetic.Languages.Syllables
-- Copyright   :  (c) OleksandrZhabenko 2021
-- License     :  MIT
-- Stability   :  Experimental
-- Maintainer  :  olexandr543@yahoo.com
--
-- This module works with syllable segmentation. The generalized version for the module
-- 'Languages.Phonetic.Ukrainian.Syllable.Arr' from @ukrainian-phonetics-basic-array@ package.
-- 

module Data.Phonetic.Languages.Syllables (
  -- * Data types and type synonyms
  PRS(..)
  , PhoneticType(..)
  , CharPhoneticClassification
  , StringRepresentation
  , SegmentationInfo1(..)
  , SegmentationPredFunction(..)
  , DListFunctionResult
  , SegmentationLineFunction(..)
  , SegmentationRules1(..)
  , SegmentRulesG
  -- * Basic functions
  , str2PRSs
  , sndGroups
  , groupSnds
  , divCnsnts
  , reSyllableCntnts
  , divVwls
  , createSyllablesPL
  -- * Auxiliary functions
  , gBF4
  , findC
  , isVowel1
  , isSonorous1
  , isVoicedC1
  , isVoicelessC1
  , isNotVowel2
  , notEqC
) where

import Prelude hiding (mappend)
import Data.Monoid
import qualified Data.List as L (groupBy,find)
import Data.Phonetic.Languages.Base
import CaseBi.Arr
import GHC.Arr
import GHC.Exts
import Data.List.InnToOut.Basic (mapI)
import Data.Maybe (mapMaybe,fromJust)
import GHC.Int

-- Inspired by: https://github.com/OleksandrZhabenko/mm1/releases/tag/0.2.0.0

-- CAUTION: Please, do not mix with the show7s functions, they are not interoperable.

data PRS = SylS {
  charS :: !Char, -- ^ Phonetic languages phenomenon representation. Usually, a phoneme, but it can be otherwise something different.
  phoneType :: !PhoneticType -- ^ Some encoded type.
} deriving ( Eq )

instance Ord PRS where
  compare (SylS x1 y1) (SylS x2 y2) =
    case compare x1 x2 of
      EQ -> compare y1 y2
      ~z -> z

data PhoneticType = P !Int8 deriving (Eq, Ord)

{-| The 'Array' 'Int' must be sorted in the ascending order to be used in the module correctly.
-}
type CharPhoneticClassification = Array Int PRS

{-| The 'String' of converted phonetic language representation 'Char' data is converted to this type to apply syllable
segmentation or other transformations.
-}
type StringRepresentation = [PRS]

-- | Is somewhat rewritten from the 'CaseBi.Arr.gBF3' function (not exported) from the @mmsyn2-array@ package.
gBF4
  :: (Ix i) => (# Int#, PRS #)
  -> (# Int#, PRS #)
  -> Char
  -> Array i PRS
  -> Maybe PRS
gBF4 (# !i#, k #) (# !j#, m #) c arr
 | isTrue# ((j# -# i#) ># 1# ) = 
    case compare c (charS p) of
     GT -> gBF4 (# n#, p #) (# j#, m #) c arr
     LT  -> gBF4 (# i#, k #) (# n#, p #) c arr
     _ -> Just p
 | c == charS m = Just m
 | c == charS k = Just k
 | otherwise = Nothing
     where !n# = (i# +# j#) `quotInt#` 2#
           !p = unsafeAt arr (I# n#)
{-# INLINABLE gBF4 #-}

findC
  :: Char
  -> Array Int PRS
  -> Maybe PRS
findC c arr = gBF4 (# i#, k #) (# j#, m #) c arr 
     where !(I# i#,I# j#) = bounds arr
           !k = unsafeAt arr (I# i#)
           !m = unsafeAt arr (I# i#)

str2PRSs :: CharPhoneticClassification -> String -> StringRepresentation
str2PRSs arr = map (\c -> fromJust . findC c $ arr)
  
-- | Function-predicate 'isVowel1' checks whether its argument is a vowel representation in the 'PRS' format.
isVowel1 :: PRS -> Bool
isVowel1 = (== P 0) . phoneType
{-# INLINE isVowel1 #-}

-- | Function-predicate 'isSonorous1' checks whether its argument is a sonorous consonant representation in the 'PRS' format.
isSonorous1 :: PRS -> Bool
isSonorous1 =  (`elem` [P 1,P 2]) . phoneType
{-# INLINE isSonorous1 #-}

-- | Function-predicate 'isVoicedC1' checks whether its argument is a voiced consonant representation in the 'PRS' format.
isVoicedC1 ::  PRS -> Bool
isVoicedC1 = (`elem` [P 3,P 4]) . phoneType
{-# INLINE isVoicedC1 #-}

-- | Function-predicate 'isVoiceless1' checks whether its argument is a voiceless consonant representation in the 'PRS' format.
isVoicelessC1 ::  PRS -> Bool
isVoicelessC1 =  (`elem` [P 5,P 6]) . phoneType
{-# INLINE isVoicelessC1 #-}

-- | Binary function-predicate 'isNotVowel2' checks whether its arguments are both consonant representations in the 'PRS' format.
isNotVowel2 :: PRS -> PRS -> Bool
isNotVowel2 x y
  | phoneType x == P 0 || phoneType y == P 0 = False
  | otherwise = True
{-# INLINE isNotVowel2 #-}

-- | Binary function-predicate 'notEqC' checks whether its arguments are not the same consonant sound representations (not taking palatalization into account).
notEqC
 :: [(Char,Char)] -- ^ The pairs of the 'Char' that corresponds to the similar phonetic languages consonant phenomenon (e. g. allophones). Must be sorted in the ascending order to be used correctly. 
 -> PRS
 -> PRS
 -> Bool
notEqC xs x y
  | (== cy) . getBFstLSorted' cx xs $ cx = False
  | otherwise = cx /= cy
      where !cx = charS x
            !cy = charS y

-- | Function 'sndGroups' converts a word being a list of 'PRS' to the list of phonetically similar (consonants grouped with consonants and each vowel separately)
-- sounds representations in 'PRS' format.
sndGroups :: [PRS] -> [[PRS]]
sndGroups ys@(_:_) = L.groupBy isNotVowel2 ys
sndGroups _ = []

groupSnds :: [PRS] -> [[PRS]]
groupSnds = L.groupBy (\x y -> ((== P 0) . phoneType $ x) == ((== P 0) . phoneType $ y))

data SegmentationInfo1 = SI {
 fieldN :: !Int8,  -- ^ Number of fields in the pattern matching that are needed to apply the segmentation rules. Not less than 1.
 predicateN :: Int8 -- ^ Number of predicates in the definition for the 'fieldN' that are needed to apply the segmentation rules.
} deriving (Eq)

data SegmentationPredFunction = PF (SegmentationInfo1 -> [(Char, Char)] -> [PRS] -> Bool)

type DListFunctionResult = ([PRS] -> [PRS],[PRS] -> [PRS])

data SegmentationLineFunction = LFS {
  predF :: SegmentationPredFunction,  -- ^ The predicate to check the needed rule for segmentation.
  resF :: DListFunctionResult -- ^ The result if the 'predF' returns 'True' for its arguments.
}

data SegmentationRules1 = SR1 {
  infoS :: SegmentationInfo1, 
  lineFs :: [SegmentationLineFunction] -- ^ The list must be sorted in the appropriate order of the guards usage for the predicates.
  -- The length of the list must be equal to the ('fromEnum' . 'predicateN' . 'infoS') value.
}  

{-| List of the 'SegmentationRules1' sorted in the descending order by the 'fieldN' 'SegmentationInfo1' data and where the
length of all the 'SegmentationPredFunction' lists of 'PRS' are equal to the 'fieldN' 'SegmentationInfo1' data by definition.
-}
type SegmentRulesG = [SegmentationRules1]

-- | Function 'divCnsnts' is used to divide groups of consonants into two-elements lists that later are made belonging to
-- different neighbour syllables if the group is between two vowels in a word. The group must be not empty, but this is not checked.
-- The example phonetical information for the proper performance in Ukrainian can be found from the:
-- https://msn.khnu.km.ua/pluginfile.php/302375/mod_resource/content/1/%D0%9B.3.%D0%86%D0%86.%20%D0%A1%D0%BA%D0%BB%D0%B0%D0%B4.%D0%9D%D0%B0%D0%B3%D0%BE%D0%BB%D0%BE%D1%81.pdf
-- The example of the 'divCnsnts' can be found at: https://hackage.haskell.org/package/ukrainian-phonetics-basic-array-0.1.2.0/docs/src/Languages.Phonetic.Ukrainian.Syllable.Arr.html#divCnsnts
divCnsnts :: [(Char,Char)] -> SegmentRulesG -> [PRS] -> DListFunctionResult
divCnsnts ks gs xs@(_:_) = resF . fromJust . L.find ((\(PF f) -> f (infoS js) ks xs) . predF). lineFs $ js
  where !l = length xs
        !js = fromJust . L.find ((== l) . fromEnum . fieldN . infoS) $ gs -- js :: SegmentationRules1
divCnsnts _ _ [] = (id,id)

reSyllableCntnts :: [(Char,Char)] -> SegmentRulesG -> [[PRS]] -> [[PRS]]
reSyllableCntnts ks gs (xs:ys:zs:xss)
  | (/= P 0) . phoneType . last $ ys = fst (divCnsnts ks gs ys) xs:reSyllableCntnts ks gs (snd (divCnsnts ks gs ys) zs:xss)
  | otherwise = reSyllableCntnts ks gs ((xs `mappend` ys):zs:xss)
reSyllableCntnts _ _ (xs:ys:_) = [(xs `mappend` ys)]
reSyllableCntnts _ _ xss = xss

divVwls :: [[PRS]] -> [[PRS]]
divVwls = mapI (\ws -> (length . filter ((== P 0) . phoneType) $ ws) > 1) h3
  where h3 us = [ys `mappend` take 1 zs] `mappend` (L.groupBy (\x y -> phoneType x == P 0 && phoneType y /= P 0) . drop 1 $ zs)
                  where (ys,zs) = span (\t -> phoneType t /= P 0) us

createSyllablesPL
  :: GWritingSystemPRPLX
  -> [(Char,Char)]
  -> CharPhoneticClassification
  -> SegmentRulesG
  -> String -- ^ Corresponds to the \'0\' symbol delimiter in the @ukrainian-phonetics-basic-array@ package.
  -> String -- ^ Corresponds to the \'1\' and \'-\' symbol delimiters in the @ukrainian-phonetics-basic-array@ package.
  -> String -- ^ Actually the converted 'String'.
  -> [[[PRS]]]
createSyllablesPL wrs ks arr gs us vs = map (divVwls . reSyllableCntnts ks gs . groupSnds . str2PRSs arr) . words1 . mapMaybe g . convertToProperPL . map (\x -> if x == '-' then ' ' else x)
  where g x
          | x `elem` us = Nothing
          | x `notElem` vs = Just x
          | otherwise = Just ' '
        words1 xs = if null ts then [] else w : words1 s'' -- Practically this is an optimized version for this case 'words' function from Prelude.
          where ts = dropWhile (== ' ') xs
                (w, s'') = span (/= ' ') ts
        {-# NOINLINE words1 #-}
        convertToProperPL = map char . stringToXG wrs
{-# INLINE createSyllablesPL #-}