module Guguk.Syllabification
(syllabify, Syllable) where

import           Data.Char  (isAlpha, toLower)
import           Data.Maybe (fromJust, isJust, isNothing)
import qualified Data.Text as T

import qualified Guguk.TurkishAlphabet as Alph

type Syllable = T.Text

{-|
  Returns Just x, where x character at index i
       or Nothing, is i is out of bounds
-}
charAt :: T.Text -> Int -> Maybe Char
charAt xs i = if T.length xs > i then Just (xs `T.index` i) else Nothing

-- | Java's substring.
substring :: Int -> Int -> T.Text -> T.Text
substring x y = T.drop x . T.take y

{-|
  Returns True is the char is in the text,
       or False otherwise
-}
elemT :: Char -> T.Text -> Bool
elemT c t = isJust $ T.find (==c) t

-- | List of Turkish syllables of the given text.
-- Syllables dont contain apostrophes, they are all in lower case.
-- Note that apostrophe is a definite syllable separator
-- for words of Arabic origin which contain the sound "ع".
syllabify :: T.Text -> [Syllable]
syllabify s
  -- Base case
  | T.null s = []
  -- Split on apostrophes
  | '\'' `elemT` T.tail s = concatMap syllabify (T.splitOn "'" s)
  -- Return the same if there is no vowel
  | isNothing firstVowelIndex = [xs]

  | isNothing (afterVowel 1) = [xs]
  | Alph.isVowel(fromJust $ afterVowel 1) = handleSubstring 1

  | isNothing (afterVowel 2) = [xs]
  | Alph.isVowel(fromJust $ afterVowel 2) = handleSubstring 1

  | isNothing (afterVowel 3) = [xs]
  | Alph.isVowel(fromJust $ afterVowel 3) = handleSubstring 2

  | lastPart `elem` exceptions = handleSubstring 2
  | otherwise = handleSubstring 3
  where xs = (T.filter isAlpha . T.map toLower) s
        firstVowelIndex = T.findIndex Alph.isVowel xs
        fVI = fromJust firstVowelIndex
        len = T.length xs
        lastPart = substring 2 5 xs
        exceptions = ["str", "ktr", "mtr", "nsp"]
        afterVowel i = fromJust $ fmap (charAt xs . (+i)) firstVowelIndex
        handleSubstring n =
          substring 0 (fVI + n) xs : syllabify(substring (fVI + n) len xs)