{-# LANGUAGE BangPatterns #-}

-- |
-- Module      : Data.Unicode.Properties.DecomposeHangul
-- Copyright   : (c) 2016 Harendra Kumar
--
-- License     : BSD-3-Clause
-- Maintainer  : harendra.kumar@gmail.com
-- Stability   : experimental
--
module Data.Unicode.Properties.DecomposeHangul
    (decomposeCharHangul
    , hangulFirst
    , hangulLast
    , isHangul
    , isHangulLV

    , isJamo
    , jamoLFirst
    , jamoLIndex
    , jamoLLast

    , jamoVFirst
    , jamoVIndex
    , jamoVLast

    , jamoTFirst
    , jamoTCount
    , jamoTIndex
    , jamoLast

    , jamoNCount
    )
where

import Control.Exception              (assert)
import Data.Char                      (ord)
import GHC.Base                       (unsafeChr)
import Data.Unicode.Internal.Division (quotRem21, quotRem28)

-- Hangul characters can be decomposed algorithmically instead of via mappings

-------------------------------------------------------------------------------
-- General utilities used by decomposition as well as composition
-------------------------------------------------------------------------------

-- * https://www.unicode.org/versions/Unicode13.0.0/ch03.pdf
-- * https://en.wikipedia.org/wiki/List_of_Hangul_jamo
-- * https://www.unicode.org/reports/tr15/tr15-18.html#Hangul

-- D134   Standard Korean syllable block: A sequence of one or more L followed
-- by a sequence of  one  or  more  V  and  a  sequence  of  zero  or  more  T,
-- or any other sequence that is canonically equivalent

-- jamo leading
jamoLFirst, jamoLCount, jamoLLast :: Int
jamoLFirst  = 0x1100
jamoLCount = 19
jamoLLast = jamoLFirst + jamoLCount - 1

-- jamo vowel
jamoVFirst, jamoVCount, jamoVLast :: Int
jamoVFirst  = 0x1161
jamoVCount = 21
jamoVLast = jamoVFirst + jamoVCount - 1

-- jamo trailing
-- jamoTFirst does not represent a valid T, it represents a missing T i.e. LV
-- without a T. See comments under jamoTIndex .
jamoTFirst, jamoTCount :: Int
jamoTFirst  = 0x11a7
jamoTCount = 28

jamoLast :: Int
jamoLast = jamoTFirst + jamoTCount - 1

-- VCount * TCount
jamoNCount :: Int
jamoNCount = 588

-- hangul
hangulFirst, hangulLast :: Int
hangulFirst = 0xac00
hangulLast = hangulFirst + jamoLCount * jamoVCount * jamoTCount - 1

isHangul :: Char -> Bool
isHangul c = n >= hangulFirst && n <= hangulLast
    where n = ord c

isHangulLV :: Char -> Bool
isHangulLV c = assert (jamoTCount == 28)
    snd (quotRem28 (ord c - hangulFirst)) == 0

isJamo :: Char -> Bool
isJamo c = n >= jamoLFirst && n <= jamoLast
    where n = ord c

-- if it is a jamo L char return the index
jamoLIndex :: Char -> Maybe Int
jamoLIndex c
  | index >= 0 && index < jamoLCount = Just index
  | otherwise = Nothing
    where index = ord c - jamoLFirst

jamoVIndex :: Char -> Maybe Int
jamoVIndex c
  | index >= 0 && index < jamoVCount = Just index
  | otherwise = Nothing
    where index = ord c - jamoVFirst

-- Note that index 0 is not a valid index for a trailing consonant. Index 0
-- means no T, only LV syllable.
-- See Unicode 9.0.0: 3.12 (Hangul Syllable Decomposition)
-- TBase is set to one less than the beginning of the range of trailing
-- consonants, which starts at U+11A8.
jamoTIndex :: Char -> Maybe Int
jamoTIndex c
  | index > 0 && index < jamoTCount = Just index
  | otherwise = Nothing
    where index = ord c - jamoTFirst

-------------------------------------------------------------------------------
-- Hangul decomposition
-------------------------------------------------------------------------------

{-# INLINE decomposeCharHangul #-}
decomposeCharHangul :: Char -> (Char, Char, Char)
decomposeCharHangul c = (l, v, t)
    where
        i = (ord c) - hangulFirst
        !(tn, ti) = assert (jamoTCount == 28) quotRem28 i
        !(li, vi) = assert (jamoVCount == 21) quotRem21 tn

        l = unsafeChr (jamoLFirst + li)
        v = unsafeChr (jamoVFirst + vi)
        t = unsafeChr (jamoTFirst + ti)