{-# LANGUAGE LambdaCase #-}

-- |
-- Module      : Unicode.Char.General
-- Copyright   : (c) 2020 Composewell Technologies and Contributors
-- License     : Apache-2.0
-- Maintainer  : streamly@composewell.com
-- Stability   : experimental
--
-- General character property related functions.
--
module Unicode.Char.General
    (
    -- * Unicode general categories
      GeneralCategory(..)
    , generalCategoryAbbr
    , generalCategory

    -- * Character classification
    , isAlphabetic
    , isAlphaNum
    , isControl
    , isMark
    , isPrint
    , isPunctuation
    , isSeparator
    , isSymbol
    , isWhiteSpace
    , isLetter
    , isSpace
    -- ** Re-export
    , isAscii
    , isLatin1
    , isAsciiUpper
    , isAsciiLower

    -- * Korean Hangul Characters
    -- | The Hangul script used in the Korean writing system consists of
    -- individual consonant and vowel letters (jamo) that are visually combined
    -- into square display cells to form entire syllable  blocks.  Hangul
    -- syllables  may  be  encoded  directly  as  precomposed  combinations of
    -- individual jamo or as decomposed sequences of conjoining jamo. Modern
    -- Hangul syllable blocks can be expressed with either two or three jamo,
    -- either in the  form  consonant + vowel  or  in  the  form  consonant +
    -- vowel + consonant. The leading consonant is represented as L, the vowel
    -- as V and the trailing consonant as T.
    --
    -- The Unicode Standard contains both a large set of precomposed modern
    -- Hangul syllables and a set of conjoining Hangul jamo, which can be used
    -- to encode archaic Korean syllable blocks as well as modern Korean
    -- syllable blocks.
    --
    -- Hangul characters can be composed or decomposed algorithmically instead
    -- of via mappings.  These APIs are used mainly for Unicode normalization
    -- of Hangul text.
    --
    -- Please refer to the following resources for more information:
    --
    -- * The @Hangul@ section of the @East Asia@ chapter of the [Unicode Standard](https://www.unicode.org/versions/latest)
    -- * Conformance chapter of the [Unicode Standard](https://www.unicode.org/versions/latest)
    -- * [Unicode® Standard Annex #15 - Unicode Normalization Forms](https://www.unicode.org/reports/tr15)
    -- * UCD file @HangulSyllableType.txt@
    -- * https://en.wikipedia.org/wiki/Hangul_Jamo_(Unicode_block)
    -- * https://en.wikipedia.org/wiki/List_of_Hangul_jamo

    -- ** Conjoining Jamo
    -- | Jamo L, V and T letters.
    , isJamo
    , jamoNCount

    -- *** Jamo Leading (L)
    , jamoLFirst
    , jamoLCount
    , jamoLIndex
    , jamoLLast

    -- *** Jamo Vowel (V)
    , jamoVFirst
    , jamoVCount
    , jamoVIndex
    , jamoVLast

    -- *** Jamo Trailing (T)
    , jamoTFirst
    , jamoTCount
    , jamoTIndex
    , jamoTLast

    -- ** Hangul Syllables
    -- | Precomposed Hangul syllables.
    , hangulFirst
    , hangulLast
    , isHangul
    , isHangulLV
    )
where

import Control.Exception (assert)
import Data.Char (isAscii, isLatin1, isAsciiUpper, isAsciiLower, ord)
import Data.Ix (Ix)
import Unicode.Internal.Division (quotRem28)

import qualified Unicode.Internal.Char.DerivedCoreProperties as P
import qualified Unicode.Internal.Char.PropList as P
import qualified Unicode.Internal.Char.UnicodeData.GeneralCategory as UC

{-| Unicode General Categories.

These classes are defined in the
[Unicode Character Database](http://www.unicode.org/reports/tr44/tr44-14.html#GC_Values_Table),
part of the Unicode standard

__Note:__ the classes must be in the same order they are listed in the Unicode Standard,
because some functions (e.g. 'generalCategory') rely on the 'Enum' instance.

@since 0.3.0
-}
data GeneralCategory
    -- L: Letter
    = UppercaseLetter       -- ^ @Lu@: Letter, Uppercase
    | LowercaseLetter       -- ^ @Ll@: Letter, Lowercase
    | TitlecaseLetter       -- ^ @Lt@: Letter, Titlecase
    | ModifierLetter        -- ^ @Lm@: Letter, Modifier
    | OtherLetter           -- ^ @Lo@: Letter, Other

    -- M: Mark
    | NonSpacingMark        -- ^ @Mn@: Mark, Non-Spacing
    | SpacingCombiningMark  -- ^ @Mc@: Mark, Spacing Combining
    | EnclosingMark         -- ^ @Me@: Mark, Enclosing

    -- N: Number
    | DecimalNumber         -- ^ @Nd@: Number, Decimal
    | LetterNumber          -- ^ @Nl@: Number, Letter
    | OtherNumber           -- ^ @No@: Number, Other

    -- P: Punctuation
    | ConnectorPunctuation  -- ^ @Pc@: Punctuation, Connector
    | DashPunctuation       -- ^ @Pd@: Punctuation, Dash
    | OpenPunctuation       -- ^ @Ps@: Punctuation, Open
    | ClosePunctuation      -- ^ @Pe@: Punctuation, Close
    | InitialQuote          -- ^ @Pi@: Punctuation, Initial quote
    | FinalQuote            -- ^ @Pf@: Punctuation, Final quote
    | OtherPunctuation      -- ^ @Po@: Punctuation, Other

    -- S: Symbol
    | MathSymbol            -- ^ @Sm@: Symbol, Math
    | CurrencySymbol        -- ^ @Sc@: Symbol, Currency
    | ModifierSymbol        -- ^ @Sk@: Symbol, Modifier
    | OtherSymbol           -- ^ @So@: Symbol, Other

    -- Z: Separator
    | Space                 -- ^ @Zs@: Separator, Space
    | LineSeparator         -- ^ @Zl@: Separator, Line
    | ParagraphSeparator    -- ^ @Zp@: Separator, Paragraph

    -- C: Other
    | Control               -- ^ @Cc@: Other, Control
    | Format                -- ^ @Cf@: Other, Format
    | Surrogate             -- ^ @Cs@: Other, Surrogate
    | PrivateUse            -- ^ @Co@: Other, Private Use
    | NotAssigned           -- ^ @Cn@: Other, Not Assigned
    deriving ( Int -> GeneralCategory -> ShowS
[GeneralCategory] -> ShowS
GeneralCategory -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [GeneralCategory] -> ShowS
$cshowList :: [GeneralCategory] -> ShowS
show :: GeneralCategory -> String
$cshow :: GeneralCategory -> String
showsPrec :: Int -> GeneralCategory -> ShowS
$cshowsPrec :: Int -> GeneralCategory -> ShowS
Show
            , GeneralCategory -> GeneralCategory -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: GeneralCategory -> GeneralCategory -> Bool
$c/= :: GeneralCategory -> GeneralCategory -> Bool
== :: GeneralCategory -> GeneralCategory -> Bool
$c== :: GeneralCategory -> GeneralCategory -> Bool
Eq
            , Eq GeneralCategory
GeneralCategory -> GeneralCategory -> Bool
GeneralCategory -> GeneralCategory -> Ordering
GeneralCategory -> GeneralCategory -> GeneralCategory
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: GeneralCategory -> GeneralCategory -> GeneralCategory
$cmin :: GeneralCategory -> GeneralCategory -> GeneralCategory
max :: GeneralCategory -> GeneralCategory -> GeneralCategory
$cmax :: GeneralCategory -> GeneralCategory -> GeneralCategory
>= :: GeneralCategory -> GeneralCategory -> Bool
$c>= :: GeneralCategory -> GeneralCategory -> Bool
> :: GeneralCategory -> GeneralCategory -> Bool
$c> :: GeneralCategory -> GeneralCategory -> Bool
<= :: GeneralCategory -> GeneralCategory -> Bool
$c<= :: GeneralCategory -> GeneralCategory -> Bool
< :: GeneralCategory -> GeneralCategory -> Bool
$c< :: GeneralCategory -> GeneralCategory -> Bool
compare :: GeneralCategory -> GeneralCategory -> Ordering
$ccompare :: GeneralCategory -> GeneralCategory -> Ordering
Ord
            , Int -> GeneralCategory
GeneralCategory -> Int
GeneralCategory -> [GeneralCategory]
GeneralCategory -> GeneralCategory
GeneralCategory -> GeneralCategory -> [GeneralCategory]
GeneralCategory
-> GeneralCategory -> GeneralCategory -> [GeneralCategory]
forall a.
(a -> a)
-> (a -> a)
-> (Int -> a)
-> (a -> Int)
-> (a -> [a])
-> (a -> a -> [a])
-> (a -> a -> [a])
-> (a -> a -> a -> [a])
-> Enum a
enumFromThenTo :: GeneralCategory
-> GeneralCategory -> GeneralCategory -> [GeneralCategory]
$cenumFromThenTo :: GeneralCategory
-> GeneralCategory -> GeneralCategory -> [GeneralCategory]
enumFromTo :: GeneralCategory -> GeneralCategory -> [GeneralCategory]
$cenumFromTo :: GeneralCategory -> GeneralCategory -> [GeneralCategory]
enumFromThen :: GeneralCategory -> GeneralCategory -> [GeneralCategory]
$cenumFromThen :: GeneralCategory -> GeneralCategory -> [GeneralCategory]
enumFrom :: GeneralCategory -> [GeneralCategory]
$cenumFrom :: GeneralCategory -> [GeneralCategory]
fromEnum :: GeneralCategory -> Int
$cfromEnum :: GeneralCategory -> Int
toEnum :: Int -> GeneralCategory
$ctoEnum :: Int -> GeneralCategory
pred :: GeneralCategory -> GeneralCategory
$cpred :: GeneralCategory -> GeneralCategory
succ :: GeneralCategory -> GeneralCategory
$csucc :: GeneralCategory -> GeneralCategory
Enum
            , GeneralCategory
forall a. a -> a -> Bounded a
maxBound :: GeneralCategory
$cmaxBound :: GeneralCategory
minBound :: GeneralCategory
$cminBound :: GeneralCategory
Bounded
            , Ord GeneralCategory
(GeneralCategory, GeneralCategory) -> Int
(GeneralCategory, GeneralCategory) -> [GeneralCategory]
(GeneralCategory, GeneralCategory) -> GeneralCategory -> Bool
(GeneralCategory, GeneralCategory) -> GeneralCategory -> Int
forall a.
Ord a
-> ((a, a) -> [a])
-> ((a, a) -> a -> Int)
-> ((a, a) -> a -> Int)
-> ((a, a) -> a -> Bool)
-> ((a, a) -> Int)
-> ((a, a) -> Int)
-> Ix a
unsafeRangeSize :: (GeneralCategory, GeneralCategory) -> Int
$cunsafeRangeSize :: (GeneralCategory, GeneralCategory) -> Int
rangeSize :: (GeneralCategory, GeneralCategory) -> Int
$crangeSize :: (GeneralCategory, GeneralCategory) -> Int
inRange :: (GeneralCategory, GeneralCategory) -> GeneralCategory -> Bool
$cinRange :: (GeneralCategory, GeneralCategory) -> GeneralCategory -> Bool
unsafeIndex :: (GeneralCategory, GeneralCategory) -> GeneralCategory -> Int
$cunsafeIndex :: (GeneralCategory, GeneralCategory) -> GeneralCategory -> Int
index :: (GeneralCategory, GeneralCategory) -> GeneralCategory -> Int
$cindex :: (GeneralCategory, GeneralCategory) -> GeneralCategory -> Int
range :: (GeneralCategory, GeneralCategory) -> [GeneralCategory]
$crange :: (GeneralCategory, GeneralCategory) -> [GeneralCategory]
Ix
            )

-- | Abbreviation of 'GeneralCategory' used in the Unicode standard.
--
-- @since 0.3.0
generalCategoryAbbr :: GeneralCategory -> String
generalCategoryAbbr :: GeneralCategory -> String
generalCategoryAbbr = \case
    GeneralCategory
UppercaseLetter      -> String
"Lu"
    GeneralCategory
LowercaseLetter      -> String
"Ll"
    GeneralCategory
TitlecaseLetter      -> String
"Lt"
    GeneralCategory
ModifierLetter       -> String
"Lm"
    GeneralCategory
OtherLetter          -> String
"Lo"
    GeneralCategory
NonSpacingMark       -> String
"Mn"
    GeneralCategory
SpacingCombiningMark -> String
"Mc"
    GeneralCategory
EnclosingMark        -> String
"Me"
    GeneralCategory
DecimalNumber        -> String
"Nd"
    GeneralCategory
LetterNumber         -> String
"Nl"
    GeneralCategory
OtherNumber          -> String
"No"
    GeneralCategory
ConnectorPunctuation -> String
"Pc"
    GeneralCategory
DashPunctuation      -> String
"Pd"
    GeneralCategory
OpenPunctuation      -> String
"Ps"
    GeneralCategory
ClosePunctuation     -> String
"Pe"
    GeneralCategory
InitialQuote         -> String
"Pi"
    GeneralCategory
FinalQuote           -> String
"Pf"
    GeneralCategory
OtherPunctuation     -> String
"Po"
    GeneralCategory
MathSymbol           -> String
"Sm"
    GeneralCategory
CurrencySymbol       -> String
"Sc"
    GeneralCategory
ModifierSymbol       -> String
"Sk"
    GeneralCategory
OtherSymbol          -> String
"So"
    GeneralCategory
Space                -> String
"Zs"
    GeneralCategory
LineSeparator        -> String
"Zl"
    GeneralCategory
ParagraphSeparator   -> String
"Zp"
    GeneralCategory
Control              -> String
"Cc"
    GeneralCategory
Format               -> String
"Cf"
    GeneralCategory
Surrogate            -> String
"Cs"
    GeneralCategory
PrivateUse           -> String
"Co"
    GeneralCategory
NotAssigned          -> String
"Cn"

{-| The Unicode general category of the character.

This property is defined in the column 2 of the @UnicodeData@ table.

This relies on the 'Enum' instance of 'GeneralCategory', which must remain in the
same order as the categories are presented in the Unicode standard.

prop> show (generalCategory c) == show (Data.Char.generalCategory c)

@since 0.3.0
-}
{-# INLINE generalCategory #-}
generalCategory :: Char -> GeneralCategory
generalCategory :: Char -> GeneralCategory
generalCategory = forall a. Enum a => Int -> a
toEnum forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Int
UC.generalCategory

{-| Returns 'True' for alphabetic Unicode characters (lower-case, upper-case
and title-case letters, plus letters of caseless scripts and modifiers
letters).

__Note:__ this function is /not/ equivalent to
'Unicode.Char.General.Compat.isAlpha' / 'Unicode.Char.General.Compat.isLetter':

* 'Unicode.Char.General.Compat.isAlpha' matches the following general categories:

    * 'UppercaseLetter' (@Lu@)
    * 'LowercaseLetter' (@Ll@)
    * 'TitlecaseLetter' (@Lt@)
    * 'ModifierLetter' (@Lm@)
    * 'OtherLetter' (@Lo@)

* whereas 'isAlphabetic' matches:

    * @Uppercase@ [property](https://www.unicode.org/reports/tr44/#Uppercase)
    * @Lowercase@ [property](https://www.unicode.org/reports/tr44/#Lowercase)
    * 'TitlecaseLetter' (@Lt@)
    * 'ModifierLetter' (@Lm@)
    * 'OtherLetter' (@Lo@)
    * 'LetterNumber' (@Nl@)
    * @Other_Alphabetic@ [property](https://www.unicode.org/reports/tr44/#Other_Alphabetic)

@since 0.3.0
-}
{-# INLINE isAlphabetic #-}
isAlphabetic :: Char -> Bool
isAlphabetic :: Char -> Bool
isAlphabetic = Char -> Bool
P.isAlphabetic

{-| Selects alphabetic or numeric Unicode characters.

This function returns 'True' if its argument has one of the
following 'GeneralCategory's, or 'False' otherwise:

* 'UppercaseLetter'
* 'LowercaseLetter'
* 'TitlecaseLetter'
* 'ModifierLetter'
* 'OtherLetter'
* 'DecimalNumber'
* 'LetterNumber'
* 'OtherNumber'

prop> isAlphaNum c == Data.Char.isAlphaNum c

@since 0.3.0
-}
isAlphaNum :: Char -> Bool
isAlphaNum :: Char -> Bool
isAlphaNum Char
c = case Char -> GeneralCategory
generalCategory Char
c of
    GeneralCategory
UppercaseLetter -> Bool
True
    GeneralCategory
LowercaseLetter -> Bool
True
    GeneralCategory
TitlecaseLetter -> Bool
True
    GeneralCategory
ModifierLetter  -> Bool
True
    GeneralCategory
OtherLetter     -> Bool
True
    GeneralCategory
DecimalNumber   -> Bool
True
    GeneralCategory
LetterNumber    -> Bool
True
    GeneralCategory
OtherNumber     -> Bool
True
    GeneralCategory
_               -> Bool
False

{-| Selects control characters, which are the non-printing characters
of the Latin-1 subset of Unicode.

This function returns 'True' if its argument has the 'GeneralCategory' 'Control'.

prop> isControl c == Data.Char.isControl c

@since 0.3.0
-}
isControl :: Char -> Bool
isControl :: Char -> Bool
isControl Char
c = case Char -> GeneralCategory
generalCategory Char
c of
    GeneralCategory
Control -> Bool
True
    GeneralCategory
_       -> Bool
False

{-| Selects Unicode mark characters, for example accents and the
like, which combine with preceding characters.

This function returns 'True' if its argument has one of the
following 'GeneralCategory's, or 'False' otherwise:

* 'NonSpacingMark'
* 'SpacingCombiningMark'
* 'EnclosingMark'

prop> isMark c == Data.Char.isMark c

@since 0.3.0
-}
isMark :: Char -> Bool
isMark :: Char -> Bool
isMark Char
c = case Char -> GeneralCategory
generalCategory Char
c of
    GeneralCategory
NonSpacingMark       -> Bool
True
    GeneralCategory
SpacingCombiningMark -> Bool
True
    GeneralCategory
EnclosingMark        -> Bool
True
    GeneralCategory
_                    -> Bool
False

{-| Selects printable Unicode characters (letters, numbers, marks, punctuation,
symbols and spaces).

This function returns 'False' if its argument has one of the
following 'GeneralCategory's, or 'True' otherwise:

* 'LineSeparator'
* 'ParagraphSeparator'
* 'Control'
* 'Format'
* 'Surrogate'
* 'PrivateUse'
* 'NotAssigned'

prop> isPrint c == Data.Char.isPrint c

@since 0.3.0
-}
isPrint :: Char -> Bool
isPrint :: Char -> Bool
isPrint Char
c = case Char -> GeneralCategory
generalCategory Char
c of
    GeneralCategory
LineSeparator      -> Bool
False
    GeneralCategory
ParagraphSeparator -> Bool
False
    GeneralCategory
Control            -> Bool
False
    GeneralCategory
Format             -> Bool
False
    GeneralCategory
Surrogate          -> Bool
False
    GeneralCategory
PrivateUse         -> Bool
False
    GeneralCategory
NotAssigned        -> Bool
False
    GeneralCategory
_                  -> Bool
True

{-| Selects Unicode punctuation characters, including various kinds
of connectors, brackets and quotes.

This function returns 'True' if its argument has one of the
following 'GeneralCategory's, or 'False' otherwise:

* 'ConnectorPunctuation'
* 'DashPunctuation'
* 'OpenPunctuation'
* 'ClosePunctuation'
* 'InitialQuote'
* 'FinalQuote'
* 'OtherPunctuation'

prop> isPunctuation c == Data.Char.isPunctuation c

@since 0.3.0
-}
isPunctuation :: Char -> Bool
isPunctuation :: Char -> Bool
isPunctuation Char
c = case Char -> GeneralCategory
generalCategory Char
c of
    GeneralCategory
ConnectorPunctuation -> Bool
True
    GeneralCategory
DashPunctuation      -> Bool
True
    GeneralCategory
OpenPunctuation      -> Bool
True
    GeneralCategory
ClosePunctuation     -> Bool
True
    GeneralCategory
InitialQuote         -> Bool
True
    GeneralCategory
FinalQuote           -> Bool
True
    GeneralCategory
OtherPunctuation     -> Bool
True
    GeneralCategory
_                    -> Bool
False

{- | Returns 'True' for any whitespace characters, and the control
characters @\\t@, @\\n@, @\\r@, @\\f@, @\\v@.

See: [Unicode @White_Space@](https://www.unicode.org/reports/tr44/#White_Space).

__Note:__ 'isWhiteSpace' is /not/ equivalent to 'Unicode.Char.General.Compat.isSpace'.
'isWhiteSpace' selects the same characters from 'isSpace' plus the following:

* @U+0085@ NEXT LINE (NEL)
* @U+2028@ LINE SEPARATOR
* @U+2029@ PARAGRAPH SEPARATOR

@since 0.3.0
-}
{-# INLINE isWhiteSpace #-}
isWhiteSpace :: Char -> Bool
isWhiteSpace :: Char -> Bool
isWhiteSpace = Char -> Bool
P.isWhite_Space

{-| Selects Unicode space and separator characters.

This function returns 'True' if its argument has one of the
following 'GeneralCategory's, or 'False' otherwise:

* 'Space'
* 'LineSeparator'
* 'ParagraphSeparator'

prop> isSeparator c == Data.Char.isSeparator c

@since 0.3.0
-}
isSeparator :: Char -> Bool
isSeparator :: Char -> Bool
isSeparator Char
c = case Char -> GeneralCategory
generalCategory Char
c of
    GeneralCategory
Space              -> Bool
True
    GeneralCategory
LineSeparator      -> Bool
True
    GeneralCategory
ParagraphSeparator -> Bool
True
    GeneralCategory
_                  -> Bool
False

{-| Selects Unicode symbol characters, including mathematical and currency symbols.

This function returns 'True' if its argument has one of the
following 'GeneralCategory's, or 'False' otherwise:
* 'MathSymbol'
* 'CurrencySymbol'
* 'ModifierSymbol'
* 'OtherSymbol'

prop> isSymbol c == Data.Char.isSymbol c

@since 0.3.0
-}
isSymbol :: Char -> Bool
isSymbol :: Char -> Bool
isSymbol Char
c = case Char -> GeneralCategory
generalCategory Char
c of
    GeneralCategory
MathSymbol     -> Bool
True
    GeneralCategory
CurrencySymbol -> Bool
True
    GeneralCategory
ModifierSymbol -> Bool
True
    GeneralCategory
OtherSymbol    -> Bool
True
    GeneralCategory
_              -> Bool
False

-- | Returns 'True' for alphabetic Unicode characters (lower-case, upper-case
-- and title-case letters, plus letters of caseless scripts and modifiers
-- letters).
--
-- @since 0.1.0
{-# INLINE isLetter #-}
{-# DEPRECATED isLetter "Use isAlphabetic instead. Note that the behavior of this function does not match base:Data.Char.isLetter. See Unicode.Char.General.Compat for behavior compatible with base:Data.Char." #-}
isLetter :: Char -> Bool
isLetter :: Char -> Bool
isLetter = Char -> Bool
P.isAlphabetic

-- | Returns 'True' for any whitespace characters, and the control
-- characters @\\t@, @\\n@, @\\r@, @\\f@, @\\v@.
--
-- @since 0.1.0
{-# INLINE isSpace #-}
{-# DEPRECATED isSpace "Use isWhiteSpace instead. Note that the behavior of this function does not match base:Data.Char.isSpace. See Unicode.Char.General.Compat for behavior compatible with base:Data.Char." #-}
isSpace :: Char -> Bool
isSpace :: Char -> Bool
isSpace = Char -> Bool
P.isWhite_Space

-------------------------------------------------------------------------------
-- Korean Hangul
-------------------------------------------------------------------------------

-- jamo leading
jamoLFirst, jamoLCount, jamoLLast :: Int

-- | First leading consonant jamo.
--
-- @since 0.1.0
jamoLFirst :: Int
jamoLFirst  = Int
0x1100

-- | Total count of leading consonant jamo.
--
-- @since 0.3.0
jamoLCount :: Int
jamoLCount = Int
19

-- | Last leading consonant jamo.
--
-- @since 0.1.0
jamoLLast :: Int
jamoLLast = Int
jamoLFirst forall a. Num a => a -> a -> a
+ Int
jamoLCount forall a. Num a => a -> a -> a
- Int
1

-- jamo vowel
jamoVFirst, jamoVCount, jamoVLast :: Int

-- | First vowel jamo.
--
-- @since 0.1.0
jamoVFirst :: Int
jamoVFirst  = Int
0x1161

-- | Total count of vowel jamo.
--
-- @since 0.1.0
jamoVCount :: Int
jamoVCount = Int
21

-- | Last vowel jamo.
--
-- @since 0.1.0
jamoVLast :: Int
jamoVLast = Int
jamoVFirst forall a. Num a => a -> a -> a
+ Int
jamoVCount forall a. Num a => a -> a -> a
- Int
1

-- jamo trailing
jamoTFirst, jamoTCount :: Int

-- | The first trailing consonant jamo.
--
-- Note that 'jamoTFirst' does not represent a valid T, it represents a missing
-- T i.e. LV without a T. See comments under 'jamoTIndex' .
--
-- @since 0.1.0
jamoTFirst :: Int
jamoTFirst  = Int
0x11a7

-- | Total count of trailing consonant jamo.
--
-- @since 0.1.0
jamoTCount :: Int
jamoTCount = Int
28

-- | Last trailing consonant jamo.
--
-- @since 0.1.0
jamoTLast :: Int
jamoTLast :: Int
jamoTLast = Int
jamoTFirst forall a. Num a => a -> a -> a
+ Int
jamoTCount forall a. Num a => a -> a -> a
- Int
1

-- | Total count of all jamo characters.
--
-- @jamoNCount = jamoVCount * jamoTCount@
--
-- @since 0.1.0
jamoNCount :: Int
jamoNCount :: Int
jamoNCount = Int
588

-- Hangul
hangulFirst, hangulLast :: Int

-- | Codepoint of the first pre-composed Hangul character.
--
-- @since 0.1.0
hangulFirst :: Int
hangulFirst = Int
0xac00

-- | Codepoint of the last Hangul character.
--
-- @since 0.1.0
hangulLast :: Int
hangulLast = Int
hangulFirst forall a. Num a => a -> a -> a
+ Int
jamoLCount forall a. Num a => a -> a -> a
* Int
jamoVCount forall a. Num a => a -> a -> a
* Int
jamoTCount forall a. Num a => a -> a -> a
- Int
1

-- | Determine if the given character is a precomposed Hangul syllable.
--
-- @since 0.1.0
isHangul :: Char -> Bool
isHangul :: Char -> Bool
isHangul Char
c = Int
n forall a. Ord a => a -> a -> Bool
>= Int
hangulFirst Bool -> Bool -> Bool
&& Int
n forall a. Ord a => a -> a -> Bool
<= Int
hangulLast
    where n :: Int
n = Char -> Int
ord Char
c

-- | Determine if the given character is a Hangul LV syllable.
--
-- __Note:__ this function requires a precomposed Hangul syllable but does /not/
-- check it. Use 'isHangul' to check the input character before passing it to
-- 'isHangulLV'.
--
-- @since 0.1.0
isHangulLV :: Char -> Bool
isHangulLV :: Char -> Bool
isHangulLV Char
c = forall a. (?callStack::CallStack) => Bool -> a -> a
assert (Int
jamoTCount forall a. Eq a => a -> a -> Bool
== Int
28)
    forall a b. (a, b) -> b
snd (Int -> (Int, Int)
quotRem28 (Char -> Int
ord Char
c forall a. Num a => a -> a -> a
- Int
hangulFirst)) forall a. Eq a => a -> a -> Bool
== Int
0

-- | Determine whether a character is a jamo L, V or T character.
--
-- @since 0.1.0
isJamo :: Char -> Bool
isJamo :: Char -> Bool
isJamo Char
c = Int
n forall a. Ord a => a -> a -> Bool
>= Int
jamoLFirst Bool -> Bool -> Bool
&& Int
n forall a. Ord a => a -> a -> Bool
<= Int
jamoTLast
    where n :: Int
n = Char -> Int
ord Char
c

-- | Given a Unicode character, if it is a leading jamo, return its index in
-- the list of leading jamo consonants, otherwise return 'Nothing'.
--
-- @since 0.1.0
jamoLIndex :: Char -> Maybe Int
jamoLIndex :: Char -> Maybe Int
jamoLIndex Char
c
  | Int
index forall a. Ord a => a -> a -> Bool
>= Int
0 Bool -> Bool -> Bool
&& Int
index forall a. Ord a => a -> a -> Bool
< Int
jamoLCount = forall a. a -> Maybe a
Just Int
index
  | Bool
otherwise = forall a. Maybe a
Nothing
    where index :: Int
index = Char -> Int
ord Char
c forall a. Num a => a -> a -> a
- Int
jamoLFirst

-- | Given a Unicode character, if it is a vowel jamo, return its index in the
-- list of vowel jamo, otherwise return 'Nothing'.
--
-- @since 0.1.0
jamoVIndex :: Char -> Maybe Int
jamoVIndex :: Char -> Maybe Int
jamoVIndex Char
c
  | Int
index forall a. Ord a => a -> a -> Bool
>= Int
0 Bool -> Bool -> Bool
&& Int
index forall a. Ord a => a -> a -> Bool
< Int
jamoVCount = forall a. a -> Maybe a
Just Int
index
  | Bool
otherwise = forall a. Maybe a
Nothing
    where index :: Int
index = Char -> Int
ord Char
c forall a. Num a => a -> a -> a
- Int
jamoVFirst

-- | Given a Unicode character, if it is a trailing jamo consonant, return its
-- index in the list of trailing jamo consonants, otherwise return 'Nothing'.
--
-- Note that index 0 is not a valid index for a trailing consonant. Index 0
-- corresponds to an LV syllable, without a T.  See "Hangul Syllable
-- Decomposition" in the Conformance chapter of the Unicode standard for more
-- details.
--
-- @since 0.1.0
jamoTIndex :: Char -> Maybe Int
jamoTIndex :: Char -> Maybe Int
jamoTIndex Char
c
  | Int
index forall a. Ord a => a -> a -> Bool
> Int
0 Bool -> Bool -> Bool
&& Int
index forall a. Ord a => a -> a -> Bool
< Int
jamoTCount = forall a. a -> Maybe a
Just Int
index
  | Bool
otherwise = forall a. Maybe a
Nothing
    where index :: Int
index = Char -> Int
ord Char
c forall a. Num a => a -> a -> a
- Int
jamoTFirst