{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveGeneric  #-}
{-# LANGUAGE ViewPatterns   #-}

-- |
-- Module    : Data.Kanji.Types
-- Copyright : (c) Colin Woodbury, 2015, 2016
-- License   : GPL3
-- Maintainer: Colin Woodbury <colin@fosskers.ca>
--
-- Types for this library. While a constructor for `Kanji` is made available
-- here, you should prefer the `kanji` "smart constructor" unless you know
-- for sure that the `Char` in question falls within the correct UTF8 range.

module Data.Kanji.Types where

import Control.DeepSeq (NFData)
import Data.Aeson
import Data.Bool (bool)
import Data.Char (isLetter, isNumber, isPunctuation, ord)
import Data.Hashable
import GHC.Generics

---

-- | A single symbol of Kanji. Japanese Kanji were borrowed from China
-- over several waves during the last 1,500 years. Japan names 2,136 of
-- these as their standard set, with rarer characters being the domain
-- of academia and esoteric writers.
--
-- Japanese has several Japan-only Kanji, including:
--
-- * 畑 (a type of rice field)
-- * 峠 (a narrow mountain pass)
-- * 働 (to do physical labour)
newtype Kanji = Kanji Char deriving (Kanji -> Kanji -> Bool
(Kanji -> Kanji -> Bool) -> (Kanji -> Kanji -> Bool) -> Eq Kanji
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Kanji -> Kanji -> Bool
$c/= :: Kanji -> Kanji -> Bool
== :: Kanji -> Kanji -> Bool
$c== :: Kanji -> Kanji -> Bool
Eq, Eq Kanji
Eq Kanji
-> (Kanji -> Kanji -> Ordering)
-> (Kanji -> Kanji -> Bool)
-> (Kanji -> Kanji -> Bool)
-> (Kanji -> Kanji -> Bool)
-> (Kanji -> Kanji -> Bool)
-> (Kanji -> Kanji -> Kanji)
-> (Kanji -> Kanji -> Kanji)
-> Ord Kanji
Kanji -> Kanji -> Bool
Kanji -> Kanji -> Ordering
Kanji -> Kanji -> Kanji
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: Kanji -> Kanji -> Kanji
$cmin :: Kanji -> Kanji -> Kanji
max :: Kanji -> Kanji -> Kanji
$cmax :: Kanji -> Kanji -> Kanji
>= :: Kanji -> Kanji -> Bool
$c>= :: Kanji -> Kanji -> Bool
> :: Kanji -> Kanji -> Bool
$c> :: Kanji -> Kanji -> Bool
<= :: Kanji -> Kanji -> Bool
$c<= :: Kanji -> Kanji -> Bool
< :: Kanji -> Kanji -> Bool
$c< :: Kanji -> Kanji -> Bool
compare :: Kanji -> Kanji -> Ordering
$ccompare :: Kanji -> Kanji -> Ordering
$cp1Ord :: Eq Kanji
Ord, Int -> Kanji -> ShowS
[Kanji] -> ShowS
Kanji -> String
(Int -> Kanji -> ShowS)
-> (Kanji -> String) -> ([Kanji] -> ShowS) -> Show Kanji
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Kanji] -> ShowS
$cshowList :: [Kanji] -> ShowS
show :: Kanji -> String
$cshow :: Kanji -> String
showsPrec :: Int -> Kanji -> ShowS
$cshowsPrec :: Int -> Kanji -> ShowS
Show, (forall x. Kanji -> Rep Kanji x)
-> (forall x. Rep Kanji x -> Kanji) -> Generic Kanji
forall x. Rep Kanji x -> Kanji
forall x. Kanji -> Rep Kanji x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep Kanji x -> Kanji
$cfrom :: forall x. Kanji -> Rep Kanji x
Generic, [Kanji] -> Encoding
[Kanji] -> Value
Kanji -> Encoding
Kanji -> Value
(Kanji -> Value)
-> (Kanji -> Encoding)
-> ([Kanji] -> Value)
-> ([Kanji] -> Encoding)
-> ToJSON Kanji
forall a.
(a -> Value)
-> (a -> Encoding)
-> ([a] -> Value)
-> ([a] -> Encoding)
-> ToJSON a
toEncodingList :: [Kanji] -> Encoding
$ctoEncodingList :: [Kanji] -> Encoding
toJSONList :: [Kanji] -> Value
$ctoJSONList :: [Kanji] -> Value
toEncoding :: Kanji -> Encoding
$ctoEncoding :: Kanji -> Encoding
toJSON :: Kanji -> Value
$ctoJSON :: Kanji -> Value
ToJSON, Value -> Parser [Kanji]
Value -> Parser Kanji
(Value -> Parser Kanji)
-> (Value -> Parser [Kanji]) -> FromJSON Kanji
forall a.
(Value -> Parser a) -> (Value -> Parser [a]) -> FromJSON a
parseJSONList :: Value -> Parser [Kanji]
$cparseJSONList :: Value -> Parser [Kanji]
parseJSON :: Value -> Parser Kanji
$cparseJSON :: Value -> Parser Kanji
FromJSON, Eq Kanji
Eq Kanji
-> (Int -> Kanji -> Int) -> (Kanji -> Int) -> Hashable Kanji
Int -> Kanji -> Int
Kanji -> Int
forall a. Eq a -> (Int -> a -> Int) -> (a -> Int) -> Hashable a
hash :: Kanji -> Int
$chash :: Kanji -> Int
hashWithSalt :: Int -> Kanji -> Int
$chashWithSalt :: Int -> Kanji -> Int
$cp1Hashable :: Eq Kanji
Hashable, Kanji -> ()
(Kanji -> ()) -> NFData Kanji
forall a. (a -> ()) -> NFData a
rnf :: Kanji -> ()
$crnf :: Kanji -> ()
NFData)

-- | The original `Char` of a `Kanji`.
_kanji :: Kanji -> Char
_kanji :: Kanji -> Char
_kanji (Kanji Char
k) = Char
k

-- | Construct a `Kanji` value from some `Char` if it falls in the correct UTF8 range.
kanji :: Char -> Maybe Kanji
kanji :: Char -> Maybe Kanji
kanji Char
c = Maybe Kanji -> Maybe Kanji -> Bool -> Maybe Kanji
forall a. a -> a -> Bool -> a
bool Maybe Kanji
forall a. Maybe a
Nothing (Kanji -> Maybe Kanji
forall a. a -> Maybe a
Just (Kanji -> Maybe Kanji) -> Kanji -> Maybe Kanji
forall a b. (a -> b) -> a -> b
$ Char -> Kanji
Kanji Char
c) (Bool -> Maybe Kanji) -> Bool -> Maybe Kanji
forall a b. (a -> b) -> a -> b
$ Char -> Bool
isKanji Char
c

-- | A Level or "Kyuu" (級) of Japanese Kanji ranking. There are 12 of these,
-- from 10 to 1, including intermediate levels between 3 and 2, and 2 and 1.
--
-- Japanese students will typically have Level-5 ability by the time they
-- finish elementary school. Level-5 accounts for 1,006 characters.
--
-- By the end of middle school, they would have covered up to Level-3
-- (1607 Kanji) in their Japanese class curriculum.
--
-- While Level-2 (2,136 Kanji) is considered "standard adult" ability,
-- many adults could not pass the Level-2, or even the Level-Pre2 (1940 Kanji)
-- exam without considerable study.
--
-- Level data for Kanji above Level-2 is currently not provided by
-- this library.
data Level = Ten | Nine | Eight | Seven | Six | Five | Four | Three | PreTwo
           | Two | PreOne | One | Unknown
           deriving (Level -> Level -> Bool
(Level -> Level -> Bool) -> (Level -> Level -> Bool) -> Eq Level
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Level -> Level -> Bool
$c/= :: Level -> Level -> Bool
== :: Level -> Level -> Bool
$c== :: Level -> Level -> Bool
Eq, Eq Level
Eq Level
-> (Level -> Level -> Ordering)
-> (Level -> Level -> Bool)
-> (Level -> Level -> Bool)
-> (Level -> Level -> Bool)
-> (Level -> Level -> Bool)
-> (Level -> Level -> Level)
-> (Level -> Level -> Level)
-> Ord Level
Level -> Level -> Bool
Level -> Level -> Ordering
Level -> Level -> Level
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: Level -> Level -> Level
$cmin :: Level -> Level -> Level
max :: Level -> Level -> Level
$cmax :: Level -> Level -> Level
>= :: Level -> Level -> Bool
$c>= :: Level -> Level -> Bool
> :: Level -> Level -> Bool
$c> :: Level -> Level -> Bool
<= :: Level -> Level -> Bool
$c<= :: Level -> Level -> Bool
< :: Level -> Level -> Bool
$c< :: Level -> Level -> Bool
compare :: Level -> Level -> Ordering
$ccompare :: Level -> Level -> Ordering
$cp1Ord :: Eq Level
Ord, Int -> Level
Level -> Int
Level -> [Level]
Level -> Level
Level -> Level -> [Level]
Level -> Level -> Level -> [Level]
(Level -> Level)
-> (Level -> Level)
-> (Int -> Level)
-> (Level -> Int)
-> (Level -> [Level])
-> (Level -> Level -> [Level])
-> (Level -> Level -> [Level])
-> (Level -> Level -> Level -> [Level])
-> Enum Level
forall a.
(a -> a)
-> (a -> a)
-> (Int -> a)
-> (a -> Int)
-> (a -> [a])
-> (a -> a -> [a])
-> (a -> a -> [a])
-> (a -> a -> a -> [a])
-> Enum a
enumFromThenTo :: Level -> Level -> Level -> [Level]
$cenumFromThenTo :: Level -> Level -> Level -> [Level]
enumFromTo :: Level -> Level -> [Level]
$cenumFromTo :: Level -> Level -> [Level]
enumFromThen :: Level -> Level -> [Level]
$cenumFromThen :: Level -> Level -> [Level]
enumFrom :: Level -> [Level]
$cenumFrom :: Level -> [Level]
fromEnum :: Level -> Int
$cfromEnum :: Level -> Int
toEnum :: Int -> Level
$ctoEnum :: Int -> Level
pred :: Level -> Level
$cpred :: Level -> Level
succ :: Level -> Level
$csucc :: Level -> Level
Enum, Int -> Level -> ShowS
[Level] -> ShowS
Level -> String
(Int -> Level -> ShowS)
-> (Level -> String) -> ([Level] -> ShowS) -> Show Level
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Level] -> ShowS
$cshowList :: [Level] -> ShowS
show :: Level -> String
$cshow :: Level -> String
showsPrec :: Int -> Level -> ShowS
$cshowsPrec :: Int -> Level -> ShowS
Show, (forall x. Level -> Rep Level x)
-> (forall x. Rep Level x -> Level) -> Generic Level
forall x. Rep Level x -> Level
forall x. Level -> Rep Level x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep Level x -> Level
$cfrom :: forall x. Level -> Rep Level x
Generic, Eq Level
Eq Level
-> (Int -> Level -> Int) -> (Level -> Int) -> Hashable Level
Int -> Level -> Int
Level -> Int
forall a. Eq a -> (Int -> a -> Int) -> (a -> Int) -> Hashable a
hash :: Level -> Int
$chash :: Level -> Int
hashWithSalt :: Int -> Level -> Int
$chashWithSalt :: Int -> Level -> Int
$cp1Hashable :: Eq Level
Hashable, Level -> ()
(Level -> ()) -> NFData Level
forall a. (a -> ()) -> NFData a
rnf :: Level -> ()
$crnf :: Level -> ()
NFData, [Level] -> Encoding
[Level] -> Value
Level -> Encoding
Level -> Value
(Level -> Value)
-> (Level -> Encoding)
-> ([Level] -> Value)
-> ([Level] -> Encoding)
-> ToJSON Level
forall a.
(a -> Value)
-> (a -> Encoding)
-> ([a] -> Value)
-> ([a] -> Encoding)
-> ToJSON a
toEncodingList :: [Level] -> Encoding
$ctoEncodingList :: [Level] -> Encoding
toJSONList :: [Level] -> Value
$ctoJSONList :: [Level] -> Value
toEncoding :: Level -> Encoding
$ctoEncoding :: Level -> Encoding
toJSON :: Level -> Value
$ctoJSON :: Level -> Value
ToJSON, Value -> Parser [Level]
Value -> Parser Level
(Value -> Parser Level)
-> (Value -> Parser [Level]) -> FromJSON Level
forall a.
(Value -> Parser a) -> (Value -> Parser [a]) -> FromJSON a
parseJSONList :: Value -> Parser [Level]
$cparseJSONList :: Value -> Parser [Level]
parseJSON :: Value -> Parser Level
$cparseJSON :: Value -> Parser Level
FromJSON, ToJSONKeyFunction [Level]
ToJSONKeyFunction Level
ToJSONKeyFunction Level
-> ToJSONKeyFunction [Level] -> ToJSONKey Level
forall a.
ToJSONKeyFunction a -> ToJSONKeyFunction [a] -> ToJSONKey a
toJSONKeyList :: ToJSONKeyFunction [Level]
$ctoJSONKeyList :: ToJSONKeyFunction [Level]
toJSONKey :: ToJSONKeyFunction Level
$ctoJSONKey :: ToJSONKeyFunction Level
ToJSONKey)

-- | Legal Kanji appear between UTF-8 characters 19968 and 40959.
isKanji :: Char -> Bool
isKanji :: Char -> Bool
isKanji (Char -> Int
ord -> Int
c) = Int
19968 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
c Bool -> Bool -> Bool
&& Int
c Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
40959
{-# INLINE isKanji #-}

-- | あ to ん.
isHiragana :: Char -> Bool
isHiragana :: Char -> Bool
isHiragana (Char -> Int
ord -> Int
c) = Int
0x3040 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
c Bool -> Bool -> Bool
&& Int
c Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0x309f
{-# INLINE isHiragana #-}

-- | ア to ン.
isKatakana :: Char -> Bool
isKatakana :: Char -> Bool
isKatakana (Char -> Int
ord -> Int
c) = Int
0x30a0 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
c Bool -> Bool -> Bool
&& Int
c Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0x30ff
{-# INLINE isKatakana #-}

-- | General categories for characters, at least as is useful for thinking about
-- Japanese.
--
-- Japanese "full-width" numbers and letters will be counted as `Numeral`
-- and `RomanLetter` respectively, alongside their usual ASCII forms.
data CharCat = Hanzi | Hiragana | Katakana | Numeral | RomanLetter | Punctuation | Other
  deriving (CharCat -> CharCat -> Bool
(CharCat -> CharCat -> Bool)
-> (CharCat -> CharCat -> Bool) -> Eq CharCat
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: CharCat -> CharCat -> Bool
$c/= :: CharCat -> CharCat -> Bool
== :: CharCat -> CharCat -> Bool
$c== :: CharCat -> CharCat -> Bool
Eq, Eq CharCat
Eq CharCat
-> (CharCat -> CharCat -> Ordering)
-> (CharCat -> CharCat -> Bool)
-> (CharCat -> CharCat -> Bool)
-> (CharCat -> CharCat -> Bool)
-> (CharCat -> CharCat -> Bool)
-> (CharCat -> CharCat -> CharCat)
-> (CharCat -> CharCat -> CharCat)
-> Ord CharCat
CharCat -> CharCat -> Bool
CharCat -> CharCat -> Ordering
CharCat -> CharCat -> CharCat
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: CharCat -> CharCat -> CharCat
$cmin :: CharCat -> CharCat -> CharCat
max :: CharCat -> CharCat -> CharCat
$cmax :: CharCat -> CharCat -> CharCat
>= :: CharCat -> CharCat -> Bool
$c>= :: CharCat -> CharCat -> Bool
> :: CharCat -> CharCat -> Bool
$c> :: CharCat -> CharCat -> Bool
<= :: CharCat -> CharCat -> Bool
$c<= :: CharCat -> CharCat -> Bool
< :: CharCat -> CharCat -> Bool
$c< :: CharCat -> CharCat -> Bool
compare :: CharCat -> CharCat -> Ordering
$ccompare :: CharCat -> CharCat -> Ordering
$cp1Ord :: Eq CharCat
Ord, Int -> CharCat -> ShowS
[CharCat] -> ShowS
CharCat -> String
(Int -> CharCat -> ShowS)
-> (CharCat -> String) -> ([CharCat] -> ShowS) -> Show CharCat
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [CharCat] -> ShowS
$cshowList :: [CharCat] -> ShowS
show :: CharCat -> String
$cshow :: CharCat -> String
showsPrec :: Int -> CharCat -> ShowS
$cshowsPrec :: Int -> CharCat -> ShowS
Show, (forall x. CharCat -> Rep CharCat x)
-> (forall x. Rep CharCat x -> CharCat) -> Generic CharCat
forall x. Rep CharCat x -> CharCat
forall x. CharCat -> Rep CharCat x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep CharCat x -> CharCat
$cfrom :: forall x. CharCat -> Rep CharCat x
Generic, Eq CharCat
Eq CharCat
-> (Int -> CharCat -> Int) -> (CharCat -> Int) -> Hashable CharCat
Int -> CharCat -> Int
CharCat -> Int
forall a. Eq a -> (Int -> a -> Int) -> (a -> Int) -> Hashable a
hash :: CharCat -> Int
$chash :: CharCat -> Int
hashWithSalt :: Int -> CharCat -> Int
$chashWithSalt :: Int -> CharCat -> Int
$cp1Hashable :: Eq CharCat
Hashable, CharCat -> ()
(CharCat -> ()) -> NFData CharCat
forall a. (a -> ()) -> NFData a
rnf :: CharCat -> ()
$crnf :: CharCat -> ()
NFData, [CharCat] -> Encoding
[CharCat] -> Value
CharCat -> Encoding
CharCat -> Value
(CharCat -> Value)
-> (CharCat -> Encoding)
-> ([CharCat] -> Value)
-> ([CharCat] -> Encoding)
-> ToJSON CharCat
forall a.
(a -> Value)
-> (a -> Encoding)
-> ([a] -> Value)
-> ([a] -> Encoding)
-> ToJSON a
toEncodingList :: [CharCat] -> Encoding
$ctoEncodingList :: [CharCat] -> Encoding
toJSONList :: [CharCat] -> Value
$ctoJSONList :: [CharCat] -> Value
toEncoding :: CharCat -> Encoding
$ctoEncoding :: CharCat -> Encoding
toJSON :: CharCat -> Value
$ctoJSON :: CharCat -> Value
ToJSON, Value -> Parser [CharCat]
Value -> Parser CharCat
(Value -> Parser CharCat)
-> (Value -> Parser [CharCat]) -> FromJSON CharCat
forall a.
(Value -> Parser a) -> (Value -> Parser [a]) -> FromJSON a
parseJSONList :: Value -> Parser [CharCat]
$cparseJSONList :: Value -> Parser [CharCat]
parseJSON :: Value -> Parser CharCat
$cparseJSON :: Value -> Parser CharCat
FromJSON, ToJSONKeyFunction [CharCat]
ToJSONKeyFunction CharCat
ToJSONKeyFunction CharCat
-> ToJSONKeyFunction [CharCat] -> ToJSONKey CharCat
forall a.
ToJSONKeyFunction a -> ToJSONKeyFunction [a] -> ToJSONKey a
toJSONKeyList :: ToJSONKeyFunction [CharCat]
$ctoJSONKeyList :: ToJSONKeyFunction [CharCat]
toJSONKey :: ToJSONKeyFunction CharCat
$ctoJSONKey :: ToJSONKeyFunction CharCat
ToJSONKey)

category :: Char -> CharCat
category :: Char -> CharCat
category Char
c | Char -> Bool
isKanji Char
c       = CharCat
Hanzi
           | Char -> Bool
isHiragana Char
c    = CharCat
Hiragana
           | Char -> Bool
isKatakana Char
c    = CharCat
Katakana
           | Char -> Bool
isLetter Char
c      = CharCat
RomanLetter
           | Char -> Bool
isNumber Char
c      = CharCat
Numeral
           | Char -> Bool
isPunctuation Char
c = CharCat
Punctuation
           | Bool
otherwise       = CharCat
Other