{-# LANGUAGE DeriveAnyClass #-} {-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE ViewPatterns #-} -- | -- Module : Data.Kanji.Types -- Copyright : (c) Colin Woodbury, 2015, 2016 -- License : GPL3 -- Maintainer: Colin Woodbury -- -- Types for this library. While a constructor for `Kanji` is made available -- here, you should prefer the `kanji` "smart constructor" unless you know -- for sure that the `Char` in question falls within the correct UTF8 range. module Data.Kanji.Types where import Control.DeepSeq (NFData) import Data.Aeson import Data.Bool (bool) import Data.Char (isLetter, isNumber, isPunctuation, ord) import Data.Hashable import GHC.Generics --- -- | A single symbol of Kanji. Japanese Kanji were borrowed from China -- over several waves during the last 1,500 years. Japan names 2,136 of -- these as their standard set, with rarer characters being the domain -- of academia and esoteric writers. -- -- Japanese has several Japan-only Kanji, including: -- -- * 畑 (a type of rice field) -- * 峠 (a narrow mountain pass) -- * 働 (to do physical labour) newtype Kanji = Kanji Char deriving (Eq, Ord, Show, Generic, ToJSON, FromJSON, Hashable, NFData) -- | The original `Char` of a `Kanji`. _kanji :: Kanji -> Char _kanji (Kanji k) = k -- | Construct a `Kanji` value from some `Char` if it falls in the correct UTF8 range. kanji :: Char -> Maybe Kanji kanji c = bool Nothing (Just $ Kanji c) $ isKanji c -- | A Level or "Kyuu" (級) of Japanese Kanji ranking. There are 12 of these, -- from 10 to 1, including intermediate levels between 3 and 2, and 2 and 1. -- -- Japanese students will typically have Level-5 ability by the time they -- finish elementary school. Level-5 accounts for 1,006 characters. -- -- By the end of middle school, they would have covered up to Level-3 -- (1607 Kanji) in their Japanese class curriculum. -- -- While Level-2 (2,136 Kanji) is considered "standard adult" ability, -- many adults could not pass the Level-2, or even the Level-Pre2 (1940 Kanji) -- exam without considerable study. -- -- Level data for Kanji above Level-2 is currently not provided by -- this library. data Level = Ten | Nine | Eight | Seven | Six | Five | Four | Three | PreTwo | Two | PreOne | One | Unknown deriving (Eq, Ord, Enum, Show, Generic, Hashable, NFData, ToJSON, FromJSON, ToJSONKey) -- | Legal Kanji appear between UTF-8 characters 19968 and 40959. isKanji :: Char -> Bool isKanji (ord -> c) = 19968 <= c && c <= 40959 {-# INLINE isKanji #-} -- | あ to ん. isHiragana :: Char -> Bool isHiragana (ord -> c) = 0x3040 <= c && c <= 0x309f {-# INLINE isHiragana #-} -- | ア to ン. isKatakana :: Char -> Bool isKatakana (ord -> c) = 0x30a0 <= c && c <= 0x30ff {-# INLINE isKatakana #-} -- | General categories for characters, at least as is useful for thinking about -- Japanese. -- -- Japanese "full-width" numbers and letters will be counted as `Numeral` -- and `RomanLetter` respectively, alongside their usual ASCII forms. data CharCat = Hanzi | Hiragana | Katakana | Numeral | RomanLetter | Punctuation | Other deriving (Eq, Ord, Show, Generic, Hashable, NFData, ToJSON, FromJSON, ToJSONKey) category :: Char -> CharCat category c | isKanji c = Hanzi | isHiragana c = Hiragana | isKatakana c = Katakana | isLetter c = RomanLetter | isNumber c = Numeral | isPunctuation c = Punctuation | otherwise = Other