module Data.Kanji
(
AsKanji(..)
, Kanji(..)
, kanji
, allKanji
, isKanji
, hasLevel
, kanjiDensity
, elementaryKanjiDensity
, percentSpread
, Level(..)
, Rank
, rankNums
, level
, levels
, isKanjiInLevel
, levelDist
, levelFromRank
, averageLevel
) where
import qualified Data.ByteString.Char8 as SB
import qualified Data.ByteString.Lazy.Char8 as LB
import Data.Char (ord)
import Data.List (sort, group)
import qualified Data.Set as S
import qualified Data.Text as ST
import qualified Data.Text.Lazy as LT
import Lens.Micro
import Data.Kanji.TenthQ
import Data.Kanji.NinthQ
import Data.Kanji.EighthQ
import Data.Kanji.SeventhQ
import Data.Kanji.SixthQ
import Data.Kanji.FifthQ
import Data.Kanji.FourthQ
import Data.Kanji.ThirdQ
import Data.Kanji.PreSecondQ
import Data.Kanji.SecondQ
class AsKanji a where
_Kanji :: Traversal' a Kanji
len :: Num b => a -> b
asKanji :: a -> [Kanji]
asKanji a = a ^.. _Kanji
instance AsKanji Char where
_Kanji f c = if isKanji c then _kanji <$> f (Kanji c) else pure c
len = const 1
instance AsKanji [Char] where
_Kanji = traverse . _Kanji
len = fromIntegral . length
instance AsKanji ST.Text where
_Kanji = packed . _Kanji
where packed f b = ST.pack <$> f (ST.unpack b)
len = fromIntegral . ST.length
instance AsKanji LT.Text where
_Kanji = packed . _Kanji
where packed f b = LT.pack <$> f (LT.unpack b)
len = fromIntegral . LT.length
instance AsKanji SB.ByteString where
_Kanji = packed . _Kanji
where packed f b = SB.pack <$> f (SB.unpack b)
len = fromIntegral . SB.length
instance AsKanji LB.ByteString where
_Kanji = packed . _Kanji
where packed f b = LB.pack <$> f (LB.unpack b)
len = fromIntegral . LB.length
newtype Kanji = Kanji { _kanji :: Char } deriving (Eq, Ord, Show)
kanji :: Traversal' Char Kanji
kanji = _Kanji
data Level = Level { _allKanji :: S.Set Kanji
, _rank :: Rank
} deriving (Eq, Show)
type Rank = Float
rankNums :: [Rank]
rankNums = [10,9,8,7,6,5,4,3,2.5,2,1.5,1]
allKanji :: [[Kanji]]
allKanji = map asKanji ks
where ks = [tenthQ, ninthQ, eighthQ, seventhQ, sixthQ,
fifthQ, fourthQ, thirdQ, preSecondQ, secondQ]
isKanji :: Char -> Bool
isKanji c = lowLimit <= c' && c' <= highLimit
where c' = ord c
lowLimit = 19968
highLimit = 40959
hasLevel :: [Level] -> Kanji -> Bool
hasLevel qs k = has _Just $ level qs k
kanjiDensity :: AsKanji a => a -> Float
kanjiDensity ks = length' (asKanji ks) / len ks
where length' = fromIntegral . length
elementaryKanjiDensity :: AsKanji a => a -> Float
elementaryKanjiDensity ks = foldl (\acc (_,p) -> acc + p) 0 elementaryQs
where elementaryQs = filter (\(qn,_) -> qn `elem` [5..10]) distributions
distributions = levelDist levels $ asKanji ks
makeLevel :: [Kanji] -> Rank -> Level
makeLevel ks n = Level (S.fromDistinctAscList ks) n
levels :: [Level]
levels = map f $ zip allKanji rankNums
where f (ks,n) = makeLevel ks n
level :: [Level] -> Kanji -> Maybe Level
level [] _ = Nothing
level (q:qs) k | isKanjiInLevel q k = Just q
| otherwise = level qs k
isKanjiInLevel :: Level -> Kanji -> Bool
isKanjiInLevel q k = S.member k $ _allKanji q
rank :: [Level] -> Kanji -> Rank
rank qs k = maybe 0 _rank $ level qs k
levelFromRank :: [Level] -> Rank -> Maybe Level
levelFromRank [] _ = Nothing
levelFromRank (q:qs) qn | _rank q == qn = Just q
| otherwise = levelFromRank qs qn
averageLevel :: [Level] -> [Kanji] -> Float
averageLevel qs ks = average $ map (rank qs) ks
where average ns = (sum ns) / (fromIntegral $ length ns)
levelDist :: [Level] -> [Kanji] -> [(Rank,Float)]
levelDist qs ks = map toNumPercentPair $ group sortedRanks
where sortedRanks = sort $ map (rank qs) ks
toNumPercentPair qns = (head qns, length' qns / length' sortedRanks)
length' n = fromIntegral $ length n
percentSpread :: [Kanji] -> [(Kanji,Float)]
percentSpread ks = map getPercent kQuants
where getPercent (k,q) = (k, fromIntegral q / totalKanji)
kQuants = kanjiQuantities ks
totalKanji = fromIntegral $ foldl (\acc (_,q) -> q + acc) 0 kQuants
kanjiQuantities :: [Kanji] -> [(Kanji,Int)]
kanjiQuantities = map (\ks -> (head ks, length ks)) . group . sort