{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TupleSections #-}

-- | Data types for unicode character data and functions for
-- extracting it from @UnicodeData.txt@.
module Text.Collate.UnicodeData
  ( UChar(..),
    GeneralCategory(..),
    BidiClass(..),
    DecompositionType(..),
    parseUnicodeData,
    toCanonicalCombiningClassMap,
    toCanonicalDecompositionMap,
    genCanonicalCombiningClassMap,
    genCanonicalDecompositionMap,
    readCodePoints,
  )
where

import qualified Data.ByteString as B
import qualified Data.IntMap as M
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import qualified Data.Text.Read as TR
import GHC.Generics (Generic)
import Instances.TH.Lift ()
import Language.Haskell.TH
import Language.Haskell.TH.Syntax (qAddDependentFile)

-- | Path to @UnicodeData.txt@.
unicodeDataPath :: FilePath
unicodeDataPath :: [Char]
unicodeDataPath = [Char]
"data/UnicodeData.txt"

readUtf8Text :: FilePath -> IO Text
readUtf8Text :: [Char] -> IO Text
readUtf8Text [Char]
fp = ByteString -> Text
TE.decodeUtf8 (ByteString -> Text) -> IO ByteString -> IO Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Char] -> IO ByteString
B.readFile [Char]
fp

-- | Generate map of code points to canonical combining class,
-- from @UnicodeData.txt@.
genCanonicalCombiningClassMap :: Q Exp
genCanonicalCombiningClassMap :: Q Exp
genCanonicalCombiningClassMap = do
  [Char] -> Q ()
forall (m :: * -> *). Quasi m => [Char] -> m ()
qAddDependentFile [Char]
unicodeDataPath
  IntMap Int
cccmap <-
    IntMap UChar -> IntMap Int
toCanonicalCombiningClassMap (IntMap UChar -> IntMap Int)
-> (Text -> IntMap UChar) -> Text -> IntMap Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> IntMap UChar
parseUnicodeData
      (Text -> IntMap Int) -> Q Text -> Q (IntMap Int)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> IO Text -> Q Text
forall a. IO a -> Q a
runIO ([Char] -> IO Text
readUtf8Text [Char]
unicodeDataPath)
  [|cccmap|]

-- | Generate map of code points to canonical decompositions,
-- from @UnicodeData.txt@.
genCanonicalDecompositionMap :: Q Exp
genCanonicalDecompositionMap :: Q Exp
genCanonicalDecompositionMap = do
  [Char] -> Q ()
forall (m :: * -> *). Quasi m => [Char] -> m ()
qAddDependentFile [Char]
unicodeDataPath
  IntMap [Int]
dmap <-
    IntMap UChar -> IntMap [Int]
toCanonicalDecompositionMap (IntMap UChar -> IntMap [Int])
-> (Text -> IntMap UChar) -> Text -> IntMap [Int]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> IntMap UChar
parseUnicodeData
      (Text -> IntMap [Int]) -> Q Text -> Q (IntMap [Int])
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> IO Text -> Q Text
forall a. IO a -> Q a
runIO ([Char] -> IO Text
readUtf8Text [Char]
unicodeDataPath)
  [|dmap|]

-- | Parse @UnicodeData.txt@ into a map of 'UChar' records.
parseUnicodeData :: Text -> M.IntMap UChar
parseUnicodeData :: Text -> IntMap UChar
parseUnicodeData = (Text -> IntMap UChar -> IntMap UChar)
-> IntMap UChar -> [Text] -> IntMap UChar
forall a b. (a -> b -> b) -> b -> [a] -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr Text -> IntMap UChar -> IntMap UChar
parseLine IntMap UChar
forall a. Monoid a => a
mempty ([Text] -> IntMap UChar)
-> (Text -> [Text]) -> Text -> IntMap UChar
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> [Text]
T.lines

-- | Convert unicode data to a map from code points to canonical combining
-- classes.
toCanonicalCombiningClassMap :: M.IntMap UChar -> M.IntMap Int
toCanonicalCombiningClassMap :: IntMap UChar -> IntMap Int
toCanonicalCombiningClassMap =
  (UChar -> Int) -> IntMap UChar -> IntMap Int
forall a b. (a -> b) -> IntMap a -> IntMap b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap UChar -> Int
uCanonicalCombiningClass (IntMap UChar -> IntMap Int)
-> (IntMap UChar -> IntMap UChar) -> IntMap UChar -> IntMap Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (UChar -> Bool) -> IntMap UChar -> IntMap UChar
forall a. (a -> Bool) -> IntMap a -> IntMap a
M.filter ((Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
0) (Int -> Bool) -> (UChar -> Int) -> UChar -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. UChar -> Int
uCanonicalCombiningClass)

-- | Convert unicode data to a map from code points to canonical decompositions.
toCanonicalDecompositionMap :: M.IntMap UChar -> M.IntMap [Int]
toCanonicalDecompositionMap :: IntMap UChar -> IntMap [Int]
toCanonicalDecompositionMap =
  (UChar -> [Int]) -> IntMap UChar -> IntMap [Int]
forall a b. (a -> b) -> IntMap a -> IntMap b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap UChar -> [Int]
uDecompositionMapping
    (IntMap UChar -> IntMap [Int])
-> (IntMap UChar -> IntMap UChar) -> IntMap UChar -> IntMap [Int]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (UChar -> Bool) -> IntMap UChar -> IntMap UChar
forall a. (a -> Bool) -> IntMap a -> IntMap a
M.filter
      ( \UChar
x ->
          UChar -> DecompositionType
uDecompositionType UChar
x DecompositionType -> DecompositionType -> Bool
forall a. Eq a => a -> a -> Bool
== DecompositionType
Canonical
            Bool -> Bool -> Bool
&& Bool -> Bool
not ([Int] -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null (UChar -> [Int]
uDecompositionMapping UChar
x))
      )

data GeneralCategory
  = Lu
  | Ll
  | Lt
  | Lm
  | Lo
  | Mn
  | Mc
  | Me
  | Nd
  | Nl
  | No
  | Pc
  | Pd
  | Ps
  | Pe
  | Pi
  | Pf
  | Po
  | Sm
  | Sc
  | Sk
  | So
  | Zs
  | Zl
  | Zp
  | Cc
  | Cf
  | Cs
  | Co
  | Cn
  deriving (Int -> GeneralCategory -> ShowS
[GeneralCategory] -> ShowS
GeneralCategory -> [Char]
(Int -> GeneralCategory -> ShowS)
-> (GeneralCategory -> [Char])
-> ([GeneralCategory] -> ShowS)
-> Show GeneralCategory
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> GeneralCategory -> ShowS
showsPrec :: Int -> GeneralCategory -> ShowS
$cshow :: GeneralCategory -> [Char]
show :: GeneralCategory -> [Char]
$cshowList :: [GeneralCategory] -> ShowS
showList :: [GeneralCategory] -> ShowS
Show, ReadPrec [GeneralCategory]
ReadPrec GeneralCategory
Int -> ReadS GeneralCategory
ReadS [GeneralCategory]
(Int -> ReadS GeneralCategory)
-> ReadS [GeneralCategory]
-> ReadPrec GeneralCategory
-> ReadPrec [GeneralCategory]
-> Read GeneralCategory
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
$creadsPrec :: Int -> ReadS GeneralCategory
readsPrec :: Int -> ReadS GeneralCategory
$creadList :: ReadS [GeneralCategory]
readList :: ReadS [GeneralCategory]
$creadPrec :: ReadPrec GeneralCategory
readPrec :: ReadPrec GeneralCategory
$creadListPrec :: ReadPrec [GeneralCategory]
readListPrec :: ReadPrec [GeneralCategory]
Read, GeneralCategory -> GeneralCategory -> Bool
(GeneralCategory -> GeneralCategory -> Bool)
-> (GeneralCategory -> GeneralCategory -> Bool)
-> Eq GeneralCategory
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: GeneralCategory -> GeneralCategory -> Bool
== :: GeneralCategory -> GeneralCategory -> Bool
$c/= :: GeneralCategory -> GeneralCategory -> Bool
/= :: GeneralCategory -> GeneralCategory -> Bool
Eq, Eq GeneralCategory
Eq GeneralCategory =>
(GeneralCategory -> GeneralCategory -> Ordering)
-> (GeneralCategory -> GeneralCategory -> Bool)
-> (GeneralCategory -> GeneralCategory -> Bool)
-> (GeneralCategory -> GeneralCategory -> Bool)
-> (GeneralCategory -> GeneralCategory -> Bool)
-> (GeneralCategory -> GeneralCategory -> GeneralCategory)
-> (GeneralCategory -> GeneralCategory -> GeneralCategory)
-> Ord GeneralCategory
GeneralCategory -> GeneralCategory -> Bool
GeneralCategory -> GeneralCategory -> Ordering
GeneralCategory -> GeneralCategory -> GeneralCategory
forall a.
Eq a =>
(a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
$ccompare :: GeneralCategory -> GeneralCategory -> Ordering
compare :: GeneralCategory -> GeneralCategory -> Ordering
$c< :: GeneralCategory -> GeneralCategory -> Bool
< :: GeneralCategory -> GeneralCategory -> Bool
$c<= :: GeneralCategory -> GeneralCategory -> Bool
<= :: GeneralCategory -> GeneralCategory -> Bool
$c> :: GeneralCategory -> GeneralCategory -> Bool
> :: GeneralCategory -> GeneralCategory -> Bool
$c>= :: GeneralCategory -> GeneralCategory -> Bool
>= :: GeneralCategory -> GeneralCategory -> Bool
$cmax :: GeneralCategory -> GeneralCategory -> GeneralCategory
max :: GeneralCategory -> GeneralCategory -> GeneralCategory
$cmin :: GeneralCategory -> GeneralCategory -> GeneralCategory
min :: GeneralCategory -> GeneralCategory -> GeneralCategory
Ord, Int -> GeneralCategory
GeneralCategory -> Int
GeneralCategory -> [GeneralCategory]
GeneralCategory -> GeneralCategory
GeneralCategory -> GeneralCategory -> [GeneralCategory]
GeneralCategory
-> GeneralCategory -> GeneralCategory -> [GeneralCategory]
(GeneralCategory -> GeneralCategory)
-> (GeneralCategory -> GeneralCategory)
-> (Int -> GeneralCategory)
-> (GeneralCategory -> Int)
-> (GeneralCategory -> [GeneralCategory])
-> (GeneralCategory -> GeneralCategory -> [GeneralCategory])
-> (GeneralCategory -> GeneralCategory -> [GeneralCategory])
-> (GeneralCategory
    -> GeneralCategory -> GeneralCategory -> [GeneralCategory])
-> Enum GeneralCategory
forall a.
(a -> a)
-> (a -> a)
-> (Int -> a)
-> (a -> Int)
-> (a -> [a])
-> (a -> a -> [a])
-> (a -> a -> [a])
-> (a -> a -> a -> [a])
-> Enum a
$csucc :: GeneralCategory -> GeneralCategory
succ :: GeneralCategory -> GeneralCategory
$cpred :: GeneralCategory -> GeneralCategory
pred :: GeneralCategory -> GeneralCategory
$ctoEnum :: Int -> GeneralCategory
toEnum :: Int -> GeneralCategory
$cfromEnum :: GeneralCategory -> Int
fromEnum :: GeneralCategory -> Int
$cenumFrom :: GeneralCategory -> [GeneralCategory]
enumFrom :: GeneralCategory -> [GeneralCategory]
$cenumFromThen :: GeneralCategory -> GeneralCategory -> [GeneralCategory]
enumFromThen :: GeneralCategory -> GeneralCategory -> [GeneralCategory]
$cenumFromTo :: GeneralCategory -> GeneralCategory -> [GeneralCategory]
enumFromTo :: GeneralCategory -> GeneralCategory -> [GeneralCategory]
$cenumFromThenTo :: GeneralCategory
-> GeneralCategory -> GeneralCategory -> [GeneralCategory]
enumFromThenTo :: GeneralCategory
-> GeneralCategory -> GeneralCategory -> [GeneralCategory]
Enum, (forall x. GeneralCategory -> Rep GeneralCategory x)
-> (forall x. Rep GeneralCategory x -> GeneralCategory)
-> Generic GeneralCategory
forall x. Rep GeneralCategory x -> GeneralCategory
forall x. GeneralCategory -> Rep GeneralCategory x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cfrom :: forall x. GeneralCategory -> Rep GeneralCategory x
from :: forall x. GeneralCategory -> Rep GeneralCategory x
$cto :: forall x. Rep GeneralCategory x -> GeneralCategory
to :: forall x. Rep GeneralCategory x -> GeneralCategory
Generic)

data BidiClass
  = L
  | LRE
  | LRO
  | R
  | AL
  | RLE
  | RLO
  | PDF
  | EN
  | ES
  | ET
  | AN
  | CS
  | NSM
  | BN
  | B
  | S
  | WS
  | ON
  | LRI
  | RLI
  | FSI
  | PDI
  deriving (Int -> BidiClass -> ShowS
[BidiClass] -> ShowS
BidiClass -> [Char]
(Int -> BidiClass -> ShowS)
-> (BidiClass -> [Char])
-> ([BidiClass] -> ShowS)
-> Show BidiClass
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> BidiClass -> ShowS
showsPrec :: Int -> BidiClass -> ShowS
$cshow :: BidiClass -> [Char]
show :: BidiClass -> [Char]
$cshowList :: [BidiClass] -> ShowS
showList :: [BidiClass] -> ShowS
Show, ReadPrec [BidiClass]
ReadPrec BidiClass
Int -> ReadS BidiClass
ReadS [BidiClass]
(Int -> ReadS BidiClass)
-> ReadS [BidiClass]
-> ReadPrec BidiClass
-> ReadPrec [BidiClass]
-> Read BidiClass
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
$creadsPrec :: Int -> ReadS BidiClass
readsPrec :: Int -> ReadS BidiClass
$creadList :: ReadS [BidiClass]
readList :: ReadS [BidiClass]
$creadPrec :: ReadPrec BidiClass
readPrec :: ReadPrec BidiClass
$creadListPrec :: ReadPrec [BidiClass]
readListPrec :: ReadPrec [BidiClass]
Read, BidiClass -> BidiClass -> Bool
(BidiClass -> BidiClass -> Bool)
-> (BidiClass -> BidiClass -> Bool) -> Eq BidiClass
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: BidiClass -> BidiClass -> Bool
== :: BidiClass -> BidiClass -> Bool
$c/= :: BidiClass -> BidiClass -> Bool
/= :: BidiClass -> BidiClass -> Bool
Eq, Eq BidiClass
Eq BidiClass =>
(BidiClass -> BidiClass -> Ordering)
-> (BidiClass -> BidiClass -> Bool)
-> (BidiClass -> BidiClass -> Bool)
-> (BidiClass -> BidiClass -> Bool)
-> (BidiClass -> BidiClass -> Bool)
-> (BidiClass -> BidiClass -> BidiClass)
-> (BidiClass -> BidiClass -> BidiClass)
-> Ord BidiClass
BidiClass -> BidiClass -> Bool
BidiClass -> BidiClass -> Ordering
BidiClass -> BidiClass -> BidiClass
forall a.
Eq a =>
(a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
$ccompare :: BidiClass -> BidiClass -> Ordering
compare :: BidiClass -> BidiClass -> Ordering
$c< :: BidiClass -> BidiClass -> Bool
< :: BidiClass -> BidiClass -> Bool
$c<= :: BidiClass -> BidiClass -> Bool
<= :: BidiClass -> BidiClass -> Bool
$c> :: BidiClass -> BidiClass -> Bool
> :: BidiClass -> BidiClass -> Bool
$c>= :: BidiClass -> BidiClass -> Bool
>= :: BidiClass -> BidiClass -> Bool
$cmax :: BidiClass -> BidiClass -> BidiClass
max :: BidiClass -> BidiClass -> BidiClass
$cmin :: BidiClass -> BidiClass -> BidiClass
min :: BidiClass -> BidiClass -> BidiClass
Ord, Int -> BidiClass
BidiClass -> Int
BidiClass -> [BidiClass]
BidiClass -> BidiClass
BidiClass -> BidiClass -> [BidiClass]
BidiClass -> BidiClass -> BidiClass -> [BidiClass]
(BidiClass -> BidiClass)
-> (BidiClass -> BidiClass)
-> (Int -> BidiClass)
-> (BidiClass -> Int)
-> (BidiClass -> [BidiClass])
-> (BidiClass -> BidiClass -> [BidiClass])
-> (BidiClass -> BidiClass -> [BidiClass])
-> (BidiClass -> BidiClass -> BidiClass -> [BidiClass])
-> Enum BidiClass
forall a.
(a -> a)
-> (a -> a)
-> (Int -> a)
-> (a -> Int)
-> (a -> [a])
-> (a -> a -> [a])
-> (a -> a -> [a])
-> (a -> a -> a -> [a])
-> Enum a
$csucc :: BidiClass -> BidiClass
succ :: BidiClass -> BidiClass
$cpred :: BidiClass -> BidiClass
pred :: BidiClass -> BidiClass
$ctoEnum :: Int -> BidiClass
toEnum :: Int -> BidiClass
$cfromEnum :: BidiClass -> Int
fromEnum :: BidiClass -> Int
$cenumFrom :: BidiClass -> [BidiClass]
enumFrom :: BidiClass -> [BidiClass]
$cenumFromThen :: BidiClass -> BidiClass -> [BidiClass]
enumFromThen :: BidiClass -> BidiClass -> [BidiClass]
$cenumFromTo :: BidiClass -> BidiClass -> [BidiClass]
enumFromTo :: BidiClass -> BidiClass -> [BidiClass]
$cenumFromThenTo :: BidiClass -> BidiClass -> BidiClass -> [BidiClass]
enumFromThenTo :: BidiClass -> BidiClass -> BidiClass -> [BidiClass]
Enum, (forall x. BidiClass -> Rep BidiClass x)
-> (forall x. Rep BidiClass x -> BidiClass) -> Generic BidiClass
forall x. Rep BidiClass x -> BidiClass
forall x. BidiClass -> Rep BidiClass x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cfrom :: forall x. BidiClass -> Rep BidiClass x
from :: forall x. BidiClass -> Rep BidiClass x
$cto :: forall x. Rep BidiClass x -> BidiClass
to :: forall x. Rep BidiClass x -> BidiClass
Generic)

data DecompositionType
  = Font
  | NoBreak
  | Initial
  | Medial
  | Final
  | Isolated
  | Circle
  | Super
  | Sub
  | Vertical
  | Wide
  | Narrow
  | Small
  | Square
  | Fraction
  | Compat
  | Canonical
  deriving (Int -> DecompositionType -> ShowS
[DecompositionType] -> ShowS
DecompositionType -> [Char]
(Int -> DecompositionType -> ShowS)
-> (DecompositionType -> [Char])
-> ([DecompositionType] -> ShowS)
-> Show DecompositionType
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> DecompositionType -> ShowS
showsPrec :: Int -> DecompositionType -> ShowS
$cshow :: DecompositionType -> [Char]
show :: DecompositionType -> [Char]
$cshowList :: [DecompositionType] -> ShowS
showList :: [DecompositionType] -> ShowS
Show, ReadPrec [DecompositionType]
ReadPrec DecompositionType
Int -> ReadS DecompositionType
ReadS [DecompositionType]
(Int -> ReadS DecompositionType)
-> ReadS [DecompositionType]
-> ReadPrec DecompositionType
-> ReadPrec [DecompositionType]
-> Read DecompositionType
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
$creadsPrec :: Int -> ReadS DecompositionType
readsPrec :: Int -> ReadS DecompositionType
$creadList :: ReadS [DecompositionType]
readList :: ReadS [DecompositionType]
$creadPrec :: ReadPrec DecompositionType
readPrec :: ReadPrec DecompositionType
$creadListPrec :: ReadPrec [DecompositionType]
readListPrec :: ReadPrec [DecompositionType]
Read, DecompositionType -> DecompositionType -> Bool
(DecompositionType -> DecompositionType -> Bool)
-> (DecompositionType -> DecompositionType -> Bool)
-> Eq DecompositionType
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: DecompositionType -> DecompositionType -> Bool
== :: DecompositionType -> DecompositionType -> Bool
$c/= :: DecompositionType -> DecompositionType -> Bool
/= :: DecompositionType -> DecompositionType -> Bool
Eq, Eq DecompositionType
Eq DecompositionType =>
(DecompositionType -> DecompositionType -> Ordering)
-> (DecompositionType -> DecompositionType -> Bool)
-> (DecompositionType -> DecompositionType -> Bool)
-> (DecompositionType -> DecompositionType -> Bool)
-> (DecompositionType -> DecompositionType -> Bool)
-> (DecompositionType -> DecompositionType -> DecompositionType)
-> (DecompositionType -> DecompositionType -> DecompositionType)
-> Ord DecompositionType
DecompositionType -> DecompositionType -> Bool
DecompositionType -> DecompositionType -> Ordering
DecompositionType -> DecompositionType -> DecompositionType
forall a.
Eq a =>
(a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
$ccompare :: DecompositionType -> DecompositionType -> Ordering
compare :: DecompositionType -> DecompositionType -> Ordering
$c< :: DecompositionType -> DecompositionType -> Bool
< :: DecompositionType -> DecompositionType -> Bool
$c<= :: DecompositionType -> DecompositionType -> Bool
<= :: DecompositionType -> DecompositionType -> Bool
$c> :: DecompositionType -> DecompositionType -> Bool
> :: DecompositionType -> DecompositionType -> Bool
$c>= :: DecompositionType -> DecompositionType -> Bool
>= :: DecompositionType -> DecompositionType -> Bool
$cmax :: DecompositionType -> DecompositionType -> DecompositionType
max :: DecompositionType -> DecompositionType -> DecompositionType
$cmin :: DecompositionType -> DecompositionType -> DecompositionType
min :: DecompositionType -> DecompositionType -> DecompositionType
Ord, Int -> DecompositionType
DecompositionType -> Int
DecompositionType -> [DecompositionType]
DecompositionType -> DecompositionType
DecompositionType -> DecompositionType -> [DecompositionType]
DecompositionType
-> DecompositionType -> DecompositionType -> [DecompositionType]
(DecompositionType -> DecompositionType)
-> (DecompositionType -> DecompositionType)
-> (Int -> DecompositionType)
-> (DecompositionType -> Int)
-> (DecompositionType -> [DecompositionType])
-> (DecompositionType -> DecompositionType -> [DecompositionType])
-> (DecompositionType -> DecompositionType -> [DecompositionType])
-> (DecompositionType
    -> DecompositionType -> DecompositionType -> [DecompositionType])
-> Enum DecompositionType
forall a.
(a -> a)
-> (a -> a)
-> (Int -> a)
-> (a -> Int)
-> (a -> [a])
-> (a -> a -> [a])
-> (a -> a -> [a])
-> (a -> a -> a -> [a])
-> Enum a
$csucc :: DecompositionType -> DecompositionType
succ :: DecompositionType -> DecompositionType
$cpred :: DecompositionType -> DecompositionType
pred :: DecompositionType -> DecompositionType
$ctoEnum :: Int -> DecompositionType
toEnum :: Int -> DecompositionType
$cfromEnum :: DecompositionType -> Int
fromEnum :: DecompositionType -> Int
$cenumFrom :: DecompositionType -> [DecompositionType]
enumFrom :: DecompositionType -> [DecompositionType]
$cenumFromThen :: DecompositionType -> DecompositionType -> [DecompositionType]
enumFromThen :: DecompositionType -> DecompositionType -> [DecompositionType]
$cenumFromTo :: DecompositionType -> DecompositionType -> [DecompositionType]
enumFromTo :: DecompositionType -> DecompositionType -> [DecompositionType]
$cenumFromThenTo :: DecompositionType
-> DecompositionType -> DecompositionType -> [DecompositionType]
enumFromThenTo :: DecompositionType
-> DecompositionType -> DecompositionType -> [DecompositionType]
Enum, (forall x. DecompositionType -> Rep DecompositionType x)
-> (forall x. Rep DecompositionType x -> DecompositionType)
-> Generic DecompositionType
forall x. Rep DecompositionType x -> DecompositionType
forall x. DecompositionType -> Rep DecompositionType x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cfrom :: forall x. DecompositionType -> Rep DecompositionType x
from :: forall x. DecompositionType -> Rep DecompositionType x
$cto :: forall x. Rep DecompositionType x -> DecompositionType
to :: forall x. Rep DecompositionType x -> DecompositionType
Generic)

-- | A 'UChar' encodes the data in one line of @UnicodeData.txt@.
data UChar = UChar
  { UChar -> Int
uCodePoint :: Int,
    UChar -> Text
uName :: Text,
    UChar -> GeneralCategory
uGeneralCategory :: GeneralCategory,
    UChar -> Int
uCanonicalCombiningClass :: Int,
    UChar -> BidiClass
uBidiClass :: BidiClass,
    UChar -> DecompositionType
uDecompositionType :: DecompositionType,
    UChar -> [Int]
uDecompositionMapping :: [Int],
    UChar -> (Maybe Int, Maybe Int, Maybe Int)
uNumericTypeAndValue :: (Maybe Int, Maybe Int, Maybe Int),
    UChar -> Bool
uBidiMirrored :: Bool,
    UChar -> Text
uUnicode1Name :: Text,
    UChar -> Text
uISOComment :: Text,
    UChar -> Int
uSimpleUppercaseMapping :: Int,
    UChar -> Int
uSimpleLowercaseMapping :: Int,
    UChar -> Int
uSimpleTitlecaseMappping :: Int
  }
  deriving (Int -> UChar -> ShowS
[UChar] -> ShowS
UChar -> [Char]
(Int -> UChar -> ShowS)
-> (UChar -> [Char]) -> ([UChar] -> ShowS) -> Show UChar
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> UChar -> ShowS
showsPrec :: Int -> UChar -> ShowS
$cshow :: UChar -> [Char]
show :: UChar -> [Char]
$cshowList :: [UChar] -> ShowS
showList :: [UChar] -> ShowS
Show, UChar -> UChar -> Bool
(UChar -> UChar -> Bool) -> (UChar -> UChar -> Bool) -> Eq UChar
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: UChar -> UChar -> Bool
== :: UChar -> UChar -> Bool
$c/= :: UChar -> UChar -> Bool
/= :: UChar -> UChar -> Bool
Eq, Eq UChar
Eq UChar =>
(UChar -> UChar -> Ordering)
-> (UChar -> UChar -> Bool)
-> (UChar -> UChar -> Bool)
-> (UChar -> UChar -> Bool)
-> (UChar -> UChar -> Bool)
-> (UChar -> UChar -> UChar)
-> (UChar -> UChar -> UChar)
-> Ord UChar
UChar -> UChar -> Bool
UChar -> UChar -> Ordering
UChar -> UChar -> UChar
forall a.
Eq a =>
(a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
$ccompare :: UChar -> UChar -> Ordering
compare :: UChar -> UChar -> Ordering
$c< :: UChar -> UChar -> Bool
< :: UChar -> UChar -> Bool
$c<= :: UChar -> UChar -> Bool
<= :: UChar -> UChar -> Bool
$c> :: UChar -> UChar -> Bool
> :: UChar -> UChar -> Bool
$c>= :: UChar -> UChar -> Bool
>= :: UChar -> UChar -> Bool
$cmax :: UChar -> UChar -> UChar
max :: UChar -> UChar -> UChar
$cmin :: UChar -> UChar -> UChar
min :: UChar -> UChar -> UChar
Ord, (forall x. UChar -> Rep UChar x)
-> (forall x. Rep UChar x -> UChar) -> Generic UChar
forall x. Rep UChar x -> UChar
forall x. UChar -> Rep UChar x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cfrom :: forall x. UChar -> Rep UChar x
from :: forall x. UChar -> Rep UChar x
$cto :: forall x. Rep UChar x -> UChar
to :: forall x. Rep UChar x -> UChar
Generic)

readCodePoint :: Text -> Int
readCodePoint :: Text -> Int
readCodePoint Text
t =
  case Reader Int
forall a. Integral a => Reader a
TR.hexadecimal Text
t of
    Left [Char]
e -> [Char] -> Int
forall a. HasCallStack => [Char] -> a
error [Char]
e -- ok to error at compile-time
    Right (Int
codepoint, Text
_) -> Int
codepoint

-- | Read a sequence of space-separated hex numbers.
readCodePoints :: Text -> ([Int], Text)
readCodePoints :: Text -> ([Int], Text)
readCodePoints Text
t =
  case Reader Int
forall a. Integral a => Reader a
TR.hexadecimal Text
t of
    Left [Char]
_ -> ([], Text
t)
    Right (Int
codepoint, Text
rest) ->
      let ([Int]
cps, Text
t') = Text -> ([Int], Text)
readCodePoints ((Char -> Bool) -> Text -> Text
T.dropWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
' ') Text
rest)
       in (Int
codepoint Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
: [Int]
cps, Text
t')

parseDecomp :: Text -> (DecompositionType, [Int])
parseDecomp :: Text -> (DecompositionType, [Int])
parseDecomp Text
bs =
  case Text -> Maybe (Char, Text)
T.uncons Text
bs of
    Just (Char
'<', Text
rest) -> (DecompositionType
ty, [Int]
xs)
      where
        xs :: [Int]
xs = ([Int], Text) -> [Int]
forall a b. (a, b) -> a
fst (([Int], Text) -> [Int]) -> ([Int], Text) -> [Int]
forall a b. (a -> b) -> a -> b
$ Text -> ([Int], Text)
readCodePoints Text
cps
        (Text
x, Text
y) = (Char -> Bool) -> Text -> (Text, Text)
T.break (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'>') Text
rest
        cps :: Text
cps = (Char -> Bool) -> Text -> Text
T.dropWhile (\Char
c -> Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'>' Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
' ') Text
y
        ty :: DecompositionType
ty = case Text
x of
          Text
"font" -> DecompositionType
Font
          Text
"noBreak" -> DecompositionType
NoBreak
          Text
"initial" -> DecompositionType
Initial
          Text
"medial" -> DecompositionType
Medial
          Text
"final" -> DecompositionType
Final
          Text
"isolate" -> DecompositionType
Isolated
          Text
"circle" -> DecompositionType
Circle
          Text
"super" -> DecompositionType
Super
          Text
"sub" -> DecompositionType
Sub
          Text
"vertical" -> DecompositionType
Vertical
          Text
"wide" -> DecompositionType
Wide
          Text
"narrow" -> DecompositionType
Narrow
          Text
"small" -> DecompositionType
Small
          Text
"square" -> DecompositionType
Square
          Text
"fraction" -> DecompositionType
Fraction
          Text
"compat" -> DecompositionType
Compat
          Text
_ -> DecompositionType
Compat
    Maybe (Char, Text)
_ -> (DecompositionType
Canonical,) ([Int] -> (DecompositionType, [Int]))
-> (([Int], Text) -> [Int])
-> ([Int], Text)
-> (DecompositionType, [Int])
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([Int], Text) -> [Int]
forall a b. (a, b) -> a
fst (([Int], Text) -> (DecompositionType, [Int]))
-> ([Int], Text) -> (DecompositionType, [Int])
forall a b. (a -> b) -> a -> b
$ Text -> ([Int], Text)
readCodePoints Text
bs

parseLine :: Text -> M.IntMap UChar -> M.IntMap UChar
parseLine :: Text -> IntMap UChar -> IntMap UChar
parseLine Text
t =
  case HasCallStack => Text -> Text -> [Text]
Text -> Text -> [Text]
T.splitOn Text
";" Text
t of
    [Text
f0, Text
f1, Text
f2, Text
f3, Text
f4, Text
f5, Text
f6, Text
f7, Text
f8, Text
f9, Text
f10, Text
f11, Text
f12, Text
f13, Text
f14] ->
      Int -> UChar -> IntMap UChar -> IntMap UChar
forall a. Int -> a -> IntMap a -> IntMap a
M.insert Int
codepoint UChar
uchar
      where
        codepoint :: Int
codepoint = Text -> Int
readCodePoint Text
f0
        (DecompositionType
decompType, [Int]
decompMapping) = Text -> (DecompositionType, [Int])
parseDecomp Text
f5
        readNumericValue :: Text -> Maybe a
readNumericValue Text
x =
          case Reader a
forall a. Integral a => Reader a
TR.decimal Text
x of
            Left [Char]
_ -> Maybe a
forall a. Maybe a
Nothing
            Right (a
v, Text
_) -> a -> Maybe a
forall a. a -> Maybe a
Just a
v
        uchar :: UChar
uchar =
          UChar
            { uCodePoint :: Int
uCodePoint = Int
codepoint,
              uName :: Text
uName = Text
f1,
              uGeneralCategory :: GeneralCategory
uGeneralCategory = [Char] -> GeneralCategory
forall a. Read a => [Char] -> a
read (Text -> [Char]
T.unpack Text
f2),
              uCanonicalCombiningClass :: Int
uCanonicalCombiningClass = ([Char] -> Int)
-> ((Int, Text) -> Int) -> Either [Char] (Int, Text) -> Int
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (Int -> [Char] -> Int
forall a b. a -> b -> a
const Int
0) (Int, Text) -> Int
forall a b. (a, b) -> a
fst (Reader Int
forall a. Integral a => Reader a
TR.decimal Text
f3),
              uBidiClass :: BidiClass
uBidiClass = [Char] -> BidiClass
forall a. Read a => [Char] -> a
read (Text -> [Char]
T.unpack Text
f4),
              uDecompositionType :: DecompositionType
uDecompositionType = DecompositionType
decompType,
              uDecompositionMapping :: [Int]
uDecompositionMapping = [Int]
decompMapping,
              uNumericTypeAndValue :: (Maybe Int, Maybe Int, Maybe Int)
uNumericTypeAndValue =
                ( Text -> Maybe Int
forall {a}. Integral a => Text -> Maybe a
readNumericValue Text
f6,
                  Text -> Maybe Int
forall {a}. Integral a => Text -> Maybe a
readNumericValue Text
f7,
                  Text -> Maybe Int
forall {a}. Integral a => Text -> Maybe a
readNumericValue Text
f8
                ),
              uBidiMirrored :: Bool
uBidiMirrored = Text
f9 Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"Y",
              uUnicode1Name :: Text
uUnicode1Name = Text
f10,
              uISOComment :: Text
uISOComment = Text
f11,
              uSimpleUppercaseMapping :: Int
uSimpleUppercaseMapping = Text -> Int
readCodePoint Text
f12,
              uSimpleLowercaseMapping :: Int
uSimpleLowercaseMapping = Text -> Int
readCodePoint Text
f13,
              uSimpleTitlecaseMappping :: Int
uSimpleTitlecaseMappping = Text -> Int
readCodePoint Text
f14
            }
    [Text]
_ -> [Char] -> IntMap UChar -> IntMap UChar
forall a. HasCallStack => [Char] -> a
error ([Char] -> IntMap UChar -> IntMap UChar)
-> [Char] -> IntMap UChar -> IntMap UChar
forall a b. (a -> b) -> a -> b
$ [Char]
"Wrong number of fields in record:\n" [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Text -> [Char]
forall a. Show a => a -> [Char]
show Text
t