{-# LANGUAGE UndecidableInstances #-} -- naturally -- | Type-level UTF-8 codepoint conversion. module Data.Type.Symbol.Utf8 ( type SymbolToUtf8 , type CharToUtf8 , type UnicodeCodePointToUtf8 ) where import GHC.TypeLits import Raehik.Data.Type.Common ( type IfNatLte, type (++) ) -- | Convert a type-level symbol to UTF-8. -- -- Each 'Natural' in the return list is "byte-sized" (0 <= n <= 0xFF). type SymbolToUtf8 sym = SymbolToUtf8' '[] (UnconsSymbol sym) type family SymbolToUtf8' bs (mchsym :: Maybe (Char, Symbol)) where SymbolToUtf8' bs ('Just '(ch, sym)) = SymbolToUtf8' (bs ++ CharToUtf8 ch) (UnconsSymbol sym) SymbolToUtf8' bs 'Nothing = bs -- | Convert a type-level character to UTF-8. -- -- Each 'Natural' in the return list is "byte-sized" (0 <= n <= 0xFF). type CharToUtf8 ch = UnicodeCodePointToUtf8 (CharToNat ch) -- | Convert a type-level Unicode code point to UTF-8 -- -- Emits a type error if you give it an invalid codepoint. type UnicodeCodePointToUtf8 n = IfNatLte n 0x7F '[n] ( IfNatLte n 0x07FF (UCP2 n) ( IfNatLte n 0xFFFF (UCP3 n) ( IfNatLte n 0x10FFFF (UCP4 n) ( TypeError ('Text "not a Unicode codepoint: " :<>: 'ShowType n) )))) type UCP2 n = [UCP21 n, UCP22 n] type UCP21 n = 0b11000000 + (n `Div` (2^6)) type UCP22 n = 0b10000000 + (n `Mod` (2^6)) type UCP3 n = [UCP31 n, UCP32 n, UCP33 n] type UCP31 n = 0b11100000 + (n `Div` (2^12)) type UCP32 n = 0b10000000 + ((n `Div` (2^6)) `Mod` (2^6)) type UCP33 n = 0b10000000 + (n `Mod` (2^6)) type UCP4 n = [UCP41 n, UCP42 n, UCP43 n, UCP44 n] type UCP41 n = 0b11110000 + (n `Div` (2^18)) type UCP42 n = 0b10000000 + ((n `Div` (2^12)) `Mod` (2^12)) type UCP43 n = 0b10000000 + ((n `Div` (2^6)) `Mod` (2^6)) type UCP44 n = 0b10000000 + (n `Mod` (2^6))