module UTF8 where

import Data.Word
import Data.Bits
import Data.Char

{-
-- Could also be imported:

import Codec.Binary.UTF8.Light as UTF8
 
encode :: Char -> [Word8]
encode c = head (UTF8.encodeUTF8' [UTF8.c2w c])

-}

-- | Encode a Haskell String to a list of Word8 values, in UTF8 format.
encode :: Char -> [Word8]
encode :: Char -> [Word8]
encode = (Int -> Word8) -> [Int] -> [Word8]
forall a b. (a -> b) -> [a] -> [b]
map Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral ([Int] -> [Word8]) -> (Char -> [Int]) -> Char -> [Word8]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> [Int]
forall a. (Ord a, Num a, Bits a) => a -> [a]
go (Int -> [Int]) -> (Char -> Int) -> Char -> [Int]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Int
ord
 where
  go :: a -> [a]
go a
oc
   | a
oc a -> a -> Bool
forall a. Ord a => a -> a -> Bool
<= a
0x7f       = [a
oc]

   | a
oc a -> a -> Bool
forall a. Ord a => a -> a -> Bool
<= a
0x7ff      = [ a
0xc0 a -> a -> a
forall a. Num a => a -> a -> a
+ (a
oc a -> Int -> a
forall a. Bits a => a -> Int -> a
`shiftR` Int
6)
                        , a
0x80 a -> a -> a
forall a. Num a => a -> a -> a
+ a
oc a -> a -> a
forall a. Bits a => a -> a -> a
.&. a
0x3f
                        ]

   | a
oc a -> a -> Bool
forall a. Ord a => a -> a -> Bool
<= a
0xffff     = [ a
0xe0 a -> a -> a
forall a. Num a => a -> a -> a
+ (a
oc a -> Int -> a
forall a. Bits a => a -> Int -> a
`shiftR` Int
12)
                        , a
0x80 a -> a -> a
forall a. Num a => a -> a -> a
+ ((a
oc a -> Int -> a
forall a. Bits a => a -> Int -> a
`shiftR` Int
6) a -> a -> a
forall a. Bits a => a -> a -> a
.&. a
0x3f)
                        , a
0x80 a -> a -> a
forall a. Num a => a -> a -> a
+ a
oc a -> a -> a
forall a. Bits a => a -> a -> a
.&. a
0x3f
                        ]
   | Bool
otherwise        = [ a
0xf0 a -> a -> a
forall a. Num a => a -> a -> a
+ (a
oc a -> Int -> a
forall a. Bits a => a -> Int -> a
`shiftR` Int
18)
                        , a
0x80 a -> a -> a
forall a. Num a => a -> a -> a
+ ((a
oc a -> Int -> a
forall a. Bits a => a -> Int -> a
`shiftR` Int
12) a -> a -> a
forall a. Bits a => a -> a -> a
.&. a
0x3f)
                        , a
0x80 a -> a -> a
forall a. Num a => a -> a -> a
+ ((a
oc a -> Int -> a
forall a. Bits a => a -> Int -> a
`shiftR` Int
6) a -> a -> a
forall a. Bits a => a -> a -> a
.&. a
0x3f)
                        , a
0x80 a -> a -> a
forall a. Num a => a -> a -> a
+ a
oc a -> a -> a
forall a. Bits a => a -> a -> a
.&. a
0x3f
                        ]