{-# LANGUAGE DeriveDataTypeable #-}
{- | This module implements UTF-32 encoding and decoding.
     See <http://en.wikipedia.org/wiki/UTF-32> for more information.
 -}
module Data.Encoding.UTF32
    (UTF32(..))
    where

import Data.Encoding.Base
import Data.Encoding.ByteSink
import Data.Encoding.ByteSource
import Data.Encoding.Exception

import Data.Char
import Data.Typeable


data UTF32
	= UTF32		-- ^ Detects big or little endian through the use of the BOM (Byte Order Mask) character. Defaults to big endian if not present.
	| UTF32BE	-- ^ Encodes and decodes using the big endian encoding.
	| UTF32LE	-- ^ Encodes and decodes using the little endian encoding.
	deriving (UTF32 -> UTF32 -> Bool
(UTF32 -> UTF32 -> Bool) -> (UTF32 -> UTF32 -> Bool) -> Eq UTF32
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: UTF32 -> UTF32 -> Bool
$c/= :: UTF32 -> UTF32 -> Bool
== :: UTF32 -> UTF32 -> Bool
$c== :: UTF32 -> UTF32 -> Bool
Eq,Int -> UTF32 -> ShowS
[UTF32] -> ShowS
UTF32 -> String
(Int -> UTF32 -> ShowS)
-> (UTF32 -> String) -> ([UTF32] -> ShowS) -> Show UTF32
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [UTF32] -> ShowS
$cshowList :: [UTF32] -> ShowS
show :: UTF32 -> String
$cshow :: UTF32 -> String
showsPrec :: Int -> UTF32 -> ShowS
$cshowsPrec :: Int -> UTF32 -> ShowS
Show,Typeable)

instance Encoding UTF32 where
    encodeChar :: forall (m :: * -> *). ByteSink m => UTF32 -> Char -> m ()
encodeChar UTF32
UTF32LE Char
ch = Word32 -> m ()
forall (m :: * -> *). ByteSink m => Word32 -> m ()
pushWord32le (Int -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word32) -> Int -> Word32
forall a b. (a -> b) -> a -> b
$ Char -> Int
ord Char
ch)
    encodeChar UTF32
_ Char
ch = Word32 -> m ()
forall (m :: * -> *). ByteSink m => Word32 -> m ()
pushWord32be (Int -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word32) -> Int -> Word32
forall a b. (a -> b) -> a -> b
$ Char -> Int
ord Char
ch)
    decodeChar :: forall (m :: * -> *). ByteSource m => UTF32 -> m Char
decodeChar UTF32
UTF32LE = do
                          Word32
wrd <- m Word32
forall (m :: * -> *). ByteSource m => m Word32
fetchWord32le
                          Char -> m Char
forall (m :: * -> *) a. Monad m => a -> m a
return (Char -> m Char) -> Char -> m Char
forall a b. (a -> b) -> a -> b
$ Int -> Char
chr (Int -> Char) -> Int -> Char
forall a b. (a -> b) -> a -> b
$ Word32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word32
wrd
    decodeChar UTF32
_ = do
                          Word32
wrd <- m Word32
forall (m :: * -> *). ByteSource m => m Word32
fetchWord32be
                          Char -> m Char
forall (m :: * -> *) a. Monad m => a -> m a
return (Char -> m Char) -> Char -> m Char
forall a b. (a -> b) -> a -> b
$ Int -> Char
chr (Int -> Char) -> Int -> Char
forall a b. (a -> b) -> a -> b
$ Word32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word32
wrd
    encode :: forall (m :: * -> *). ByteSink m => UTF32 -> String -> m ()
encode UTF32
UTF32 String
str = do
      UTF32 -> Char -> m ()
forall enc (m :: * -> *).
(Encoding enc, ByteSink m) =>
enc -> Char -> m ()
encodeChar UTF32
UTF32 Char
'\xFEFF'
      (Char -> m ()) -> String -> m ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ (UTF32 -> Char -> m ()
forall enc (m :: * -> *).
(Encoding enc, ByteSink m) =>
enc -> Char -> m ()
encodeChar UTF32
UTF32) String
str
    encode UTF32
enc String
str = (Char -> m ()) -> String -> m ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ (UTF32 -> Char -> m ()
forall enc (m :: * -> *).
(Encoding enc, ByteSink m) =>
enc -> Char -> m ()
encodeChar UTF32
enc) String
str

    decode :: forall (m :: * -> *). ByteSource m => UTF32 -> m String
decode UTF32
UTF32 = do
      Word32
ch <- m Word32
forall (m :: * -> *). ByteSource m => m Word32
fetchWord32be
      case Word32
ch of
        Word32
0x0000FEFF -> m Bool -> m Char -> m String
forall (m :: * -> *) a. Monad m => m Bool -> m a -> m [a]
untilM m Bool
forall (m :: * -> *). ByteSource m => m Bool
sourceEmpty (UTF32 -> m Char
forall enc (m :: * -> *).
(Encoding enc, ByteSource m) =>
enc -> m Char
decodeChar UTF32
UTF32BE)
        Word32
0xFFFE0000 -> m Bool -> m Char -> m String
forall (m :: * -> *) a. Monad m => m Bool -> m a -> m [a]
untilM m Bool
forall (m :: * -> *). ByteSource m => m Bool
sourceEmpty (UTF32 -> m Char
forall enc (m :: * -> *).
(Encoding enc, ByteSource m) =>
enc -> m Char
decodeChar UTF32
UTF32LE)
        Word32
_ -> do
          String
rest <- m Bool -> m Char -> m String
forall (m :: * -> *) a. Monad m => m Bool -> m a -> m [a]
untilM m Bool
forall (m :: * -> *). ByteSource m => m Bool
sourceEmpty (UTF32 -> m Char
forall enc (m :: * -> *).
(Encoding enc, ByteSource m) =>
enc -> m Char
decodeChar UTF32
UTF32)
          String -> m String
forall (m :: * -> *) a. Monad m => a -> m a
return ((Int -> Char
chr (Int -> Char) -> Int -> Char
forall a b. (a -> b) -> a -> b
$ Word32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word32
ch)Char -> ShowS
forall a. a -> [a] -> [a]
:String
rest)
    decode UTF32
enc = m Bool -> m Char -> m String
forall (m :: * -> *) a. Monad m => m Bool -> m a -> m [a]
untilM m Bool
forall (m :: * -> *). ByteSource m => m Bool
sourceEmpty (UTF32 -> m Char
forall enc (m :: * -> *).
(Encoding enc, ByteSource m) =>
enc -> m Char
decodeChar UTF32
enc)
    encodeable :: UTF32 -> Char -> Bool
encodeable UTF32
_ Char
_ = Bool
True