{-# LANGUAGE MagicHash #-}
module Basement.String.Encoding.UTF16
( UTF16(..)
, UTF16_Invalid(..)
) where
import GHC.Prim
import GHC.Word
import GHC.Types
import Data.Bits
import qualified Prelude
import Basement.Compat.Base
import Basement.Types.OffsetSize
import Basement.Monad
import Basement.Numerical.Additive
import Basement.UArray
import Basement.UArray.Mutable (MUArray)
import Basement.MutableBuilder
import Basement.String.Encoding.Encoding
data UTF16_Invalid
= InvalidContinuation
| InvalidUnicode Char
deriving (Show, Eq, Typeable)
instance Exception UTF16_Invalid
data UTF16 = UTF16
instance Encoding UTF16 where
type Unit UTF16 = Word16
type Error UTF16 = UTF16_Invalid
encodingNext _ = next
encodingWrite _ = write
next :: (Offset Word16 -> Word16)
-> Offset Word16
-> Either UTF16_Invalid (Char, Offset Word16)
next getter off
| h < 0xd800 = Right (toChar hh, off + Offset 1)
| h >= 0xe000 = Right (toChar hh, off + Offset 1)
| otherwise = nextContinuation
where
h :: Word16
!h@(W16# hh) = getter off
toChar :: Word# -> Char
toChar w = C# (chr# (word2Int# w))
to32 :: Word16 -> Word32
to32 (W16# w) = W32# w
nextContinuation
| cont >= 0xdc00 && cont < 0xe00 =
let !(W32# w) = ((to32 h .&. 0x3ff) `shiftL` 10)
.|. (to32 cont .&. 0x3ff)
in Right (toChar w, off + Offset 2)
| otherwise = Left InvalidContinuation
where
cont :: Word16
!cont = getter $ off + Offset 1
write :: (PrimMonad st, Monad st)
=> Char
-> Builder (UArray Word16) (MUArray Word16) Word16 st err ()
write c
| c < toEnum 0xd800 = builderAppend $ w16 c
| c > toEnum 0x10000 = let (w1, w2) = wHigh c in builderAppend w1 >> builderAppend w2
| c > toEnum 0x10ffff = throw $ InvalidUnicode c
| c >= toEnum 0xe000 = builderAppend $ w16 c
| otherwise = throw $ InvalidUnicode c
where
w16 :: Char -> Word16
w16 (C# ch) = W16# (int2Word# (ord# ch))
to16 :: Word32 -> Word16
to16 = Prelude.fromIntegral
wHigh :: Char -> (Word16, Word16)
wHigh (C# ch) =
let v = W32# (minusWord# (int2Word# (ord# ch)) 0x10000##)
in (0xdc00 .|. to16 (v `shiftR` 10), 0xd800 .|. to16 (v .&. 0x3ff))