-- | A module that extends the builder monoid from BlazeHtml with a number of
-- functions to insert unicode as UTF-8.
--
module Text.Blaze.Builder.Utf8
    ( 
      -- * Custom writes to the builder
      writeChar

      -- * Creating builders
    , fromChar
    , fromString
    , fromText
    ) where

import Foreign
import Data.Char (ord)
import Data.Monoid (mempty, mappend)

import Data.Text (Text)
import qualified Data.Text as T

import Text.Blaze.Builder.Core

-- | Write a Unicode character, encoding it as UTF-8.
--
writeChar :: Char   -- ^ Character to write
          -> Write  -- ^ Resulting write
writeChar = encodeCharUtf8 f1 f2 f3 f4
  where
    f1 x = Write 1 $ \ptr -> poke ptr x

    f2 x1 x2 = Write 2 $ \ptr -> do poke ptr x1
                                    poke (ptr `plusPtr` 1) x2

    f3 x1 x2 x3 = Write 3 $ \ptr -> do poke ptr x1
                                       poke (ptr `plusPtr` 1) x2
                                       poke (ptr `plusPtr` 2) x3

    f4 x1 x2 x3 x4 = Write 4 $ \ptr -> do poke ptr x1
                                          poke (ptr `plusPtr` 1) x2
                                          poke (ptr `plusPtr` 2) x3
                                          poke (ptr `plusPtr` 3) x4
{-# INLINE writeChar #-}

-- | Encode a Unicode character to another datatype, using UTF-8. This function
-- acts as an abstract way of encoding characters, as it is unaware of what
-- needs to happen with the resulting bytes: you have to specify functions to
-- deal with those.
--
encodeCharUtf8 :: (Word8 -> a)                             -- ^ 1-byte UTF-8
               -> (Word8 -> Word8 -> a)                    -- ^ 2-byte UTF-8
               -> (Word8 -> Word8 -> Word8 -> a)           -- ^ 3-byte UTF-8
               -> (Word8 -> Word8 -> Word8 -> Word8 -> a)  -- ^ 4-byte UTF-8
               -> Char                                     -- ^ Input 'Char'
               -> a                                        -- ^ Result
encodeCharUtf8 f1 f2 f3 f4 c = case ord c of
    x | x <= 0x7F -> f1 $ fromIntegral x
      | x <= 0x07FF ->
           let x1 = fromIntegral $ (x `shiftR` 6) + 0xC0
               x2 = fromIntegral $ (x .&. 0x3F)   + 0x80
           in f2 x1 x2
      | x <= 0xFFFF ->
           let x1 = fromIntegral $ (x `shiftR` 12) + 0xE0
               x2 = fromIntegral $ ((x `shiftR` 6) .&. 0x3F) + 0x80
               x3 = fromIntegral $ (x .&. 0x3F) + 0x80
           in f3 x1 x2 x3
      | otherwise ->
           let x1 = fromIntegral $ (x `shiftR` 18) + 0xF0
               x2 = fromIntegral $ ((x `shiftR` 12) .&. 0x3F) + 0x80
               x3 = fromIntegral $ ((x `shiftR` 6) .&. 0x3F) + 0x80
               x4 = fromIntegral $ (x .&. 0x3F) + 0x80
           in f4 x1 x2 x3 x4
{-# INLINE encodeCharUtf8 #-}

-- | An unescaped, utf8 encoded character.
--
fromChar :: Char     -- ^ 'Char' to insert
         -> Builder  -- ^ Resulting 'Builder'
fromChar = writeSingleton writeChar

-- | A list of unescaped, utf8 encoded characters.
--
fromString :: String   -- ^ 'String' to insert
           -> Builder  -- ^ Resulting 'Builder'
fromString = writeList writeChar

-- | Create an UTF-8 encoded 'Builder' from some 'Text'.
--
fromText :: Text     -- ^ 'Text' to insert
         -> Builder  -- ^ Resulting 'Builder'
fromText = writeSingleton (T.foldl (\w c -> w `mappend` writeChar c) mempty)