-- |
-- Module      : Streamly.Internal.Unicode.Utf8
-- Copyright   : (c) 2021 Composewell Technologies
-- License     : BSD-3-Clause
-- Maintainer  : streamly@composewell.com
-- Stability   : experimental
-- Portability : GHC

-- XXX We can move this to stream-core/streamly-unicode-core, and provide an
-- additional module in streamly-unicode for case conversions (because it
-- depends on unicode-data). Or just keep all of it in streamly-unicode
-- which will have a dependency on unicode-data.

module Streamly.Internal.Unicode.Utf8
    (
    -- * Type
      Utf8

    -- * Creation and elimination
    , pack
    , unpack
    , toArray
    )
where

--------------------------------------------------------------------------------
-- Imports
--------------------------------------------------------------------------------

import Data.Word (Word8)
import Streamly.Data.Array (Array)
import System.IO.Unsafe (unsafePerformIO)

import qualified Streamly.Data.Fold as Fold
import qualified Streamly.Data.Stream as Stream
import qualified Streamly.Internal.Data.Array as Array
    ( fromStreamN
    , read
    )
import qualified Streamly.Internal.Unicode.Stream as Unicode

--------------------------------------------------------------------------------
-- Type
--------------------------------------------------------------------------------

-- | A space efficient, packed, unboxed Unicode container.
newtype Utf8 = Utf8 (Array Word8)

--------------------------------------------------------------------------------
-- Functions
--------------------------------------------------------------------------------

{-# INLINE toArray #-}
toArray :: Utf8 -> Array Word8
toArray :: Utf8 -> Array Word8
toArray (Utf8 Array Word8
arr) = Array Word8
arr


{-# INLINEABLE pack #-}
pack :: String -> Utf8
pack :: String -> Utf8
pack String
s =
    Array Word8 -> Utf8
Utf8
        (Array Word8 -> Utf8) -> Array Word8 -> Utf8
forall a b. (a -> b) -> a -> b
$ IO (Array Word8) -> Array Word8
forall a. IO a -> a
unsafePerformIO
        (IO (Array Word8) -> Array Word8)
-> IO (Array Word8) -> Array Word8
forall a b. (a -> b) -> a -> b
$ Int -> Stream IO Word8 -> IO (Array Word8)
forall (m :: * -> *) a.
(MonadIO m, Unbox a) =>
Int -> Stream m a -> m (Array a)
Array.fromStreamN Int
len (Stream IO Word8 -> IO (Array Word8))
-> Stream IO Word8 -> IO (Array Word8)
forall a b. (a -> b) -> a -> b
$ Stream IO Char -> Stream IO Word8
forall (m :: * -> *). Monad m => Stream m Char -> Stream m Word8
Unicode.encodeUtf8' (Stream IO Char -> Stream IO Word8)
-> Stream IO Char -> Stream IO Word8
forall a b. (a -> b) -> a -> b
$ String -> Stream IO Char
forall (m :: * -> *) a. Applicative m => [a] -> Stream m a
Stream.fromList String
s

    where

    len :: Int
len = String -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length String
s

{-# INLINEABLE unpack #-}
unpack :: Utf8 -> String
unpack :: Utf8 -> String
unpack Utf8
u =
    IO String -> String
forall a. IO a -> a
unsafePerformIO
        (IO String -> String) -> IO String -> String
forall a b. (a -> b) -> a -> b
$ Fold IO Char String -> Stream IO Char -> IO String
forall (m :: * -> *) a b.
Monad m =>
Fold m a b -> Stream m a -> m b
Stream.fold Fold IO Char String
forall (m :: * -> *) a. Monad m => Fold m a [a]
Fold.toList (Stream IO Char -> IO String) -> Stream IO Char -> IO String
forall a b. (a -> b) -> a -> b
$ Stream IO Word8 -> Stream IO Char
forall (m :: * -> *). Monad m => Stream m Word8 -> Stream m Char
Unicode.decodeUtf8' (Stream IO Word8 -> Stream IO Char)
-> Stream IO Word8 -> Stream IO Char
forall a b. (a -> b) -> a -> b
$ Array Word8 -> Stream IO Word8
forall (m :: * -> *) a. (Monad m, Unbox a) => Array a -> Stream m a
Array.read (Array Word8 -> Stream IO Word8) -> Array Word8 -> Stream IO Word8
forall a b. (a -> b) -> a -> b
$ Utf8 -> Array Word8
toArray Utf8
u