-- |
-- Module      : Basement.String.Encoding.Encoding
-- License     : BSD-style
-- Maintainer  : Foundation
-- Stability   : experimental
-- Portability : portable
--

{-# LANGUAGE FlexibleContexts #-}

module Basement.String.Encoding.Encoding
    ( Encoding(..)
    , convertFromTo
    ) where

import           Basement.Compat.Base
import           Basement.Types.OffsetSize
import           Basement.Monad
import           Basement.PrimType
import           Basement.MutableBuilder
import           Basement.Numerical.Additive
import           Basement.UArray (UArray)
import           Basement.UArray.Mutable (MUArray)
import qualified Basement.UArray as Vec

class Encoding encoding where
    -- | the unit element use for the encoding.
    -- i.e. Word8 for ASCII7 or UTF8, Word16 for UTF16...
    --
    type Unit encoding

    -- | define the type of error handling you want to use for the
    -- next function.
    --
    -- > type Error UTF8 = Either UTF8_Invalid
    --
    type Error encoding

    -- | consume an `Unit encoding` and return the Unicode point and the position
    -- of the next possible `Unit encoding`
    --
    encodingNext :: encoding
                      -- ^ only used for type deduction
                -> (Offset (Unit encoding) -> Unit encoding)
                      -- ^ method to access a given `Unit encoding`
                      -- (see `unsafeIndexer`)
                -> Offset (Unit encoding)
                      -- ^ offset of the `Unit encoding` where starts the
                      -- encoding of a given unicode
                -> Either (Error encoding) (Char, Offset (Unit encoding)) -- ^ either successfully validated the `Unit encoding`
                      -- and returned the next offset or fail with an
                      -- `Error encoding`

    -- Write a unicode point encoded into one or multiple `Unit encoding`
    --
    -- > build 64 $ sequence_ (write UTF8) "this is a simple list of char..."
    --
    encodingWrite :: (PrimMonad st, Monad st)
                  => encoding
                      -- ^ only used for type deduction
                  -> Char
                      -- ^ the unicode character to encode
                  -> Builder (UArray (Unit encoding))
                             (MUArray (Unit encoding))
                             (Unit encoding) st err ()

-- | helper to convert a given Array in a given encoding into an array
-- with another encoding.
--
-- This is a helper to convert from one String encoding to another.
-- This function is (quite) slow and needs some work.
--
-- ```
-- let s16 = ... -- string in UTF16
-- -- create s8, a UTF8 String
-- let s8  = runST $ convertWith UTF16 UTF8 (toBytes s16)
--
-- print s8
-- ```
--
convertFromTo :: ( PrimMonad st, Monad st
                 , Encoding input, PrimType (Unit input)
                 , Encoding output, PrimType (Unit output)
                 )
              => input
                -- ^ Input's encoding type
              -> output
                -- ^ Output's encoding type
              -> UArray (Unit input)
                -- ^ the input raw array
              -> st (Either (Offset (Unit input), Error input) (UArray (Unit output)))
convertFromTo :: forall (st :: * -> *) input output.
(PrimMonad st, Monad st, Encoding input, PrimType (Unit input),
 Encoding output, PrimType (Unit output)) =>
input
-> output
-> UArray (Unit input)
-> st
     (Either (Offset (Unit input), Error input) (UArray (Unit output)))
convertFromTo input
inputEncodingTy output
outputEncodingTy UArray (Unit input)
bytes
    | forall ty. UArray ty -> Bool
Vec.null UArray (Unit input)
bytes = forall (m :: * -> *) a. Monad m => a -> m a
return forall {k} (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ forall a. Monoid a => a
mempty
    | Bool
otherwise      = forall (prim :: * -> *) ty a.
(PrimMonad prim, PrimType ty) =>
UArray ty -> ((Offset ty -> ty) -> prim a) -> prim a
Vec.unsafeIndexer UArray (Unit input)
bytes forall a b. (a -> b) -> a -> b
$ \Offset (Unit input) -> Unit input
t -> forall ty (m :: * -> *) err.
(PrimType ty, PrimMonad m) =>
Int
-> Builder (UArray ty) (MUArray ty) ty m err ()
-> m (Either err (UArray ty))
Vec.builderBuild Int
64 (Offset (Unit input)
-> (Offset (Unit input) -> Unit input)
-> Builder
     (UArray (Unit output))
     (MUArray (Unit output))
     (Unit output)
     st
     (Offset (Unit input), Error input)
     ()
loop forall a. Additive a => a
azero Offset (Unit input) -> Unit input
t)
  where
    lastUnit :: CountOf (Unit input)
lastUnit = forall ty. UArray ty -> CountOf ty
Vec.length UArray (Unit input)
bytes

    loop :: Offset (Unit input)
-> (Offset (Unit input) -> Unit input)
-> Builder
     (UArray (Unit output))
     (MUArray (Unit output))
     (Unit output)
     st
     (Offset (Unit input), Error input)
     ()
loop Offset (Unit input)
off Offset (Unit input) -> Unit input
getter
      | Offset (Unit input)
off forall ty. Offset ty -> CountOf ty -> Bool
.==# CountOf (Unit input)
lastUnit = forall (m :: * -> *) a. Monad m => a -> m a
return ()
      | Bool
otherwise = case forall encoding.
Encoding encoding =>
encoding
-> (Offset (Unit encoding) -> Unit encoding)
-> Offset (Unit encoding)
-> Either (Error encoding) (Char, Offset (Unit encoding))
encodingNext input
inputEncodingTy Offset (Unit input) -> Unit input
getter Offset (Unit input)
off of
          Left Error input
err -> forall (m :: * -> *). MonadFailure m => Failure m -> m ()
mFail (Offset (Unit input)
off, Error input
err)
          Right (Char
c, Offset (Unit input)
noff) -> forall encoding (st :: * -> *) err.
(Encoding encoding, PrimMonad st, Monad st) =>
encoding
-> Char
-> Builder
     (UArray (Unit encoding))
     (MUArray (Unit encoding))
     (Unit encoding)
     st
     err
     ()
encodingWrite output
outputEncodingTy Char
c forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Offset (Unit input)
-> (Offset (Unit input) -> Unit input)
-> Builder
     (UArray (Unit output))
     (MUArray (Unit output))
     (Unit output)
     st
     (Offset (Unit input), Error input)
     ()
loop Offset (Unit input)
noff Offset (Unit input) -> Unit input
getter