{-# LANGUAGE MagicHash, DeriveDataTypeable, UnliftedFFITypes #-}

-- |
-- Stability   : experimental
-- Portability : GHC
--
-- This module is intended to be imported @qualified@ in order to
-- avoid name clashes with the "Prelude", "Data.ByteString", and
-- "Data.ByteString.Lazy" modules. E.g.:
--
-- > import qualified Data.ByteString.Plain as PB
--
module Data.ByteString.Plain (
    -- * The plain @ByteString@ type and representation
      ByteString
    -- * Introducing and eliminating 'ByteString's
    , empty
    , fromStrict
    , toStrict
    -- * Basic operations
    , null
    , length
    ) where

import           Control.DeepSeq (NFData(rnf))
import qualified Data.ByteString as B
import qualified Data.ByteString.Internal as B
import           Data.Hashable (Hashable(hashWithSalt))
import           Data.Typeable            (Typeable)
import           GHC.ForeignPtr
import           GHC.Prim
import           GHC.Types
import           Prelude hiding (length, null)
import           System.IO.Unsafe (unsafePerformIO)

-- |Compact heap representation a (strict) 'B.ByteString' can be (un)wrapped to/from.
--
-- This data type depends on the ordinary 'B.ByteString' type to be
-- useful but comes with a different cost-model.
--
-- This representation avoids the 'ForeignPtr' indirection, and the
-- offset/length slice representation for shared 'B.ByteString's, and
-- is therefore suitable if you need to store many small strings in a
-- data records or for use as keys in container types. On the other
-- hand, string operations on 'ByteString' would require
-- re-allocations, and thus are not supported. If you need to perform
-- such operations convert and operate on conventional 'B.ByteString's
-- instead.
--
-- This structure supports @UNPACK@, and then has an overhead
-- of only 3 words (beyond the word-padded storage of the byte-string
-- payload), as it's basically just a pointer to a
-- 'MutableByteArray#':
--
-- > data ByteString = PBS !(MutableByteArray# RealWorld)
--
-- In contrast, a single non-shared unpacked ('PlainPtr'-backed)
-- 'ByteString' field exhibits an overhead of 8 words:
--
-- > data ByteString = PS {-# UNPACK #-} !(ForeignPtr Word8) -- payload (2 words)
-- >                      {-# UNPACK #-} !Int                -- offset (1 word)
-- >                      {-# UNPACK #-} !Int                -- length (1 word)
-- >
-- > data ForeignPtr a = ForeignPtr Addr# ForeignPtrContents -- 2 words w/o info-ptr
-- >
-- > data ForeignPtrContents -- 1 word needed for info-ptr
-- >     = PlainForeignPtr {...}
-- >     | MallocPtr {...}
-- >     | PlainPtr (MutableByteArray# RealWorld)  -- common case (1 word)
-- >
-- > data MutableByteArray# s -- 2 words + payload
--
-- As an optimization, all zero-length strings are mapped to the
-- singleton 'empty' value.
data ByteString = PBS !(MutableByteArray# RealWorld)
                  deriving Typeable

-- |Singleton value the 'B.empty' 'B.ByteString' is mapped to/from.
empty :: ByteString
empty = unsafePerformIO $ do
    (ForeignPtr _ (PlainPtr mbarr#)) <- mallocPlainForeignPtrBytes 0
    return $! PBS mbarr#
{-# NOINLINE empty #-}

-- |Extract 'ByteString' from 'ByteString'
--
-- If possible, the internally used 'MutableByteArray#' is shared with
-- the original 'ByteString' in which case the conversion is cheap.
--
-- However, if necessary, a trimmed copy of the original 'ByteString'
-- will be created via 'B.copy' resulting in a newly allocated
-- 'MutableByteArray#'.
--
-- N.B.: Because strict 'B.ByteString's use pinned memory internally
-- also plain 'ByteString's use pinned memory and thereby increase the
-- potential for memory fragmentation as the garbage collector is not
-- allowed to move pinned memory areas.
--
-- Depending on the use case, it might be beneficial to apply some
-- form of memoizing to the 'fromStrict' conversion (also known as
-- <http://en.wikipedia.org/wiki/Hash_consing Hash consing> or
-- <http://en.wikipedia.org/wiki/String_interning String interning>).

fromStrict :: B.ByteString -> ByteString
fromStrict (B.PS _ _ 0) = empty
fromStrict (B.PS (ForeignPtr _ (PlainPtr mbarr#)) 0 l)
  | l' == l            = PBS mbarr#
  where
    l' = I# (sizeofMutableByteArray# mbarr#)
fromStrict bs = fromStrict (B.copy bs) -- this assumes
                    -- ByteString internals return a trimmed
                    -- ForeignPtr/PlainPtr string; otherwise there's
                    -- risk of infinite looping
{-# INLINE fromStrict #-}

-- |Convert a plain 'ByteString' back into a 'ByteString'.
--
-- This effectively wraps the plain 'ByteString' into a 'ForeignPtr'
-- and a plain 'B.ByteString' type.
toStrict :: ByteString -> B.ByteString
toStrict (PBS mbarr#) | l == 0     = B.empty
                      | otherwise  = B.PS fp 0 l
  where
    l  = I# (sizeofMutableByteArray# mbarr#)
    addr = byteArrayContents# (unsafeCoerce# mbarr#)
    fp = ForeignPtr addr (PlainPtr mbarr#)
{-# INLINE toStrict #-}

null :: ByteString -> Bool
null = (==0) . length
{-# INLINE null #-}

length :: ByteString -> Int
length (PBS mbarr#) = I# (sizeofMutableByteArray# mbarr#)
{-# INLINE length #-}

-- WHNF == NF
instance NFData ByteString where rnf x = seq x ()

-- the following instances are implement via strict 'B.ByteString's;
-- In the future "native" implementations shall be provided if they
-- result being faster.
instance Eq ByteString where
    x == y  = toStrict x == toStrict y
    {-# INLINE (==) #-}

instance Ord ByteString where
    compare x y  = compare (toStrict x) (toStrict y)
    {-# INLINE compare #-}

instance Hashable ByteString where
    hashWithSalt salt = hashWithSalt salt . toStrict
    {-# INLINE hashWithSalt #-}

-- Mostly for convenience
instance Show ByteString where
    showsPrec p rbs = showsPrec p (toStrict rbs)

instance Read ByteString where
    readsPrec p str = [ (fromStrict x, y) | (x, y) <- readsPrec p str ]