{-# LANGUAGE FlexibleContexts, BangPatterns #-}

-- |Monadic Iteratees:
-- incremental input parsers, processors, and transformers
--
-- Iteratees for parsing binary data.

module Bio.Iteratee.Bytes (
  -- * Types
  Endian (..)
  -- * Endian multi-byte iteratees
  ,endianRead2
  ,endianRead3
  ,endianRead3i
  ,endianRead4
  ,endianRead8

  -- * Iteratees treating Bytes as list of Word8
  ,headStreamBS
  ,tryHeadBS
  ,peekStreamBS
  ,takeStreamBS
  ,dropStreamBS
  ,dropWhileStreamBS

  -- * Iteratees treating Bytes as list of Char
  ,enumLinesBS
  ,enumWordsBS
)
where

import Bio.Iteratee.Base
import Bio.Iteratee.Iteratee
import Bio.Prelude

import qualified Data.ByteString              as B
import qualified Data.ByteString.Char8        as C
import qualified Data.ByteString.Unsafe       as B

-- ------------------------------------------------------------------------
-- Binary Random IO Iteratees

-- Iteratees to read unsigned integers written in Big- or Little-endian ways

-- | Indicate endian-ness.
data Endian = MSB -- ^ Most Significant Byte is first (big-endian)
  | LSB           -- ^ Least Significan Byte is first (little-endian)
  deriving (Eq, Ord, Show, Enum)

endianRead2 :: Endian -> Iteratee Bytes m Word16
endianRead2 e = endianReadN e 2 word16'
{-# INLINE endianRead2 #-}

endianRead3 :: Endian -> Iteratee Bytes m Word32
endianRead3 e = endianReadN e 3 (word32' . (0:))
{-# INLINE endianRead3 #-}

-- |Read 3 bytes in an endian manner.  If the first bit is set (negative),
-- set the entire first byte so the Int32 will be negative as
-- well.
endianRead3i :: Monad m => Endian -> Iteratee Bytes m Int32
endianRead3i e = do
  c1 <- headStreamBS
  c2 <- headStreamBS
  c3 <- headStreamBS
  case e of
    MSB -> return $ (((fromIntegral c1
                        `shiftL` 8) .|. fromIntegral c2)
                        `shiftL` 8) .|. fromIntegral c3
    LSB ->
     let m :: Int32
         m = shiftR (shiftL (fromIntegral c3) 24) 8
     in return $ (((fromIntegral c3
                        `shiftL` 8) .|. fromIntegral c2)
                        `shiftL` 8) .|. fromIntegral m
{-# INLINE endianRead3i #-}

endianRead4 :: Endian -> Iteratee Bytes m Word32
endianRead4 e = endianReadN e 4 word32'
{-# INLINE endianRead4 #-}

endianRead8 :: Endian -> Iteratee Bytes m Word64
endianRead8 e = endianReadN e 8 word64'
{-# INLINE endianRead8 #-}

-- This function does all the parsing work, depending upon provided arguments
endianReadN ::
  Endian
  -> Int
  -> ([Word8] -> b)
  -> Iteratee Bytes m b
endianReadN MSB n0 cnct = liftI (step n0 [])
 where
  step !n acc (Chunk c)
    | B.null c        = liftI (step n acc)
    | B.length c >= n = let (this,next) = B.splitAt n c
                            !result     = cnct $ acc ++ B.unpack this
                        in idone result (Chunk next)
    | otherwise        = liftI (step (n - B.length c) (acc ++ B.unpack c))
  step !n acc (EOF Nothing)  = icont (step n acc) (Just $ toException EofException)
  step !n acc (EOF (Just e)) = icont (step n acc) (Just e)
endianReadN LSB n0 cnct = liftI (step n0 [])
 where
  step !n acc (Chunk c)
    | B.null c        = liftI (step n acc)
    | B.length c >= n = let (this,next) = B.splitAt n c
                            !result = cnct $ B.unpack (B.reverse this) ++ acc
                        in idone result (Chunk next)
    | otherwise        = liftI (step (n - B.length c)
                                     (B.unpack (B.reverse c) ++ acc))
  step !n acc (EOF Nothing)  = icont (step n acc)
                                    (Just $ toException EofException)
  step !n acc (EOF (Just e)) = icont (step n acc) (Just e)
{-# INLINE endianReadN #-}


word16' :: [Word8] -> Word16
word16' [c1,c2] = word16 c1 c2
word16' _ = error "iteratee: internal error in word16'"

word16 :: Word8 -> Word8 -> Word16
word16 c1 c2 = (fromIntegral c1 `shiftL`  8) .|.  fromIntegral c2
{-# INLINE word16 #-}

word32' :: [Word8] -> Word32
word32' [c1,c2,c3,c4] = word32 c1 c2 c3 c4
word32' _ = error "iteratee: internal error in word32'"

word32 :: Word8 -> Word8 -> Word8 -> Word8 -> Word32
word32 c1 c2 c3 c4 =
  (fromIntegral c1 `shiftL` 24) .|.
  (fromIntegral c2 `shiftL` 16) .|.
  (fromIntegral c3 `shiftL`  8) .|.
   fromIntegral c4
{-# INLINE word32 #-}

word64' :: [Word8] -> Word64
word64' [c1,c2,c3,c4,c5,c6,c7,c8] = word64 c1 c2 c3 c4 c5 c6 c7 c8
word64' _ = error "iteratee: internal error in word64'"
{-# INLINE word64' #-}

word64
  :: Word8 -> Word8 -> Word8 -> Word8
  -> Word8 -> Word8 -> Word8 -> Word8
  -> Word64
word64 c1 c2 c3 c4 c5 c6 c7 c8 =
  (fromIntegral c1 `shiftL` 56) .|.
  (fromIntegral c2 `shiftL` 48) .|.
  (fromIntegral c3 `shiftL` 40) .|.
  (fromIntegral c4 `shiftL` 32) .|.
  (fromIntegral c5 `shiftL` 24) .|.
  (fromIntegral c6 `shiftL` 16) .|.
  (fromIntegral c7 `shiftL`  8) .|.
   fromIntegral c8
{-# INLINE word64 #-}

headStreamBS :: Iteratee Bytes m Word8
headStreamBS = liftI step
  where
  step (Chunk c)
    | B.null c = icont step Nothing
    | otherwise  = idone (B.unsafeHead c) (Chunk (B.unsafeTail c))
  step stream          = icont step (Just (setEOF stream))
{-# INLINE headStreamBS #-}

peekStreamBS :: Iteratee Bytes m (Maybe Word8)
peekStreamBS = liftI step
  where
    step s@(Chunk vec)
      | B.null vec = liftI step
      | otherwise   = idone (Just $ B.unsafeHead vec) s
    step stream     = idone Nothing stream
{-# INLINE peekStreamBS #-}

tryHeadBS :: Iteratee Bytes m (Maybe Word8)
tryHeadBS = liftI step
  where
  step (Chunk vec)
    | B.null vec = liftI step
    | otherwise  = idone (Just (B.unsafeHead vec)) (Chunk (B.unsafeTail vec))
  step stream          = idone Nothing stream
{-# INLINE tryHeadBS #-}

dropStreamBS :: Int -> Iteratee Bytes m ()
dropStreamBS 0  = idone () (Chunk emptyP)
dropStreamBS n' = liftI (step n')
  where
    step n (Chunk str)
      | B.length str < n = liftI (step (n - B.length str))
      | otherwise        = idone () (Chunk (B.drop n str))
    step _ stream        = idone () stream
{-# INLINE dropStreamBS #-}

dropWhileStreamBS :: (Word8 -> Bool) -> Iteratee Bytes m ()
dropWhileStreamBS p = liftI step
  where
    step (Chunk str)
      | B.null rest  = liftI step
      | otherwise    = idone () (Chunk rest)
      where
        rest = B.dropWhile p str
    step stream      = idone () stream
{-# INLINE dropWhileStreamBS #-}

takeStreamBS ::
  Monad m
  => Int   -- ^ number of elements to consume
  -> Enumeratee Bytes Bytes m a
takeStreamBS n' iter
 | n' <= 0   = return iter
 | otherwise = Iteratee $ \od oc -> runIter iter (on_done od oc) (on_cont od oc)
  where
    on_done od oc x _ = runIter (dropStreamBS n' >> return (return x)) od oc
    on_cont od oc k Nothing = if n' == 0 then od (liftI k) (Chunk mempty)
                                 else runIter (liftI (step n' k)) od oc
    on_cont od oc _ (Just e) = runIter (dropStreamBS n' >> throwErr e) od oc
    step n k (Chunk str)
      | B.null str        = liftI (step n k)
      | B.length str <= n = takeStreamBS (n - B.length str) $ k (Chunk str)
      | otherwise          = idone (k (Chunk s1)) (Chunk s2)
      where (s1, s2) = B.splitAt n str
    step _n k stream       = idone (liftI k) stream
{-# INLINE takeStreamBS #-}

-- Like enumWords, but operates on ByteStrings.
-- This is provided as a higher-performance alternative to enumWords, and
-- is equivalent to treating the stream as a Data.ByteString.Char8.ByteString.
enumWordsBS :: Monad m => Enumeratee Bytes [Bytes] m a
enumWordsBS = convStream getter
  where
    getter = liftI step
    lChar = isSpace . C.last
    step (Chunk xs)
      | C.null xs  = getter
      | lChar xs   = idone (C.words xs) (Chunk C.empty)
      | otherwise  = icont (step' xs) Nothing
    step str       = idone mempty str
    step' xs (Chunk ys)
      | C.null ys  = icont (step' xs) Nothing
      | lChar ys   = idone (C.words . C.append xs $ ys) mempty
      | otherwise  = let w' = C.words . C.append xs $ ys
                         ws = init w'
                         ck = last w'
                     in idone ws (Chunk ck)
    step' xs str   = idone (C.words xs) str
{-# INLINE enumWordsBS #-}

-- Like enumLines, but operates on ByteStrings.
-- This is provided as a higher-performance alternative to enumLines, and
-- is equivalent to treating the stream as a Data.ByteString.Char8.ByteString.
enumLinesBS :: Monad m => Enumeratee Bytes [Bytes] m a
enumLinesBS = convStream getter
  where
    getter = icont step Nothing
    lChar = (== '\n') . C.last
    step (Chunk xs)
      | C.null xs  = getter
      | lChar xs   = idone (C.lines xs) (Chunk C.empty)
      | otherwise  = icont (step' xs) Nothing
    step str       = idone mempty str
    step' xs (Chunk ys)
      | C.null ys  = icont (step' xs) Nothing
      | lChar ys   = idone (C.lines . C.append xs $ ys) mempty
      | otherwise  = let w' = C.lines $ C.append xs ys
                         ws = init w'
                         ck = last w'
                     in idone ws (Chunk ck)
    step' xs str   = idone (C.lines xs) str
{-# INLINE enumLinesBS #-}