{-# LANGUAGE ScopedTypeVariables, FlexibleContexts #-}

-- | Utilities for Char-based iteratee processing.

module Bio.Iteratee.Char (
  -- * Word and Line processors
  printLines
  ,printLinesUnterminated
  ,enumLines
  ,enumLinesBS
  ,enumWords
  ,enumWordsBS
)

where

import           Bio.Iteratee.Iteratee
import           Bio.Iteratee.ListLike
import           Data.Char
import           Data.Monoid
import qualified Data.ListLike as LL
import           Control.Monad (liftM)
import           Control.Monad.IO.Class
import qualified Data.ByteString.Char8 as BC
import           Prelude


-- |Print lines as they are received. This is the first `impure' iteratee
-- with non-trivial actions during chunk processing
--
--  Only lines ending with a newline are printed,
--  data terminated with EOF is not printed.
printLines :: Iteratee String IO ()
printLines = lines'
 where
  lines' = breakStream (\c -> c == '\r' || c == '\n') >>= \l -> terminators >>= check l
  check _  0 = return ()
  check "" _ = return ()
  check l  _ = liftIO (putStrLn l) >> lines'

-- |Print lines as they are received.
--
--  All lines are printed, including a line with a terminating EOF.
--  If the final line is terminated by EOF without a newline,
--  no newline is printed.
--  this function should be used in preference to printLines when possible,
--  as it is more efficient with long lines.
printLinesUnterminated :: forall s el.
                       (Eq el, Nullable s, LL.StringLike s, LL.ListLike s el)
                       => Iteratee s IO ()
printLinesUnterminated = lines'
 where
  lines' = do
    joinI $ breakE (`LL.elem` t1) (mapChunksM_ (putStr . LL.toString))
    terminators >>= check
  check 0 = return ()
  check _ = liftIO (putStrLn "") >> lines'
  t1 :: s
  t1 = LL.fromString "\r\n"

terminators :: (Eq el, Nullable s, LL.StringLike s, LL.ListLike s el)
            => Iteratee s IO Int
terminators = do
  l <- heads (LL.fromString "\r\n")
  if l == 0 then heads (LL.fromString "\n") else return l


-- |Convert the stream of characters to the stream of lines, and
-- apply the given iteratee to enumerate the latter.
-- The stream of lines is normally terminated by the empty line.
-- When the stream of characters is terminated, the stream of lines
-- is also terminated.
-- This is the first proper iteratee-enumerator: it is the iteratee of the
-- character stream and the enumerator of the line stream.

enumLines
  :: (LL.ListLike s el, LL.StringLike s, Nullable s, Monad m) =>
     Enumeratee s [s] m a
enumLines = convStream getter
  where
    getter = icont step Nothing
    lChar = (== '\n') . last . LL.toString
    step (Chunk xs)
      | LL.null xs = getter
      | lChar xs   = idone (LL.lines xs) mempty
      | otherwise  = icont (step' xs) Nothing
    step _str      = getter
    step' xs (Chunk ys)
      | LL.null ys = icont (step' xs) Nothing
      | lChar ys   = idone (LL.lines . mappend xs $ ys) mempty
      | otherwise  = let w' = LL.lines $ mappend xs ys
                         ws = init w'
                         ck = last w'
                     in idone ws (Chunk ck)
    step' xs str   = idone (LL.lines xs) str

-- |Convert the stream of characters to the stream of words, and
-- apply the given iteratee to enumerate the latter.
-- Words are delimited by white space.
-- This is the analogue of List.words
enumWords :: (LL.ListLike s Char, Nullable s, Monad m) => Enumeratee s [s] m a
enumWords = convStream $ dropWhileStream isSpace >> liftM (:[]) (breakStream isSpace)
{-# INLINE enumWords #-}

-- Like enumWords, but operates on ByteStrings.
-- This is provided as a higher-performance alternative to enumWords, and
-- is equivalent to treating the stream as a Data.ByteString.Char8.ByteString.
enumWordsBS
  :: (Monad m) => Enumeratee BC.ByteString [BC.ByteString] m a
enumWordsBS iter = convStream getter iter
  where
    getter = liftI step
    lChar = isSpace . BC.last
    step (Chunk xs)
      | BC.null xs = getter
      | lChar xs   = idone (BC.words xs) (Chunk BC.empty)
      | otherwise  = icont (step' xs) Nothing
    step str       = idone mempty str
    step' xs (Chunk ys)
      | BC.null ys = icont (step' xs) Nothing
      | lChar ys   = idone (BC.words . BC.append xs $ ys) mempty
      | otherwise  = let w' = BC.words . BC.append xs $ ys
                         ws = init w'
                         ck = last w'
                     in idone ws (Chunk ck)
    step' xs str   = idone (BC.words xs) str

{-# INLINE enumWordsBS #-}

-- Like enumLines, but operates on ByteStrings.
-- This is provided as a higher-performance alternative to enumLines, and
-- is equivalent to treating the stream as a Data.ByteString.Char8.ByteString.
enumLinesBS :: (Monad m) => Enumeratee BC.ByteString [BC.ByteString] m a
enumLinesBS = convStream getter
  where
    getter = icont step Nothing
    lChar = (== '\n') . BC.last
    step (Chunk xs)
      | BC.null xs = getter
      | lChar xs   = idone (BC.lines xs) (Chunk BC.empty)
      | otherwise  = icont (step' xs) Nothing
    step str       = idone mempty str
    step' xs (Chunk ys)
      | BC.null ys = icont (step' xs) Nothing
      | lChar ys   = idone (BC.lines . BC.append xs $ ys) mempty
      | otherwise  = let w' = BC.lines $ BC.append xs ys
                         ws = init w'
                         ck = last w'
                     in idone ws (Chunk ck)
    step' xs str   = idone (BC.lines xs) str