{-# LANGUAGE OverloadedStrings #-}

-- |
--
-- This module builds on module "Control.Foldl.Text", adding stateful
-- transducers and grouping operations.
module Control.Foldl.Transduce.Text (
        -- * Decoding transducers
        decoder
    ,   utf8
    ,   utf8lenient 
    ,   utf8strict
    ,   decoderE
    ,   utf8E
        -- * Other transducers
    ,   newline
    ,   stripStart
    ,   stripEnd
        -- * Splitters
    ,   lines
    ) where

import Prelude hiding (lines)
import Data.Char
import qualified Data.ByteString as B
import qualified Data.Text 
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import qualified Data.Text.Encoding.Error as T
import Control.Applicative
import Control.Monad.Trans.Except
import Control.Monad.IO.Class
import Control.Exception.Base 
import qualified Control.Foldl.Transduce as L
import Control.Foldl.Transduce.Internal (Pair(..))

{- $setup

>>> import Data.String hiding (lines)
>>> import Data.Text (Text)
>>> import Control.Applicative
>>> import Control.Monad.Trans.Except
>>> import qualified Control.Foldl as L
>>> import Control.Foldl.Transduce

-}

{-| Builds a decoding 'Transducer' out of a stream-oriented decoding function
    from "Data.Text.Encoding" and an error handler from
    "Data.Text.Encoding.Error".		

-}
decoder :: (B.ByteString -> T.Decoding) -> T.OnDecodeError -> L.Transducer B.ByteString T.Text ()
decoder _step onLeftovers = L.Transducer step (Pair mempty _step) done
    where
    step (Pair _ next) i = 
        let 
            T.Some txt leftovers next' = next i 
        in
        (Pair leftovers next', [txt])
    done (Pair leftovers _) = 
        if B.null leftovers
            then ((), [])
            else ((), foldMap (pure . T.singleton) onLeftovers')
    onLeftovers' = onLeftovers "leftovers" Nothing

{-| Builds a UTF8-decoding 'Transducer'. Takes an error handler from
    "Data.Text.Encoding.Error".		

-}
utf8 :: T.OnDecodeError -> L.Transducer B.ByteString T.Text ()
utf8 onDecodeError = 
    decoder (T.streamDecodeUtf8With onDecodeError)  onDecodeError

{-| UTF8-decoding 'Transducer' that replaces invalid input bytes with the
    Unicode replacement character U+FFFD.

>>> L.fold (transduce utf8lenient L.list) (map fromString ["decode","this"])
["decode","this"]

>>> L.fold (transduce utf8lenient L.list) (map fromString ["across \xe2","\x98\x83 boundaries"])
["across ","\9731 boundaries"]

>>> L.fold (transduce utf8lenient L.list) (map fromString ["invalid \xc3\x28 sequence"])
["invalid \65533 sequence"]

>>> L.fold (transduce utf8lenient L.list) (map fromString ["incomplete \xe2"])
["incomplete ","\65533"]
-}
utf8lenient :: L.Transducer B.ByteString T.Text ()
utf8lenient = utf8 T.lenientDecode

{-| __/BEWARE!/__ 
    This 'Transducer' may throw 'UnicodeException'.
    __/BEWARE!/__ 

>>> L.fold (transduce utf8strict L.list) (map fromString ["invalid \xc3\x28 sequence"])
*** Exception: Cannot decode byte '\x28': Data.Text.Internal.Encoding.streamDecodeUtf8With: Invalid UTF-8 stream

>>> L.fold (transduce utf8strict L.list) (map fromString ["incomplete \xe2"])
*** Exception: Cannot decode input: leftovers
-}
utf8strict :: L.Transducer B.ByteString T.Text ()
utf8strict = utf8 T.strictDecode

{-| Similar to 'decoder', but catches 'UnicodeException' in 'IO' and uses
    'Control.Monad.Trans.Except' to communicate the error.		

-}
decoderE :: MonadIO m
         => (T.OnDecodeError -> B.ByteString -> T.Decoding)
         -> L.TransducerM (ExceptT T.UnicodeException m) B.ByteString T.Text ()   
decoderE next = L.TransducerM step (pure (Pair mempty next')) done
    where
        step (Pair _ next1) i = do
            emc <- liftIO . try . evaluate $ next1 i 
            case emc of 
                Left ue -> do
                    throwE ue
                Right (T.Some txt leftovers next2) -> do
                    return (Pair leftovers next2, [txt])
        done (Pair leftovers _) = do
            if B.null leftovers
                then return ((), [])
                else do
                    emc <- liftIO . try . evaluate $ onLeftovers'
                    case emc of
                        Left ue -> do
                            throwE ue
                        Right mc -> do
                            return ((), foldMap (pure . T.singleton) mc)
        next' = next T.strictDecode  
        onLeftovers' = T.strictDecode "leftovers" Nothing

{-| Like 'utf8strict', but catches 'UnicodeException' in 'IO' and uses
    'Control.Monad.Trans.Except' to communicate the error.		

>>> runExceptT $ L.foldM (transduceM utf8E (L.generalize L.list)) (map fromString ["invalid \xc3\x28 sequence"])
Left Cannot decode byte '\x28': Data.Text.Internal.Encoding.streamDecodeUtf8With: Invalid UTF-8 stream

>>> runExceptT $ L.foldM (transduceM utf8E (L.generalize L.list)) (map fromString ["incomplete \xe2"])
Left Cannot decode input: leftovers
-}
utf8E :: MonadIO m => L.TransducerM (ExceptT T.UnicodeException m) B.ByteString T.Text ()   
utf8E = decoderE T.streamDecodeUtf8With

{-| Appends a newline at the end of the stream.		

>>> L.fold (transduce newline L.list) (map T.pack ["without","newline"])
["without","newline","\n"]
-}
newline :: L.Transducer T.Text T.Text ()
newline = L.surround [] ["\n"]

blank :: T.Text -> Bool
blank = Data.Text.all isSpace

{-| Remove leading white space from a stream of 'Text'.		

>>> L.fold (transduce stripStart L.list) (map T.pack ["   ","", "   text "])
["text "]
-}
stripStart :: L.Transducer T.Text T.Text ()
stripStart = L.Transducer step False done
    where
        step True i = (True, [i])
        step False i =
            if blank i 
                then (False, [])
                else (True, [T.stripStart i])
        done _  = ((),[])

{-| Remove trailing white space from a stream of 'Text'.		

    __/BEWARE!/__ 
    This function naively accumulates in memory any arriving "blank blocks" of
    text until a non-blank block or end-of-stream arrives, and therefore it is
    potentially dangerous. Do not use with untrusted inputs.

>>> L.fold (transduce stripEnd L.list) (map T.pack [" ", " \n  text ", "   ", "" , " "])
[" "," \n  text"]
-}
stripEnd :: L.Transducer T.Text T.Text ()
stripEnd = L.Transducer step [] done
    where
        step txts i =
            if blank i
                -- dangerous!
                then (i:txts, [])
                else ([i], reverse txts)
        done txts = case reverse txts of
            txt : _ -> ((), [T.stripEnd txt])
            _ -> ((), [])

{-| Splits a stream into lines, removing the newlines.

>>> L.fold (L.groups lines id L.list) (map T.pack ["line 1\n line 2\n"])
["line 1"," line 2"]

>>> L.fold (L.groups lines (transduce newline) L.list) (map T.pack ["line 1\n line 2\n"])
["line 1","\n"," line 2","\n"]
-}
lines :: L.Splitter T.Text
lines = L.Splitter step False done 
    where
        step previousnl txt | Data.Text.null txt = (previousnl,[],[]) 
        step previousnl txt = do
            let
                lastc = Data.Text.last txt == '\n'
                txts = T.lines txt
            case (previousnl,txts) of
                (_,[]) -> error "never happens"
                (True,_) -> (lastc, [], map pure txts)
                (False,t:ts) -> (lastc, [t], map pure ts)
        done _ = []