{-# LANGUAGE RankNTypes #-}

-- | This module uses the stream decoding functions from
--  <http://hackage.haskell.org/package/streaming-commons streaming-commons>
--  package to define decoding functions and lenses.  The exported names
--  conflict with names in @Data.Text.Encoding@ but not with the @Prelude@
module Pipes.Text.Encoding
  ( -- * Decoding ByteStrings and Encoding Texts

    -- ** Simple usage
    -- $usage

    -- ** Lens usage
    -- $lenses

    -- * Basic lens operations
    Codec,
    decode,
    eof,

    -- * Decoding lenses
    utf8,
    utf8Pure,
    utf16LE,
    utf16BE,
    utf32LE,
    utf32BE,

    -- * Non-lens decoding functions
    -- $decoders
    decodeUtf8,
    decodeUtf8Pure,
    decodeUtf16LE,
    decodeUtf16BE,
    decodeUtf32LE,
    decodeUtf32BE,

    -- * Re-encoding functions
    -- $encoders
    encodeUtf8,
    encodeUtf16LE,
    encodeUtf16BE,
    encodeUtf32LE,
    encodeUtf32BE,

    -- * Functions for latin and ascii text
    -- $ascii
    encodeAscii,
    decodeAscii,
    encodeIso8859_1,
    decodeIso8859_1,
  )
where

import Control.Monad (join)
import Data.ByteString as B
import Data.ByteString.Char8 as B8
import Data.Char (ord)
import Data.Functor.Constant (Constant (..))
import Data.Streaming.Text (DecodeResult (..))
import qualified Data.Streaming.Text as Stream
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import Pipes

-- $usage
--    Encoding is of course simple. Given
--
-- >   text :: Producer Text IO ()
--
--    we can encode it with @Data.Text.Encoding.encodeUtf8@
--
-- >   TE.encodeUtf8 :: Text -> ByteString
--
--    and ordinary pipe operations:
--
-- >   text >-> P.map TE.encodeUtf8 :: Producer.ByteString IO ()
--
--    or, equivalently
--
-- >   for text (yield . TE.encodeUtf8)
--
--    But, using this module, we might use
--
-- >   encodeUtf8 :: Text -> Producer ByteString m ()
--
--    to write
--
-- >   for text encodeUtf8 :: Producer.ByteString IO ()
--
--    All of the above come to the same.
--
--
--    Given
--
-- >   bytes :: Producer ByteString IO ()
--
--    we can apply a decoding function from this module:
--
-- >   decodeUtf8 bytes :: Producer Text IO (Producer ByteString IO ())
--
--    The Text producer ends wherever decoding first fails. The un-decoded
--    material is returned. If we are confident it is of no interest, we can
--    write:
--
-- >   void $ decodeUtf8 bytes :: Producer Text IO ()
--
--    Thus we can re-encode
--    as uft8 as much of our byte stream as is decodeUtf16BE decodable, with, e.g.
--
-- >   for (decodeUtf16BE bytes) encodeUtf8 :: Producer ByteString IO (Producer ByteString IO ())
--
--    The bytestring producer that is returned begins with where utf16BE decoding
--    failed; if it didn't fail the producer is empty.

-- $lenses
--    We get a bit more flexibility, particularly in the use of pipes-style "parsers",
--    if we use a lens like @utf8@ or @utf16BE@
--    that focusses on the text in an appropriately encoded byte stream.
--
-- >   type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a)
--
--    is just an alias for a Prelude type.  We abbreviate this further, for our use case, as
--
-- >   type Codec
-- >     =  forall m r .  Monad m => Lens' (Producer ByteString m r) (Producer Text m (Producer ByteString m r))
--
--    and call the decoding lenses @utf8@, @utf16BE@ \"codecs\", since they can
--    re-encode what they have decoded.  Thus you use any particular codec with
--    the @view@ / @(^.)@ , @zoom@ and @over@ functions from the standard lens libraries;
--    <http://hackage.haskell.org/package/lens lens>,
--    <http://hackage.haskell.org/package/lens-family lens-family>,
--    <http://hackage.haskell.org/package/lens-simple lens-simple>, or one of the
--    and <http://hackage.haskell.org/package/microlens microlens> packages will all work
--    the same, since we already have access to the types they require.
--
--    Each decoding lens looks into a byte stream that is supposed to contain text.
--    The particular lenses are named in accordance with the expected
--    encoding, 'utf8', 'utf16LE' etc. To turn a such a lens or @Codec@
--    into an ordinary function, use @view@ / @(^.)@ -- here also called 'decode':
--
-- >   view utf8 :: Producer ByteString m r -> Producer Text m (Producer ByteString m r)
-- >   decode utf8 Byte.stdin :: Producer Text IO (Producer ByteString IO r)
-- >   Bytes.stdin ^. utf8 ::  Producer Text IO (Producer ByteString IO r)
--
--    Of course, we could always do this with the specialized decoding functions, e.g.
--
-- >   decodeUtf8 ::  Producer ByteString m r -> Producer Text m (Producer ByteString m r)
-- >   decodeUtf8 Byte.stdin :: Producer Text IO (Producer ByteString IO r)
--
--    As with these functions, the stream of text that a @Codec@ \'sees\'
--    in the stream of bytes begins at its head.
--    At any point of decoding failure, the stream of text ends and reverts to (returns)
--    the original byte stream. Thus if the first bytes are already
--    un-decodable, the whole ByteString producer will be returned, i.e.
--
-- >   view utf8 bad_bytestream
--
--    will just come to the same as
--
-- >   return bad_bytestream
--
--    Where there is no decoding failure, the return value of the text stream will be
--    an empty byte stream followed by its own return value.  In all cases you must
--    deal with the fact that it is a /ByteString producer/ that is returned, even if
--    it can be thrown away with @Control.Monad.void@
--
-- >   void (Bytes.stdin ^. utf8) :: Producer Text IO ()
--
--    The @eof@ lens permits you to pattern match: if there is a Right value,
--    it is the leftover bytestring producer, if there is a Right value, it
--    is the return value of the original bytestring producer:
--
-- >   Bytes.stdin ^. utf8 . eof :: Producer Text IO (Either (Producer ByteString IO IO) ())
--
--    Thus for the stream of un-decodable bytes mentioned above,
--
-- >   view (utf8 . eof) bad_bytestream
--
--    will be the same as
--
-- >   return (Left bad_bytestream)
--
--    @zoom utf8@ converts a Text parser into a ByteString parser:
--
-- >   zoom utf8 drawChar :: Monad m => StateT (Producer ByteString m r) m (Maybe Char)
--
--    or, using the type synonymn from @Pipes.Parse@:
--
-- >   zoom utf8 drawChar :: Monad m => Parser ByteString m (Maybe Char)
--
--    Thus we can define a ByteString parser (in the pipes-parse sense) like this:
--
-- >   charPlusByte :: Parser ByteString m (Maybe Char, Maybe Word8)))
-- >   charPlusByte = do char_ <- zoom utf8 Text.drawChar
-- >                     byte_ <- Bytes.peekByte
-- >                     return (char_, byte_)
--
--     Though @charPlusByte@ is partly defined with a Text parser 'drawChar';
--     but it is a ByteString parser; it will return the first valid utf8-encoded
--     Char in a ByteString, /whatever its byte-length/,
--     and the first byte following, if both exist. Because
--     we \'draw\' one and \'peek\' at the other, the parser as a whole only
--     advances one Char's length along the bytestring, whatever that length may be.
--     See the slightly more complex example \'decode.hs\' in the
--     <http://www.haskellforall.com/2014/02/pipes-parse-30-lens-based-parsing.html#batteries-included haskellforall blog>
--     discussion of this type of byte stream parsing.

type Lens' a b = forall f. Functor f => (b -> f b) -> (a -> f a)

type Codec =
  forall m r.
  Monad m =>
  Lens'
    (Producer ByteString m r)
    (Producer Text m (Producer ByteString m r))

-- | @decode@ is just the ordinary @view@ or @(^.)@ of the lens libraries;
--   exported here under a name appropriate to the material.
--   Thus given a bytestring producer called @bytes@ we have
--
-- >    decode utf8 bytes :: Producer Text IO (Producer ByteString IO ())
--
--    All of these are thus the same:
--
-- >    decode utf8 bytes
-- >    view utf8 bytes
-- >    bytes ^. utf8
-- >    decodeUtf8 bytes
decode :: ((b -> Constant b b) -> (a -> Constant b a)) -> a -> b
decode :: ((b -> Constant b b) -> a -> Constant b a) -> a -> b
decode (b -> Constant b b) -> a -> Constant b a
codec a
a = Constant b a -> b
forall a k (b :: k). Constant a b -> a
getConstant ((b -> Constant b b) -> a -> Constant b a
codec b -> Constant b b
forall k a (b :: k). a -> Constant a b
Constant a
a)

-- | @eof@ tells you explicitly when decoding stops due to bad bytes or
--    instead reaches end-of-file happily. (Without it one just makes an explicit
--    test for emptiness of the resulting bytestring production using next) Thus
--
-- >    decode (utf8 . eof) bytes :: Producer T.Text IO (Either (Producer B.ByteString IO ()) ())
--
--    If we hit undecodable bytes, the remaining bytestring producer will be
--    returned as a Left value; in the happy case, a Right value is returned
--    with the anticipated return value for the original bytestring producer.
--
--    Given a bytestring producer called @bytes@ all of these will be the same:
--
-- >    decode (utf8 . eof) bytes
-- >    view (utf8 . eof) bytes
-- >    bytes^.utf8.eof
eof ::
  (Monad m, Monad (t m), MonadTrans t) =>
  Lens'
    (t m (Producer ByteString m r))
    (t m (Either (Producer ByteString m r) r))
eof :: Lens'
  (t m (Producer ByteString m r))
  (t m (Either (Producer ByteString m r) r))
eof t m (Either (Producer ByteString m r) r)
-> f (t m (Either (Producer ByteString m r) r))
k t m (Producer ByteString m r)
p0 = (t m (Either (Producer ByteString m r) r)
 -> t m (Producer ByteString m r))
-> f (t m (Either (Producer ByteString m r) r))
-> f (t m (Producer ByteString m r))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap t m (Either (Producer ByteString m r) r)
-> t m (Producer ByteString m r)
forall a.
t m (Either (Proxy X () () ByteString m a) a)
-> t m (Proxy X () () ByteString m a)
fromEither (t m (Either (Producer ByteString m r) r)
-> f (t m (Either (Producer ByteString m r) r))
k (t m (Producer ByteString m r)
-> t m (Either (Producer ByteString m r) r)
forall (t :: (* -> *) -> * -> *) (m :: * -> *) b.
(MonadTrans t, Monad m, Monad (t m)) =>
t m (Producer ByteString m b)
-> t m (Either (Producer ByteString m b) b)
toEither t m (Producer ByteString m r)
p0))
  where
    fromEither :: t m (Either (Proxy X () () ByteString m a) a)
-> t m (Proxy X () () ByteString m a)
fromEither = (Either (Proxy X () () ByteString m a) a
 -> Proxy X () () ByteString m a)
-> t m (Either (Proxy X () () ByteString m a) a)
-> t m (Proxy X () () ByteString m a)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ((Proxy X () () ByteString m a -> Proxy X () () ByteString m a)
-> (a -> Proxy X () () ByteString m a)
-> Either (Proxy X () () ByteString m a) a
-> Proxy X () () ByteString m a
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either Proxy X () () ByteString m a -> Proxy X () () ByteString m a
forall a. a -> a
id a -> Proxy X () () ByteString m a
forall (m :: * -> *) a. Monad m => a -> m a
return)

    toEither :: t m (Producer ByteString m b)
-> t m (Either (Producer ByteString m b) b)
toEither t m (Producer ByteString m b)
pp = do
      Producer ByteString m b
p <- t m (Producer ByteString m b)
pp
      Producer ByteString m b -> t m (Either (Producer ByteString m b) b)
forall (t :: (* -> *) -> * -> *) (m :: * -> *) b.
(MonadTrans t, Monad m, Monad (t m)) =>
Producer ByteString m b -> t m (Either (Producer ByteString m b) b)
check Producer ByteString m b
p

    check :: Producer ByteString m b -> t m (Either (Producer ByteString m b) b)
check Producer ByteString m b
p = do
      Either b (ByteString, Producer ByteString m b)
e <- m (Either b (ByteString, Producer ByteString m b))
-> t m (Either b (ByteString, Producer ByteString m b))
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (Producer ByteString m b
-> m (Either b (ByteString, Producer ByteString m b))
forall (m :: * -> *) a r.
Monad m =>
Producer a m r -> m (Either r (a, Producer a m r))
next Producer ByteString m b
p)
      case Either b (ByteString, Producer ByteString m b)
e of
        Left b
r -> Either (Producer ByteString m b) b
-> t m (Either (Producer ByteString m b) b)
forall (m :: * -> *) a. Monad m => a -> m a
return (b -> Either (Producer ByteString m b) b
forall a b. b -> Either a b
Right b
r)
        Right (ByteString
bs, Producer ByteString m b
pb) ->
          if ByteString -> Bool
B.null ByteString
bs
            then Producer ByteString m b -> t m (Either (Producer ByteString m b) b)
check Producer ByteString m b
pb
            else
              Either (Producer ByteString m b) b
-> t m (Either (Producer ByteString m b) b)
forall (m :: * -> *) a. Monad m => a -> m a
return
                ( Producer ByteString m b -> Either (Producer ByteString m b) b
forall a b. a -> Either a b
Left
                    ( do
                        ByteString -> Proxy X () () ByteString m ()
forall (m :: * -> *) a x' x. Functor m => a -> Proxy x' x () a m ()
yield ByteString
bs
                        Producer ByteString m b
pb
                    )
                )

utf8 :: Codec
utf8 :: Lens'
  (Producer ByteString m r)
  (Producer Text m (Producer ByteString m r))
utf8 = (forall r (m :: * -> *).
 Monad m =>
 Producer ByteString m r
 -> Producer Text m (Producer ByteString m r))
-> (Text -> ByteString) -> Codec
mkCodec forall r (m :: * -> *).
Monad m =>
Producer ByteString m r
-> Producer Text m (Producer ByteString m r)
forall (m :: * -> *) r.
Monad m =>
Producer ByteString m r
-> Producer Text m (Producer ByteString m r)
decodeUtf8 Text -> ByteString
TE.encodeUtf8

utf8Pure :: Codec
utf8Pure :: Lens'
  (Producer ByteString m r)
  (Producer Text m (Producer ByteString m r))
utf8Pure = (forall r (m :: * -> *).
 Monad m =>
 Producer ByteString m r
 -> Producer Text m (Producer ByteString m r))
-> (Text -> ByteString) -> Codec
mkCodec forall r (m :: * -> *).
Monad m =>
Producer ByteString m r
-> Producer Text m (Producer ByteString m r)
forall (m :: * -> *) r.
Monad m =>
Producer ByteString m r
-> Producer Text m (Producer ByteString m r)
decodeUtf8Pure Text -> ByteString
TE.encodeUtf8

utf16LE :: Codec
utf16LE :: Lens'
  (Producer ByteString m r)
  (Producer Text m (Producer ByteString m r))
utf16LE = (forall r (m :: * -> *).
 Monad m =>
 Producer ByteString m r
 -> Producer Text m (Producer ByteString m r))
-> (Text -> ByteString) -> Codec
mkCodec forall r (m :: * -> *).
Monad m =>
Producer ByteString m r
-> Producer Text m (Producer ByteString m r)
forall (m :: * -> *) r.
Monad m =>
Producer ByteString m r
-> Producer Text m (Producer ByteString m r)
decodeUtf16LE Text -> ByteString
TE.encodeUtf16LE

utf16BE :: Codec
utf16BE :: Lens'
  (Producer ByteString m r)
  (Producer Text m (Producer ByteString m r))
utf16BE = (forall r (m :: * -> *).
 Monad m =>
 Producer ByteString m r
 -> Producer Text m (Producer ByteString m r))
-> (Text -> ByteString) -> Codec
mkCodec forall r (m :: * -> *).
Monad m =>
Producer ByteString m r
-> Producer Text m (Producer ByteString m r)
forall (m :: * -> *) r.
Monad m =>
Producer ByteString m r
-> Producer Text m (Producer ByteString m r)
decodeUtf16BE Text -> ByteString
TE.encodeUtf16BE

utf32LE :: Codec
utf32LE :: Lens'
  (Producer ByteString m r)
  (Producer Text m (Producer ByteString m r))
utf32LE = (forall r (m :: * -> *).
 Monad m =>
 Producer ByteString m r
 -> Producer Text m (Producer ByteString m r))
-> (Text -> ByteString) -> Codec
mkCodec forall r (m :: * -> *).
Monad m =>
Producer ByteString m r
-> Producer Text m (Producer ByteString m r)
forall (m :: * -> *) r.
Monad m =>
Producer ByteString m r
-> Producer Text m (Producer ByteString m r)
decodeUtf32LE Text -> ByteString
TE.encodeUtf32LE

utf32BE :: Codec
utf32BE :: Lens'
  (Producer ByteString m r)
  (Producer Text m (Producer ByteString m r))
utf32BE = (forall r (m :: * -> *).
 Monad m =>
 Producer ByteString m r
 -> Producer Text m (Producer ByteString m r))
-> (Text -> ByteString) -> Codec
mkCodec forall r (m :: * -> *).
Monad m =>
Producer ByteString m r
-> Producer Text m (Producer ByteString m r)
forall (m :: * -> *) r.
Monad m =>
Producer ByteString m r
-> Producer Text m (Producer ByteString m r)
decodeUtf32BE Text -> ByteString
TE.encodeUtf32BE

decodeStream ::
  Monad m =>
  (B.ByteString -> DecodeResult) ->
  Producer ByteString m r ->
  Producer Text m (Producer ByteString m r)
decodeStream :: (ByteString -> DecodeResult)
-> Producer ByteString m r
-> Producer Text m (Producer ByteString m r)
decodeStream = (ByteString -> DecodeResult)
-> Producer ByteString m r
-> Producer Text m (Producer ByteString m r)
forall (m :: * -> *) b x' x.
Monad m =>
(ByteString -> DecodeResult)
-> Producer ByteString m b
-> Proxy x' x () Text m (Producer ByteString m b)
loop
  where
    loop :: (ByteString -> DecodeResult)
-> Producer ByteString m b
-> Proxy x' x () Text m (Producer ByteString m b)
loop ByteString -> DecodeResult
dec0 Producer ByteString m b
p =
      do
        Either b (ByteString, Producer ByteString m b)
x <- m (Either b (ByteString, Producer ByteString m b))
-> Proxy
     x' x () Text m (Either b (ByteString, Producer ByteString m b))
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (Producer ByteString m b
-> m (Either b (ByteString, Producer ByteString m b))
forall (m :: * -> *) a r.
Monad m =>
Producer a m r -> m (Either r (a, Producer a m r))
next Producer ByteString m b
p)
        case Either b (ByteString, Producer ByteString m b)
x of
          Left b
r -> Producer ByteString m b
-> Proxy x' x () Text m (Producer ByteString m b)
forall (m :: * -> *) a. Monad m => a -> m a
return (b -> Producer ByteString m b
forall (m :: * -> *) a. Monad m => a -> m a
return b
r)
          Right (ByteString
chunk, Producer ByteString m b
p') -> case ByteString -> DecodeResult
dec0 ByteString
chunk of
            DecodeResultSuccess Text
text ByteString -> DecodeResult
dec -> do
              Text -> Proxy x' x () Text m ()
forall (m :: * -> *) a x' x. Functor m => a -> Proxy x' x () a m ()
yield Text
text
              (ByteString -> DecodeResult)
-> Producer ByteString m b
-> Proxy x' x () Text m (Producer ByteString m b)
loop ByteString -> DecodeResult
dec Producer ByteString m b
p'
            DecodeResultFailure Text
text ByteString
bs -> do
              Text -> Proxy x' x () Text m ()
forall (m :: * -> *) a x' x. Functor m => a -> Proxy x' x () a m ()
yield Text
text
              Producer ByteString m b
-> Proxy x' x () Text m (Producer ByteString m b)
forall (m :: * -> *) a. Monad m => a -> m a
return
                ( do
                    ByteString -> Proxy X () () ByteString m ()
forall (m :: * -> *) a x' x. Functor m => a -> Proxy x' x () a m ()
yield ByteString
bs
                    Producer ByteString m b
p'
                )
{-# INLINEABLE decodeStream #-}

-- $decoders
--   These are functions with the simple type:
--
-- >   decodeUtf8 :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
--
--   Thus in general
--
-- >     decodeUtf8 = view utf8
-- >     decodeUtf16LE = view utf16LE
--
--   and so forth, but these forms
--   may be more convenient (and give better type errors!) where lenses are
--   not desired.

decodeUtf8 :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
decodeUtf8 :: Producer ByteString m r
-> Producer Text m (Producer ByteString m r)
decodeUtf8 = (ByteString -> DecodeResult)
-> Producer ByteString m r
-> Producer Text m (Producer ByteString m r)
forall (m :: * -> *) r.
Monad m =>
(ByteString -> DecodeResult)
-> Producer ByteString m r
-> Producer Text m (Producer ByteString m r)
decodeStream ByteString -> DecodeResult
Stream.decodeUtf8
{-# INLINE decodeUtf8 #-}

decodeUtf8Pure :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
decodeUtf8Pure :: Producer ByteString m r
-> Producer Text m (Producer ByteString m r)
decodeUtf8Pure = (ByteString -> DecodeResult)
-> Producer ByteString m r
-> Producer Text m (Producer ByteString m r)
forall (m :: * -> *) r.
Monad m =>
(ByteString -> DecodeResult)
-> Producer ByteString m r
-> Producer Text m (Producer ByteString m r)
decodeStream ByteString -> DecodeResult
Stream.decodeUtf8Pure
{-# INLINE decodeUtf8Pure #-}

decodeUtf16LE :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
decodeUtf16LE :: Producer ByteString m r
-> Producer Text m (Producer ByteString m r)
decodeUtf16LE = (ByteString -> DecodeResult)
-> Producer ByteString m r
-> Producer Text m (Producer ByteString m r)
forall (m :: * -> *) r.
Monad m =>
(ByteString -> DecodeResult)
-> Producer ByteString m r
-> Producer Text m (Producer ByteString m r)
decodeStream ByteString -> DecodeResult
Stream.decodeUtf16LE
{-# INLINE decodeUtf16LE #-}

decodeUtf16BE :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
decodeUtf16BE :: Producer ByteString m r
-> Producer Text m (Producer ByteString m r)
decodeUtf16BE = (ByteString -> DecodeResult)
-> Producer ByteString m r
-> Producer Text m (Producer ByteString m r)
forall (m :: * -> *) r.
Monad m =>
(ByteString -> DecodeResult)
-> Producer ByteString m r
-> Producer Text m (Producer ByteString m r)
decodeStream ByteString -> DecodeResult
Stream.decodeUtf16BE
{-# INLINE decodeUtf16BE #-}

decodeUtf32LE :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
decodeUtf32LE :: Producer ByteString m r
-> Producer Text m (Producer ByteString m r)
decodeUtf32LE = (ByteString -> DecodeResult)
-> Producer ByteString m r
-> Producer Text m (Producer ByteString m r)
forall (m :: * -> *) r.
Monad m =>
(ByteString -> DecodeResult)
-> Producer ByteString m r
-> Producer Text m (Producer ByteString m r)
decodeStream ByteString -> DecodeResult
Stream.decodeUtf32LE
{-# INLINE decodeUtf32LE #-}

decodeUtf32BE :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
decodeUtf32BE :: Producer ByteString m r
-> Producer Text m (Producer ByteString m r)
decodeUtf32BE = (ByteString -> DecodeResult)
-> Producer ByteString m r
-> Producer Text m (Producer ByteString m r)
forall (m :: * -> *) r.
Monad m =>
(ByteString -> DecodeResult)
-> Producer ByteString m r
-> Producer Text m (Producer ByteString m r)
decodeStream ByteString -> DecodeResult
Stream.decodeUtf32BE
{-# INLINE decodeUtf32BE #-}

-- $encoders
--   These are simply defined
--
-- >      encodeUtf8 = yield . TE.encodeUtf8
--
--   They are intended for use with 'for'
--
-- >      for Text.stdin encodeUtf8 :: Producer ByteString IO ()
--
--   which would have the effect of
--
-- >      Text.stdin >-> Pipes.Prelude.map (TE.encodeUtf8)
--
--   using the encoding functions from Data.Text.Encoding

encodeUtf8 :: Monad m => Text -> Producer' ByteString m ()
encodeUtf8 :: Text -> Producer' ByteString m ()
encodeUtf8 Text
str = ByteString -> Proxy x' x () ByteString m ()
forall (m :: * -> *) a x' x. Functor m => a -> Proxy x' x () a m ()
yield (ByteString -> Proxy x' x () ByteString m ())
-> ByteString -> Proxy x' x () ByteString m ()
forall a b. (a -> b) -> a -> b
$ Text -> ByteString
TE.encodeUtf8 Text
str

encodeUtf16LE :: Monad m => Text -> Producer' ByteString m ()
encodeUtf16LE :: Text -> Producer' ByteString m ()
encodeUtf16LE Text
str = ByteString -> Proxy x' x () ByteString m ()
forall (m :: * -> *) a x' x. Functor m => a -> Proxy x' x () a m ()
yield (ByteString -> Proxy x' x () ByteString m ())
-> ByteString -> Proxy x' x () ByteString m ()
forall a b. (a -> b) -> a -> b
$ Text -> ByteString
TE.encodeUtf16LE Text
str

encodeUtf16BE :: Monad m => Text -> Producer' ByteString m ()
encodeUtf16BE :: Text -> Producer' ByteString m ()
encodeUtf16BE Text
str = ByteString -> Proxy x' x () ByteString m ()
forall (m :: * -> *) a x' x. Functor m => a -> Proxy x' x () a m ()
yield (ByteString -> Proxy x' x () ByteString m ())
-> ByteString -> Proxy x' x () ByteString m ()
forall a b. (a -> b) -> a -> b
$ Text -> ByteString
TE.encodeUtf16BE Text
str

encodeUtf32LE :: Monad m => Text -> Producer' ByteString m ()
encodeUtf32LE :: Text -> Producer' ByteString m ()
encodeUtf32LE Text
str = ByteString -> Proxy x' x () ByteString m ()
forall (m :: * -> *) a x' x. Functor m => a -> Proxy x' x () a m ()
yield (ByteString -> Proxy x' x () ByteString m ())
-> ByteString -> Proxy x' x () ByteString m ()
forall a b. (a -> b) -> a -> b
$ Text -> ByteString
TE.encodeUtf32LE Text
str

encodeUtf32BE :: Monad m => Text -> Producer' ByteString m ()
encodeUtf32BE :: Text -> Producer' ByteString m ()
encodeUtf32BE Text
str = ByteString -> Proxy x' x () ByteString m ()
forall (m :: * -> *) a x' x. Functor m => a -> Proxy x' x () a m ()
yield (ByteString -> Proxy x' x () ByteString m ())
-> ByteString -> Proxy x' x () ByteString m ()
forall a b. (a -> b) -> a -> b
$ Text -> ByteString
TE.encodeUtf32BE Text
str

mkCodec ::
  ( forall r m.
    Monad m =>
    Producer ByteString m r ->
    Producer Text m (Producer ByteString m r)
  ) ->
  (Text -> ByteString) ->
  Codec
mkCodec :: (forall r (m :: * -> *).
 Monad m =>
 Producer ByteString m r
 -> Producer Text m (Producer ByteString m r))
-> (Text -> ByteString) -> Codec
mkCodec forall r (m :: * -> *).
Monad m =>
Producer ByteString m r
-> Producer Text m (Producer ByteString m r)
dec Text -> ByteString
enc Producer Text m (Producer ByteString m r)
-> f (Producer Text m (Producer ByteString m r))
k Producer ByteString m r
p0 = (Producer Text m (Producer ByteString m r)
 -> Producer ByteString m r)
-> f (Producer Text m (Producer ByteString m r))
-> f (Producer ByteString m r)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (\Producer Text m (Producer ByteString m r)
p -> Proxy X () () ByteString m (Producer ByteString m r)
-> Producer ByteString m r
forall (m :: * -> *) a. Monad m => m (m a) -> m a
join (Producer Text m (Producer ByteString m r)
-> (Text -> Proxy X () () ByteString m ())
-> Proxy X () () ByteString m (Producer ByteString m r)
forall (m :: * -> *) x' x b' b a' c' c.
Functor m =>
Proxy x' x b' b m a'
-> (b -> Proxy x' x c' c m b') -> Proxy x' x c' c m a'
for Producer Text m (Producer ByteString m r)
p (ByteString -> Proxy X () () ByteString m ()
forall (m :: * -> *) a x' x. Functor m => a -> Proxy x' x () a m ()
yield (ByteString -> Proxy X () () ByteString m ())
-> (Text -> ByteString) -> Text -> Proxy X () () ByteString m ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> ByteString
enc))) (Producer Text m (Producer ByteString m r)
-> f (Producer Text m (Producer ByteString m r))
k (Producer ByteString m r
-> Producer Text m (Producer ByteString m r)
forall r (m :: * -> *).
Monad m =>
Producer ByteString m r
-> Producer Text m (Producer ByteString m r)
dec Producer ByteString m r
p0))

-- $ascii
--   ascii and latin encodings only use a small number of the characters 'Text'
--     recognizes; thus we cannot use the pipes @Lens@ style to work with them.
--     Rather we simply define functions each way.

-- | 'encodeAscii' reduces as much of your stream of 'Text' actually is ascii to a byte stream,
--   returning the rest of the 'Text' at the first non-ascii 'Char'
encodeAscii :: Monad m => Producer Text m r -> Producer ByteString m (Producer Text m r)
encodeAscii :: Producer Text m r -> Producer ByteString m (Producer Text m r)
encodeAscii = Producer Text m r -> Producer ByteString m (Producer Text m r)
forall (m :: * -> *) b x' x.
Monad m =>
Producer Text m b -> Proxy x' x () ByteString m (Producer Text m b)
go
  where
    go :: Producer Text m b -> Proxy x' x () ByteString m (Producer Text m b)
go Producer Text m b
p = do
      Either b (Text, Producer Text m b)
e <- m (Either b (Text, Producer Text m b))
-> Proxy x' x () ByteString m (Either b (Text, Producer Text m b))
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (Producer Text m b -> m (Either b (Text, Producer Text m b))
forall (m :: * -> *) a r.
Monad m =>
Producer a m r -> m (Either r (a, Producer a m r))
next Producer Text m b
p)
      case Either b (Text, Producer Text m b)
e of
        Left b
r -> Producer Text m b -> Proxy x' x () ByteString m (Producer Text m b)
forall (m :: * -> *) a. Monad m => a -> m a
return (b -> Producer Text m b
forall (m :: * -> *) a. Monad m => a -> m a
return b
r)
        Right (Text
chunk, Producer Text m b
p') ->
          if Text -> Bool
T.null Text
chunk
            then Producer Text m b -> Proxy x' x () ByteString m (Producer Text m b)
go Producer Text m b
p'
            else
              let (Text
safe, Text
unsafe) = (Char -> Bool) -> Text -> (Text, Text)
T.span (\Char
c -> Char -> Int
ord Char
c Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0x7F) Text
chunk
               in do
                    ByteString -> Proxy x' x () ByteString m ()
forall (m :: * -> *) a x' x. Functor m => a -> Proxy x' x () a m ()
yield (String -> ByteString
B8.pack (Text -> String
T.unpack Text
safe))
                    if Text -> Bool
T.null Text
unsafe
                      then Producer Text m b -> Proxy x' x () ByteString m (Producer Text m b)
go Producer Text m b
p'
                      else Producer Text m b -> Proxy x' x () ByteString m (Producer Text m b)
forall (m :: * -> *) a. Monad m => a -> m a
return (Producer Text m b
 -> Proxy x' x () ByteString m (Producer Text m b))
-> Producer Text m b
-> Proxy x' x () ByteString m (Producer Text m b)
forall a b. (a -> b) -> a -> b
$ do
                        Text -> Proxy X () () Text m ()
forall (m :: * -> *) a x' x. Functor m => a -> Proxy x' x () a m ()
yield Text
unsafe
                        Producer Text m b
p'

-- | Reduce as much of your stream of 'Text' actually is iso8859 or latin1 to a byte stream,
--     returning the rest of the 'Text' upon hitting any non-latin 'Char'
encodeIso8859_1 :: Monad m => Producer Text m r -> Producer ByteString m (Producer Text m r)
encodeIso8859_1 :: Producer Text m r -> Producer ByteString m (Producer Text m r)
encodeIso8859_1 = Producer Text m r -> Producer ByteString m (Producer Text m r)
forall (m :: * -> *) b x' x.
Monad m =>
Producer Text m b -> Proxy x' x () ByteString m (Producer Text m b)
go
  where
    go :: Producer Text m b -> Proxy x' x () ByteString m (Producer Text m b)
go Producer Text m b
p = do
      Either b (Text, Producer Text m b)
e <- m (Either b (Text, Producer Text m b))
-> Proxy x' x () ByteString m (Either b (Text, Producer Text m b))
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (Producer Text m b -> m (Either b (Text, Producer Text m b))
forall (m :: * -> *) a r.
Monad m =>
Producer a m r -> m (Either r (a, Producer a m r))
next Producer Text m b
p)
      case Either b (Text, Producer Text m b)
e of
        Left b
r -> Producer Text m b -> Proxy x' x () ByteString m (Producer Text m b)
forall (m :: * -> *) a. Monad m => a -> m a
return (b -> Producer Text m b
forall (m :: * -> *) a. Monad m => a -> m a
return b
r)
        Right (Text
txt, Producer Text m b
p') ->
          if Text -> Bool
T.null Text
txt
            then Producer Text m b -> Proxy x' x () ByteString m (Producer Text m b)
go Producer Text m b
p'
            else
              let (Text
safe, Text
unsafe) = (Char -> Bool) -> Text -> (Text, Text)
T.span (\Char
c -> Char -> Int
ord Char
c Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0xFF) Text
txt
               in do
                    ByteString -> Proxy x' x () ByteString m ()
forall (m :: * -> *) a x' x. Functor m => a -> Proxy x' x () a m ()
yield (String -> ByteString
B8.pack (Text -> String
T.unpack Text
safe))
                    if Text -> Bool
T.null Text
unsafe
                      then Producer Text m b -> Proxy x' x () ByteString m (Producer Text m b)
go Producer Text m b
p'
                      else Producer Text m b -> Proxy x' x () ByteString m (Producer Text m b)
forall (m :: * -> *) a. Monad m => a -> m a
return (Producer Text m b
 -> Proxy x' x () ByteString m (Producer Text m b))
-> Producer Text m b
-> Proxy x' x () ByteString m (Producer Text m b)
forall a b. (a -> b) -> a -> b
$ do
                        Text -> Proxy X () () Text m ()
forall (m :: * -> *) a x' x. Functor m => a -> Proxy x' x () a m ()
yield Text
unsafe
                        Producer Text m b
p'

-- | Reduce a byte stream to a corresponding stream of ascii chars, returning the
--     unused 'ByteString' upon hitting an un-ascii byte.
decodeAscii :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
decodeAscii :: Producer ByteString m r
-> Producer Text m (Producer ByteString m r)
decodeAscii = Producer ByteString m r
-> Producer Text m (Producer ByteString m r)
forall (m :: * -> *) b x' x.
Monad m =>
Producer ByteString m b
-> Proxy x' x () Text m (Producer ByteString m b)
go
  where
    go :: Producer ByteString m b
-> Proxy x' x () Text m (Producer ByteString m b)
go Producer ByteString m b
p = do
      Either b (ByteString, Producer ByteString m b)
e <- m (Either b (ByteString, Producer ByteString m b))
-> Proxy
     x' x () Text m (Either b (ByteString, Producer ByteString m b))
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (Producer ByteString m b
-> m (Either b (ByteString, Producer ByteString m b))
forall (m :: * -> *) a r.
Monad m =>
Producer a m r -> m (Either r (a, Producer a m r))
next Producer ByteString m b
p)
      case Either b (ByteString, Producer ByteString m b)
e of
        Left b
r -> Producer ByteString m b
-> Proxy x' x () Text m (Producer ByteString m b)
forall (m :: * -> *) a. Monad m => a -> m a
return (b -> Producer ByteString m b
forall (m :: * -> *) a. Monad m => a -> m a
return b
r)
        Right (ByteString
chunk, Producer ByteString m b
p') ->
          if ByteString -> Bool
B.null ByteString
chunk
            then Producer ByteString m b
-> Proxy x' x () Text m (Producer ByteString m b)
go Producer ByteString m b
p'
            else
              let (ByteString
safe, ByteString
unsafe) = (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
B.span (Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
0x7F) ByteString
chunk
               in do
                    Text -> Proxy x' x () Text m ()
forall (m :: * -> *) a x' x. Functor m => a -> Proxy x' x () a m ()
yield (String -> Text
T.pack (ByteString -> String
B8.unpack ByteString
safe))
                    if ByteString -> Bool
B.null ByteString
unsafe
                      then Producer ByteString m b
-> Proxy x' x () Text m (Producer ByteString m b)
go Producer ByteString m b
p'
                      else
                        Producer ByteString m b
-> Proxy x' x () Text m (Producer ByteString m b)
forall (m :: * -> *) a. Monad m => a -> m a
return
                          ( do
                              ByteString -> Proxy X () () ByteString m ()
forall (m :: * -> *) a x' x. Functor m => a -> Proxy x' x () a m ()
yield ByteString
unsafe
                              Producer ByteString m b
p'
                          )

-- | Reduce a byte stream to a corresponding stream of ascii chars, returning the
--     unused 'ByteString' upon hitting the rare un-latinizable byte.
decodeIso8859_1 :: Monad m => Producer ByteString m r -> Producer Text m (Producer ByteString m r)
decodeIso8859_1 :: Producer ByteString m r
-> Producer Text m (Producer ByteString m r)
decodeIso8859_1 = Producer ByteString m r
-> Producer Text m (Producer ByteString m r)
forall (m :: * -> *) b x' x.
Monad m =>
Producer ByteString m b
-> Proxy x' x () Text m (Producer ByteString m b)
go
  where
    go :: Producer ByteString m b
-> Proxy x' x () Text m (Producer ByteString m b)
go Producer ByteString m b
p = do
      Either b (ByteString, Producer ByteString m b)
e <- m (Either b (ByteString, Producer ByteString m b))
-> Proxy
     x' x () Text m (Either b (ByteString, Producer ByteString m b))
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (Producer ByteString m b
-> m (Either b (ByteString, Producer ByteString m b))
forall (m :: * -> *) a r.
Monad m =>
Producer a m r -> m (Either r (a, Producer a m r))
next Producer ByteString m b
p)
      case Either b (ByteString, Producer ByteString m b)
e of
        Left b
r -> Producer ByteString m b
-> Proxy x' x () Text m (Producer ByteString m b)
forall (m :: * -> *) a. Monad m => a -> m a
return (b -> Producer ByteString m b
forall (m :: * -> *) a. Monad m => a -> m a
return b
r)
        Right (ByteString
chunk, Producer ByteString m b
p') ->
          if ByteString -> Bool
B.null ByteString
chunk
            then Producer ByteString m b
-> Proxy x' x () Text m (Producer ByteString m b)
go Producer ByteString m b
p'
            else do
              let (ByteString
safe, ByteString
unsafe) = (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
B.span (Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
0xFF) ByteString
chunk
              Text -> Proxy x' x () Text m ()
forall (m :: * -> *) a x' x. Functor m => a -> Proxy x' x () a m ()
yield (String -> Text
T.pack (ByteString -> String
B8.unpack ByteString
safe))
              if ByteString -> Bool
B.null ByteString
unsafe
                then Producer ByteString m b
-> Proxy x' x () Text m (Producer ByteString m b)
go Producer ByteString m b
p'
                else
                  Producer ByteString m b
-> Proxy x' x () Text m (Producer ByteString m b)
forall (m :: * -> *) a. Monad m => a -> m a
return
                    ( do
                        ByteString -> Proxy X () () ByteString m ()
forall (m :: * -> *) a x' x. Functor m => a -> Proxy x' x () a m ()
yield ByteString
unsafe
                        Producer ByteString m b
p'
                    )