{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
module System.IO.Streams.Text
(
decodeUtf8
, decodeUtf8With
, encodeUtf8
) where
import Control.Monad (when)
import Control.Monad.IO.Class (MonadIO (..))
import Data.ByteString (ByteString)
import qualified Data.ByteString as S
import qualified Data.ByteString.Unsafe as S
#if !MIN_VERSION_base(4,8,0)
import Data.Monoid (mappend)
#endif
import Data.Text (Text)
import qualified Data.Text.Encoding as T
import Data.Text.Encoding.Error (OnDecodeError)
import Data.Word (Word8)
import qualified System.IO.Streams.Combinators as Streams
import System.IO.Streams.Internal (InputStream, OutputStream)
import qualified System.IO.Streams.Internal as Streams
encodeUtf8 :: OutputStream ByteString -> IO (OutputStream Text)
encodeUtf8 :: OutputStream ByteString -> IO (OutputStream Text)
encodeUtf8 = (Text -> ByteString)
-> OutputStream ByteString -> IO (OutputStream Text)
forall a b. (a -> b) -> OutputStream b -> IO (OutputStream a)
Streams.contramap Text -> ByteString
T.encodeUtf8
decodeUtf8 :: InputStream ByteString -> IO (InputStream Text)
decodeUtf8 :: InputStream ByteString -> IO (InputStream Text)
decodeUtf8 = (ByteString -> Text)
-> InputStream ByteString -> IO (InputStream Text)
decode ByteString -> Text
T.decodeUtf8
{-# INLINE decodeUtf8 #-}
decodeUtf8With :: OnDecodeError
-> InputStream ByteString
-> IO (InputStream Text)
decodeUtf8With :: OnDecodeError -> InputStream ByteString -> IO (InputStream Text)
decodeUtf8With OnDecodeError
e = (ByteString -> Text)
-> InputStream ByteString -> IO (InputStream Text)
decode (OnDecodeError -> ByteString -> Text
T.decodeUtf8With OnDecodeError
e)
{-# INLINE decodeUtf8With #-}
decode :: (ByteString -> Text)
-> InputStream ByteString
-> IO (InputStream Text)
decode :: (ByteString -> Text)
-> InputStream ByteString -> IO (InputStream Text)
decode ByteString -> Text
decodeFunc InputStream ByteString
input = Generator Text () -> IO (InputStream Text)
forall r a. Generator r a -> IO (InputStream r)
Streams.fromGenerator (Generator Text () -> IO (InputStream Text))
-> Generator Text () -> IO (InputStream Text)
forall a b. (a -> b) -> a -> b
$ Maybe ByteString -> Generator Text ()
go Maybe ByteString
forall a. Maybe a
Nothing
where
go :: Maybe ByteString -> Generator Text ()
go !Maybe ByteString
soFar = IO (Maybe ByteString) -> Generator Text (Maybe ByteString)
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (InputStream ByteString -> IO (Maybe ByteString)
forall a. InputStream a -> IO (Maybe a)
Streams.read InputStream ByteString
input) Generator Text (Maybe ByteString)
-> (Maybe ByteString -> Generator Text ()) -> Generator Text ()
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>=
Generator Text ()
-> (ByteString -> Generator Text ())
-> Maybe ByteString
-> Generator Text ()
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (Maybe ByteString -> Generator Text ()
finish Maybe ByteString
soFar) (Maybe ByteString -> ByteString -> Generator Text ()
chunk Maybe ByteString
soFar)
finish :: Maybe ByteString -> Generator Text ()
finish Maybe ByteString
Nothing = () -> Generator Text ()
forall (m :: * -> *) a. Monad m => a -> m a
return (() -> Generator Text ()) -> () -> Generator Text ()
forall a b. (a -> b) -> a -> b
$! ()
finish (Just ByteString
x) = Text -> Generator Text ()
forall r. r -> Generator r ()
Streams.yield (Text -> Generator Text ()) -> Text -> Generator Text ()
forall a b. (a -> b) -> a -> b
$! ByteString -> Text
decodeFunc ByteString
x
chunk :: Maybe ByteString -> ByteString -> Generator Text ()
chunk Maybe ByteString
Nothing ByteString
s = ByteString -> Generator Text ()
process ByteString
s
chunk (Just ByteString
a) ByteString
b = ByteString -> Generator Text ()
process (ByteString -> Generator Text ())
-> ByteString -> Generator Text ()
forall a b. (a -> b) -> a -> b
$ ByteString
a ByteString -> ByteString -> ByteString
forall a. Monoid a => a -> a -> a
`mappend` ByteString
b
process :: ByteString -> Generator Text ()
process !ByteString
s =
case ByteString -> FindOutput
findLastFullCode ByteString
s of
LastCodeIsComplete ByteString
x -> (Text -> Generator Text ()
forall r. r -> Generator r ()
Streams.yield (Text -> Generator Text ()) -> Text -> Generator Text ()
forall a b. (a -> b) -> a -> b
$! ByteString -> Text
decodeFunc ByteString
x) Generator Text () -> Generator Text () -> Generator Text ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Maybe ByteString -> Generator Text ()
go Maybe ByteString
forall a. Maybe a
Nothing
Split ByteString
a ByteString
b -> do
Bool -> Generator Text () -> Generator Text ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Bool -> Bool
not (Bool -> Bool) -> Bool -> Bool
forall a b. (a -> b) -> a -> b
$ ByteString -> Bool
S.null ByteString
a) (Generator Text () -> Generator Text ())
-> Generator Text () -> Generator Text ()
forall a b. (a -> b) -> a -> b
$
Text -> Generator Text ()
forall r. r -> Generator r ()
Streams.yield (Text -> Generator Text ()) -> Text -> Generator Text ()
forall a b. (a -> b) -> a -> b
$! ByteString -> Text
decodeFunc ByteString
a
Maybe ByteString -> Generator Text ()
go (ByteString -> Maybe ByteString
forall a. a -> Maybe a
Just ByteString
b)
NoCodesAreComplete ByteString
x -> Maybe ByteString -> Generator Text ()
go (ByteString -> Maybe ByteString
forall a. a -> Maybe a
Just ByteString
x)
data ByteType = Regular
| Continuation
| Start !Int
between :: Word8 -> Word8 -> Word8 -> Bool
between :: Word8 -> Word8 -> Word8 -> Bool
between Word8
x Word8
y Word8
z = Word8
x Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
>= Word8
y Bool -> Bool -> Bool
&& Word8
x Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
z
{-# INLINE between #-}
characterizeByte :: Word8 -> ByteType
characterizeByte :: Word8 -> ByteType
characterizeByte Word8
c | Word8 -> Word8 -> Word8 -> Bool
between Word8
c Word8
0 Word8
0x7F = ByteType
Regular
| Word8 -> Word8 -> Word8 -> Bool
between Word8
c Word8
0x80 Word8
0xBF = ByteType
Continuation
| Word8 -> Word8 -> Word8 -> Bool
between Word8
c Word8
0xC0 Word8
0xDF = Int -> ByteType
Start Int
1
| Word8 -> Word8 -> Word8 -> Bool
between Word8
c Word8
0xE0 Word8
0xEF = Int -> ByteType
Start Int
2
| Bool
otherwise = Int -> ByteType
Start Int
3
data FindOutput = LastCodeIsComplete !ByteString
| Split !ByteString !ByteString
| NoCodesAreComplete !ByteString
findLastFullCode :: ByteString -> FindOutput
findLastFullCode :: ByteString -> FindOutput
findLastFullCode ByteString
b | Int
len Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = ByteString -> FindOutput
LastCodeIsComplete ByteString
b
| Bool
otherwise = FindOutput
go
where
len :: Int
len = ByteString -> Int
S.length ByteString
b
go :: FindOutput
go = let !idx :: Int
idx = Int
len Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1
!c :: Word8
c = ByteString -> Int -> Word8
S.unsafeIndex ByteString
b Int
idx
in case Word8 -> ByteType
characterizeByte Word8
c of
ByteType
Regular -> ByteString -> FindOutput
LastCodeIsComplete ByteString
b
ByteType
Continuation -> Int -> FindOutput
cont (Int
len Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
2)
ByteType
_ -> ByteString -> ByteString -> FindOutput
Split (Int -> ByteString -> ByteString
S.unsafeTake Int
idx ByteString
b) (Int -> ByteString -> ByteString
S.unsafeDrop Int
idx ByteString
b)
cont :: Int -> FindOutput
cont !Int
idx | Int
idx Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0 = ByteString -> FindOutput
NoCodesAreComplete ByteString
b
| Bool
otherwise =
let !c :: Word8
c = ByteString -> Int -> Word8
S.unsafeIndex ByteString
b Int
idx
in case Word8 -> ByteType
characterizeByte Word8
c of
ByteType
Regular -> ByteString -> FindOutput
LastCodeIsComplete ByteString
b
ByteType
Continuation -> Int -> FindOutput
cont (Int
idx Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
Start Int
n -> if Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
idx Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
len Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1
then ByteString -> FindOutput
LastCodeIsComplete ByteString
b
else ByteString -> ByteString -> FindOutput
Split (Int -> ByteString -> ByteString
S.unsafeTake Int
idx ByteString
b)
(Int -> ByteString -> ByteString
S.unsafeDrop Int
idx ByteString
b)
{-# INLINE findLastFullCode #-}