{-# LANGUAGE DataKinds #-}
--{-# LANGUAGE KindSignatures #-}
{-# LANGUAGE PolyKinds #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE FlexibleContexts #-}
--{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE PartialTypeSignatures #-}
--{-# LANGUAGE TypeApplications #-}

-- | 'UTF-8' encoding with additional assumption of conforming to Unicode.D76.
--
-- @"r-UTF-8"@ basically defines restriction on @ByteString@ that is needed for
-- conversion to @Text@ to work.
--
-- @since 0.1.0.0
module Data.TypedEncoding.Instances.Restriction.UTF8 (
     module Data.TypedEncoding.Instances.Restriction.UTF8
     -- * reexported for backward compatibility, will be removed in the future
     , implVerifyR 
   ) where

import           Data.TypedEncoding.Instances.Support

import           Data.Proxy

import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as BL
import qualified Data.Text.Encoding as TE 
import qualified Data.Text.Lazy.Encoding as TEL 
import           Data.Either


-- $setup
-- >>> :set -XScopedTypeVariables -XKindSignatures -XMultiParamTypeClasses -XDataKinds -XPolyKinds -XPartialTypeSignatures -XFlexibleInstances -XTypeApplications
-- >>> import Test.QuickCheck
-- >>> import Test.QuickCheck.Instances.Text()
-- >>> import Test.QuickCheck.Instances.ByteString()
-- >>> import Data.TypedEncoding
-- >>> let emptyUTF8B = unsafeSetPayload () "" ::  Enc '["r-UTF8"] () B.ByteString 
-- >>> :{  
-- instance Arbitrary (Enc '["r-UTF8"] () B.ByteString) where 
--      arbitrary =  fmap (fromRight emptyUTF8B) 
--                   . flip suchThat isRight 
--                   . fmap (encodeFAll @'["r-UTF8"] @(Either EncodeEx) @(). toEncoding ()) $ arbitrary 
-- :}



-----------------
-- Encodings  --
-----------------

prxyUtf8 :: Proxy @Symbol "r-UTF8"
prxyUtf8 = Proxy @Symbol "r-UTF8"
forall k (t :: k). Proxy @k t
Proxy :: Proxy "r-UTF8"


-- | UTF8 encodings are defined for ByteString only as that would not make much sense for Text
--
-- >>> _runEncodings encodings . toEncoding () $ "\xc3\xb1" :: Either EncodeEx (Enc '["r-UTF8"] () B.ByteString)
-- Right (UnsafeMkEnc Proxy () "\195\177")
--
-- >>> _runEncodings encodings . toEncoding () $ "\xc3\x28" :: Either EncodeEx (Enc '["r-UTF8"] () B.ByteString)
-- Left (EncodeEx "r-UTF8" (Cannot decode byte '\xc3': ...
--
-- Following test uses 'verEncoding' helper that checks that bytes are encoded as Right iff they are valid UTF8 bytes
--
-- >>> :{ 
-- quickCheck $ \(b :: B.ByteString) -> verEncoding b $ fmap (
--          fromEncoding 
--          . decodeAll @'["r-UTF8"]
--          ) . encodeFAll @'["r-UTF8"] @(Either EncodeEx)
--          . toEncoding () $ b
-- :}
-- +++ OK, passed 100 tests.

instance Encode (Either EncodeEx) "r-UTF8" "r-UTF8" c B.ByteString where
    encoding :: Encoding (Either EncodeEx) "r-UTF8" "r-UTF8" c ByteString
encoding = Encoding (Either EncodeEx) "r-UTF8" "r-UTF8" c ByteString
forall c. Encoding (Either EncodeEx) "r-UTF8" "r-UTF8" c ByteString
encUTF8B
  

instance Encode (Either EncodeEx) "r-UTF8" "r-UTF8" c BL.ByteString where
    encoding :: Encoding (Either EncodeEx) "r-UTF8" "r-UTF8" c ByteString
encoding = Encoding (Either EncodeEx) "r-UTF8" "r-UTF8" c ByteString
forall c. Encoding (Either EncodeEx) "r-UTF8" "r-UTF8" c ByteString
encUTF8BL :: Encoding (Either EncodeEx) "r-UTF8" "r-UTF8" c BL.ByteString


-- using lazy decoding to detect errors seems to be the fastest option that is not super hard to code

encUTF8B :: Encoding (Either EncodeEx) "r-UTF8" "r-UTF8" c B.ByteString
encUTF8B :: Encoding (Either EncodeEx) "r-UTF8" "r-UTF8" c ByteString
encUTF8B = (ByteString -> Either UnicodeException ByteString)
-> Encoding
     (Either EncodeEx) "r-UTF8" (AlgNm "r-UTF8") c ByteString
forall (nm :: Symbol) err c str.
(KnownSymbol nm, Show err) =>
(str -> Either err str)
-> Encoding (Either EncodeEx) nm (AlgNm nm) c str
_implEncodingEx ((ByteString -> Either UnicodeException Text)
-> ByteString -> Either UnicodeException ByteString
forall a err b. (a -> Either err b) -> a -> Either err a
implVerifyR (ByteString -> Either UnicodeException Text
TEL.decodeUtf8' (ByteString -> Either UnicodeException Text)
-> (ByteString -> ByteString)
-> ByteString
-> Either UnicodeException Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ByteString
BL.fromStrict)) 


encUTF8BL :: Encoding (Either EncodeEx) "r-UTF8" "r-UTF8" c BL.ByteString
encUTF8BL :: Encoding (Either EncodeEx) "r-UTF8" "r-UTF8" c ByteString
encUTF8BL = (ByteString -> Either UnicodeException ByteString)
-> Encoding
     (Either EncodeEx) "r-UTF8" (AlgNm "r-UTF8") c ByteString
forall (nm :: Symbol) err c str.
(KnownSymbol nm, Show err) =>
(str -> Either err str)
-> Encoding (Either EncodeEx) nm (AlgNm nm) c str
_implEncodingEx ((ByteString -> Either UnicodeException Text)
-> ByteString -> Either UnicodeException ByteString
forall a err b. (a -> Either err b) -> a -> Either err a
implVerifyR ByteString -> Either UnicodeException Text
TEL.decodeUtf8')

-- * Decoding

instance (Applicative f) => Decode f "r-UTF8" "r-UTF8" c str where
    decoding :: Decoding f "r-UTF8" "r-UTF8" c str
decoding = Decoding f "r-UTF8" "r-UTF8" c str
forall (r :: Symbol) (f :: * -> *) c str.
(Restriction r, Applicative f) =>
Decoding f r r c str
decAnyR

instance (RecreateErr f, Applicative f) =>  Validate f "r-UTF8" "r-UTF8" c B.ByteString  where
    validation :: Validation f "r-UTF8" "r-UTF8" c ByteString
validation = Encoding (Either EncodeEx) "r-UTF8" "r-UTF8" c ByteString
-> Validation f "r-UTF8" "r-UTF8" c ByteString
forall (nm :: Symbol) (f :: * -> *) c str.
(Restriction nm, KnownSymbol nm, RecreateErr @* f,
 Applicative f) =>
Encoding (Either EncodeEx) nm nm c str -> Validation f nm nm c str
validR Encoding (Either EncodeEx) "r-UTF8" "r-UTF8" c ByteString
forall c. Encoding (Either EncodeEx) "r-UTF8" "r-UTF8" c ByteString
encUTF8B

instance (RecreateErr f, Applicative f) =>  Validate f "r-UTF8" "r-UTF8" c BL.ByteString  where
    validation :: Validation f "r-UTF8" "r-UTF8" c ByteString
validation = Encoding (Either EncodeEx) "r-UTF8" "r-UTF8" c ByteString
-> Validation f "r-UTF8" "r-UTF8" c ByteString
forall (nm :: Symbol) (f :: * -> *) c str.
(Restriction nm, KnownSymbol nm, RecreateErr @* f,
 Applicative f) =>
Encoding (Either EncodeEx) nm nm c str -> Validation f nm nm c str
validR Encoding (Either EncodeEx) "r-UTF8" "r-UTF8" c ByteString
forall c. Encoding (Either EncodeEx) "r-UTF8" "r-UTF8" c ByteString
encUTF8BL


--- Utilities ---

-- | helper function checks that given ByteString, 
-- if is encoded as Left is must be not Utf8 decodable
-- is is encoded as Right is must be Utf8 encodable 
verEncoding :: B.ByteString -> Either err B.ByteString -> Bool
verEncoding :: ByteString -> Either err ByteString -> Bool
verEncoding ByteString
bs (Left err
_) = Either UnicodeException Text -> Bool
forall a b. Either a b -> Bool
isLeft (Either UnicodeException Text -> Bool)
-> (ByteString -> Either UnicodeException Text)
-> ByteString
-> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Either UnicodeException Text
TE.decodeUtf8' (ByteString -> Bool) -> ByteString -> Bool
forall a b. (a -> b) -> a -> b
$ ByteString
bs
verEncoding ByteString
bs (Right ByteString
_) = Either UnicodeException Text -> Bool
forall a b. Either a b -> Bool
isRight (Either UnicodeException Text -> Bool)
-> (ByteString -> Either UnicodeException Text)
-> ByteString
-> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Either UnicodeException Text
TE.decodeUtf8' (ByteString -> Bool) -> ByteString -> Bool
forall a b. (a -> b) -> a -> b
$ ByteString
bs