{-# LANGUAGE DataKinds #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE KindSignatures #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TypeApplications #-}
-- {-# LANGUAGE PartialTypeSignatures #-}
-- {-# OPTIONS_GHC -Wno-partial-type-signatures #-}

-- | Examples or moving between type annotated encodings
--
-- Haskell programs typically make these imports to do String, ByteString, and Text conversions:
--
-- @
-- import qualified Data.Text as T (pack, unpack)
-- import qualified Data.ByteString.Char8 as B8 (pack, unpack)
-- import           Data.Text.Encoding (decodeUtf8, encodeUtf8)
-- @
--
-- or corresponding @Lazy@ imports (not shown).
--
-- Enc-specific equivalents can be found in:
--
-- @
-- import qualified Data.TypedEncoding.Conv.Text as EncT (pack, unpack)
-- import qualified Data.TypedEncoding.Conv.ByteString.Char8 as EncB8 (pack, unpack)
-- import           Data.TypedEncoding.Conv.Text.Encoding (decodeUtf8, encodeUtf8)
-- @    
--
-- Conversions aim at providing type safety when moving between encoded string-like types.
--
-- __The assumption__ made by `typed-encoding` is that @"enc-"@ encodings work in an equivalent way independently of the payload type.
-- For example, if the following instances exist:
--
-- @
-- EncodeF SomeErr (Enc xs () String) (Enc ("enc-B64" ': xs) () String)    
-- EncodeF SomeErr (Enc xs () Text) (Enc ("enc-B64" ': xs) () Text)    
-- @
-- 
-- Then /typed-encoding/ expects @pack@ @encodeF@ to commute (if encoding instances exist):
-- 
-- @
--  str     -- EncT.pack -->   txt
--   |                          |
--  encodeF                  encodeF
--   |                          | 
--   v                          v
--  estr -- fmap EncT.pack --> etxt
-- @
--
-- (@unpack@ and $decode$ are expected to satisfy similar diagrams, not shown)
--
-- Basically, it should not matter which type we run the encoding (or decoding) on (other than performance cost).
--
-- Note that, as a consequence, multi-byte encodings (such as @enc-UTF8@ - available in /typed-encoding-encoding/ package) 
-- that encode a Unicode characters into several bytes cannot be decoded
-- in @ByteString@ as this would violate @EncB8.pack@ and @EncB8.unpack@ consistency.
--
-- Also note that this requirement is concerned about @"enc-"@ encodings, @"r-"@ encodings are much simpler to reason about 
-- in conversions.
--
-- This module also discusses concepts of __Superset__ (for @"r-"@ encodings), __leniency__, and __flattening__. 
module Examples.TypedEncoding.Conversions where

import           Data.TypedEncoding
import           Data.TypedEncoding.Instances.Enc.Base64 ()
import           Data.TypedEncoding.Instances.Restriction.ASCII ()
import           Data.TypedEncoding.Instances.Restriction.UTF8 ()
import           Data.TypedEncoding.Instances.Restriction.D76 ()
import           Data.TypedEncoding.Instances.Restriction.ByteRep ()

import qualified Data.TypedEncoding.Conv.Text as EncT
import qualified Data.TypedEncoding.Conv.Text.Encoding as EncTe -- (decodeUtf8)

import qualified Data.Text as T
import qualified Data.ByteString as B
import           GHC.TypeLits

import qualified Data.TypedEncoding.Conv.ByteString.Char8 as EncB8
import           Data.TypedEncoding.Instances.Restriction.BoundedAlphaNums ()

-- $setup
-- >>> :set -XDataKinds -XMultiParamTypeClasses -XKindSignatures -XFlexibleInstances -XFlexibleContexts -XOverloadedStrings -XTypeApplications -XScopedTypeVariables
-- >>> import qualified Data.TypedEncoding.Instances.Enc.Base64 as EnB64 (acceptLenientS)
-- >>> import qualified Data.TypedEncoding.Conv.Text as EncT (pack, utf8Promote, utf8Demote)
-- >>> import qualified Data.TypedEncoding.Conv.ByteString.Char8 as EncB8 (pack, unpack)
-- >>> import qualified Data.TypedEncoding.Conv.Text.Encoding as EncTe (decodeUtf8, encodeUtf8)
-- >>> import           Data.Proxy
--
-- This module contains some ghci friendly values to play with.
--
-- Each value is documented in a doctest style by including an equivalent ghci ready expression.
-- These documents generate a test suite for this library as well.


-- * Moving between Text and ByteString

eHelloAsciiB :: Either EncodeEx (Enc '["r-ASCII"] () B.ByteString)
eHelloAsciiB = encodeFAll . toEncoding () $ "HeLlo world"
-- ^ Example value to play with
--
-- >>>  encodeFAll . toEncoding () $ "HeLlo world" :: Either EncodeEx (Enc '["r-ASCII"] () B.ByteString) 
-- Right (UnsafeMkEnc Proxy () "HeLlo world")

Right helloAsciiB = eHelloAsciiB
-- ^ above with either removed

helloAsciiT :: Enc '["r-ASCII"] () T.Text
helloAsciiT = EncTe.decodeUtf8 helloAsciiB
-- ^ 
-- We use a tween function of the popular 'Data.Text.Encoding.decodeUtf8' 
-- from the /text/ package.
--
-- Notice the encoding annotation is preserved.
--
-- >>> displ $ EncTe.decodeUtf8 helloAsciiB
-- "Enc '[r-ASCII] () (Text HeLlo world)"


-- * @pack@ from String

helloZero :: Enc ('[] :: [Symbol]) () String
helloZero = toEncoding () "Hello"
-- ^ Consider 0-encoding of a 'String',  to move it to @Enc '[] () ByteString@ one could try:
--
-- >>> EncB8.pack helloZero
-- ...
-- ... error: 
-- ... Empty list, no last element
-- ...
--
-- this does not compile.  And it should not. @pack@ from "Data.ByteString.Char8" is error prone.
-- It is not an injection as it only considers first 8 bits of information from each 'Char'.  
-- I doubt that there are any code examples of its intentional use on a String that has chars @> \'\255\'@.
--
-- @EncB8.pack@ will not compile unless the encoding has "r-CHAR8" as its superset.
-- This works:
-- 
-- >>> fmap (displ . EncB8.pack) . encodeFAll @'["r-ASCII"] @(Either EncodeEx) $ helloZero
-- Right "Enc '[r-ASCII] () (ByteString Hello)"
--
-- And the result is a @ByteString@ with bonus annotation describing its content.
--
-- Similar game is played for @Text@:
--
-- >>> fmap (displ . EncT.d76Demote . EncT.pack) . encodeFAll @'["r-UNICODE.D76"] @(Either EncodeEx) $ helloZero
-- Right "Enc '[] () (Text Hello)"
--
-- See "Data.TypedEncoding.Conv" for more information on this.


helloRestricted :: Either EncodeEx (Enc '["r-ban:zzzzz"] () B.ByteString)
helloRestricted = fmap EncB8.pack . _runEncodings encodings $ toEncoding () "Hello"
-- ^ more interestingly @EncB8.pack@ works fine on "r-" encodings that are subsets of "r-ASCII"
-- this example @"r-ban:zzzzz"@ restricts to 5 alpha-numeric charters all @< \'z\'@
-- 
-- >>> displ <$> helloRestricted
-- Right "Enc '[r-ban:zzzzz] () (ByteString Hello)"
--
-- Adding @"r-ASCII"@ annotation on this ByteString would have been redundant since @"r-ban:zzzzz"@ is more
-- restrictive (see Supersets below).
--
-- @unpack@, as expected will put us back in a String keeping the annotation
--
-- >>> fmap (displ . EncB8.unpack) helloRestricted
-- Right "Enc '[r-ban:zzzzz] () (String Hello)"
-- 

byteRep :: Either EncodeEx (Enc '["r-ByteRep"] () B.ByteString)
byteRep = fmap EncB8.pack . _runEncodings encodings $ toEncoding () "\254"
-- ^ For low level use of @Char@ instead of @Word8@, "r-ByteRep" represents anything under @256@.

-- * More complex rules

helloUtf8B64B :: Enc '["enc-B64", "r-UTF8"] () B.ByteString
helloUtf8B64B = encodePart @'["enc-B64"] helloUtf8B
-- ^ We Base64 encode a ByteString which adheres to UTF8 layout
--
-- >>> displ $ encodePart @'["enc-B64"] helloUtf8B
-- "Enc '[enc-B64,r-UTF8] () (ByteString SGVMbG8gd29ybGQ=)"

helloUtf8B64T :: Enc '["enc-B64"] () T.Text
helloUtf8B64T = EncT.utf8Demote . EncTe.decodeUtf8 $ helloUtf8B64B
-- ^ .. and copy it over to Text.
--
-- >>> displ $ EncTe.decodeUtf8 helloUtf8B64B
-- "Enc '[enc-B64,r-UTF8] () (Text SGVMbG8gd29ybGQ=)"
--
-- but UTF8 would be redundant in Text so the "r-UTF8" can be dropped:
--
-- >>> displ . EncT.utf8Demote . EncTe.decodeUtf8 $ helloUtf8B64B
-- "Enc '[enc-B64] () (Text SGVMbG8gd29ybGQ=)"
--
-- Conversely moving back to ByteString we need to recover the annotation
-- 
-- >>> :t EncTe.encodeUtf8 helloUtf8B64T
-- ...
-- ... Couldn't match type ‘IsSupersetOpen
-- ... "r-UTF8" "enc-B64" ...
-- ...
--
-- This is not allowed! We need to add the redundant "r-UTF8" back:
--
-- >>> displ .  EncTe.encodeUtf8 . EncT.utf8Promote $ helloUtf8B64T
-- "Enc '[enc-B64,r-UTF8] () (ByteString SGVMbG8gd29ybGQ=)"
--
-- To achieve type safety, our @encodeUtf8@ and @decodeUtf8@ require "r-UTF8" annotation. 
-- But since @Text@ values can always emit @UTF8@ layout, we can simply add and remove
-- these annotations on @Text@ encodings.  This approach gives us type level safety over UTF8 encoding/decoding errors.

notTextB :: Enc '["enc-B64"] () B.ByteString
notTextB = encodeAll . toEncoding () $ "\195\177"
-- ^ 'notTextB' a binary, one that does not even represent a valid UTF8.
-- 
-- >>> encodeAll . toEncoding () $ "\195\177" :: Enc '["enc-B64"] () B.ByteString
-- UnsafeMkEnc Proxy () "w7E="
--
-- Decoding it to Text is prevented by the compiler
--
-- >>> :t EncTe.decodeUtf8 notTextB
-- ...
-- ... error:
-- ... Couldn't match type ...
-- ... "r-UTF8" "enc-B64" ...
-- ...
--
-- This is good because having the payload inside of @Enc '["enc-B64"] () Text@ would allow us
-- to try to decode it to Text (causing runtime errors).
-- 
-- We can move it to Text but to do that we will need to forget the "enc-B64" annotation.
-- This can be done, for example, using flattening (see below).


-- * Supersets

helloUtf8B :: Enc '["r-UTF8"] () B.ByteString
helloUtf8B = injectInto helloAsciiB
-- ^ To claim UTF8 on @helloAsciiB@, instead encoding again: 
--
-- >>> encodeFAll . toEncoding () $ "HeLlo world" :: Either EncodeEx (Enc '["r-UTF8"] () B.ByteString)
-- Right (UnsafeMkEnc Proxy () "HeLlo world")
-- 
-- We should be able to convert the ASCII annotation directly.
--
-- This is done using 'IsSuperset' type family.
--
-- @injectInto@ method accepts proxy to specify superset to use.
--
-- >>> displ $ injectInto @ "r-UTF8" helloAsciiB
-- "Enc '[r-UTF8] () (ByteString HeLlo world)"
--
-- Superset is intended for @"r-"@ annotations only, should not be used
-- with general encodings like @"enc-B64"@, it assumes that decoding in the superset
-- can replace the decoding from injected subset.


notTextBB64Ascii :: Enc '["r-ASCII", "enc-B64"] () B.ByteString
notTextBB64Ascii =  _encodesInto notTextB
-- ^ /Base64/ encoding represents binary data in an ASCII string format.
-- 
-- In Haskell, we should be able to express this in types.
--
-- 'EncodingSuperset' class is what specifies this.
--
-- We can use it with '_encodesInto' combinator. 
-- 'EncodingSuperset' should not be used directly at the calling site. 
--
-- >>>  displ (_encodesInto @"r-ASCII" $ notTextB)
-- "Enc '[r-ASCII,enc-B64] () (ByteString w7E=)"
--
-- '_encodesInto' can be used with a superset of the encoding 
-- character set as well making it more backward compatible 
-- (the definition of @EncodingSuperset "enc-B64" could be made more precise without breaking the code).
--
-- >>>  displ (_encodesInto @"r-UTF8" $ notTextB)
-- "Enc '[r-UTF8,enc-B64] () (ByteString w7E=)"
--



-- * Lenient recovery

lenientSomething :: Enc '["enc-B64-len"] () B.ByteString
lenientSomething = recreateAll . toEncoding () $ "abc==CB"
-- ^ 
-- >>> recreateAll . toEncoding () $ "abc==CB" :: Enc '["enc-B64-len"] () B.ByteString
-- UnsafeMkEnc Proxy () "abc==CB"
--
-- The rest of Haskell does lenient decoding, type safety allows this library to use it for recovery.
-- lenient algorithms are not partial and automatically fix invalid input:
--
-- >>> recreateFAll . toEncoding () $ "abc==CB" :: Either RecreateEx (Enc '["enc-B64"] () B.ByteString)
-- Left (RecreateEx "enc-B64" ("invalid padding"))
--
-- This library allows to recover to "enc-B64-len" which is different than "enc-B64"
--
-- 'EnB64.acceptLenientS' allows to convert "enc-B64-len" to "enc-B64"
--
-- >>> displ $ EnB64.acceptLenientS lenientSomething
-- "Enc '[enc-B64] () (ByteString abc=)"
--
-- This is now properly encoded data
--
-- >>> recreateFAll . toEncoding () $ "abc=" :: Either RecreateEx (Enc '["enc-B64"] () B.ByteString)
-- Right (UnsafeMkEnc Proxy () "abc=")
--
-- Except the content could be surprising
--
-- >>> decodeAll $ EnB64.acceptLenientS lenientSomething
-- UnsafeMkEnc Proxy () "i\183"


-- * Flattening

b64IsAscii :: Enc '["r-ASCII"] () B.ByteString
b64IsAscii = flattenAs helloUtf8B64B
-- ^ Base 64 encodes binary data as ASCII text. 
-- thus, we should be able to treat "enc-B64" as "r-ASCII" losing some information.
-- this is done using 'FlattenAs' type class
--
-- >>> :t flattenAs @ "r-ASCII" helloUtf8B64B
-- flattenAs @ "r-ASCII" helloUtf8B64B
-- ... :: Enc '["r-ASCII"] () B.ByteString