{-# LANGUAGE DataKinds #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE TypeOperators #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE TypeApplications #-} -- | Examples or moving between type annotated encodings -- -- Modules that define encoding and decoding instances also provide conversion functions. -- -- Currently, these are separate functions, generalization of conversions seems hard. -- -- These examples discuss handling of __subsets__ (for character sets), __leniency__, and __flattening__. module Examples.TypedEncoding.Conversions where import Data.TypedEncoding import qualified Data.TypedEncoding.Instances.Enc.Base64 as EnB64 import qualified Data.TypedEncoding.Instances.Restriction.ASCII as EnASCII -- import qualified Data.TypedEncoding.Instances.Restriction.UTF8 as EnUTF8 import qualified Data.Text as T import qualified Data.ByteString as B -- $setup -- >>> :set -XOverloadedStrings -XMultiParamTypeClasses -XDataKinds -XTypeApplications -- >>> import Data.TypedEncoding.Instances.Restriction.UTF8 () -- >>> import Data.Proxy -- -- This module contains some ghci friendly values to play with. -- -- Each value is documented in a doctest style by including an equivalent ghci ready expression. -- These documents generate a test suite for this library as well. -- * Moving between Text and ByteString eHelloAsciiB :: Either EncodeEx (Enc '["r-ASCII"] () B.ByteString) eHelloAsciiB = encodeFAll . toEncoding () $ "HeLlo world" -- ^ Example value to play with -- -- >>> encodeFAll . toEncoding () $ "HeLlo world" :: Either EncodeEx (Enc '["r-ASCII"] () B.ByteString) -- Right (MkEnc Proxy () "HeLlo world") Right helloAsciiB = eHelloAsciiB -- ^ above with either removed helloAsciiT :: Enc '["r-ASCII"] () T.Text helloAsciiT = EnASCII.byteString2TextS helloAsciiB -- ^ When converted to Text the annotation is preserved. -- -- Currently separate function is defined for each allowed conversion. -- -- >>> displ $ EnASCII.byteString2TextS helloAsciiB -- "MkEnc '[r-ASCII] () (Text HeLlo world)" -- * Subsets helloUtf8B :: Enc '["r-UTF8"] () B.ByteString helloUtf8B = inject helloAsciiB -- ^ To get UTF8 annotation, instead of doing this: -- -- >>> encodeFAll . toEncoding () $ "HeLlo world" :: Either EncodeEx (Enc '["r-UTF8"] () B.ByteString) -- Right (MkEnc Proxy () "HeLlo world") -- -- We should be able to convert the ASCII version. -- -- This is done using 'Superset' typeclass. -- -- @inject@ method accepts proxy to specify superset to use. -- -- >>> displ $ inject @ "r-UTF8" helloAsciiB -- "MkEnc '[r-UTF8] () (ByteString HeLlo world)" -- * More complex rules helloUtf8B64B :: Enc '["enc-B64", "r-UTF8"] () B.ByteString helloUtf8B64B = encodePart @'["enc-B64"] helloUtf8B -- ^ We put Base64 on the UFT8 ByteString -- -- >>> displ $ encodePart_ (Proxy :: Proxy '["enc-B64"]) helloUtf8B -- "MkEnc '[enc-B64,r-UTF8] () (ByteString SGVMbG8gd29ybGQ=)" helloUtf8B64T :: Enc '["enc-B64"] () T.Text helloUtf8B64T = EnB64.byteString2TextS helloUtf8B64B -- ^ .. and copy it over to Text. -- but UTF8 would be redundant in Text so the "r-UTF8" is dropped -- -- >>> :t EnB64.byteString2TextS helloUtf8B64B -- EnB64.byteString2TextS helloUtf8B64B :: Enc '["enc-B64"] () T.Text -- -- Conversely moving back to ByteString recovers the annotation. -- (there could be a choice of a UTF annotation to recover in the future) -- -- >>> :t EnB64.text2ByteStringS helloUtf8B64T -- EnB64.text2ByteStringS helloUtf8B64T -- ... :: Enc '["enc-B64", "r-UTF8"] () B.ByteString notTextB :: Enc '["enc-B64"] () B.ByteString notTextB = encodeAll . toEncoding () $ "\195\177" -- ^ 'notTextB' a binary, one that does not even represent valid UTF8. -- -- >>> encodeAll . toEncoding () $ "\195\177" :: Enc '["enc-B64"] () B.ByteString -- MkEnc Proxy () "w7E=" -- -- 'EnB64.byteString2TextS'' is a fuction that allows to convert Base 64 ByteString that is not UTF8. -- -- >>> :t EnB64.byteString2TextS' notTextB -- EnB64.byteString2TextS' notTextB -- ... :: Enc '["enc-B64-nontext"] () T.Text -- -- The result is annotated as "enc-B64-nontext" which prevents decoding it within 'T.Text' type. -- We can only move it back to ByteString as "enc-B64". -- * Lenient recovery lenientSomething :: Enc '["enc-B64-len"] () B.ByteString lenientSomething = recreateAll . toEncoding () $ "abc==CB" -- ^ -- >>> recreateAll . toEncoding () $ "abc==CB" :: Enc '["enc-B64-len"] () B.ByteString -- MkEnc Proxy () "abc==CB" -- -- The rest of Haskell does lenient decoding, type safety allows this library to use it for recovery. -- lenient algorithms are not partial and automatically fix invalid input: -- -- >>> recreateFAll . toEncoding () $ "abc==CB" :: Either RecreateEx (Enc '["enc-B64"] () B.ByteString) -- Left (RecreateEx "enc-B64" ("invalid padding")) -- -- This library allows to recover to "enc-B64-len" which is different than "enc-B64" -- -- 'EnB64.acceptLenientS' allows to convert "enc-B64-len" to "enc-B64" -- -- >>> displ $ EnB64.acceptLenientS lenientSomething -- "MkEnc '[enc-B64] () (ByteString abc=)" -- -- This is now properly encoded data -- -- >>> recreateFAll . toEncoding () $ "abc=" :: Either RecreateEx (Enc '["enc-B64"] () B.ByteString) -- Right (MkEnc Proxy () "abc=") -- -- Except the content could be surprising -- -- >>> decodeAll $ EnB64.acceptLenientS lenientSomething -- MkEnc Proxy () "i\183" -- * Flattening b64IsAscii :: Enc '["r-ASCII"] () B.ByteString b64IsAscii = flattenAs helloUtf8B64B -- ^ Base 64 encodes binary data as ASCII text. -- thus, we should be able to treat "enc-B64" as "r-ASCII" losing some information. -- this is done using 'FlattenAs' type class -- -- >>> :t flattenAs @ "r-ASCII" helloUtf8B64B -- flattenAs @ "r-ASCII" helloUtf8B64B -- ... :: Enc '["r-ASCII"] () B.ByteString