{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE OverloadedStrings #-}
-- |
-- Module       : Data.ByteString.Base64
-- Copyright    : (c) 2019-2023 Emily Pillmore
-- License      : BSD-style
--
-- Maintainer   : Emily Pillmore <emilypi@cohomolo.gy>
-- Stability    : stable
-- Portability  : non-portable
--
-- This module contains 'Data.ByteString.ByteString'-valued combinators for
-- implementing the RFC 4648 specification of the Base64
-- encoding format. This includes lenient decoding variants, as well as
-- internal and external validation for canonicity.
--
module Data.ByteString.Base64
( -- * Encoding
  encodeBase64
, encodeBase64'
  -- * Decoding
, decodeBase64
, decodeBase64Untyped
, decodeBase64Lenient
  -- * Validation
, isBase64
, isValidBase64
) where

import Data.Base64.Types

import Data.ByteString.Internal (ByteString(..))
import Data.ByteString.Base64.Internal
import Data.ByteString.Base64.Internal.Head
import Data.ByteString.Base64.Internal.Tables
import Data.Either (isRight)
import Data.Text (Text)
import qualified Data.Text.Encoding as T

import System.IO.Unsafe

-- $setup
--
-- >>> import Data.Base64.Types
-- >>> :set -XOverloadedStrings
-- >>> :set -XTypeApplications
-- >>> :set -XDataKinds
--

-- | Encode a 'ByteString' value as Base64 'Text' with padding.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-4 RFC-4648 section 4>
--
-- === __Examples__:
--
-- >>> encodeBase64 "Sun"
-- "U3Vu"
--
encodeBase64 :: ByteString -> Base64 'StdPadded Text
encodeBase64 :: ByteString -> Base64 'StdPadded Text
encodeBase64 = (ByteString -> Text)
-> Base64 'StdPadded ByteString -> Base64 'StdPadded Text
forall a b. (a -> b) -> Base64 'StdPadded a -> Base64 'StdPadded b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ByteString -> Text
T.decodeUtf8 (Base64 'StdPadded ByteString -> Base64 'StdPadded Text)
-> (ByteString -> Base64 'StdPadded ByteString)
-> ByteString
-> Base64 'StdPadded Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Base64 'StdPadded ByteString
encodeBase64'
{-# inline encodeBase64 #-}

-- | Encode a 'ByteString' value as a Base64 'ByteString'  value with padding.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-4 RFC-4648 section 4>
--
-- === __Examples__:
--
-- >>> encodeBase64' "Sun"
-- "U3Vu"
--
encodeBase64' :: ByteString -> Base64 'StdPadded ByteString
encodeBase64' :: ByteString -> Base64 'StdPadded ByteString
encodeBase64' = ByteString -> Base64 'StdPadded ByteString
forall (k :: Alphabet) a. a -> Base64 k a
assertBase64 (ByteString -> Base64 'StdPadded ByteString)
-> (ByteString -> ByteString)
-> ByteString
-> Base64 'StdPadded ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. EncodingTable -> ByteString -> ByteString
encodeBase64_ EncodingTable
base64Table
{-# inline encodeBase64' #-}

-- | Decode a padded Base64-encoded 'ByteString' value.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-4 RFC-4648 section 4>
--
-- === __Examples__:
--
-- >>> decodeBase64 $ assertBase64 @'StdPadded "U3Vu"
-- "Sun"
--
decodeBase64 :: StdAlphabet k => Base64 k ByteString -> ByteString
decodeBase64 :: forall (k :: Alphabet).
StdAlphabet k =>
Base64 k ByteString -> ByteString
decodeBase64 = ForeignPtr Word8 -> Base64 k ByteString -> ByteString
forall (k :: Alphabet).
ForeignPtr Word8 -> Base64 k ByteString -> ByteString
decodeBase64Typed_ ForeignPtr Word8
decodeB64Table
{-# inline decodeBase64 #-}

-- | Decode a padded untyped Base64-encoded 'ByteString' value.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-4 RFC-4648 section 4>
--
-- === __Examples__:
--
-- >>> decodeBase64Untyped "U3Vu"
-- Right "Sun"
--
-- >>> decodeBase64Untyped "U3V"
-- Left "Base64-encoded bytestring requires padding"
--
-- >>> decodeBase64Untyped "U3V="
-- Left "non-canonical encoding detected at offset: 2"
--
decodeBase64Untyped :: ByteString -> Either Text ByteString
decodeBase64Untyped :: ByteString -> Either Text ByteString
decodeBase64Untyped bs :: ByteString
bs@(PS ForeignPtr Word8
_ Int
_ !Int
l)
    | Int
l Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = ByteString -> Either Text ByteString
forall a b. b -> Either a b
Right ByteString
forall a. Monoid a => a
mempty
    | Int
r Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
1 = Text -> Either Text ByteString
forall a b. a -> Either a b
Left Text
"Base64-encoded bytestring has invalid size"
    | Int
r Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Int
0 = Text -> Either Text ByteString
forall a b. a -> Either a b
Left Text
"Base64-encoded bytestring requires padding"
    | Bool
otherwise = IO (Either Text ByteString) -> Either Text ByteString
forall a. IO a -> a
unsafeDupablePerformIO (IO (Either Text ByteString) -> Either Text ByteString)
-> IO (Either Text ByteString) -> Either Text ByteString
forall a b. (a -> b) -> a -> b
$ ForeignPtr Word8 -> ByteString -> IO (Either Text ByteString)
decodeBase64_ ForeignPtr Word8
decodeB64Table ByteString
bs
  where
    !r :: Int
r = Int
l Int -> Int -> Int
forall a. Integral a => a -> a -> a
`rem` Int
4
{-# inline decodeBase64Untyped #-}

-- | Leniently decode an untyped Base64-encoded 'ByteString' value. This function
-- will not generate parse errors. If input data contains padding chars,
-- then the input will be parsed up until the first pad character.
--
-- __Note:__ This is not RFC 4648-compliant.
--
-- === __Examples__:
--
-- >>> decodeBase64Lenient "U3Vu"
-- "Sun"
--
-- >>> decodeBase64Lenient "U3V"
-- "Su"
--
-- >>> decodeBase64Lenient "U3V="
-- "Su"
--
decodeBase64Lenient :: ByteString -> ByteString
decodeBase64Lenient :: ByteString -> ByteString
decodeBase64Lenient = ForeignPtr Word8 -> ByteString -> ByteString
decodeBase64Lenient_ ForeignPtr Word8
decodeB64Table
{-# inline decodeBase64Lenient #-}

-- | Tell whether a 'ByteString' value is base64 encoded.
--
-- This function will also detect non-canonical encodings such as @ZE==@, which are
-- externally valid Base64-encoded values, but are internally inconsistent "impossible"
-- values.
--
-- === __Examples__:
--
-- >>> isBase64 "U3Vu"
-- True
--
-- >>> isBase64 "U3V"
-- False
--
-- >>> isBase64 "U3V="
-- False
--
isBase64 :: ByteString -> Bool
isBase64 :: ByteString -> Bool
isBase64 ByteString
bs
  = ByteString -> Bool
isValidBase64 ByteString
bs
  Bool -> Bool -> Bool
&& Either Text ByteString -> Bool
forall a b. Either a b -> Bool
isRight (ByteString -> Either Text ByteString
decodeBase64Untyped ByteString
bs)
{-# inline isBase64 #-}

-- | Tell whether a 'ByteString' value is a valid Base64 format.
--
-- This will not tell you whether or not this is a correct Base64 representation,
-- only that it conforms to the correct shape. To check whether it is a true
-- Base64 encoded 'ByteString' value, use 'isBase64'.
--
-- === __Examples__:
--
-- >>> isValidBase64 "U3Vu"
-- True
--
-- >>> isValidBase64 "U3V"
-- True
--
-- >>> isValidBase64 "U3V="
-- True
--
-- >>> isValidBase64 "%"
-- False
--
isValidBase64 :: ByteString -> Bool
isValidBase64 :: ByteString -> Bool
isValidBase64 = ByteString -> ByteString -> Bool
validateBase64 ByteString
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
{-# inline isValidBase64 #-}