{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE Trustworthy #-}
-- |
-- Module       : Data.ByteString.Base64.URL
-- Copyright    : (c) 2019-2020 Emily Pillmore
-- License      : BSD-style
--
-- Maintainer   : Emily Pillmore <emilypi@cohomolo.gy>
-- Stability    : stable
-- Portability  : non-portable
--
-- This module contains 'Data.ByteString.ByteString'-valued combinators for
-- implementing the RFC 4648 specification of the Base64url
-- encoding format. This includes strictly padded/unpadded and lenient decoding
-- variants, as well as internal and external validation for canonicity.
--
module Data.ByteString.Base64.URL
( -- * Encoding
  encodeBase64
, encodeBase64'
, encodeBase64Unpadded
, encodeBase64Unpadded'
  -- * Decoding
, decodeBase64
, decodeBase64Unpadded
, decodeBase64Padded
, decodeBase64Lenient
  -- * Validation
, isBase64Url
, isValidBase64Url
) where


import qualified Data.ByteString as BS
import Data.ByteString.Internal (ByteString(..))
import Data.ByteString.Base64.Internal
import Data.ByteString.Base64.Internal.Head
import Data.ByteString.Base64.Internal.Tables
import Data.Either (isRight)
import Data.Text (Text)
import qualified Data.Text.Encoding as T

import System.IO.Unsafe


-- | Encode a 'ByteString' value as a Base64url 'Text' value with padding.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-5 RFC-4648 section 5>
--
-- === __Examples__:
--
-- >>> encodeBase64 "<<?>>"
-- "PDw_Pj4="
--
encodeBase64 :: ByteString -> Text
encodeBase64 :: ByteString -> Text
encodeBase64 = ByteString -> Text
T.decodeUtf8 (ByteString -> Text)
-> (ByteString -> ByteString) -> ByteString -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ByteString
encodeBase64'
{-# INLINE encodeBase64 #-}

-- | Encode a 'ByteString' as a Base64url 'ByteString' value with padding.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-5 RFC-4648 section 5>
--
-- === __Examples__:
--
-- >>> encodeBase64' "<<?>>"
-- "PDw_Pj4="
--
encodeBase64' :: ByteString -> ByteString
encodeBase64' :: ByteString -> ByteString
encodeBase64' = EncodingTable -> ByteString -> ByteString
encodeBase64_ EncodingTable
base64UrlTable

-- | Decode a padded Base64url encoded 'ByteString' value. If its length is not a multiple
-- of 4, then padding chars will be added to fill out the input to a multiple of
-- 4 for safe decoding as Base64url-encoded values are optionally padded.
--
-- For a decoder that fails on unpadded input of incorrect size, use 'decodeBase64Unpadded'.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-4 RFC-4648 section 4>
--
-- === __Examples__:
--
-- >>> decodeBase64 "PDw_Pj4="
-- Right "<<?>>"
--
-- >>> decodeBase64 "PDw_Pj4"
-- Right "<<?>>"
--
-- >>> decodeBase64 "PDw-Pg="
-- Left "Base64-encoded bytestring has invalid padding"
--
-- >>> decodeBase64 "PDw-Pg"
-- Right "<<>>"
--
decodeBase64 :: ByteString -> Either Text ByteString
decodeBase64 :: ByteString -> Either Text ByteString
decodeBase64 bs :: ByteString
bs@(PS ForeignPtr Word8
_ Int
_ !Int
l)
    | Int
l Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = ByteString -> Either Text ByteString
forall a b. b -> Either a b
Right ByteString
bs
    | Int
r Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = IO (Either Text ByteString) -> Either Text ByteString
forall a. IO a -> a
unsafeDupablePerformIO (IO (Either Text ByteString) -> Either Text ByteString)
-> IO (Either Text ByteString) -> Either Text ByteString
forall a b. (a -> b) -> a -> b
$ Int
-> ForeignPtr Word8 -> ByteString -> IO (Either Text ByteString)
decodeBase64_ Int
dlen ForeignPtr Word8
decodeB64UrlTable ByteString
bs
    | Int
r Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
2 = IO (Either Text ByteString) -> Either Text ByteString
forall a. IO a -> a
unsafeDupablePerformIO (IO (Either Text ByteString) -> Either Text ByteString)
-> IO (Either Text ByteString) -> Either Text ByteString
forall a b. (a -> b) -> a -> b
$ Int
-> ForeignPtr Word8 -> ByteString -> IO (Either Text ByteString)
decodeBase64_ Int
dlen ForeignPtr Word8
decodeB64UrlTable (ByteString -> ByteString -> ByteString
BS.append ByteString
bs ByteString
"==")
    | Int
r Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
3 = ByteString -> IO (Either Text ByteString) -> Either Text ByteString
validateLastPad ByteString
bs (IO (Either Text ByteString) -> Either Text ByteString)
-> IO (Either Text ByteString) -> Either Text ByteString
forall a b. (a -> b) -> a -> b
$ Int
-> ForeignPtr Word8 -> ByteString -> IO (Either Text ByteString)
decodeBase64_ Int
dlen ForeignPtr Word8
decodeB64UrlTable (ByteString -> ByteString -> ByteString
BS.append ByteString
bs ByteString
"=")
    | Bool
otherwise = Text -> Either Text ByteString
forall a b. a -> Either a b
Left Text
"Base64-encoded bytestring has invalid size"
  where
    !q :: Int
q = Int
l Int -> Int -> Int
forall a. Integral a => a -> a -> a
`quot` Int
4
    !r :: Int
r = Int
l Int -> Int -> Int
forall a. Integral a => a -> a -> a
`rem` Int
4
    !dlen :: Int
dlen = Int
q Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
3
{-# INLINE decodeBase64 #-}

-- | Encode a 'ByteString' value as Base64url 'Text' without padding. Note that for Base64url,
-- padding is optional. If you call this function, you will simply be encoding
-- as Base64url and stripping padding chars from the output.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-3.2 RFC-4648 section 3.2>
--
-- === __Examples__:
--
-- >>> encodeBase64Unpadded "<<?>>"
-- "PDw_Pj4"
--
encodeBase64Unpadded :: ByteString -> Text
encodeBase64Unpadded :: ByteString -> Text
encodeBase64Unpadded = ByteString -> Text
T.decodeUtf8 (ByteString -> Text)
-> (ByteString -> ByteString) -> ByteString -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ByteString
encodeBase64Unpadded'
{-# INLINE encodeBase64Unpadded #-}

-- | Encode a 'ByteString' value as Base64url without padding. Note that for Base64url,
-- padding is optional. If you call this function, you will simply be encoding
-- as Base64url and stripping padding chars from the output.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-3.2 RFC-4648 section 3.2>
--
-- === __Examples__:
--
-- >>> encodeBase64Unpadded' "<<?>>"
-- "PDw_Pj4"
--
encodeBase64Unpadded' :: ByteString -> ByteString
encodeBase64Unpadded' :: ByteString -> ByteString
encodeBase64Unpadded' = EncodingTable -> ByteString -> ByteString
encodeBase64Nopad_ EncodingTable
base64UrlTable

-- | Decode an unpadded Base64url-encoded 'ByteString' value. Input strings are
-- required to be unpadded, and will undergo validation prior to decoding to
-- confirm.
--
-- In general, unless unpadded Base64url is explicitly required, it is
-- safer to call 'decodeBase64'.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-4 RFC-4648 section 4>
--
-- === __Examples__:
--
-- >>> decodeBase64Unpadded "PDw_Pj4"
-- Right "<<?>>"
--
-- >>> decodeBase64Unpadded "PDw_Pj4="
-- Left "Base64-encoded bytestring has invalid padding"
--
decodeBase64Unpadded :: ByteString -> Either Text ByteString
decodeBase64Unpadded :: ByteString -> Either Text ByteString
decodeBase64Unpadded bs :: ByteString
bs@(PS ForeignPtr Word8
_ Int
_ !Int
l)
    | Int
l Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = ByteString -> Either Text ByteString
forall a b. b -> Either a b
Right ByteString
bs
    | Int
r Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = ByteString -> IO (Either Text ByteString) -> Either Text ByteString
validateLastPad ByteString
bs (IO (Either Text ByteString) -> Either Text ByteString)
-> IO (Either Text ByteString) -> Either Text ByteString
forall a b. (a -> b) -> a -> b
$ Int
-> ForeignPtr Word8 -> ByteString -> IO (Either Text ByteString)
decodeBase64_ Int
dlen ForeignPtr Word8
decodeB64UrlTable ByteString
bs
    | Int
r Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
2 = ByteString -> IO (Either Text ByteString) -> Either Text ByteString
validateLastPad ByteString
bs (IO (Either Text ByteString) -> Either Text ByteString)
-> IO (Either Text ByteString) -> Either Text ByteString
forall a b. (a -> b) -> a -> b
$ Int
-> ForeignPtr Word8 -> ByteString -> IO (Either Text ByteString)
decodeBase64_ Int
dlen ForeignPtr Word8
decodeB64UrlTable (ByteString -> ByteString -> ByteString
BS.append ByteString
bs ByteString
"==")
    | Int
r Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
3 = ByteString -> IO (Either Text ByteString) -> Either Text ByteString
validateLastPad ByteString
bs (IO (Either Text ByteString) -> Either Text ByteString)
-> IO (Either Text ByteString) -> Either Text ByteString
forall a b. (a -> b) -> a -> b
$ Int
-> ForeignPtr Word8 -> ByteString -> IO (Either Text ByteString)
decodeBase64_ Int
dlen ForeignPtr Word8
decodeB64UrlTable (ByteString -> ByteString -> ByteString
BS.append ByteString
bs ByteString
"=")
    | Bool
otherwise = Text -> Either Text ByteString
forall a b. a -> Either a b
Left Text
"Base64-encoded bytestring has invalid size"
  where
    !q :: Int
q = Int
l Int -> Int -> Int
forall a. Integral a => a -> a -> a
`quot` Int
4
    !r :: Int
r = Int
l Int -> Int -> Int
forall a. Integral a => a -> a -> a
`rem` Int
4
    !dlen :: Int
dlen = Int
q Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
3
{-# INLINE decodeBase64Unpadded #-}

-- | Decode a padded Base64url-encoded 'ByteString' value. Input strings are
-- required to be correctly padded, and will be validated prior to decoding
-- to confirm.
--
-- In general, unless padded Base64url is explicitly required, it is
-- safer to call 'decodeBase64'.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-4 RFC-4648 section 4>
--
-- === __Examples__:
--
-- >>> decodeBase64Padded "PDw_Pj4="
-- Right "<<?>>"
--
-- >>> decodeBase64Padded "PDw_Pj4"
-- Left "Base64-encoded bytestring requires padding"
--
decodeBase64Padded :: ByteString -> Either Text ByteString
decodeBase64Padded :: ByteString -> Either Text ByteString
decodeBase64Padded bs :: ByteString
bs@(PS ForeignPtr Word8
_ Int
_ !Int
l)
    | Int
l Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = ByteString -> Either Text ByteString
forall a b. b -> Either a b
Right ByteString
bs
    | Int
r Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
1 = Text -> Either Text ByteString
forall a b. a -> Either a b
Left Text
"Base64-encoded bytestring has invalid size"
    | Int
r Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Int
0 = Text -> Either Text ByteString
forall a b. a -> Either a b
Left Text
"Base64-encoded bytestring requires padding"
    | Bool
otherwise = IO (Either Text ByteString) -> Either Text ByteString
forall a. IO a -> a
unsafeDupablePerformIO (IO (Either Text ByteString) -> Either Text ByteString)
-> IO (Either Text ByteString) -> Either Text ByteString
forall a b. (a -> b) -> a -> b
$ Int
-> ForeignPtr Word8 -> ByteString -> IO (Either Text ByteString)
decodeBase64_ Int
dlen ForeignPtr Word8
decodeB64UrlTable ByteString
bs
  where
    !q :: Int
q = Int
l Int -> Int -> Int
forall a. Integral a => a -> a -> a
`quot` Int
4
    !r :: Int
r = Int
l Int -> Int -> Int
forall a. Integral a => a -> a -> a
`rem` Int
4
    !dlen :: Int
dlen = Int
q Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
3
{-# INLINE decodeBase64Padded #-}

-- | Leniently decode an unpadded Base64url-encoded 'ByteString'. This function
-- will not generate parse errors. If input data contains padding chars,
-- then the input will be parsed up until the first pad character.
--
-- __Note:__ This is not RFC 4648-compliant.
--
-- === __Examples__:
--
-- >>> decodeBase64Lenient "PDw_Pj4="
-- "<<?>>"
--
-- >>> decodeBase64Lenient "PDw_%%%$}Pj4"
-- "<<?>>"
--
decodeBase64Lenient :: ByteString -> ByteString
decodeBase64Lenient :: ByteString -> ByteString
decodeBase64Lenient = ForeignPtr Word8 -> ByteString -> ByteString
decodeBase64Lenient_ ForeignPtr Word8
decodeB64UrlTable
{-# INLINE decodeBase64Lenient #-}

-- | Tell whether a 'ByteString' is encoded in padded /or/ unpadded Base64url format.
--
-- This function will also detect non-canonical encodings such as @ZE==@, which are
-- externally valid Base64url-encoded values, but are internally inconsistent "impossible"
-- values.
--
-- === __Examples__:
--
-- >>> isBase64Url "PDw_Pj4="
-- True
--
-- >>> isBase64Url "PDw_Pj4"
-- True
--
-- >>> isBase64Url "PDw_Pj"
-- False
--
isBase64Url :: ByteString -> Bool
isBase64Url :: ByteString -> Bool
isBase64Url ByteString
bs = ByteString -> Bool
isValidBase64Url ByteString
bs Bool -> Bool -> Bool
&& Either Text ByteString -> Bool
forall a b. Either a b -> Bool
isRight (ByteString -> Either Text ByteString
decodeBase64 ByteString
bs)
{-# INLINE isBase64Url #-}

-- | Tell whether a 'ByteString' is a valid Base64url format.
--
-- This will not tell you whether or not this is a correct Base64url representation,
-- only that it conforms to the correct shape. To check whether it is a true
-- Base64 encoded 'ByteString' value, use 'isBase64Url'.
--
-- === __Examples__:
--
-- >>> isValidBase64Url "PDw_Pj4="
-- True
--
-- >>> isValidBase64Url "PDw_Pj"
-- True
--
-- >>> isValidBase64Url "%"
-- False
--
isValidBase64Url :: ByteString -> Bool
isValidBase64Url :: ByteString -> Bool
isValidBase64Url = ByteString -> ByteString -> Bool
validateBase64Url ByteString
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"
{-# INLINE isValidBase64Url #-}