{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE GADTSyntax #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE UnboxedTuples #-}

{- | This module provides functions for encoding fixed-width words
using the base-62 encoding scheme. The encoding functions in this
module produce byte sequences that are ASCII-compatible text
encodings (e.g. ISO-8859-1 and UTF-8). Similarly, the decoding
functions only decode byte sequences that are an ASCII-compatible
text encoding of characters in the class @[A-Za-Z0-9]@. Other
encodings (notably UTF-16) are not supported but would be
accepted in a pull request.
-}
module Data.Word.Base62
  ( -- * 64-bit Word
    encode64
  , builder64
  , decode64

    -- * 128-bit Word
  , encode128
  , shortText128
  , text128
  , builder128
  , decode128
  ) where

import Data.ByteString.Short.Internal (ShortByteString (SBS))
import Data.Bytes.Builder.Bounded.Unsafe (Builder (..))
import Data.Bytes.Types (Bytes (Bytes))
import Data.Char (ord)
import Data.Primitive (ByteArray (..), MutableByteArray (MutableByteArray), readByteArray, writeByteArray)
import Data.Text (Text)
import Data.Text.Short (ShortText)
import Data.WideWord.Word128 (Word128 (Word128))
import GHC.Exts (ByteArray#, Char (C#), Int (I#), Int#, Word64#, Word8#, indexCharArray#, isTrue#, quotRemWord#, word64ToWord#, wordToWord64#, (+#), (-#), (>#))
import GHC.ST (ST (ST))
import GHC.Word (Word64 (W64#), Word8 (W8#))

import qualified Arithmetic.Nat as Nat
import qualified Data.Bytes as Bytes
import qualified Data.Bytes.Builder.Bounded as Builder
import qualified Data.Text.Short as TS
import qualified Data.Text.Short.Unsafe as TS
import qualified GHC.Exts as Exts

{- | Base62 encode a 64-bit word. Leading zero bits are suppressed.
Note that this will encode the number 0 as the character '0' rather
than as the empty byte array.
-}
encode64 :: Word64 -> ByteArray
encode64 = Builder.run Nat.constant . builder64

-- | Base62 encode a 64-bit word as a builder.
builder64 :: Word64 -> Builder 11
{-# INLINE builder64 #-}
builder64 (W64# w) = builder64# w

-- | Base62 encode a 128-bit word. Leading zero bits are suppressed.
encode128 :: Word128 -> ByteArray
{-# INLINE encode128 #-}
encode128 = Builder.run Nat.constant . builder128

-- | Base62 encode a 128-bit word as @ShortText@.
shortText128 :: Word128 -> ShortText
{-# INLINE shortText128 #-}
shortText128 !w = case encode128 w of
  ByteArray x -> TS.fromShortByteStringUnsafe (SBS x)

-- | Base62 encode a 128-bit word as @Text@.
text128 :: Word128 -> Text
{-# INLINE text128 #-}
text128 = TS.toText . shortText128

-- | Base62 encode a 128-bit word as a builder.
builder128 :: Word128 -> Builder 22
{-# INLINE builder128 #-}
builder128 (Word128 (W64# a) (W64# b)) = builder128# a b

builder64# :: Word64# -> Builder 11
{-# NOINLINE builder64# #-}
builder64# w0 =
  Builder
    ( \marr off0 s0 -> case word64ToWord# w0 of
        0## -> case Exts.writeWord8Array# marr off0 (Exts.wordToWord8# 48##) s0 of
          s1 -> (# s1, off0 +# 1# #)
        _ ->
          let go ix w s1 = case word64ToWord# w of
                0## -> case reverseBytes (MutableByteArray marr) (I# off0) (I# (ix -# 1#)) of
                  ST f -> case f s1 of
                    (# s2, (_ :: ()) #) -> (# s2, ix #)
                _ ->
                  let !(# q0, r0 #) = quotRemWord# (word64ToWord# w) 62##
                      !q = wordToWord64# q0
                      !r = wordToWord64# r0
                   in case Exts.writeWord8Array# marr ix (unW8 (encodeByte (W64# r))) s1 of
                        s2 -> go (ix +# 1#) q s2
           in go off0 w0 s0
    )

-- Always outputs exactly ten digits. They do not need to be reversed.
builder62pow10# :: Word64# -> Builder 10
{-# NOINLINE builder62pow10# #-}
builder62pow10# w0 =
  Builder
    ( \marr off0 s0 ->
        let go ix d w s1 = case d of
              0# -> (# s1, ix +# 11# #)
              _ ->
                let !(# q0, r0 #) = quotRemWord# (word64ToWord# w) 62##
                    !q = wordToWord64# q0
                    !r = wordToWord64# r0
                 in case Exts.writeWord8Array# marr ix (unW8 (encodeByte (W64# r))) s1 of
                      s2 -> go (ix -# 1#) (d -# 1#) q s2
         in go (off0 +# 9#) 10# w0 s0
    )

builder128# :: Word64# -> Word64# -> Builder 22
{-# NOINLINE builder128# #-}
builder128# wa wb =
  Builder
    ( \marr off0 s0 -> case word64ToWord# wa of
        0## -> case builder64# wb of Builder f -> f marr off0 s0
        _ -> case quotRem (Word128 (W64# wa) (W64# wb)) (Word128 0 n62pow10) of
          (upper@(Word128 upperHi (W64# upperLo)), (Word128 shouldBeZeroA (W64# lower))) -> case shouldBeZeroA of
            0 -> case upperHi of
              0 -> case builder64# upperLo `Builder.append` builder62pow10# lower of Builder f -> f marr off0 s0
              _ -> case quotRem upper (Word128 0 n62pow10) of
                (Word128 shouldBeZeroB (W64# x), Word128 shouldBeZeroC (W64# y)) -> case shouldBeZeroB of
                  0 -> case shouldBeZeroC of
                    0 -> case builder64# x `Builder.append` (builder62pow10# y `Builder.append` builder62pow10# lower) of Builder f -> f marr off0 s0
                    _ -> errorWithoutStackTrace "Data.Word.Base62: logical error c"
                  _ -> errorWithoutStackTrace "Data.Word.Base62: logical error b"
            _ -> errorWithoutStackTrace "Data.Word.Base62: logical error a"
    )

-- Reverse the bytes in the designated slice. This takes
-- an inclusive start offset and an inclusive end offset.
reverseBytes :: MutableByteArray s -> Int -> Int -> ST s ()
{-# INLINE reverseBytes #-}
reverseBytes arr begin end = go begin end
 where
  go ixA ixB =
    if ixA < ixB
      then do
        a :: Word8 <- readByteArray arr ixA
        b :: Word8 <- readByteArray arr ixB
        writeByteArray arr ixA b
        writeByteArray arr ixB a
        go (ixA + 1) (ixB - 1)
      else pure ()

-- Precondition: argument is less than 62.
encodeByte :: Word64 -> Word8
encodeByte w
  | w < 10 = unsafeW8 (c2w '0' + w)
  | w < 36 = unsafeW8 ((c2w 'A' - 10) + w)
  | otherwise = unsafeW8 ((c2w 'a' - 36) + w)

-- We use Char here since it produces more readable Core.
-- Performance is not impacted in any way.
decodeByte :: Char -> Maybe Word64
decodeByte w
  | w >= '0' && w <= '9' = Just (c2w w - c2w '0')
  | w >= 'A' && w <= 'Z' = Just (c2w w - (c2w 'A' - 10))
  | w >= 'a' && w <= 'z' = Just (c2w w - (c2w 'a' - 36))
  | otherwise = Nothing

c2w :: Char -> Word64
c2w = fromIntegral . ord

-- Precondition: the argument is less than 256
unsafeW8 :: Word64 -> Word8
unsafeW8 (W64# w) = W8# (Exts.wordToWord8# (word64ToWord# w))

unW8 :: Word8 -> Word8#
unW8 (W8# w) = w

{- | Decode a base62-encoded 64-bit word. This rejects the empty
string rather than decoding it as zero. This also rejects encoded
numbers greater than or equal to @2^64@.
-}
decode64 :: Bytes -> Maybe Word64
{-# INLINE decode64 #-}
decode64 b@(Bytes _ _ len) = case len of
  0 -> Nothing
  _ -> case decode64# 0 b of
    (# (# #) | #) -> Nothing
    (# | w #) -> Just (W64# w)

-- Worker-wrapper will turn this into good code as long as
-- we do not put a noinline pragma on it. It is recursive,
-- so it cannot inline anywhere.
decode64# :: Word64 -> Bytes -> (# (# #) | Word64# #)
decode64# !acc@(W64# acc#) b@(Bytes arr off len) = case len of
  0 -> (# | acc# #)
  _ -> case decodeByte (indexAsciiArray arr off) of
    Nothing -> (# (# #) | #)
    Just w ->
      -- If we overflow, the accumulator will shrink. We
      -- return Nothing in this case.
      let (overflow, acc') = unsignedPushBase62 acc w
       in if overflow
            then (# (# #) | #)
            else decode64# acc' (Bytes.unsafeDrop 1 b)

n62pow10 :: Word64
n62pow10 = 62 * 62 * 62 * 62 * 62 * 62 * 62 * 62 * 62 * 62

n62pow20 :: Word128
n62pow20 = Word128 0 n62pow10 * Word128 0 n62pow10

-- Precondition: there are at least 10 bytes starting at the offset.
-- The result is in [0,62^10) Notice that we do not need to check
-- for overflow in this function. While @off@ tracks the actual
-- position in the bytearray, @d@ tracks the number of digits
-- consumed by this particular function. The caller should always
-- set @d@ to 0.
unsafeDecode62pow10# :: Word64 -> ByteArray -> Int -> Int -> (# (# #) | Word64# #)
unsafeDecode62pow10# !acc@(W64# acc#) !arr !off !d =
  if d < 10
    then case decodeByte (indexAsciiArray arr off) of
      Nothing -> (# (# #) | #)
      Just w ->
        let acc' = acc * 62 + w
         in unsafeDecode62pow10# acc' arr (off + 1) (d + 1)
    else (# | acc# #)

{- | Decode a base62-encoded 128-bit word. This rejects the empty
string rather than decoding it as zero. This also rejects encoded
numbers greater than or equal to @2^128@.
-}
decode128 :: Bytes -> Maybe Word128
{-# INLINE decode128 #-}
decode128 (Bytes (ByteArray arr) (I# off) (I# len)) =
  case decode128# arr off len of
    (# (# #) | #) -> Nothing
    (# | (# a, b #) #) -> Just (Word128 (W64# a) (W64# b))

decode128# :: ByteArray# -> Int# -> Int# -> (# (# #) | (# Word64#, Word64# #) #)
{-# NOINLINE decode128# #-}
decode128# arr off len
  | isTrue# (len ># 22#) = (# (# #) | #) -- always overflows
  | isTrue# (len ># 20#) =
      case unsafeDecode62pow10# 0 (ByteArray arr) (I# (off +# len -# 10#)) 0 of
        (# (# #) | #) -> (# (# #) | #)
        (# | c #) -> case unsafeDecode62pow10# 0 (ByteArray arr) (I# (off +# len -# 20#)) 0 of
          (# (# #) | #) -> (# (# #) | #)
          (# | b #) -> case decode64# 0 (Bytes (ByteArray arr) (I# off) (I# (len -# 20#))) of
            (# (# #) | #) -> (# (# #) | #)
            (# | a #) ->
              if W64# a < 484
                then
                  let r0 = Word128 0 (W64# c) + (Word128 0 n62pow10 * (Word128 0 (W64# b)))
                      !r1@(Word128 (W64# r1x) (W64# r1y)) = n62pow20 * Word128 0 (W64# a) + r0
                   in if r1 >= r0
                        then (# | (# r1x, r1y #) #)
                        else (# (# #) | #)
                else (# (# #) | #)
  | isTrue# (len ># 10#) =
      case unsafeDecode62pow10# 0 (ByteArray arr) (I# (off +# len -# 10#)) 0 of
        (# (# #) | #) -> (# (# #) | #)
        (# | b #) -> case decode64# 0 (Bytes (ByteArray arr) (I# off) (I# (len -# 10#))) of
          (# (# #) | #) -> (# (# #) | #)
          (# | a #) -> case Word128 0 (W64# b) + (fromIntegral n62pow10 * Word128 0 (W64# a)) of
            Word128 (W64# x) (W64# y) -> (# | (# x, y #) #)
  | otherwise = case decode64# 0 (Bytes (ByteArray arr) (I# off) (I# len)) of
      (# (# #) | #) -> (# (# #) | #)
      (# | w #) -> (# | (# wordToWord64# 0##, w #) #)

indexAsciiArray :: ByteArray -> Int -> Char
indexAsciiArray (ByteArray arr) (I# i) = C# (indexCharArray# arr i)

unsignedPushBase62 :: Word64 -> Word64 -> (Bool, Word64)
{-# INLINE unsignedPushBase62 #-}
unsignedPushBase62 (W64# a) (W64# b) =
  let !(# ca, r0 #) = Exts.timesWord2# (word64ToWord# a) 62##
   in case ca of
        0## ->
          let !r0' = wordToWord64# r0
              !r1 = Exts.plusWord64# r0' b
           in case Exts.ltWord64# r1 r0' of
                1# -> (True, 0)
                _ -> (False, W64# r1)
        _ -> (True, 0)