{-# language BangPatterns #-}
{-# language BinaryLiterals #-}
{-# language TypeApplications #-}

-- | Parse numbers that have been encoded with <https://en.wikipedia.org/wiki/LEB128 LEB-128>.
-- LEB-128 allows arbitrarily large numbers to be encoded. Parsers in this
-- module will fail if the number they attempt to parse is outside the
-- range of what their target type can handle. The parsers for signed
-- numbers assume that the numbers have been
-- <https://developers.google.com/protocol-buffers/docs/encoding zigzig encoded>.
module Data.Bytes.Parser.Leb128
  ( -- * Unsigned
    word16
  , word32
  , word64
    -- * Signed (Zig-zag)
  , int16
  , int32
  , int64
  ) where

import Data.Bits (testBit,(.&.),unsafeShiftR,xor,complement)
import Data.Bits (unsafeShiftL,(.|.))
import Data.Bytes.Parser (Parser)
import Data.Int (Int16,Int32,Int64)
import Data.Word (Word8,Word16,Word32,Word64)

import qualified Data.Bytes.Parser as P

-- | Parse a LEB-128-encoded number. If the number is larger
-- than @0xFFFF@, fails with the provided error.
word16 :: e -> Parser e s Word16
word16 :: forall e s. e -> Parser e s Word16
word16 e
e = do
  Word64
w <- forall e s. e -> Int -> Word64 -> Int -> Parser e s Word64
stepBoundedWord e
e Int
16 Word64
0 Int
0
  forall (f :: * -> *) a. Applicative f => a -> f a
pure (forall a b. (Integral a, Num b) => a -> b
fromIntegral @Word64 @Word16 Word64
w)

-- | Parse a LEB-128-encoded number. If the number is larger
-- than @0xFFFFFFFF@, fails with the provided error.
word32 :: e -> Parser e s Word32
word32 :: forall e s. e -> Parser e s Word32
word32 e
e = do
  Word64
w <- forall e s. e -> Int -> Word64 -> Int -> Parser e s Word64
stepBoundedWord e
e Int
32 Word64
0 Int
0
  forall (f :: * -> *) a. Applicative f => a -> f a
pure (forall a b. (Integral a, Num b) => a -> b
fromIntegral @Word64 @Word32 Word64
w)

-- | Parse a LEB-128-encoded number. If the number is larger
-- than @0xFFFFFFFFFFFFFFFF@, fails with the provided error.
word64 :: e -> Parser e s Word64
word64 :: forall e s. e -> Parser e s Word64
word64 e
e = forall e s. e -> Int -> Word64 -> Int -> Parser e s Word64
stepBoundedWord e
e Int
64 Word64
0 Int
0

-- | Parse a LEB-128-zigzag-encoded signed number. If the encoded
-- number is outside the range @[-32768,32767]@, this fails with
-- the provided error.
int16 :: e -> Parser e s Int16
int16 :: forall e s. e -> Parser e s Int16
int16 = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Word16 -> Int16
zigzagDecode16 forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall e s. e -> Parser e s Word16
word16

-- | Parse a LEB-128-zigzag-encoded signed number. If the encoded
-- number is outside the range @[-2147483648,2147483647]@, this
-- fails with the provided error.
int32 :: e -> Parser e s Int32
int32 :: forall e s. e -> Parser e s Int32
int32 = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Word32 -> Int32
zigzagDecode32 forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall e s. e -> Parser e s Word32
word32

-- | Parse a LEB-128-zigzag-encoded signed number. If the encoded
-- number is outside the range @[-9223372036854775808,9223372036854775807]@,
-- this fails with the provided error.
int64 :: e -> Parser e s Int64
int64 :: forall e s. e -> Parser e s Int64
int64 = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Word64 -> Int64
zigzagDecode64 forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall e s. e -> Parser e s Word64
word64

-- What these parameters are:
--
-- bitLimit: number of bits in the target word size
-- accShift: shift amount, increases by 7 at a time
stepBoundedWord :: e -> Int -> Word64 -> Int -> Parser e s Word64
stepBoundedWord :: forall e s. e -> Int -> Word64 -> Int -> Parser e s Word64
stepBoundedWord e
e !Int
bitLimit !Word64
acc0 !Int
accShift = do
  Word8
raw <- forall e s. e -> Parser e s Word8
P.any e
e
  let number :: Word8
number = Word8
raw forall a. Bits a => a -> a -> a
.&. Word8
0x7F
      acc1 :: Word64
acc1 = Word64
acc0 forall a. Bits a => a -> a -> a
.|.
        forall a. Bits a => a -> Int -> a
unsafeShiftL (forall a b. (Integral a, Num b) => a -> b
fromIntegral @Word8 @Word64 Word8
number) Int
accShift
      accShift' :: Int
accShift' = Int
accShift forall a. Num a => a -> a -> a
+ Int
7
  if Int
accShift' forall a. Ord a => a -> a -> Bool
<= Int
bitLimit
    then if forall a. Bits a => a -> Int -> Bool
testBit Word8
raw Int
7
      then forall e s. e -> Int -> Word64 -> Int -> Parser e s Word64
stepBoundedWord e
e Int
bitLimit Word64
acc1 Int
accShift'
      else forall (f :: * -> *) a. Applicative f => a -> f a
pure Word64
acc1
    else if forall a b. (Integral a, Num b) => a -> b
fromIntegral @Word8 @Word Word8
raw forall a. Ord a => a -> a -> Bool
< Int -> Word
twoExp (Int
bitLimit forall a. Num a => a -> a -> a
- Int
accShift)
      then forall (f :: * -> *) a. Applicative f => a -> f a
pure Word64
acc1 -- TODO: no need to mask upper bit in number
      else forall e s a. e -> Parser e s a
P.fail e
e

twoExp :: Int -> Word
twoExp :: Int -> Word
twoExp Int
x = forall a. Bits a => a -> Int -> a
unsafeShiftL Word
1 Int
x

-- Zigzag decode strategy taken from https://stackoverflow.com/a/2211086/1405768
-- The accepted answer is a little bit, so an answer further down was used:
--
-- > zigzag_decode(value) = ( value >> 1 ) ^ ( ~( value & 1 ) + 1 )
zigzagDecode16 :: Word16 -> Int16
zigzagDecode16 :: Word16 -> Int16
zigzagDecode16 Word16
n =
  forall a b. (Integral a, Num b) => a -> b
fromIntegral ((forall a. Bits a => a -> Int -> a
unsafeShiftR Word16
n Int
1) forall a. Bits a => a -> a -> a
`xor` (forall a. Bits a => a -> a
complement (Word16
n forall a. Bits a => a -> a -> a
.&. Word16
1) forall a. Num a => a -> a -> a
+ Word16
1))

zigzagDecode32 :: Word32 -> Int32
zigzagDecode32 :: Word32 -> Int32
zigzagDecode32 Word32
n =
  forall a b. (Integral a, Num b) => a -> b
fromIntegral ((forall a. Bits a => a -> Int -> a
unsafeShiftR Word32
n Int
1) forall a. Bits a => a -> a -> a
`xor` (forall a. Bits a => a -> a
complement (Word32
n forall a. Bits a => a -> a -> a
.&. Word32
1) forall a. Num a => a -> a -> a
+ Word32
1))

zigzagDecode64 :: Word64 -> Int64
zigzagDecode64 :: Word64 -> Int64
zigzagDecode64 Word64
n =
  forall a b. (Integral a, Num b) => a -> b
fromIntegral ((forall a. Bits a => a -> Int -> a
unsafeShiftR Word64
n Int
1) forall a. Bits a => a -> a -> a
`xor` (forall a. Bits a => a -> a
complement (Word64
n forall a. Bits a => a -> a -> a
.&. Word64
1) forall a. Num a => a -> a -> a
+ Word64
1))