{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE BinaryLiterals #-}
{-# LANGUAGE TypeApplications #-}

{- | Parse numbers that have been encoded with <https://en.wikipedia.org/wiki/LEB128 LEB-128>.
LEB-128 allows arbitrarily large numbers to be encoded. Parsers in this
module will fail if the number they attempt to parse is outside the
range of what their target type can handle. The parsers for signed
numbers assume that the numbers have been
<https://developers.google.com/protocol-buffers/docs/encoding zigzig encoded>.
-}
module Data.Bytes.Parser.Leb128
  ( -- * Unsigned
    word16
  , word32
  , word64

    -- * Signed (Zig-zag)
  , int16
  , int32
  , int64
  ) where

import Data.Bits (complement, testBit, unsafeShiftL, unsafeShiftR, xor, (.&.), (.|.))
import Data.Bytes.Parser (Parser)
import Data.Int (Int16, Int32, Int64)
import Data.Word (Word16, Word32, Word64, Word8)

import qualified Data.Bytes.Parser as P

{- | Parse a LEB-128-encoded number. If the number is larger
than @0xFFFF@, fails with the provided error.
-}
word16 :: e -> Parser e s Word16
word16 :: forall e s. e -> Parser e s Word16
word16 e
e = do
  Word64
w <- e -> Int -> Word64 -> Int -> Parser e s Word64
forall e s. e -> Int -> Word64 -> Int -> Parser e s Word64
stepBoundedWord e
e Int
16 Word64
0 Int
0
  Word16 -> Parser e s Word16
forall a. a -> Parser e s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (forall a b. (Integral a, Num b) => a -> b
fromIntegral @Word64 @Word16 Word64
w)

{- | Parse a LEB-128-encoded number. If the number is larger
than @0xFFFFFFFF@, fails with the provided error.
-}
word32 :: e -> Parser e s Word32
word32 :: forall e s. e -> Parser e s Word32
word32 e
e = do
  Word64
w <- e -> Int -> Word64 -> Int -> Parser e s Word64
forall e s. e -> Int -> Word64 -> Int -> Parser e s Word64
stepBoundedWord e
e Int
32 Word64
0 Int
0
  Word32 -> Parser e s Word32
forall a. a -> Parser e s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (forall a b. (Integral a, Num b) => a -> b
fromIntegral @Word64 @Word32 Word64
w)

{- | Parse a LEB-128-encoded number. If the number is larger
than @0xFFFFFFFFFFFFFFFF@, fails with the provided error.
-}
word64 :: e -> Parser e s Word64
word64 :: forall e s. e -> Parser e s Word64
word64 e
e = e -> Int -> Word64 -> Int -> Parser e s Word64
forall e s. e -> Int -> Word64 -> Int -> Parser e s Word64
stepBoundedWord e
e Int
64 Word64
0 Int
0

{- | Parse a LEB-128-zigzag-encoded signed number. If the encoded
number is outside the range @[-32768,32767]@, this fails with
the provided error.
-}
int16 :: e -> Parser e s Int16
int16 :: forall e s. e -> Parser e s Int16
int16 = (Word16 -> Int16) -> Parser e s Word16 -> Parser e s Int16
forall a b. (a -> b) -> Parser e s a -> Parser e s b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Word16 -> Int16
zigzagDecode16 (Parser e s Word16 -> Parser e s Int16)
-> (e -> Parser e s Word16) -> e -> Parser e s Int16
forall b c a. (b -> c) -> (a -> b) -> a -> c
. e -> Parser e s Word16
forall e s. e -> Parser e s Word16
word16

{- | Parse a LEB-128-zigzag-encoded signed number. If the encoded
number is outside the range @[-2147483648,2147483647]@, this
fails with the provided error.
-}
int32 :: e -> Parser e s Int32
int32 :: forall e s. e -> Parser e s Int32
int32 = (Word32 -> Int32) -> Parser e s Word32 -> Parser e s Int32
forall a b. (a -> b) -> Parser e s a -> Parser e s b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Word32 -> Int32
zigzagDecode32 (Parser e s Word32 -> Parser e s Int32)
-> (e -> Parser e s Word32) -> e -> Parser e s Int32
forall b c a. (b -> c) -> (a -> b) -> a -> c
. e -> Parser e s Word32
forall e s. e -> Parser e s Word32
word32

{- | Parse a LEB-128-zigzag-encoded signed number. If the encoded
number is outside the range @[-9223372036854775808,9223372036854775807]@,
this fails with the provided error.
-}
int64 :: e -> Parser e s Int64
int64 :: forall e s. e -> Parser e s Int64
int64 = (Word64 -> Int64) -> Parser e s Word64 -> Parser e s Int64
forall a b. (a -> b) -> Parser e s a -> Parser e s b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Word64 -> Int64
zigzagDecode64 (Parser e s Word64 -> Parser e s Int64)
-> (e -> Parser e s Word64) -> e -> Parser e s Int64
forall b c a. (b -> c) -> (a -> b) -> a -> c
. e -> Parser e s Word64
forall e s. e -> Parser e s Word64
word64

-- What these parameters are:
--
-- bitLimit: number of bits in the target word size
-- accShift: shift amount, increases by 7 at a time
stepBoundedWord :: e -> Int -> Word64 -> Int -> Parser e s Word64
stepBoundedWord :: forall e s. e -> Int -> Word64 -> Int -> Parser e s Word64
stepBoundedWord e
e !Int
bitLimit !Word64
acc0 !Int
accShift = do
  Word8
raw <- e -> Parser e s Word8
forall e s. e -> Parser e s Word8
P.any e
e
  let number :: Word8
number = Word8
raw Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0x7F
      acc1 :: Word64
acc1 =
        Word64
acc0
          Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
.|. Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
unsafeShiftL (forall a b. (Integral a, Num b) => a -> b
fromIntegral @Word8 @Word64 Word8
number) Int
accShift
      accShift' :: Int
accShift' = Int
accShift Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
7
  if Int
accShift' Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
bitLimit
    then
      if Word8 -> Int -> Bool
forall a. Bits a => a -> Int -> Bool
testBit Word8
raw Int
7
        then e -> Int -> Word64 -> Int -> Parser e s Word64
forall e s. e -> Int -> Word64 -> Int -> Parser e s Word64
stepBoundedWord e
e Int
bitLimit Word64
acc1 Int
accShift'
        else Word64 -> Parser e s Word64
forall a. a -> Parser e s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Word64
acc1
    else
      if forall a b. (Integral a, Num b) => a -> b
fromIntegral @Word8 @Word Word8
raw Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
< Int -> Word
twoExp (Int
bitLimit Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
accShift)
        then Word64 -> Parser e s Word64
forall a. a -> Parser e s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Word64
acc1 -- TODO: no need to mask upper bit in number
        else e -> Parser e s Word64
forall e s a. e -> Parser e s a
P.fail e
e

twoExp :: Int -> Word
twoExp :: Int -> Word
twoExp Int
x = Word -> Int -> Word
forall a. Bits a => a -> Int -> a
unsafeShiftL Word
1 Int
x

-- Zigzag decode strategy taken from https://stackoverflow.com/a/2211086/1405768
-- The accepted answer is a little bit, so an answer further down was used:
--
-- > zigzag_decode(value) = ( value >> 1 ) ^ ( ~( value & 1 ) + 1 )
zigzagDecode16 :: Word16 -> Int16
zigzagDecode16 :: Word16 -> Int16
zigzagDecode16 Word16
n =
  Word16 -> Int16
forall a b. (Integral a, Num b) => a -> b
fromIntegral ((Word16 -> Int -> Word16
forall a. Bits a => a -> Int -> a
unsafeShiftR Word16
n Int
1) Word16 -> Word16 -> Word16
forall a. Bits a => a -> a -> a
`xor` (Word16 -> Word16
forall a. Bits a => a -> a
complement (Word16
n Word16 -> Word16 -> Word16
forall a. Bits a => a -> a -> a
.&. Word16
1) Word16 -> Word16 -> Word16
forall a. Num a => a -> a -> a
+ Word16
1))

zigzagDecode32 :: Word32 -> Int32
zigzagDecode32 :: Word32 -> Int32
zigzagDecode32 Word32
n =
  Word32 -> Int32
forall a b. (Integral a, Num b) => a -> b
fromIntegral ((Word32 -> Int -> Word32
forall a. Bits a => a -> Int -> a
unsafeShiftR Word32
n Int
1) Word32 -> Word32 -> Word32
forall a. Bits a => a -> a -> a
`xor` (Word32 -> Word32
forall a. Bits a => a -> a
complement (Word32
n Word32 -> Word32 -> Word32
forall a. Bits a => a -> a -> a
.&. Word32
1) Word32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
+ Word32
1))

zigzagDecode64 :: Word64 -> Int64
zigzagDecode64 :: Word64 -> Int64
zigzagDecode64 Word64
n =
  Word64 -> Int64
forall a b. (Integral a, Num b) => a -> b
fromIntegral ((Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
unsafeShiftR Word64
n Int
1) Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
`xor` (Word64 -> Word64
forall a. Bits a => a -> a
complement (Word64
n Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
.&. Word64
1) Word64 -> Word64 -> Word64
forall a. Num a => a -> a -> a
+ Word64
1))