{-# LANGUAGE CPP #-} #include -- | __ZigZag LEB128 codec__. This codec encodes the [ZigZag] -- (https://en.wikipedia.org/wiki/Variable-length_quantity#Zigzag_encoding) -- representation of a signed number through -- [ULEB128](https://en.wikipedia.org/wiki/LEB128#Unsigned_LEB128). -- -- Any /getXXX/ decoder can decode bytes generated using any of the /putXXX/ -- encoders, provided the encoded number fits in the target type. -- -- __WARNING__: This is not compatible with the /Unsigned LEB128/ codec at -- "Data.Binary.ULEB128" nor with the /Signed LEB128/ codec at -- "Data.Binary.SLEB128". module Data.Binary.ZLEB128 {--} ( ZLEB128(..) -- * Put , putInteger , putInt64 , putInt32 , putInt16 , putInt8 , putInt , putNatural , putWord64 , putWord32 , putWord16 , putWord8 , putWord -- * Get , getInteger , getInt64 , getInt32 , getInt16 , getInt8 , getInt , getNatural , getWord64 , getWord32 , getWord16 , getWord8 , getWord ) --} where import Data.Binary qualified as Bin import Data.Binary.Get qualified as Bin import Data.Bits import GHC.Num.BigNat import GHC.Num.Integer import GHC.Num.Natural import GHC.Int import GHC.Word import GHC.Exts import Data.Binary.ULEB128 qualified as U -------------------------------------------------------------------------------- -- | Newtype wrapper for 'Bin.Binary' encoding and decoding @x@ using the -- /ZigZag LEB128/ codec. Useful in conjunction with @DerivingVia@. newtype ZLEB128 x = ZLEB128 x -- | Note: Maximum allowed number of input bytes is restricted to 1024. -- Use 'putNatural' if you want a greater limit. instance Bin.Binary (ZLEB128 Integer) where put = coerce putInteger {-# INLINE put #-} get = coerce (getInteger 1024) {-# INLINE get #-} -- | Note: Maximum allowed number of input bytes is restricted to 1024. -- Use 'putNatural' if you want a greater limit. instance Bin.Binary (ZLEB128 Natural) where put = coerce putNatural {-# INLINE put #-} get = coerce (getNatural 1024) {-# INLINE get #-} instance Bin.Binary (ZLEB128 Int) where put = coerce putInt {-# INLINE put #-} get = coerce getInt {-# INLINE get #-} instance Bin.Binary (ZLEB128 Word) where put = coerce putWord {-# INLINE put #-} get = coerce getWord {-# INLINE get #-} instance Bin.Binary (ZLEB128 Int8) where put = coerce putInt8 {-# INLINE put #-} get = coerce getInt8 {-# INLINE get #-} instance Bin.Binary (ZLEB128 Word8) where put = coerce putWord8 {-# INLINE put #-} get = coerce getWord8 {-# INLINE get #-} instance Bin.Binary (ZLEB128 Int16) where put = coerce putInt16 {-# INLINE put #-} get = coerce getInt16 {-# INLINE get #-} instance Bin.Binary (ZLEB128 Word16) where put = coerce putWord16 {-# INLINE put #-} get = coerce getWord16 {-# INLINE get #-} instance Bin.Binary (ZLEB128 Int32) where put = coerce putInt32 {-# INLINE put #-} get = coerce getInt32 {-# INLINE get #-} instance Bin.Binary (ZLEB128 Word32) where put = coerce putWord32 {-# INLINE put #-} get = coerce getWord32 {-# INLINE get #-} instance Bin.Binary (ZLEB128 Int64) where put = coerce putInt64 {-# INLINE put #-} get = coerce getInt64 {-# INLINE get #-} instance Bin.Binary (ZLEB128 Word64) where put = coerce putWord64 {-# INLINE put #-} get = coerce getWord64 {-# INLINE get #-} -------------------------------------------------------------------------------- putInteger :: Integer -> Bin.Put -- putInteger = U.putNatural . _zigZagInteger putInteger = \case IS x | y <- zigZagInt (I# x) -> U.putWord y IP x -> U.putNatural (NB (bigNatShiftL# x 1##)) IN x -> U.putNatural (NB (bigNatShiftL# x 1## `bigNatSubWordUnsafe#` 1##)) {-# INLINE putInteger #-} putNatural :: Natural -> Bin.Put putNatural = \n -> U.putNatural (unsafeShiftL n 1) {-# INLINE putNatural #-} putWord8 :: Word8 -> Bin.Put putWord8 = putInt16 . fromIntegral {-# INLINE putWord8 #-} putWord16 :: Word16 -> Bin.Put putWord16 = putInt32 . fromIntegral {-# INLINE putWord16 #-} putWord32 :: Word32 -> Bin.Put putWord32 = putInt64 . fromIntegral {-# INLINE putWord32 #-} putWord64 :: Word64 -> Bin.Put putWord64 = putInteger . fromIntegral {-# INLINE putWord64 #-} putWord :: Word -> Bin.Put putWord = putInteger . fromIntegral {-# INLINE putWord #-} putInt8 :: Int8 -> Bin.Put putInt8 = U.putWord8 . zigZagInt8 {-# INLINE putInt8 #-} putInt16 :: Int16 -> Bin.Put putInt16 = U.putWord16 . zigZagInt16 {-# INLINE putInt16 #-} putInt32 :: Int32 -> Bin.Put putInt32 = U.putWord32 . zigZagInt32 {-# INLINE putInt32 #-} putInt64 :: Int64 -> Bin.Put putInt64 = U.putWord64 . zigZagInt64 {-# INLINE putInt64 #-} putInt :: Int -> Bin.Put putInt = U.putWord . zigZagInt {-# INLINE putInt #-} -------------------------------------------------------------------------------- getInteger :: Int -- ^ /Maximum/ number of bytes to consume. If the 'Integer' number can be -- determined before consuming this number of bytes, it will be. If @0@, -- parsing fails. -- -- Each ULEB128 byte encodes at most 7 bits of data. That is, -- \(length(encoded) == \lceil\frac{length(data)}{7}\rceil\). -> Bin.Get Integer getInteger = fmap zagZigInteger . U.getNatural {-# INLINE getInteger #-} getNatural :: Int -- ^ /Maximum/ number of bytes to consume. If the 'Integer' number can be -- determined before consuming this number of bytes, it will be. If @0@, -- parsing fails. -- -- Each ULEB128 byte encodes at most 7 bits of data. That is, -- \(length(encoded) == \lceil\frac{length(data)}{7}\rceil\). -> Bin.Get Natural getNatural = \m -> do i <- getInteger m Bin.label "ZLEB128" $ naturalFromInteger i {-# INLINE getNatural #-} getBoundedIntegral :: forall s u . (Bits s, Integral s, Bits u, Integral u) => Bin.Get s -> Bin.Get u getBoundedIntegral = \gs -> do s <- gs Bin.label "ZLEB128" $ case toIntegralSized s of Just u -> pure u Nothing -> fail "underflow or overflow" {-# INLINE getBoundedIntegral #-} getInt8 :: Bin.Get Int8 getInt8 = zagZigInt8 <$> U.getWord8 {-# INLINE getInt8 #-} getInt16 :: Bin.Get Int16 getInt16 = zagZigInt16 <$> U.getWord16 {-# INLINE getInt16 #-} getInt32 :: Bin.Get Int32 getInt32 = zagZigInt32 <$> U.getWord32 {-# INLINE getInt32 #-} getInt64 :: Bin.Get Int64 getInt64 = zagZigInt64 <$> U.getWord64 {-# INLINE getInt64 #-} getInt :: Bin.Get Int getInt = zagZigInt <$> U.getWord {-# INLINE getInt #-} getWord8 :: Bin.Get Word8 getWord8 = getBoundedIntegral getInt16 {-# INLINE getWord8 #-} getWord16 :: Bin.Get Word16 getWord16 = getBoundedIntegral getInt32 {-# INLINE getWord16 #-} getWord32 :: Bin.Get Word32 getWord32 = getBoundedIntegral getInt64 {-# INLINE getWord32 #-} getWord64 :: Bin.Get Word64 getWord64 = getBoundedIntegral (getInteger 10) {-# INLINE getWord64 #-} getWord :: Bin.Get Word getWord = getBoundedIntegral (getInteger 10) {-# INLINE getWord #-} -------------------------------------------------------------------------------- -- | OK, but not used. See putInteger. {-# INLINE _zigZagInteger #-} _zigZagInteger :: Integer -> Natural _zigZagInteger = \case IS x | W# y <- zigZagInt (I# x) -> NS y IP x -> NB (bigNatShiftL# x 1##) IN x -> NB (bigNatShiftL# x 1## `bigNatSubWordUnsafe#` 1##) {-# INLINE zagZigInteger #-} zagZigInteger :: Natural -> Integer zagZigInteger = \case NS x | I# y <- zagZigInt (W# x) -> IS y NB x -- Unnecessary check because of Natural invariant: -- | bigNatIsZero x -> IS 0# | 0## <- and# 1## (indexWordArray# x 0#) -> IP (bigNatShiftR# x 1##) | otherwise -> IN (bigNatShiftR# (bigNatAddWord# x 1##) 1##) -- | @s@ is expected to be the signed version of @u@. This is not checked. {-# INLINE unsafeZigZagFixed #-} unsafeZigZagFixed :: forall s u. (FiniteBits s, FiniteBits u, Integral s, Integral u) => s -> u unsafeZigZagFixed = let !n = finiteBitSize (undefined :: s) - 1 in \s -> fromIntegral $! xor (unsafeShiftL s 1) (unsafeShiftR s n) -- | @u@ is expected to be the unsigned version of @s@. This is not checked. {-# INLINE unsafeZagZigFixed #-} unsafeZagZigFixed :: forall u s. (FiniteBits u, FiniteBits s, Integral u, Integral s) => u -> s unsafeZagZigFixed = \u -> fromIntegral $! xor (unsafeShiftR u 1) (negate (u .&. 1)) {-# INLINE zigZagInt8 #-} zigZagInt8 :: Int8 -> Word8 zigZagInt8 = unsafeZigZagFixed {-# INLINE zagZigInt8 #-} zagZigInt8 :: Word8 -> Int8 zagZigInt8 = unsafeZagZigFixed {-# INLINE zigZagInt16 #-} zigZagInt16 :: Int16 -> Word16 zigZagInt16 = unsafeZigZagFixed {-# INLINE zagZigInt16 #-} zagZigInt16 :: Word16 -> Int16 zagZigInt16 = unsafeZagZigFixed {-# INLINE zigZagInt32 #-} zigZagInt32 :: Int32 -> Word32 zigZagInt32 = unsafeZigZagFixed {-# INLINE zagZigInt32 #-} zagZigInt32 :: Word32 -> Int32 zagZigInt32 = unsafeZagZigFixed {-# INLINE zigZagInt64 #-} zigZagInt64 :: Int64 -> Word64 zigZagInt64 = unsafeZigZagFixed {-# INLINE zagZigInt64 #-} zagZigInt64 :: Word64 -> Int64 zagZigInt64 = unsafeZagZigFixed {-# INLINE zigZagInt #-} zigZagInt :: Int -> Word zigZagInt = unsafeZigZagFixed {-# INLINE zagZigInt #-} zagZigInt :: Word -> Int zagZigInt = unsafeZagZigFixed {-# INLINE naturalFromInteger #-} naturalFromInteger :: MonadFail m => Integer -> m Natural naturalFromInteger = \case IS x | isTrue# (0# <=# x) -> pure $ naturalFromWord# (int2Word# x) IP x -> pure $ naturalFromBigNat# x _ -> fail "underflow"