{-# LANGUAGE CPP #-} {-# LANGUAGE UndecidableInstances #-} #include -- | __Unsigned LEB128 codec__. This codec encodes an unsigned number -- [as described here](https://en.wikipedia.org/wiki/LEB128#Unsigned_LEB128). -- -- Any /getXXX/ decoder can decode bytes generated using any of the /putXXX/ -- encoders, provided the encoded number fits in the target type. -- -- __WARNING__: This is not compatible with the /Signed LEB128/ codec at -- "Data.Binary.SLEB128" nor with the /ZigZag LEB128/ codec at -- "Data.Binary.ZLEB128". module Data.Binary.ULEB128 {--} ( ULEB128 (..) -- * Put , putNatural , putWord64 , putWord32 , putWord16 , putWord8 , putWord -- * Get , getNatural , getWord64 , getWord32 , getWord16 , getWord8 , getWord , getInteger , getInt64 , getInt32 , getInt16 , getInt8 , getInt -- * ByteString , putByteString , getByteString -- ** Lazy , putLazyByteString , getLazyByteString -- ** Short , putShortByteString , getShortByteString ) -- } where import Data.Binary qualified as Bin import Data.Binary.Get qualified as Bin import Data.Binary.Put qualified as Bin import Data.Bits import Data.ByteString qualified as B import Data.ByteString.Builder.Prim qualified as BB import Data.ByteString.Builder.Prim.Internal qualified as BB import Data.ByteString.Lazy qualified as BL import Data.ByteString.Short qualified as BS import Data.Int import Foreign.Ptr import Foreign.Storable import GHC.Exts import GHC.TypeLits qualified as GHC import GHC.Num.Natural import GHC.Word -------------------------------------------------------------------------------- -- | Newtype wrapper for 'Bin.Binary' encoding and decoding @x@ using the -- /Unsigned LEB128/ codec. Useful in conjunction with @DerivingVia@. newtype ULEB128 x = ULEB128 x -- | Note: Maximum allowed number of input bytes is restricted to 1024. -- Use 'putNatural' if you want a greater limit. instance Bin.Binary (ULEB128 Natural) where put = coerce putNatural {-# INLINE put #-} get = coerce (getNatural 1024) {-# INLINE get #-} instance Bin.Binary (ULEB128 Word) where put = coerce putWord {-# INLINE put #-} get = coerce getWord {-# INLINE get #-} instance Bin.Binary (ULEB128 Word8) where put = coerce putWord8 {-# INLINE put #-} get = coerce getWord8 {-# INLINE get #-} instance Bin.Binary (ULEB128 Word16) where put = coerce putWord16 {-# INLINE put #-} get = coerce getWord16 {-# INLINE get #-} instance Bin.Binary (ULEB128 Word32) where put = coerce putWord32 {-# INLINE put #-} get = coerce getWord32 {-# INLINE get #-} instance Bin.Binary (ULEB128 Word64) where put = coerce putWord64 {-# INLINE put #-} get = coerce getWord64 {-# INLINE get #-} instance DecodeOnly "getInt8" => Bin.Binary (ULEB128 Int8) where put = undefined get = undefined instance DecodeOnly "getInt16" => Bin.Binary (ULEB128 Int16) where put = undefined get = undefined instance DecodeOnly "getInt32" => Bin.Binary (ULEB128 Int32) where put = undefined get = undefined instance DecodeOnly "getInt64" => Bin.Binary (ULEB128 Int64) where put = undefined get = undefined instance DecodeOnly "getInt" => Bin.Binary (ULEB128 Int) where put = undefined get = undefined instance DecodeOnly "getInteger" => Bin.Binary (ULEB128 Integer) where put = undefined get = undefined type family DecodeOnly s where DecodeOnly s = GHC.TypeError ( 'GHC.Text "ULEB128 can't encode signed numbers, " 'GHC.:<>: 'GHC.Text "use SLEB128 or ZLEB128 instead." 'GHC.:$$: 'GHC.Text "To decode, use “ULEB128." 'GHC.:<>: 'GHC.Text s 'GHC.:<>: 'GHC.Text "”.") -------------------------------------------------------------------------------- putNatural :: Natural -> Bin.Put putNatural (NS w#) = putWord (W# w#) putNatural a = let b = fromIntegral a :: Word8 in case unsafeShiftR a 7 of c | c /= 0 -> Bin.putWord8 (b .|. 0x80) >> putNatural c | otherwise -> Bin.putWord8 b {-# INLINE putNatural #-} putWord8 :: Word8 -> Bin.Put putWord8 = Bin.putBuilder . BB.primBounded (BB.boundedPrim 2 unsafePokeUnsigned) {-# INLINE putWord8 #-} putWord16 :: Word16 -> Bin.Put putWord16 = Bin.putBuilder . BB.primBounded (BB.boundedPrim 3 unsafePokeUnsigned) {-# INLINE putWord16 #-} putWord32 :: Word32 -> Bin.Put putWord32 = Bin.putBuilder . BB.primBounded (BB.boundedPrim 5 unsafePokeUnsigned) {-# INLINE putWord32 #-} putWord64 :: Word64 -> Bin.Put putWord64 = Bin.putBuilder . BB.primBounded (BB.boundedPrim 10 unsafePokeUnsigned) {-# INLINE putWord64 #-} putWord :: Word -> Bin.Put putWord = #if WORD_SIZE_IN_BITS == 64 Bin.putBuilder . BB.primBounded (BB.boundedPrim 10 unsafePokeUnsigned) #elif WORD_SIZE_IN_BITS == 32 Bin.putBuilder . BB.primBounded (BB.boundedPrim 5 unsafePokeUnsigned) #endif {-# INLINE putWord #-} -------------------------------------------------------------------------------- getNatural :: Int -- ^ /Maximum/ number of bytes to consume. If the 'Natural' number can be -- determined before consuming this number of bytes, it will be. If @0@, -- parsing fails. -- -- Each ULEB128 byte encodes at most 7 bits of data. That is, -- \(length(encoded) == \lceil\frac{length(data)}{7}\rceil\). -> Bin.Get Natural getNatural = unsafeGetUnsigned word8ToNatural {-# INLINE getNatural #-} -- | Like 'getNatural', except it's offered here so that other parsers can use -- this specilized to types other than 'Natural'. This is unsafe because it -- only works for unsigned numbers whose ULEB128 representation is at most as -- long as the specified 'Int', but none of that is checked by this parser. {-# INLINE unsafeGetUnsigned #-} unsafeGetUnsigned :: forall a. (Bits a, Num a) => (Word8 -> a) -> Int -> Bin.Get a unsafeGetUnsigned fromWord8 = \m -> Bin.label "ULEB128" (go m 0 0) where {-# INLINE go #-} go :: Int -> Int -> a -> Bin.Get a go m i o | i < m = do w <- Bin.getWord8 if w >= 0x80 then go m (i + 1) $! o .|. unsafeShiftL (fromWord8 (w .&. 0x7f)) (7 * i) else pure $! o .|. unsafeShiftL (fromWord8 w) (7 * i) go _ _ _ = fail "input exceeds maximum allowed bytes" getInteger :: Int -- ^ /Maximum/ number of bytes to consume. If the 'Integer' number can be -- determined before consuming this number of bytes, it will be. If @0@, -- parsing fails. -- -- Each ULEB128 byte encodes at most 7 bits of data. That is, -- \(length(encoded) == \lceil\frac{length(data)}{7}\rceil\). -> Bin.Get Integer getInteger = fmap toInteger . getNatural {-# INLINE getInteger #-} getBoundedIntegral :: forall s u . (Bits s, Integral s, Bits u, Integral u) => Bin.Get s -> Bin.Get u getBoundedIntegral = \gs -> do s <- gs Bin.label "ULEB128" $ case toIntegralSized s of Just u -> pure u Nothing -> fail "underflow or overflow" {-# INLINE getBoundedIntegral #-} getWord8 :: Bin.Get Word8 getWord8 = unsafeGetUnsigned id 2 {-# INLINE getWord8 #-} getWord16 :: Bin.Get Word16 getWord16 = unsafeGetUnsigned fromIntegral 3 {-# INLINE getWord16 #-} getWord32 :: Bin.Get Word32 getWord32 = unsafeGetUnsigned fromIntegral 5 {-# INLINE getWord32 #-} getWord64 :: Bin.Get Word64 getWord64 = unsafeGetUnsigned fromIntegral 10 {-# INLINE getWord64 #-} getWord :: Bin.Get Word getWord = #if WORD_SIZE_IN_BITS == 64 unsafeGetUnsigned fromIntegral 10 #elif WORD_SIZE_IN_BITS == 32 unsafeGetUnsigned fromIntegral 5 #endif {-# INLINE getWord #-} getInt8 :: Bin.Get Int8 getInt8 = getBoundedIntegral (unsafeGetUnsigned @Word8 fromIntegral 1) {-# INLINE getInt8 #-} getInt16 :: Bin.Get Int16 getInt16 = getBoundedIntegral getWord16 {-# INLINE getInt16 #-} getInt32 :: Bin.Get Int32 getInt32 = getBoundedIntegral getWord32 {-# INLINE getInt32 #-} getInt64 :: Bin.Get Int64 getInt64 = getBoundedIntegral getWord64 {-# INLINE getInt64 #-} getInt :: Bin.Get Int getInt = getBoundedIntegral getWord {-# INLINE getInt #-} -------------------------------------------------------------------------------- -- | Puts a strict 'B.ByteString' with its ULEB128-encoded length as prefix. -- -- See 'getByteString'. putByteString :: B.ByteString -> Bin.Put putByteString = \a -> do putWord (fromIntegral (B.length a :: Int)) Bin.putByteString a {-# INLINE putByteString #-} -- | Gets a strict 'B.ByteString' with its ULEB128-encoded length as prefix. -- -- See 'putByteString'. getByteString :: Bin.Get B.ByteString getByteString = Bin.getByteString =<< getInt {-# INLINE getByteString #-} -- | Puts a lazy 'B.ByteString' with its ULEB128-encoded length as prefix. -- -- See 'getLazyByteString'. putLazyByteString :: BL.ByteString -> Bin.Put putLazyByteString = \a -> do putWord64 (fromIntegral (BL.length a :: Int64)) Bin.putLazyByteString a {-# INLINE putLazyByteString #-} -- | Gets a lazy 'BL.ByteString' with its ULEB128-encoded length as prefix. -- -- See 'putLazyByteString'. getLazyByteString :: Bin.Get BL.ByteString getLazyByteString = Bin.getLazyByteString =<< getInt64 {-# INLINE getLazyByteString #-} -- | Puts a 'BS.ShortByteString' with its ULEB128-encoded length as prefix. -- -- See 'getShortByteString'. putShortByteString :: BS.ShortByteString -> Bin.Put putShortByteString = \a -> do putWord (fromIntegral (BS.length a :: Int)) Bin.putShortByteString a {-# INLINE putShortByteString #-} -- | Gets a 'BS.ShortByteString' with its ULEB128-encoded length as prefix. -- -- See 'putShortByteString'. getShortByteString :: Bin.Get BS.ShortByteString getShortByteString = fmap BS.toShort (Bin.getByteString =<< getInt) {-# INLINE getShortByteString #-} -------------------------------------------------------------------------------- -- | ULEB128-encodes @a@ and writes it into 'Ptr'. Returns one past the last -- written address. Only works with unsigned types. None of this is not checked. unsafePokeUnsigned :: (Bits a, Integral a) => a -> Ptr Word8 -> IO (Ptr Word8) unsafePokeUnsigned = \ !a !p -> case unsafeShiftR a 7 of b | b /= 0 -> do poke p $! 0x80 .|. fromIntegral a unsafePokeUnsigned b $! plusPtr p 1 | otherwise -> do poke p $! fromIntegral a pure $! plusPtr p 1 {-# INLINE unsafePokeUnsigned #-} -- | This is faster than 'fromIntegral', which goes through 'Integer'. word8ToNatural :: Word8 -> Natural word8ToNatural (W8# a) = NS (word8ToWord# a) {-# INLINE word8ToNatural #-}