{-# LANGUAGE CPP #-} {-# OPTIONS_GHC -ddump-simpl -ddump-to-file #-} #include -- | __Signed LEB128 codec__. This codec encodes the two's complement -- of a signed number -- [as described here](https://en.wikipedia.org/wiki/LEB128#Signed_LEB128). -- -- Any /getXXX/ decoder can decode bytes generated using any of the /putXXX/ -- encoders, provided the encoded number fits in the target type. -- -- __WARNING__: This is not compatible with the /Unsigned LEB128/ codec at -- "Data.Binary.ULEB128" nor with the /ZigZag LEB128/ codec at -- "Data.Binary.ZLEB128". module Data.Binary.SLEB128 ( SLEB128(..) -- * Put , putInteger , putInt64 , putInt32 , putInt16 , putInt8 , putInt , putNatural , putWord64 , putWord32 , putWord16 , putWord8 , putWord -- * Get , getInteger , getInt64 , getInt32 , getInt16 , getInt8 , getInt , getNatural , getWord64 , getWord32 , getWord16 , getWord8 , getWord -- * Extras -- ** Scientific , putScientific , getScientific -- ** Rational , putRational , getRational -- ** Fixed , putFixed , getFixed ) where import Data.Binary qualified as Bin import Data.Binary.Get qualified as Bin import Data.Binary.Put qualified as Bin import Data.ByteString.Builder.Prim qualified as BB import Data.ByteString.Builder.Prim.Internal qualified as BB import Data.Scientific qualified as S import Data.Bits import Data.Coerce import Data.Fixed import Data.Proxy import GHC.Real import GHC.Exts import GHC.Int import GHC.Word import GHC.Num.BigNat import GHC.Num.Natural import GHC.Num.Integer import Foreign.Ptr import Foreign.Storable import Math.NumberTheory.Logarithms (integerLog10) import Data.Binary.ULEB128 qualified as U -------------------------------------------------------------------------------- -- | Newtype wrapper for 'Bin.Binary' encoding and decoding @x@ using the -- /Signed LEB128/ codec. Useful in conjunction with @DerivingVia@. newtype SLEB128 x = SLEB128 x -- | Note: Maximum allowed number of input bytes is restricted to 1000. -- Use 'putNatural' if you want a greater limit. instance Bin.Binary (SLEB128 Integer) where put = coerce putInteger {-# INLINE put #-} get = coerce (getInteger 1000) {-# INLINE get #-} -- | Note: Maximum allowed number of input bytes is restricted to 1000. -- Use 'putNatural' if you want a greater limit. instance Bin.Binary (SLEB128 Natural) where put = coerce putNatural {-# INLINE put #-} get = coerce (getNatural 1000) {-# INLINE get #-} instance Bin.Binary (SLEB128 Int) where put = coerce putInt {-# INLINE put #-} get = coerce getInt {-# INLINE get #-} instance Bin.Binary (SLEB128 Word) where put = coerce putWord {-# INLINE put #-} get = coerce getWord {-# INLINE get #-} instance Bin.Binary (SLEB128 Int8) where put = coerce putInt8 {-# INLINE put #-} get = coerce getInt8 {-# INLINE get #-} instance Bin.Binary (SLEB128 Word8) where put = coerce putWord8 {-# INLINE put #-} get = coerce getWord8 {-# INLINE get #-} instance Bin.Binary (SLEB128 Int16) where put = coerce putInt16 {-# INLINE put #-} get = coerce getInt16 {-# INLINE get #-} instance Bin.Binary (SLEB128 Word16) where put = coerce putWord16 {-# INLINE put #-} get = coerce getWord16 {-# INLINE get #-} instance Bin.Binary (SLEB128 Int32) where put = coerce putInt32 {-# INLINE put #-} get = coerce getInt32 {-# INLINE get #-} instance Bin.Binary (SLEB128 Word32) where put = coerce putWord32 {-# INLINE put #-} get = coerce getWord32 {-# INLINE get #-} instance Bin.Binary (SLEB128 Int64) where put = coerce putInt64 {-# INLINE put #-} get = coerce getInt64 {-# INLINE get #-} instance Bin.Binary (SLEB128 Word64) where put = coerce putWord64 {-# INLINE put #-} get = coerce getWord64 {-# INLINE get #-} -------------------------------------------------------------------------------- {-# INLINE putInteger #-} putInteger :: Integer -> Bin.Put putInteger = \case IS x -> putInt (I# x) IP x -> putIP x $ fromIntegral (bigNatSizeInBase 2 x) IN x -> putIN x where {-# INLINE putIP #-} putIP :: BigNat# -> Int -> Bin.Put putIP !a !m = do Bin.putWord8 (W8# (wordToWord8# (or# (bigNatIndex# a 0#) 0x80##))) let b = bigNatShiftR# a 7## :: BigNat# n = m - 7 if n > WORD_SIZE_IN_BITS - 1 then putIP b n else putInt (I# (word2Int# (bigNatIndex# b 0#))) -- TODO: Faster 'putIN' implementation, similar to 'putIP' {-# INLINE putIN #-} putIN :: BigNat# -> Bin.Put putIN !a = do let b = unsafeShiftR (IN a) 7 :: Integer c = fromIntegral (IN a .&. 0x7f) :: Word8 d = c .&. 0x40 if d /= 0 && b == -1 then Bin.putWord8 c else do Bin.putWord8 $! c .|. 0x80 putInteger b putNatural :: Natural -> Bin.Put putNatural = putInteger . fromIntegral {-# INLINE putNatural #-} putInt8 :: Int8 -> Bin.Put putInt8 = Bin.putBuilder . BB.primBounded (BB.boundedPrim 2 unsafePoke) {-# INLINE putInt8 #-} putInt16 :: Int16 -> Bin.Put putInt16 = Bin.putBuilder . BB.primBounded (BB.boundedPrim 3 unsafePoke) {-# INLINE putInt16 #-} putInt32 :: Int32 -> Bin.Put putInt32 = Bin.putBuilder . BB.primBounded (BB.boundedPrim 5 unsafePoke) {-# INLINE putInt32 #-} putInt64 :: Int64 -> Bin.Put putInt64 = Bin.putBuilder . BB.primBounded (BB.boundedPrim 10 unsafePoke) {-# INLINE putInt64 #-} putInt :: Int -> Bin.Put putInt = #if WORD_SIZE_IN_BITS == 64 Bin.putBuilder . BB.primBounded (BB.boundedPrim 10 unsafePoke) #elif WORD_SIZE_IN_BITS == 32 Bin.putBuilder . BB.primBounded (BB.boundedPrim 5 unsafePoke) #endif {-# INLINE putInt #-} putWord8 :: Word8 -> Bin.Put putWord8 = Bin.putBuilder . BB.primBounded (BB.boundedPrim 2 unsafePoke) {-# INLINE putWord8 #-} putWord16 :: Word16 -> Bin.Put putWord16 = Bin.putBuilder . BB.primBounded (BB.boundedPrim 3 unsafePoke) {-# INLINE putWord16 #-} putWord32 :: Word32 -> Bin.Put putWord32 = Bin.putBuilder . BB.primBounded (BB.boundedPrim 5 unsafePoke) {-# INLINE putWord32 #-} putWord64 :: Word64 -> Bin.Put putWord64 = Bin.putBuilder . BB.primBounded (BB.boundedPrim 10 unsafePoke) {-# INLINE putWord64 #-} putWord :: Word -> Bin.Put putWord = #if WORD_SIZE_IN_BITS == 64 Bin.putBuilder . BB.primBounded (BB.boundedPrim 10 unsafePoke) #elif WORD_SIZE_IN_BITS == 32 Bin.putBuilder . BB.primBounded (BB.boundedPrim 5 unsafePoke) #endif {-# INLINE putWord #-} -------------------------------------------------------------------------------- getInteger :: Int -- ^ /Maximum/ number of bytes to consume. If the 'Integer' number can be -- determined before consuming this number of bytes, it will be. If @0@, -- parsing fails. -- -- Each SLEB128 byte encodes at most 7 bits of data. That is, -- \(length(encoded) == \lceil\frac{length(data)}{7}\rceil\). -> Bin.Get Integer getInteger = unsafeGetSigned toInteger {-# INLINE getInteger #-} -- | Like 'getInteger', except it's offered here so that other parsers can use -- this specilized to types other than 'Integer'. This is unsafe because it -- only works for signed numbers whose SLEB128 representation is at most as -- long as the specified 'Int', but none of that is checked by this parser. {-# INLINE unsafeGetSigned #-} unsafeGetSigned :: forall a. (Bits a, Num a) => (Word8 -> a) -> Int -> Bin.Get a unsafeGetSigned fromWord8 = \m -> Bin.label "SLEB128" (go m 0 0) where {-# INLINE go #-} go :: Int -> Int -> a -> Bin.Get a go m i o | i < m = do w <- Bin.getWord8 let !a = o .|. unsafeShiftL (fromWord8 (w .&. 0x7f)) (i * 7) if w >= 0x80 then go m (i + 1) a else pure $! a - bit ((i + 1) * 7) * fromWord8 (unsafeShiftR (w .&. 0x40) 6) go _ _ _ = fail "input exceeds maximum allowed bytes" getNatural :: Int -- ^ /Maximum/ number of bytes to consume. If the 'Natural' number can be -- determined before consuming this number of bytes, it will be. If @0@, -- parsing fails. -- -- Each SLEB128 byte encodes at most 7 bits of data. That is, -- \(length(encoded) == \lceil\frac{length(data)}{7}\rceil\). -> Bin.Get Natural getNatural = \m -> do i <- getInteger m Bin.label "SLEB128" (naturalFromInteger i) {-# INLINE getNatural #-} getBoundedIntegral :: forall a b . (Bits a, Integral a, Bits b, Integral b) => Bin.Get a -> Bin.Get b getBoundedIntegral = \ga -> do a <- ga maybe (fail "underflow or overflow") pure (toIntegralSized a) {-# INLINE getBoundedIntegral #-} getInt8 :: Bin.Get Int8 getInt8 = unsafeGetSigned fromIntegral 2 {-# INLINE getInt8 #-} getInt16 :: Bin.Get Int16 getInt16 = unsafeGetSigned fromIntegral 3 {-# INLINE getInt16 #-} getInt32 :: Bin.Get Int32 getInt32 = unsafeGetSigned fromIntegral 5 {-# INLINE getInt32 #-} getInt64 :: Bin.Get Int64 getInt64 = unsafeGetSigned fromIntegral 10 {-# INLINE getInt64 #-} getInt :: Bin.Get Int getInt = #if WORD_SIZE_IN_BITS == 64 unsafeGetSigned fromIntegral 10 #elif WORD_SIZE_IN_BITS == 32 unsafeGetSigned fromIntegral 5 #endif {-# INLINE getInt #-} getWord8 :: Bin.Get Word8 getWord8 = getBoundedIntegral (unsafeGetSigned @Int16 fromIntegral 2) {-# INLINE getWord8 #-} getWord16 :: Bin.Get Word16 getWord16 = getBoundedIntegral (unsafeGetSigned @Int32 fromIntegral 3) {-# INLINE getWord16 #-} getWord32 :: Bin.Get Word32 getWord32 = getBoundedIntegral (unsafeGetSigned @Int64 fromIntegral 5) {-# INLINE getWord32 #-} getWord64 :: Bin.Get Word64 getWord64 = getBoundedIntegral (getInteger 10) {-# INLINE getWord64 #-} getWord :: Bin.Get Word getWord = #if WORD_SIZE_IN_BITS == 64 getBoundedIntegral (getInteger 10) #elif WORD_SIZE_IN_BITS == 32 getBoundedIntegral (unsafeGetSigned @Int64 fromIntegral 5) #endif {-# INLINE getWord #-} -------------------------------------------------------------------------------- -- | SLEB128-encodes @a@ and writes it into 'Ptr'. Returns one past the last -- written address. None of this is not checked. {-# INLINE unsafePoke #-} unsafePoke :: forall a. (Bits a, Integral a) => a -> Ptr Word8 -> IO (Ptr Word8) unsafePoke = \a p -> -- We split neg and pos so that their internal 'if' checks for less things. if a < 0 then neg a p else pos a p where {-# INLINE neg #-} neg :: a -> Ptr Word8 -> IO (Ptr Word8) neg = \ !a !p -> do let b = unsafeShiftR a 7 :: a c = fromIntegral (a .&. 0x7f) :: Word8 d = c .&. 0x40 :: Word8 if d == 0 || b /= -1 then do poke p $! c .|. 0x80 neg b $! plusPtr p 1 else do poke p $! c .|. 0x40 pure $! plusPtr p 1 {-# INLINE pos #-} pos :: a -> Ptr Word8 -> IO (Ptr Word8) pos = \ !a !p -> do let b = unsafeShiftR a 7 :: a c = fromIntegral a :: Word8 d = c .&. 0x40 :: Word8 if d /= 0 || b /= 0 then do poke p $! c .|. 0x80 pos b $! plusPtr p 1 else do poke p $! c pure $! plusPtr p 1 {-# INLINE naturalFromInteger #-} naturalFromInteger :: MonadFail m => Integer -> m Natural naturalFromInteger = \case IS x | isTrue# (0# <=# x) -> pure $ naturalFromWord# (int2Word# x) IP x -> pure $ naturalFromBigNat# x _ -> fail "underflow" -------------------------------------------------------------------------------- -- | Compact 'S.Scientific' encoding. Internally, it uses both ULEB128 and -- SLEB128. 0 is encoded as @\\x00@, other numbers take at least two bytes. -- -- Compatible decoders are 'getFixed' and 'getScientific'. putScientific :: S.Scientific -> Bin.Put putScientific = \a -> case compare (S.coefficient a) 0 of LT -> do -- We store the coefficient sign alongside the base10Exponent so as to -- increase the chances that the coefficient fits in one less byte. The -- base10Exponent is usually much smaller than the coefficient in bit -- size, so this is likely to happen. let b = S.normalize a U.putNatural $ fromInteger $ negate $ S.coefficient b putInteger $ setBit (toInteger (S.base10Exponent b) `unsafeShiftL` 1) 0 GT -> do let b = S.normalize a U.putNatural $ fromInteger $ S.coefficient b putInteger $ toInteger (S.base10Exponent b) `unsafeShiftL` 1 EQ -> Bin.putWord8 0 -- | Decode a 'S.Scientific' encoded with 'putScientific' or 'putFixed'. getScientific :: Int -- ^ /Maximum/ number of ULEB128 bytes to consume for the 'S.coefficient' -- part. See 'U.getNatural'. -> Bin.Get S.Scientific getScientific n = Bin.label "getScientific" $ do c <- U.getInteger n if c /= 0 then do e1 <- getInteger 10 -- Valid e1 can't be longer than 10. case toIntegralSized (e1 `unsafeShiftR` 1) of Just (e0 :: Int) | testBit e1 0 -> pure $ S.scientific (negate c) e0 | otherwise -> pure $ S.scientific c e0 Nothing -> fail "Exponent too large" else pure 0 -------------------------------------------------------------------------------- -- | Compact 'Rational' encoding. Internally, it uses both ULEB128 and SLEB128. -- Decode with 'getRational'. 0 is encoded as @\\x00@, other numbers take at -- least four bytes. putRational :: Rational -> Bin.Put putRational = \(unsafeReduceRational -> n :% d) -> if n /= 0 then do -- ns: The coefficient is often larger than the non-negative exponent, -- so we store the coefficient sign with the exponent in order to -- improve the chances that the coefficient fit in one less byte. let ns = S.normalize (fromInteger n) :: S.Scientific U.putNatural $ fromInteger $ abs $ S.coefficient ns putInt $ if n < 0 then complement (S.base10Exponent ns) else S.base10Exponent ns -- ds: The coefficient is more than 0, the exponent is at least 0. -- We decrease the coefficient by one to improve the chances it fits -- in one less byte. let ds = S.normalize (fromInteger d) :: S.Scientific U.putNatural $ fromInteger $ S.coefficient ds - 1 U.putWord $ fromIntegral $ S.base10Exponent ds else Bin.putWord8 0 where unsafeReduceRational :: Rational -> Rational unsafeReduceRational = \(n :% d) -> n % d -- Fails with 'error' if the denominator is 0. That's OK, that kind of -- 'Rational' is not supposed to exist, anyway. -- | Decode a 'Rational' encoded with 'putRational'. getRational :: Int -- ^ /Maximum/ number of bytes to consume for each of the numerator and -- denominator parts. See 'U.getNatural'. -> Bin.Get Rational getRational m = Bin.label "getRational" $ do ncs0 <- U.getInteger m if ncs0 /= 0 then do nes0 <- getInt let ns = if nes0 >= 0 then S.scientific ncs0 nes0 else S.scientific (negate ncs0) (complement nes0) case S.floatingOrInteger ns of Right n -> do dcs <- (1 +) <$> U.getInteger m des <- U.getInt case S.floatingOrInteger (S.scientific dcs des) of Right d -> pure (n % d) Left (_ :: Double) -> fail "Non-integer denominator" Left (_ :: Double) -> fail "Non-integer numerator" else pure 0 -------------------------------------------------------------------------------- -- | Same encoding as 'putScientific'. -- -- Compatible decoders are 'getFixed' and 'getScientific'. putFixed :: HasResolution r => Fixed r -> Bin.Put putFixed = putScientific . fixedToScientific {-# INLINE putFixed #-} -- | Decode a 'Fixed' encoded with 'putFixed' or 'putScientific'. getFixed :: (HasResolution r) => Int -- ^ /Maximum/ number of ULEB128 bytes to consume for the 'S.coefficient' -- part. See 'U.getNatural'. -> Bin.Get (Fixed r) getFixed n = Bin.label "getFixed" $ do s <- getScientific n either fail pure $ fixedFromScientific s fixedToScientific :: forall r. (HasResolution r) => Fixed r -> S.Scientific fixedToScientific = \(MkFixed i) -> S.scientific i e where e :: Int e = negate $ integerLog10 $ resolution $ Proxy @r fixedFromScientific :: forall r . (HasResolution r) => S.Scientific -> Either String (Fixed r) fixedFromScientific = \s0 -> if | s0e <- S.base10Exponent s0, s0e >= e -> Right (MkFixed (S.coefficient s0 * 10 ^ (s0e - e))) | s1 <- S.normalize s0, s1e <- S.base10Exponent s1, s1e >= e -> Right (MkFixed (S.coefficient s1 * 10 ^ (s1e - e))) | otherwise -> Left "Too small" where e :: Int e = negate $ integerLog10 $ resolution $ Proxy @r