{-# LANGUAGE FlexibleInstances #-} -- | -- Module : Data.Serialize.RLP -- License : LGPL-3 (see LICENSE) -- -- Maintainer : Javier Sagredo <jasataco@gmail.com> -- Stability : stable -- -- An implementation of the Recursive Length Prefix method -- as described in the Yellow Paper <https://ethereum.github.io/yellowpaper/paper.pdf>. -- -- To actually use this module, the type that is going to -- be encoded has to be instance of RLPSerialize defining -- 'toRLP' and 'fromRLP'. -------------------------------------------------------------------------------- module Data.Serialize.RLP ( -- * The RLP Type RLPT(..), -- ** Subtleties -- $subtleties -- * Helper Int functions toBigEndian, toBigEndianS, fromBigEndian, fromBigEndianS, -- * Helper String functions toByteString, toByteStringS, fromByteString, fromByteStringS, -- * The RLPSerialize class RLPSerialize(..) -- * Example -- $example -- * Errors and special cases -- $failexample ) where import Data.Serialize.RLP.Internal import qualified Data.ByteString as DBS import qualified Data.ByteString.Lazy as DBSL import qualified Data.ByteString.Char8 as DBSC -------------------------------------------------------------------------------- -- $subtleties -- The idea of transforming a custom type into RLPT is to -- preserve the original structure as far as possible. For example, -- suppose we have a data structure: -- -- > data Name = (String, String) -- represents the first and last name of a Person -- > data Person = Person Name Int -- represents the whole name of a Person and its age -- -- Then the desired output of the transformation of a Person value to RLPT should be (pseudocode): -- -- > RLPL [ RLPL [ RLPB, RLPB ], RLPB ] -- -- This way the structure is clearly preserved. Eventhough this does not have -- to be true as the transformation to RLPL is defined by the user and a custom -- process can be implemented, it is advised to follow this guideline for better -- understanding of the generated code. -- -- It is important to remark that although it can't be imposed, it doesn't make sense to try -- to transform to RLP types with more than one constructor that don't difer in structure. -- The transformation should encode -- a way to find out which of the constructors belongs to the data so not only data is being -- encoded in the result, also information about the structure futher than the actual length -- prefixes. That's why it only makes sense to transform to RLP types with just one constructor. -- On the other hand, it's perfectly viable to encode types with more than one constructor if -- the structure of each of them is different as it can be adjusted via pattern matching -- strategies. -------------------------------------------------------------------------------- -- | The 'RLPSerialize' class provides functions for transforming values to RLPT structures. -- For encoding and decoding values with the RLP protocol, 'toRLP' and 'fromRLP' have to -- be implemented. -- -- Instances of RLPSerialize are expected to satisfy the following property: -- -- > fromRLP . toRLP == id -- -- In such case, it can be assured with the default definition that: -- -- > rlpDecode . rlpEncode == id -- -- RLPSerialize makes use of the Get and Put classes together with a set of -- custom serializations for encoding and decoding RLPT data. class RLPSerialize a where -- | Transform a value to the 'RLPT' structure that best fits its internal structure toRLP :: a -> RLPT -- | Transform an 'RLPT' structure back into the value it represents. -- Its return type is 'Maybe a' because it can fail fromRLP :: RLPT -> Maybe a -- | Transform a value to an 'RLPT' structure and then encode it following the -- RLP standard. rlpEncode :: a -> DBSL.ByteString rlpEncode = rlpEncodeI . toRLP -- | Transform a ByteString to an 'RLPT' structure following the RLP standard and -- then transform it to the original type. It returns 'Left s' when failing on the -- decoding of the transforming from RLPT into the required type, and 'Right v' on -- success. rlpDecode :: DBSL.ByteString -> Either String a rlpDecode x = case rlpDecodeI x :: Either String RLPT of Left m -> Left m Right v -> case fromRLP v of Nothing -> Left "RLPT value couldn't ve transformed into the required type" Just v' -> Right v' {-# MINIMAL toRLP, fromRLP #-} -- RLPT values don't have to be transformed as they already are RLPT instance RLPSerialize RLPT where toRLP = id fromRLP = Just . id -- ByteStrings just have to be encapsulated -- Also, it only makes sense to disencapsulate from a ByteString instance RLPSerialize DBS.ByteString where toRLP = RLPB fromRLP (RLPB b) = Just b fromRLP _ = Nothing -- Ints have to be transformed into its Big-endian form -- and then they are treated as ByteStrings. -- The same applies for the inverse transformation. They -- are treated as ByteStrings and then interpreted as a -- Big-endian encoded Int. instance RLPSerialize Int where toRLP = toRLP . toBigEndianS fromRLP = maybe Nothing (\s -> if DBSC.head s == '\NUL' then Nothing else case fromBigEndianS s of Left _ -> Nothing Right v -> Just v ) . (fromRLP :: RLPT -> Maybe DBS.ByteString) -- Serializing lists implies making a list with the serialization -- of each element instance {-# OVERLAPPABLE #-} RLPSerialize a => RLPSerialize [a] where toRLP = RLPL . map toRLP fromRLP (RLPL x) = if any (\a -> case a of Nothing -> True _ -> False) r then Nothing else Just $ map unJust r where r = map fromRLP x fromRLP _ = Nothing -- Bools are serialized as [0] or [1] in a ByteArray -- THIS IS AN ASUMPTION considering Bool equivalent to -- integers in the range 0..1 instance RLPSerialize Bool where toRLP True = RLPB $ toByteStringS "\SOH" toRLP False = RLPB $ toByteStringS "\NUL" fromRLP x | x == toRLP True = Just True | x == toRLP False = Just False | otherwise = Nothing -- Strings are serialized as ByteStrings instance {-# OVERLAPPING #-} RLPSerialize String where toRLP = RLPB . toByteStringS fromRLP (RLPB x) = Just $ fromByteStringS x fromRLP _ = Nothing -- Chars are just length-one strings instance RLPSerialize Char where toRLP = RLPB . toByteStringS . (: []) fromRLP (RLPB x) = Just $ head $ fromByteStringS x fromRLP _ = Nothing -- Tuples are transformed into Lists instance (RLPSerialize a, RLPSerialize b) => RLPSerialize (a, b) where toRLP (x, y) = RLPL [toRLP x, toRLP y] fromRLP (RLPL [x, y]) = maybe Nothing (\x' -> maybe Nothing (\y' -> Just (x', y')) (fromRLP y)) (fromRLP x) fromRLP _ = Nothing instance (RLPSerialize a, RLPSerialize b, RLPSerialize c) => RLPSerialize (a, b, c) where toRLP (x, y, z) = RLPL [toRLP x, toRLP y, toRLP z] fromRLP (RLPL [x, y, z]) = maybe Nothing (\x' -> maybe Nothing (\y' -> maybe Nothing (\z' -> Just (x', y', z')) (fromRLP z)) (fromRLP y)) (fromRLP x) fromRLP _ = Nothing instance (RLPSerialize a, RLPSerialize b, RLPSerialize c, RLPSerialize d) => RLPSerialize (a, b, c, d) where toRLP (a1, a2, a3, a4) = RLPL [toRLP a1, toRLP a2, toRLP a3, toRLP a4] fromRLP (RLPL [a1, a2, a3, a4]) = maybe Nothing (\a1' -> maybe Nothing (\a2' -> maybe Nothing (\a3' -> maybe Nothing (\a4' -> Just (a1', a2', a3', a4')) (fromRLP a4)) (fromRLP a3)) (fromRLP a2)) (fromRLP a1) fromRLP _ = Nothing instance (RLPSerialize a, RLPSerialize b, RLPSerialize c, RLPSerialize d, RLPSerialize e) => RLPSerialize (a, b, c, d, e) where toRLP (a1, a2, a3, a4, a5) = RLPL [toRLP a1, toRLP a2, toRLP a3, toRLP a4, toRLP a5] fromRLP (RLPL [a1, a2, a3, a4, a5]) = maybe Nothing (\a1' -> maybe Nothing (\a2' -> maybe Nothing (\a3' -> maybe Nothing (\a4' -> maybe Nothing (\a5' -> Just (a1', a2', a3', a4', a5')) (fromRLP a5)) (fromRLP a4)) (fromRLP a3)) (fromRLP a2)) (fromRLP a1) fromRLP _ = Nothing instance (RLPSerialize a, RLPSerialize b, RLPSerialize c, RLPSerialize d, RLPSerialize e, RLPSerialize f) => RLPSerialize (a, b, c, d, e, f) where toRLP (a1, a2, a3, a4, a5, a6) = RLPL [toRLP a1, toRLP a2, toRLP a3, toRLP a4, toRLP a5, toRLP a6] fromRLP (RLPL [a1, a2, a3, a4, a5, a6]) = maybe Nothing (\a1' -> maybe Nothing (\a2' -> maybe Nothing (\a3' -> maybe Nothing (\a4' -> maybe Nothing (\a5' -> maybe Nothing (\a6' -> Just (a1', a2', a3', a4', a5', a6')) (fromRLP a6)) (fromRLP a5)) (fromRLP a4)) (fromRLP a3)) (fromRLP a2)) (fromRLP a1) fromRLP _ = Nothing -------------------------------------------------------------------------------- -- $example -- For a full example, we reproduce the implementation of the Person type as in the -- subtleties section. -- -- First of all, we define the type: -- -- > type Name = (String, String) -- > data Person = Person { -- > name :: Name, -- > age :: Int -- > } deriving (Show) -- -- Then we have to make it an instance of RLPSerialize: -- -- >instance RLPSerialize Person where -- > toRLP p = RLPL [ -- > RLPL [ -- > toRLP . toByteStringS . fst . name $ p, -- > toRLP . toByteStringS . snd . name $ p -- > ], -- > toRLP . age $ p] -- > -- > fromRLP (RLPL [ RLPL [ RLPB a, RLPB b ], RLPB c ]) = -- > case fromBigEndianS c of -- > Right v -> Just $ Person (fromByteStringS a, fromByteStringS b) v -- > _ -> Nothing -- > fromRLP _ = Nothing -- -- This way, if the decoding gives rise to other structure than the expected, a the -- resulting value would be 'Nothing'. We can now use our decoder and encoder -- with our custom type: -- -- > p = Person ("John", "Snow") 33 -- > e = rlpEncode p -- > -- "\204\202\132John\132Snow!" ~ [204,202,132,74,111,104,110,132,83,110,111,119,33] -- > rlpDecode e :: Maybe Person -- > -- Right (Person {name = ("John","Snow"), age = 33}) -- -------------------------------------------------------------------------------- -- $failexample -- In case we run into an error situation, depending whether the RLPT structure is not well -- formed or the generated structure couldn't be transformed into the expected type, an error -- is returned in the form of a 'Left' value. -- -- Just to see this as an example, if we chop the resulting ByteString, there's no way to -- generate a correct RLPT structure so an error is thrown: -- -- > rlpDecode $ DBSL.take 6 $ rlpEncode $ RLPL [ RLPB $ toByteStringS "John", RLPB $ toByteStringS "Snow" ] :: Either String RLPT -- > -- Left "not enough bytes" -- -- On the other hand, if we try to transform an incorrect value from the decoded RLPT we -- generate a new error: -- -- > rlpDecode $ rlpEncode $ RLPB $ toByteStringS "\STX" :: Either String Bool -- > -- Left "RLPT value couldn't ve transformed into the required type" -- -- If a ByteString is the result of the concatenation of more than one serialized RLPT structure, -- only the first one would be decoded. This isn't quite specified in the Yellow Paper although it is possible that an error should be thrown when finding trailing bytes: -- -- > a = rlpEncode $ RLPL [ RLPB $ toByteStringS "John", RLPB $ toByteStringS "Snow" ] -- > b = DBSL.append a a -- > -- "\202\132John\132Snow\202\132John\132Snow" -- > rlpDecode b :: Either String RLPT -- > -- Right (RLPL [RLPB "John",RLPB "Snow"])