module Blip.Marshal
( readPyc, writePyc, PycFile (..), PyObject (..), CodeObjectFlagMask
, co_optimized, co_newlocals, co_varargs, co_varkeywords
, co_nested, co_generator, co_nofree )
where
import Blip.MarshalDouble (bytesToDouble, doubleToBytes)
import Blip.Bytecode (decode, BytecodeSeq (..))
import Blip.Pretty (Pretty (..), prettyList, prettyTuple)
import Control.Applicative ((<$>), (<*>))
import Data.Map as Map hiding (map, size, empty)
import Data.Word (Word8, Word16, Word32)
import Control.Monad.Error (ErrorT (..), lift, replicateM)
import System.IO (Handle)
import qualified Data.ByteString.Lazy as B
(ByteString, hGetContents, unpack, hPutStr, length)
import Data.ByteString.Lazy.UTF8 as UTF8 (toString, fromString)
import Data.Binary.Get (Get, runGet, getLazyByteString, getWord32le, getWord8, getWord16le)
import Data.Binary.Put (PutM, putWord32le, putLazyByteString, runPutM, putWord8, putWord16le)
import Data.Bits ((.&.))
import Data.Int (Int64, Int32)
import Data.Char (chr, ord)
import Text.PrettyPrint
(text, (<+>), ($$), (<>), Doc , vcat, int, equals, doubleQuotes, hsep, empty)
data PycFile =
PycFile
{ magic :: Word32
, modified_time :: Word32
, size :: Word32
, object :: PyObject
}
deriving Show
instance Pretty PycFile where
pretty pycFile =
text "magic =" <+> pretty (magic pycFile) $$
text "modified time =" <+> pretty (modified_time pycFile) $$
text "size =" <+> pretty (size pycFile) $$
text "object =" <+> pretty (object pycFile)
data PyObject
= Code
{ argcount :: !Word32
, kwonlyargcount :: !Word32
, nlocals :: !Word32
, stacksize :: !Word32
, flags :: !Word32
, code :: !PyObject
, consts :: !PyObject
, names :: !PyObject
, varnames :: !PyObject
, freevars :: !PyObject
, cellvars :: !PyObject
, filename :: !PyObject
, name :: !PyObject
, firstlineno :: !Word32
, lnotab :: !PyObject
}
| String { string :: !B.ByteString }
| Tuple { elements :: ![PyObject] }
| Int { int_value :: !Word32 }
| Float { float_value :: !Double }
| None
| Ellipsis
| Unicode { unicode :: !String }
| TrueObj
| FalseObj
| Complex { real :: !Double, imaginary :: !Double }
| Long { long_value :: !Integer }
deriving (Eq, Ord, Show)
instance Pretty PyObject where
pretty (String {..}) = doubleQuotes $ pretty string
pretty (Tuple {..}) = prettyTuple $ map pretty elements
pretty (Int {..}) = pretty int_value
pretty (Long {..}) = pretty long_value
pretty (Float {..}) = pretty float_value
pretty None = text "None"
pretty Ellipsis = text "..."
pretty TrueObj = text "True"
pretty FalseObj = text "False"
pretty (Unicode {..}) = doubleQuotes $ text unicode
pretty (Code {..}) =
text "argcount =" <+> pretty argcount $$
text "kwonlyargcount =" <+> pretty kwonlyargcount $$
text "nlocals =" <+> pretty nlocals $$
text "stacksize =" <+> pretty stacksize $$
text "flags =" <+> prettyFlags flags $$
text "varnames =" <+> pretty varnames $$
text "freevars =" <+> pretty freevars $$
text "cellvars =" <+> pretty cellvars $$
text "filename =" <+> pretty filename $$
text "name =" <+> pretty name $$
text "firstlineno =" <+> pretty firstlineno $$
text "lnotab =" <+> prettyLnotab lnotab $$
text "names =" <+> pretty names $$
prettyConsts consts $$
text "code =" <+> pretty (BytecodeSeq $ decode $ string code)
pretty (Complex {..}) = pretty real <+> text "+" <+> pretty imaginary <> text "j"
prettyConsts :: PyObject -> Doc
prettyConsts obj =
case obj of
Tuple {..} ->
vcat $ map prettyConst $ zip [0..] elements
_other -> text ("consts not a tuple: " ++ show obj)
where
prettyConst :: (Int, PyObject) -> Doc
prettyConst (i, obj) = text "const" <+> int i <+> equals <+> pretty obj
prettyLnotab :: PyObject -> Doc
prettyLnotab obj =
case obj of
String {..} -> prettyList $ map pretty $ pairs $ B.unpack string
_other -> text ("lnotab not a string: " ++ show obj)
where
pairs :: [Word8] -> [(Word8, Word8)]
pairs [] = []
pairs [_] = error $ "Odd numbered linenotab"
pairs (nextCode:nextLine:rest) = (nextCode, nextLine) : pairs rest
readPyc :: Handle -> IO PycFile
readPyc handle = do
bytes <- B.hGetContents handle
runGetDataCheck getPycFile bytes
writePyc :: Handle -> PycFile -> IO ()
writePyc handle pycFile = do
bytes <- runPutDataCheck $ putPycFile pycFile
B.hPutStr handle bytes
getPycFile :: GetData PycFile
getPycFile = PycFile <$> getU32 <*> getU32 <*> getU32 <*> readObject
putPycFile :: PycFile -> PutData
putPycFile pycFile = do
putU32 $ magic pycFile
putU32 $ modified_time pycFile
putU32 $ size pycFile
writeObject $ object pycFile
readObject :: GetData PyObject
readObject = do
object_type <- decodeObjectType <$> getU8
case object_type of
CODE -> readCodeObject
STRING -> readStringObject
TUPLE -> readTupleObject
INT -> readIntObject
NONE -> return None
ELLIPSIS -> return Ellipsis
TRUE -> return TrueObj
FALSE -> return FalseObj
UNICODE -> readUnicodeObject
BINARY_FLOAT -> readFloatObject
BINARY_COMPLEX -> readComplexObject
LONG -> readLongObject
_other -> error ("readObject: unsupported object type: " ++ show object_type)
writeObject :: PyObject -> PutData
writeObject object =
case object of
Code {..} -> writeCodeObject object
String {..} -> writeStringObject object
Tuple {..} -> writeTupleObject object
Int {..} -> writeIntObject object
None -> putU8 $ encodeObjectType NONE
Ellipsis -> putU8 $ encodeObjectType ELLIPSIS
Unicode {..} -> writeUnicodeObject object
TrueObj -> putU8 $ encodeObjectType TRUE
FalseObj -> putU8 $ encodeObjectType FALSE
Float {..} -> writeFloatObject object
Complex {..} -> writeComplexObject object
Long {..} -> writeLongObject object
writeObjectType :: ObjectType -> PutData
writeObjectType = putU8 . encodeObjectType
readCodeObject :: GetData PyObject
readCodeObject =
Code <$> getU32 <*> getU32 <*> getU32 <*> getU32 <*> getU32 <*>
readObject <*> readObject <*> readObject <*> readObject <*>
readObject <*> readObject <*> readObject <*> readObject <*>
getU32 <*> readObject
writeCodeObject :: PyObject -> PutData
writeCodeObject (Code {..}) =
writeObjectType CODE >>
mapM_ putU32 [argcount, kwonlyargcount, nlocals, stacksize, flags] >>
mapM_ writeObject [code, consts, names, varnames, freevars, cellvars,
filename, name] >>
putU32 firstlineno >>
writeObject lnotab
writeCodeObject other = error $ "writeCodeObject called on non code object: " ++ show other
readStringObject :: GetData PyObject
readStringObject = do
len <- getU32
String <$> (getBS $ fromIntegral len)
writeStringObject :: PyObject -> PutData
writeStringObject (String {..}) =
writeObjectType STRING >>
putU32 (fromIntegral $ B.length string) >>
putBS string
writeStringObject other = error $ "writStringObject called on non string object: " ++ show other
readTupleObject :: GetData PyObject
readTupleObject = do
len <- getU32
Tuple <$> replicateM (fromIntegral len) readObject
writeTupleObject :: PyObject -> PutData
writeTupleObject (Tuple {..}) =
writeObjectType TUPLE >>
putU32 (fromIntegral $ length elements) >>
mapM_ writeObject elements
writeTupleObject other = error $ "writeTupleObject called on non tuple object: " ++ show other
readIntObject :: GetData PyObject
readIntObject = Int <$> getU32
writeIntObject :: PyObject -> PutData
writeIntObject (Int {..}) =
writeObjectType INT >> putU32 int_value
writeIntObject other = error $ "writeIntObject called on non int object: " ++ show other
readFloatObject :: GetData PyObject
readFloatObject = Float <$> getDouble
readComplexObject :: GetData PyObject
readComplexObject = Complex <$> getDouble <*> getDouble
writeFloatObject :: PyObject -> PutData
writeFloatObject (Float {..}) =
writeObjectType BINARY_FLOAT >> putDouble float_value
writeFloatObject other = error $ "writeFloatObject called on non float object: " ++ show other
writeComplexObject :: PyObject -> PutData
writeComplexObject (Complex {..}) =
writeObjectType BINARY_COMPLEX >> putDouble real >> putDouble imaginary
writeComplexObject other = error $ "writeComplexObject called on non complex object: " ++ show other
readUnicodeObject :: GetData PyObject
readUnicodeObject = do
len <- getU32
bs <- getBS $ fromIntegral len
return $ Unicode $ UTF8.toString bs
writeUnicodeObject :: PyObject -> PutData
writeUnicodeObject (Unicode {..}) = do
writeObjectType UNICODE
let uc = UTF8.fromString unicode
putU32 (fromIntegral $ B.length uc)
putBS uc
writeUnicodeObject other = error $ "writeUnicodeObject called on non unicode object: " ++ show other
longDigitBase :: Integer
longDigitBase = 2^(15::Integer)
readLongObject :: GetData PyObject
readLongObject = do
len <- getI32
if len == 0
then return $ Long 0
else do
base15digits <- replicateM (fromIntegral (abs len)) getU16
let digitsExponents = zip (map fromIntegral base15digits) [(0::Integer) ..]
val = sum [(longDigitBase ^ exp) * digit | (digit, exp) <- digitsExponents]
if len < 0
then return $! Long $! negate val
else return $! Long val
writeLongObject :: PyObject -> PutData
writeLongObject (Long {..}) = do
writeObjectType LONG
case compare long_value 0 of
EQ -> putI32 0
GT -> do
putI32 numDigits
mapM_ putU16 digits
LT -> do
putI32 $ negate numDigits
mapM_ putU16 digits
where
digits :: [Word16]
digits = getDigits (abs long_value) longDigitBase
numDigits :: Int32
numDigits = fromIntegral $ length digits
getDigits :: Integer -> Integer -> [Word16]
getDigits 0 _base = []
getDigits n base = (fromIntegral (n `mod` base)) : getDigits (n `div` base) base
writeLongObject other = error $ "writeLongObject called on non long object: " ++ show other
data ObjectType
= NULL
| NONE
| FALSE
| TRUE
| STOPITER
| ELLIPSIS
| INT
| INT64
| FLOAT
| BINARY_FLOAT
| COMPLEX
| BINARY_COMPLEX
| LONG
| STRING
| TUPLE
| LIST
| DICT
| CODE
| UNICODE
| UNKNOWN
| SET
| FROZENSET
deriving (Eq, Ord, Show)
charToObjectType :: Map.Map Char ObjectType
charToObjectType = Map.fromList objectTypeList
objectTypeToChar :: Map.Map ObjectType Char
objectTypeToChar = Map.fromList [ (y, x) | (x, y) <- objectTypeList ]
objectTypeList :: [(Char, ObjectType)]
objectTypeList = [
('0', NULL),
('N', NONE),
('F', FALSE),
('T', TRUE),
('S', STOPITER),
('.', ELLIPSIS),
('i', INT),
('I', INT64),
('f', FLOAT),
('g', BINARY_FLOAT),
('x', COMPLEX),
('y', BINARY_COMPLEX),
('l', LONG),
('s', STRING),
('(', TUPLE),
('[', LIST),
('{', DICT),
('c', CODE),
('u', UNICODE),
('?', UNKNOWN),
('<', SET),
('>', FROZENSET) ]
encodeObjectType :: ObjectType -> Word8
encodeObjectType objectType =
case Map.lookup objectType objectTypeToChar of
Nothing -> error $ "bad object type: " ++ show objectType
Just chr -> fromIntegral $ ord chr
decodeObjectType :: Word8 -> ObjectType
decodeObjectType byte =
case Map.lookup byteChar charToObjectType of
Nothing -> error $ "bad object type: " ++ show byteChar
Just t -> t
where
byteChar = chr $ fromIntegral byte
type GetData a = ErrorT String Get a
getDouble :: GetData Double
getDouble = do
bs <- replicateM 8 getU8
return $ bytesToDouble bs
getBS :: Int64 -> GetData B.ByteString
getBS = lift . getLazyByteString
getU8 :: GetData Word8
getU8 = lift getWord8
getU32 :: GetData Word32
getU32 = lift getWord32le
getI32 :: GetData Int32
getI32 = fromIntegral `fmap` lift getWord32le
getU16 :: GetData Word16
getU16 = lift getWord16le
runGetData :: GetData a -> B.ByteString -> Either String a
runGetData = runGet . runErrorT
runGetDataCheck :: GetData a -> B.ByteString -> IO a
runGetDataCheck g b =
case runGetData g b of
Left e -> fail e
Right v -> return v
type PutData = ErrorT String PutM ()
putDouble :: Double -> PutData
putDouble d = mapM_ putU8 $ doubleToBytes d
putBS :: B.ByteString -> PutData
putBS = lift . putLazyByteString
putU8 :: Word8 -> PutData
putU8 = lift . putWord8
putU16 :: Word16 -> PutData
putU16 = lift . putWord16le
putU32 :: Word32 -> PutData
putU32 = lift . putWord32le
putI32 :: Int32 -> PutData
putI32 = putU32 . fromIntegral
runPutData :: PutData -> Either String B.ByteString
runPutData comp =
case runPutM (runErrorT comp) of
(Left err, _) -> Left err
(Right (), bs) -> Right bs
runPutDataCheck :: PutData -> IO B.ByteString
runPutDataCheck comp =
case runPutData comp of
Left e -> fail e
Right bs -> return bs
type CodeObjectFlagMask = Word32
co_optimized :: CodeObjectFlagMask
co_optimized = 0x0001
co_newlocals :: CodeObjectFlagMask
co_newlocals = 0x0002
co_varargs :: CodeObjectFlagMask
co_varargs = 0x0004
co_varkeywords :: CodeObjectFlagMask
co_varkeywords = 0x0008
co_nested :: CodeObjectFlagMask
co_nested = 0x0010
co_generator :: CodeObjectFlagMask
co_generator = 0x0020
co_nofree :: CodeObjectFlagMask
co_nofree = 0x0040
prettyFlags :: Word32 -> Doc
prettyFlags bits =
hsep $ map (uncurry showFlag) masks
where
checkFlag :: CodeObjectFlagMask -> Bool
checkFlag mask = (bits .&. mask) /= 0
showFlag :: CodeObjectFlagMask -> String -> Doc
showFlag mask name
| checkFlag mask = text name
| otherwise = empty
masks = [ (co_optimized, "CO_OPTIMIZED")
, (co_newlocals, "CO_NEWLOCALS")
, (co_varargs, "CO_VARARGS")
, (co_varkeywords, "CO_VARKEYWORDS")
, (co_nested, "CO_NESTED")
, (co_generator, "CO_GENERATOR")
, (co_nofree, "CO_NOFREE") ]