module JVM.ClassFile
(
Attribute (..),
FieldType (..),
FieldSignature, MethodSignature (..), ReturnSignature (..),
ArgumentSignature (..),
File, Direct,
Pool, Link,
Method (..), Field (..), Class (..),
Constant (..),
AccessFlag (..), AccessFlags,
Attributes (..),
defaultClass,
HasSignature (..), HasAttributes (..),
NameType (..),
fieldNameType, methodNameType,
lookupField, lookupMethod,
long,
toString,
className,
apsize, arsize, arlist
)
where
import Control.Monad
import Control.Monad.Trans (lift)
import Control.Applicative
import qualified Control.Monad.State as St
import Data.Binary
import Data.Binary.IEEE754
import Data.Binary.Get
import Data.Binary.Put
import Data.Char
import Data.List
import Data.Default
import qualified Data.Set as S
import qualified Data.Map as M
import qualified Data.ByteString.Lazy as B
import Codec.Binary.UTF8.String hiding (encode, decode)
getChar8 :: Get Char
getChar8 = do
x <- getWord8
return $ chr (fromIntegral x)
toString :: B.ByteString -> String
toString bstr = decodeString $ map (chr . fromIntegral) $ B.unpack bstr
data File = File
data Direct = Direct
type family Link stage a
type instance Link File a = Word16
type instance Link Direct a = a
type family AccessFlags stage
type instance AccessFlags File = Word16
type instance AccessFlags Direct = S.Set AccessFlag
data family Attributes stage
data instance Attributes File = AP {attributesList :: [Attribute]}
deriving (Eq, Show)
instance Default (Attributes File) where
def = AP []
data instance Attributes Direct = AR (M.Map B.ByteString B.ByteString)
deriving (Eq, Show)
instance Default (Attributes Direct) where
def = AR M.empty
arsize :: Attributes Direct -> Int
arsize (AR m) = M.size m
arlist :: Attributes Direct -> [(B.ByteString, B.ByteString)]
arlist (AR m) = M.assocs m
apsize :: Attributes File -> Int
apsize (AP list) = length list
data AccessFlag =
ACC_PUBLIC
| ACC_PRIVATE
| ACC_PROTECTED
| ACC_STATIC
| ACC_FINAL
| ACC_SYNCHRONIZED
| ACC_VOLATILE
| ACC_TRANSIENT
| ACC_NATIVE
| ACC_INTERFACE
| ACC_ABSTRACT
deriving (Eq, Show, Ord, Enum)
class (Binary (Signature a), Show (Signature a), Eq (Signature a))
=> HasSignature a where
type Signature a
instance HasSignature (Field Direct) where
type Signature (Field Direct) = FieldSignature
instance HasSignature (Method Direct) where
type Signature (Method Direct) = MethodSignature
data NameType a = NameType {
ntName :: B.ByteString,
ntSignature :: Signature a }
instance (HasSignature a) => Show (NameType a) where
show (NameType n t) = toString n ++ ": " ++ show t
deriving instance HasSignature a => Eq (NameType a)
instance HasSignature a => Binary (NameType a) where
put (NameType n t) = putLazyByteString n >> put t
get = NameType <$> get <*> get
data Constant stage =
CClass (Link stage B.ByteString)
| CField (Link stage B.ByteString) (Link stage (NameType (Field stage)))
| CMethod (Link stage B.ByteString) (Link stage (NameType (Method stage)))
| CIfaceMethod (Link stage B.ByteString) (Link stage (NameType (Method stage)))
| CString (Link stage B.ByteString)
| CInteger Word32
| CFloat Float
| CLong Word64
| CDouble Double
| CNameType (Link stage B.ByteString) (Link stage B.ByteString)
| CUTF8 {getString :: B.ByteString}
| CUnicode {getString :: B.ByteString}
className :: Constant Direct -> B.ByteString
className (CClass s) = s
className x = error $ "Not a class: " ++ show x
instance Show (Constant Direct) where
show (CClass name) = "class " ++ toString name
show (CField cls nt) = "field " ++ toString cls ++ "." ++ show nt
show (CMethod cls nt) = "method " ++ toString cls ++ "." ++ show nt
show (CIfaceMethod cls nt) = "interface method " ++ toString cls ++ "." ++ show nt
show (CString s) = "String \"" ++ toString s ++ "\""
show (CInteger x) = show x
show (CFloat x) = show x
show (CLong x) = show x
show (CDouble x) = show x
show (CNameType name tp) = toString name ++ ": " ++ toString tp
show (CUTF8 s) = "UTF8 \"" ++ toString s ++ "\""
show (CUnicode s) = "Unicode \"" ++ toString s ++ "\""
type Pool stage = M.Map Word16 (Constant stage)
data Class stage = Class {
magic :: Word32,
minorVersion :: Word16,
majorVersion :: Word16,
constsPoolSize :: Word16,
constsPool :: Pool stage,
accessFlags :: AccessFlags stage,
thisClass :: Link stage B.ByteString,
superClass :: Link stage B.ByteString,
interfacesCount :: Word16,
interfaces :: [Link stage B.ByteString],
classFieldsCount :: Word16,
classFields :: [Field stage],
classMethodsCount :: Word16,
classMethods :: [Method stage],
classAttributesCount :: Word16,
classAttributes :: Attributes stage
}
deriving instance Eq (Class File)
deriving instance Eq (Class Direct)
deriving instance Show (Class File)
deriving instance Show (Class Direct)
deriving instance Eq (Constant File)
deriving instance Eq (Constant Direct)
deriving instance Show (Constant File)
defaultClass :: (Default (AccessFlags stage), Default (Link stage B.ByteString), Default (Attributes stage))
=> Class stage
defaultClass = Class {
magic = 0xCAFEBABE,
minorVersion = 0,
majorVersion = 50,
constsPoolSize = 0,
constsPool = def,
accessFlags = def,
thisClass = def,
superClass = def,
interfacesCount = 0,
interfaces = [],
classFieldsCount = 0,
classFields = [],
classMethodsCount = 0,
classMethods = [],
classAttributesCount = 0,
classAttributes = def }
instance Binary (Class File) where
put (Class {..}) = do
put magic
put minorVersion
put majorVersion
putPool constsPool
put accessFlags
put thisClass
put superClass
put interfacesCount
forM_ interfaces put
put classFieldsCount
forM_ classFields put
put classMethodsCount
forM_ classMethods put
put classAttributesCount
forM_ (attributesList classAttributes) put
get = do
magic <- get
when (magic /= 0xCAFEBABE) $
fail $ "Invalid .class file MAGIC value: " ++ show magic
minor <- get
major <- get
when (major > 50) $
fail $ "Too new .class file format: " ++ show major
poolsize <- getWord16be
pool <- getPool (poolsize 1)
af <- get
this <- get
super <- get
interfacesCount <- get
ifaces <- replicateM (fromIntegral interfacesCount) get
classFieldsCount <- getWord16be
classFields <- replicateM (fromIntegral classFieldsCount) get
classMethodsCount <- get
classMethods <- replicateM (fromIntegral classMethodsCount) get
asCount <- get
as <- replicateM (fromIntegral $ asCount) get
return $ Class magic minor major poolsize pool af this super
interfacesCount ifaces classFieldsCount classFields
classMethodsCount classMethods asCount (AP as)
data FieldType =
SignedByte
| CharByte
| DoubleType
| FloatType
| IntType
| LongInt
| ShortInt
| BoolType
| ObjectType String
| Array (Maybe Int) FieldType
deriving (Eq, Ord)
instance Show FieldType where
show SignedByte = "byte"
show CharByte = "char"
show DoubleType = "double"
show FloatType = "float"
show IntType = "int"
show LongInt = "long"
show ShortInt = "short"
show BoolType = "bool"
show (ObjectType s) = "Object " ++ s
show (Array Nothing t) = show t ++ "[]"
show (Array (Just n) t) = show t ++ "[" ++ show n ++ "]"
type FieldSignature = FieldType
getInt :: Get (Maybe Int)
getInt = do
s <- getDigits
if null s
then return Nothing
else return $ Just (read s)
where
getDigits :: Get [Char]
getDigits = do
c <- lookAhead getChar8
if isDigit c
then do
skip 1
next <- getDigits
return (c: next)
else return []
putString :: String -> Put
putString str = forM_ str put
instance Binary FieldType where
put SignedByte = put 'B'
put CharByte = put 'C'
put DoubleType = put 'D'
put FloatType = put 'F'
put IntType = put 'I'
put LongInt = put 'J'
put ShortInt = put 'S'
put BoolType = put 'Z'
put (ObjectType name) = put 'L' >> putString name >> put ';'
put (Array Nothing sig) = put '[' >> put sig
put (Array (Just n) sig) = put '[' >> put (show n) >> put sig
get = do
b <- getChar8
case b of
'B' -> return SignedByte
'C' -> return CharByte
'D' -> return DoubleType
'F' -> return FloatType
'I' -> return IntType
'J' -> return LongInt
'S' -> return ShortInt
'Z' -> return BoolType
'L' -> do
name <- getToSemicolon
return (ObjectType name)
'[' -> do
mbSize <- getInt
sig <- get
return (Array mbSize sig)
_ -> fail $ "Unknown signature opening symbol: " ++ [b]
getToSemicolon :: Get String
getToSemicolon = do
x <- get
if x == ';'
then return []
else do
next <- getToSemicolon
return (x: next)
data ReturnSignature =
Returns FieldType
| ReturnsVoid
deriving (Eq, Ord)
instance Show ReturnSignature where
show (Returns t) = show t
show ReturnsVoid = "Void"
instance Binary ReturnSignature where
put (Returns sig) = put sig
put ReturnsVoid = put 'V'
get = do
x <- lookAhead getChar8
case x of
'V' -> skip 1 >> return ReturnsVoid
_ -> Returns <$> get
type ArgumentSignature = FieldType
data MethodSignature =
MethodSignature [ArgumentSignature] ReturnSignature
deriving (Eq, Ord)
instance Show MethodSignature where
show (MethodSignature args ret) = "(" ++ intercalate ", " (map show args) ++ ") returns " ++ show ret
instance Binary MethodSignature where
put (MethodSignature args ret) = do
put '('
forM_ args put
put ')'
put ret
get = do
x <- getChar8
when (x /= '(') $
fail "Cannot parse method signature: no starting `(' !"
args <- getArgs
y <- getChar8
when (y /= ')') $
fail "Internal error: method signature without `)' !?"
ret <- get
return (MethodSignature args ret)
getArgs :: Get [ArgumentSignature]
getArgs = whileJust getArg
where
getArg :: Get (Maybe ArgumentSignature)
getArg = do
x <- lookAhead getChar8
if x == ')'
then return Nothing
else Just <$> get
whileJust :: (Monad m) => m (Maybe a) -> m [a]
whileJust m = do
r <- m
case r of
Just x -> do
next <- whileJust m
return (x: next)
Nothing -> return []
long :: Constant stage -> Bool
long (CLong _) = True
long (CDouble _) = True
long _ = False
putPool :: Pool File -> Put
putPool pool = do
let list = M.elems pool
d = length $ filter long list
putWord16be $ fromIntegral (M.size pool + d + 1)
forM_ list putC
where
putC (CClass i) = putWord8 7 >> put i
putC (CField i j) = putWord8 9 >> put i >> put j
putC (CMethod i j) = putWord8 10 >> put i >> put j
putC (CIfaceMethod i j) = putWord8 11 >> put i >> put j
putC (CString i) = putWord8 8 >> put i
putC (CInteger x) = putWord8 3 >> put x
putC (CFloat x) = putWord8 4 >> putFloat32be x
putC (CLong x) = putWord8 5 >> put x
putC (CDouble x) = putWord8 6 >> putFloat64be x
putC (CNameType i j) = putWord8 12 >> put i >> put j
putC (CUTF8 bs) = do
putWord8 1
put (fromIntegral (B.length bs) :: Word16)
putLazyByteString bs
putC (CUnicode bs) = do
putWord8 2
put (fromIntegral (B.length bs) :: Word16)
putLazyByteString bs
getPool :: Word16 -> Get (Pool File)
getPool n = do
items <- St.evalStateT go 1
return $ M.fromList items
where
go :: St.StateT Word16 Get [(Word16, Constant File)]
go = do
i <- St.get
if i > n
then return []
else do
c <- lift getC
let i' = if long c
then i+2
else i+1
St.put i'
next <- go
return $ (i,c): next
getC = do
!offset <- bytesRead
tag <- getWord8
case tag of
1 -> do
l <- get
bs <- getLazyByteString (fromIntegral (l :: Word16))
return $ CUTF8 bs
2 -> do
l <- get
bs <- getLazyByteString (fromIntegral (l :: Word16))
return $ CUnicode bs
3 -> CInteger <$> get
4 -> CFloat <$> getFloat32be
5 -> CLong <$> get
6 -> CDouble <$> getFloat64be
7 -> CClass <$> get
8 -> CString <$> get
9 -> CField <$> get <*> get
10 -> CMethod <$> get <*> get
11 -> CIfaceMethod <$> get <*> get
12 -> CNameType <$> get <*> get
_ -> fail $ "Unknown constants pool entry tag: " ++ show tag
data Field stage = Field {
fieldAccessFlags :: AccessFlags stage,
fieldName :: Link stage B.ByteString,
fieldSignature :: Link stage FieldSignature,
fieldAttributesCount :: Word16,
fieldAttributes :: Attributes stage }
deriving instance Eq (Field File)
deriving instance Eq (Field Direct)
deriving instance Show (Field File)
deriving instance Show (Field Direct)
lookupField :: B.ByteString -> Class Direct -> Maybe (Field Direct)
lookupField name cls = look (classFields cls)
where
look [] = Nothing
look (f:fs)
| fieldName f == name = Just f
| otherwise = look fs
fieldNameType :: Field Direct -> NameType (Field Direct)
fieldNameType f = NameType (fieldName f) (fieldSignature f)
instance Binary (Field File) where
put (Field {..}) = do
put fieldAccessFlags
put fieldName
put fieldSignature
put fieldAttributesCount
forM_ (attributesList fieldAttributes) put
get = do
af <- get
ni <- getWord16be
si <- get
n <- getWord16be
as <- replicateM (fromIntegral n) get
return $ Field af ni si n (AP as)
data Method stage = Method {
methodAccessFlags :: AccessFlags stage,
methodName :: Link stage B.ByteString,
methodSignature :: Link stage MethodSignature,
methodAttributesCount :: Word16,
methodAttributes :: Attributes stage }
deriving instance Eq (Method File)
deriving instance Eq (Method Direct)
deriving instance Show (Method File)
deriving instance Show (Method Direct)
methodNameType :: Method Direct -> NameType (Method Direct)
methodNameType m = NameType (methodName m) (methodSignature m)
lookupMethod :: B.ByteString -> Class Direct -> Maybe (Method Direct)
lookupMethod name cls = look (classMethods cls)
where
look [] = Nothing
look (f:fs)
| methodName f == name = Just f
| otherwise = look fs
instance Binary (Method File) where
put (Method {..}) = do
put methodAccessFlags
put methodName
put methodSignature
put methodAttributesCount
forM_ (attributesList methodAttributes) put
get = do
offset <- bytesRead
af <- get
ni <- get
si <- get
n <- get
as <- replicateM (fromIntegral n) get
return $ Method {
methodAccessFlags = af,
methodName = ni,
methodSignature = si,
methodAttributesCount = n,
methodAttributes = AP as }
data Attribute = Attribute {
attributeName :: Word16,
attributeLength :: Word32,
attributeValue :: B.ByteString }
deriving (Eq, Show)
instance Binary Attribute where
put (Attribute {..}) = do
put attributeName
putWord32be attributeLength
putLazyByteString attributeValue
get = do
offset <- bytesRead
name <- getWord16be
len <- getWord32be
value <- getLazyByteString (fromIntegral len)
return $ Attribute name len value
class HasAttributes a where
attributes :: a stage -> Attributes stage
instance HasAttributes Class where
attributes = classAttributes
instance HasAttributes Field where
attributes = fieldAttributes
instance HasAttributes Method where
attributes = methodAttributes