{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE PatternGuards #-}
module Data.LLVM.BitCode.Bitstream (
Bitstream(..)
, Entry(..)
, AbbrevIdWidth, AbbrevId(..)
, Block(..), BlockId
, RecordId
, UnabbrevRecord(..)
, DefineAbbrev(..), AbbrevOp(..)
, AbbrevRecord(..), Field(..)
, getBitstream, parseBitstream
, getBitCodeBitstream, parseBitCodeBitstream, parseBitCodeBitstreamLazy
, parseMetadataStringLengths
) where
import Data.LLVM.BitCode.BitString as BS
import Data.LLVM.BitCode.GetBits
import Control.Applicative ((<$>))
import Control.Monad (unless,replicateM,guard)
import Data.Monoid (Monoid(..))
import Data.Word (Word8,Word16,Word32)
import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as L
import qualified Data.Map as Map
import qualified Data.Serialize as C
boolean :: GetBits Bool
boolean = do
bs <- fixed 1
return (bsData bs == 1)
numeric :: Num a => Int -> GetBits a
numeric n = do
bs <- fixed n
return (fromIntegral (bsData bs))
vbr :: Int -> GetBits BitString
vbr n = loop mempty
where
len = n - 1
loop acc = acc `seq` do
chunk <- fixed len
cont <- boolean
let acc' = acc `mappend` chunk
if cont
then loop acc'
else return acc'
vbrNum :: Num a => Int -> GetBits a
vbrNum n = fromBitString <$> vbr n
char6 :: GetBits Char
char6 = do
word <- numeric 6
case word of
n | 0 <= n && n <= 25 -> return (toEnum (n + 97))
| 26 <= n && n <= 51 -> return (toEnum (n + 39))
| 52 <= n && n <= 61 -> return (toEnum (n - 4))
62 -> return '.'
63 -> return '_'
_ -> fail "invalid char6"
data Bitstream = Bitstream
{ bsAppMagic :: !Word16
, bsEntries :: [Entry]
} deriving (Show)
parseBitstream :: S.ByteString -> Either String Bitstream
parseBitstream = C.runGet (runGetBits getBitstream)
parseBitCodeBitstream :: S.ByteString -> Either String Bitstream
parseBitCodeBitstream = C.runGet (runGetBits getBitCodeBitstream)
parseBitCodeBitstreamLazy :: L.ByteString -> Either String Bitstream
parseBitCodeBitstreamLazy = C.runGetLazy (runGetBits getBitCodeBitstream)
bcMagicConst :: BitString
bcMagicConst = BitString 8 0x42 `mappend` BitString 8 0x43
getBitCodeBitstream :: GetBits Bitstream
getBitCodeBitstream = label "llvm-bitstream" $ do
mb <- try guardWrapperMagic
case mb of
Nothing -> getBitstream
Just () -> do
skip 32
off <- fixed 32
unless (fromBitString off == (20 :: Int))
(fail ("invalid offset value: " ++ show off))
size <- fixed 32
skip 32
isolate (fromBitString size `div` 4) getBitstream
bcWrapperMagicConst :: BitString
bcWrapperMagicConst = mconcat [byte 0xDE, byte 0xC0, byte 0x17, byte 0x0B]
where
byte = BitString 8
guardWrapperMagic :: GetBits ()
guardWrapperMagic = do
magic <- fixed 32
guard (magic == bcWrapperMagicConst)
getBitstream :: GetBits Bitstream
getBitstream = label "bitstream" $ do
bc <- fixed 16
unless (bc == bcMagicConst) (fail "Invalid magic number")
appMagic <- numeric 16
entries <- getTopLevelEntries
return Bitstream
{ bsAppMagic = appMagic
, bsEntries = entries
}
data Entry
= EntryBlock Block
| EntryUnabbrevRecord UnabbrevRecord
| EntryDefineAbbrev DefineAbbrev
| EntryAbbrevRecord AbbrevRecord
deriving (Show)
getTopLevelEntries :: GetBits [Entry]
getTopLevelEntries = fst <$> getEntries 2 Map.empty emptyAbbrevMap True
getEntries :: AbbrevIdWidth -> BlockInfoMap -> AbbrevMap -> Bool
-> GetBits ([Entry],BlockInfoMap)
getEntries aw bim0 am0 endBlocksFail = loop bim0 am0
where
loop bim am = do
let finish = return ([],bim)
mb <- try (getAbbrevId aw)
case mb of
Nothing -> finish
Just aid -> do
let continue bim' am' k = do
(rest,bim'') <- loop bim' am'
return (k rest,bim'')
label (show aid) $ case aid of
END_BLOCK | endBlocksFail -> fail "unexpected END_BLOCK"
| otherwise -> align32bits >> finish
ENTER_SUBBLOCK -> do
(block,bim') <- getBlock bim
continue bim' am (EntryBlock block :)
DEFINE_ABBREV -> do
def <- getDefineAbbrev
continue bim (insertAbbrev def am) (EntryDefineAbbrev def :)
UNABBREV_RECORD -> do
record <- getUnabbrevRecord
continue bim am (EntryUnabbrevRecord record :)
ABBREV_RECORD rid -> case lookupAbbrev rid am of
Nothing -> fail ("unknown abbrev id: " ++ show rid)
Just def -> do
record <- getAbbrevRecord aw rid def
continue bim am (EntryAbbrevRecord record :)
type AbbrevIdWidth = Int
data AbbrevId
= END_BLOCK
| ENTER_SUBBLOCK
| DEFINE_ABBREV
| UNABBREV_RECORD
| ABBREV_RECORD !RecordId
deriving (Ord,Eq,Show)
getAbbrevId :: AbbrevIdWidth -> GetBits AbbrevId
getAbbrevId aw = label "abbreviation" $ do
aid <- numeric aw
case aid of
0 -> return END_BLOCK
1 -> return ENTER_SUBBLOCK
2 -> return DEFINE_ABBREV
3 -> return UNABBREV_RECORD
_ -> return (ABBREV_RECORD aid)
data DefineAbbrev = DefineAbbrev
{ defineOps :: [AbbrevOp]
} deriving (Show)
getDefineAbbrev :: GetBits DefineAbbrev
getDefineAbbrev =
label "define abbrev" (DefineAbbrev `fmap` (getAbbrevOps =<< vbrNum 5))
data AbbrevOp
= OpLiteral !BitString
| OpFixed !Int
| OpVBR !Int
| OpArray AbbrevOp
| OpChar6
| OpBlob
deriving (Show)
getAbbrevOps :: Int -> GetBits [AbbrevOp]
getAbbrevOps 0 = return []
getAbbrevOps n = do
(op,consumed) <- getAbbrevOp
rest <- getAbbrevOps (n - consumed)
return (op:rest)
getAbbrevOp :: GetBits (AbbrevOp,Int)
getAbbrevOp = label "abbrevop" $ do
let one = fmap (\x -> (x,1))
isLiteral <- boolean
if isLiteral
then one (OpLiteral <$> vbr 8)
else do
enc <- numeric 3
case enc :: Word8 of
1 -> one (OpFixed <$> vbrNum 5)
2 -> one (OpVBR <$> vbrNum 5)
3 -> do
(op,n) <- getAbbrevOp
return (OpArray op, n+1)
4 -> one (return OpChar6)
5 -> one (return OpBlob)
_ -> fail ("invalid encoding: " ++ show enc)
data AbbrevMap = AbbrevMap
{ amNextId :: !RecordId
, amDefines :: Map.Map RecordId DefineAbbrev
} deriving (Show)
emptyAbbrevMap :: AbbrevMap
emptyAbbrevMap = AbbrevMap
{ amNextId = 4
, amDefines = Map.empty
}
insertAbbrev :: DefineAbbrev -> AbbrevMap -> AbbrevMap
insertAbbrev def m = m
{ amNextId = amNextId m + 1
, amDefines = Map.insert (amNextId m) def (amDefines m)
}
lookupAbbrev :: RecordId -> AbbrevMap -> Maybe DefineAbbrev
lookupAbbrev aid = Map.lookup aid . amDefines
type BlockId = Word32
data Block = Block
{ blockId :: !BlockId
, blockNewAbbrevLen :: !AbbrevIdWidth
, blockLength :: !Int
, blockEntries :: [Entry]
} deriving (Show)
getBlock :: BlockInfoMap -> GetBits (Block, BlockInfoMap)
getBlock bim = do
(block,bim') <- getGenericBlock bim
case blockId block of
0 -> do
bim'' <- processBlockInfo block bim'
return (block,bim'')
_ -> return (block, bim')
getGenericBlock :: BlockInfoMap -> GetBits (Block,BlockInfoMap)
getGenericBlock bim = label "block " $ do
blockid <- vbrNum 8
newabbrevlen <- vbrNum 4
align32bits
blocklen <- numeric 32
let am = lookupAbbrevMap blockid bim
(entries,bim') <- isolate blocklen (getEntries newabbrevlen bim am False)
let block = Block
{ blockId = blockid
, blockNewAbbrevLen = newabbrevlen
, blockLength = blocklen
, blockEntries = entries
}
return (block,bim')
data BlockInfo = BlockInfo
{ infoId :: !BlockId
, infoAbbrevs :: AbbrevMap
} deriving (Show)
addAbbrev :: DefineAbbrev -> BlockInfo -> BlockInfo
addAbbrev a bi = bi { infoAbbrevs = insertAbbrev a (infoAbbrevs bi) }
type BlockInfoMap = Map.Map BlockId BlockInfo
insertBlockInfo :: BlockInfo -> BlockInfoMap -> BlockInfoMap
insertBlockInfo bi bim = Map.insert (infoId bi) bi bim
lookupAbbrevMap :: BlockId -> BlockInfoMap -> AbbrevMap
lookupAbbrevMap bid bim = maybe emptyAbbrevMap infoAbbrevs (Map.lookup bid bim)
processBlockInfo :: Block -> BlockInfoMap -> GetBits BlockInfoMap
processBlockInfo bl bim0 = case blockEntries bl of
EntryUnabbrevRecord record:es
| Just bi0 <- unabbrevSetBid record -> loop bim0 bi0 es
_ -> fail "invalid BLOCKINFO block"
where
closeInfo bi bim = insertBlockInfo bi bim
loop bim bi [] = return (closeInfo bi bim)
loop bim bi (e:es) = case e of
EntryDefineAbbrev def -> loop bim (addAbbrev def bi) es
EntryUnabbrevRecord record
| Just bi' <- unabbrevSetBid record -> loop (closeInfo bi bim) bi' es
_ -> loop bim bi es
type RecordId = Int
data UnabbrevRecord = UnabbrevRecord
{ unabbrevCode :: !RecordId
, unabbrevOps :: [BitString]
} deriving (Show)
getUnabbrevRecord :: GetBits UnabbrevRecord
getUnabbrevRecord = label "unabbreviated record" $ do
code <- vbrNum 6
numops <- vbrNum 6
ops <- replicateM numops (vbr 6)
return UnabbrevRecord
{ unabbrevCode = code
, unabbrevOps = ops
}
unabbrevSetBid :: UnabbrevRecord -> Maybe BlockInfo
unabbrevSetBid record = do
guard (unabbrevCode record == 1)
guard (not (null (unabbrevOps record)))
return BlockInfo
{ infoId = fromBitString (unabbrevOps record !! 0)
, infoAbbrevs = emptyAbbrevMap
}
data AbbrevRecord = AbbrevRecord
{ abbrevIdWidth :: !AbbrevIdWidth
, abbrevId :: !RecordId
, abbrevFields :: [Field]
} deriving (Show)
getAbbrevRecord :: AbbrevIdWidth -> RecordId -> DefineAbbrev
-> GetBits AbbrevRecord
getAbbrevRecord aw aid def =
label "abbreviated record" (AbbrevRecord aw aid <$> getFields def)
data Field
= FieldLiteral !BitString
| FieldFixed !BitString
| FieldVBR !BitString
| FieldArray [Field]
| FieldChar6 !Char
| FieldBlob !S.ByteString
deriving Show
getFields :: DefineAbbrev -> GetBits [Field]
getFields def = mapM interpAbbrevOp (defineOps def)
interpAbbrevOp :: AbbrevOp -> GetBits Field
interpAbbrevOp op = label (show op) $ case op of
OpLiteral val -> return (FieldLiteral val)
OpFixed width -> FieldFixed <$> fixed width
OpVBR width -> FieldVBR <$> vbr width
OpArray ty -> do
len <- vbrNum 6
FieldArray <$> replicateM len (interpAbbrevOp ty)
OpChar6 -> FieldChar6 <$> char6
OpBlob -> do
len <- vbrNum 6
bytes <- bytestring len
return (FieldBlob bytes)
parseMetadataStringLengths :: Int -> S.ByteString -> Either String [Int]
parseMetadataStringLengths n = C.runGet (runGetBits (replicateM n (vbrNum 6)))