module Data.Dwarf
( Endianess(..), TargetSize(..)
, Sections(..)
, parseInfo
, DieID, dieID, DIE(..), (!?)
, DIERefs(..), DIEMap
, Reader(..)
, parseAranges
, parsePubnames
, parsePubtypes
, Range(..), parseRanges, parseLoc
, DW_CFA(..)
, DW_MACINFO(..), parseMacInfo
, DW_CIEFDE(..), parseFrame
, DW_OP(..), parseDW_OP
, DW_TAG(..)
, DW_AT(..)
, DW_ATVAL(..)
, DW_LNE(..), parseLNE
, DW_ATE(..), dw_ate
, DW_DS(..), dw_ds
, DW_END(..), dw_end
, DW_ACCESS(..), dw_access
, DW_VIS(..), dw_vis
, DW_VIRTUALITY(..), dw_virtuality
, DW_LANG(..), dw_lang
, DW_ID(..), dw_id
, DW_INL(..), dw_inl
, DW_CC(..), dw_cc
, DW_ORD(..), dw_ord
, DW_DSC(..), dw_dsc
) where
import Control.Applicative (Applicative(..), (<$>), (<$))
import Control.Arrow ((&&&), (***))
import Control.Monad ((<=<))
import Control.Monad.Trans.Class (lift)
import Control.Monad.Trans.Writer (WriterT(..))
import qualified Control.Monad.Trans.Writer as Writer
import Data.Binary (Get)
import Data.Binary.Get (getWord8, getByteString)
import qualified Data.Binary.Get as Get
import qualified Data.ByteString as B
import Data.Dwarf.AT
import Data.Dwarf.ATE
import Data.Dwarf.CFA
import Data.Dwarf.Form
import Data.Dwarf.LNI
import Data.Dwarf.OP
import Data.Dwarf.Reader
import Data.Dwarf.TAG
import Data.Dwarf.Types
import Data.Dwarf.Utils
import Data.Int (Int64)
import qualified Data.Map as M
import Data.Maybe (listToMaybe)
import Data.String (IsString(..))
import Data.Text (Text)
import qualified Data.Text as Text
import Data.Traversable (traverse)
import Data.Word (Word64)
import GHC.Generics (Generic)
import Numeric (showHex)
import TextShow (TextShow(..))
import TextShow.Generic (genericShowbPrec)
newtype CUOffset = CUOffset Word64
deriving (Eq, Ord, Read, Show, Generic)
instance TextShow CUOffset where showbPrec = genericShowbPrec
dieID :: DieID -> Word64
dieID (DieID x) = x
inCU :: Integral a => CUOffset -> a -> DieID
inCU (CUOffset base) x = DieID $ base + fromIntegral x
data Sections = Sections
{ dsInfoSection :: B.ByteString
, dsAbbrevSection :: B.ByteString
, dsStrSection :: B.ByteString
}
data CUContext = CUContext
{ cuOffset :: CUOffset
, cuAbbrevMap :: M.Map AbbrevId DW_ABBREV
, cuReader :: Reader
, cuSections :: Sections
}
newtype AbbrevId = AbbrevId Word64
deriving (Eq, Ord, Read, Show, Generic)
instance TextShow AbbrevId where showbPrec = genericShowbPrec
data DW_ABBREV = DW_ABBREV
{ abbrevId :: AbbrevId
, abbrevTag :: DW_TAG
, abbrevChildren :: Bool
, abbrevAttrForms :: [(DW_AT, DW_FORM)]
}
getMAbbrevId :: Get (Maybe AbbrevId)
getMAbbrevId = do
i <- getULEB128
pure $
if i == 0
then Nothing
else Just $ AbbrevId i
getAbbrevList :: Get [DW_ABBREV]
getAbbrevList =
whileJust $ traverse getAbbrev =<< getMAbbrevId
where
getAbbrev abbrev = do
tag <- getDW_TAG
children <- (== 1) <$> getWord8
attrForms <- getAttrFormList
pure $ DW_ABBREV abbrev tag children attrForms
getAttrFormList =
(fmap . map) (dw_at *** dw_form) . whileM (/= (0,0)) $
(,) <$> getULEB128 <*> getULEB128
(!?) :: DIE -> DW_AT -> [DW_ATVAL]
(!?) die at = map snd $ filter ((== at) . fst) $ dieAttributes die
getNonZeroOffset :: Reader -> Get (Maybe Word64)
getNonZeroOffset dr = do
offset <- drGetOffset dr
pure $ if offset == 0 then Nothing else Just offset
getNameLookupEntries :: Reader -> CUOffset -> Get [(Text, [DieID])]
getNameLookupEntries dr cu_offset =
whileJust $ traverse getEntry =<< getNonZeroOffset dr
where
getEntry die_offset = do
name <- getUTF8Str0
pure (name, [inCU cu_offset die_offset])
getTableHeader :: TargetSize -> EndianReader -> Get (Reader, CUOffset)
getTableHeader target64 der = do
(desr, _) <- getUnitLength der
let dr = reader target64 desr
_version <- drGetW16 dr
cu_offset <- drGetOffset dr
return (dr, CUOffset cu_offset)
getNameLookupTable :: TargetSize -> EndianReader -> Get [M.Map Text [DieID]]
getNameLookupTable target64 der = getWhileNotEmpty $ do
(dr, cu_offset) <- getTableHeader target64 der
_debug_info_length <- drGetOffset dr
M.fromListWith (++) <$> getNameLookupEntries dr cu_offset
parsePubSection :: Endianess -> TargetSize -> B.ByteString -> M.Map Text [DieID]
parsePubSection endianess target64 section =
M.unionsWith (++) $ strictGet (getNameLookupTable target64 der) section
where
der = endianReader endianess
parsePubnames :: Endianess -> TargetSize -> B.ByteString -> M.Map Text [DieID]
parsePubnames = parsePubSection
parsePubtypes :: Endianess -> TargetSize -> B.ByteString -> M.Map Text [DieID]
parsePubtypes = parsePubSection
align :: Integral a => a -> Get ()
align alignment = do
pos <- Get.bytesRead
Get.skip . fromIntegral $ (pos) `mod` fromIntegral alignment
data Range = Range
{ rangeBegin :: !Word64
, rangeEnd :: !Word64
} deriving (Eq, Ord, Read, Show, Generic)
instance TextShow Range where showbPrec = genericShowbPrec
getAddressRangeTable :: TargetSize -> EndianReader -> Get [([Range], CUOffset)]
getAddressRangeTable target64 der = getWhileNotEmpty $ do
(dr, cu_offset) <- getTableHeader target64 der
address_size <- getWord8
let
readAddress =
case address_size of
4 -> fromIntegral <$> drGetW32 dr
8 -> drGetW64 dr
n -> fail $ "Unrecognized address size " ++ show n ++ " in .debug_aranges section."
_segment_size <- getWord8
align $ 2 * address_size
address_ranges <- whileM (/= Range 0 0) $ Range <$> readAddress <*> readAddress
pure (address_ranges, cu_offset)
parseAranges ::
Endianess -> TargetSize -> B.ByteString -> [([Range], CUOffset)]
parseAranges endianess target64 aranges_section =
let dr = endianReader endianess
in strictGet (getAddressRangeTable target64 dr) aranges_section
data DW_MACINFO
= DW_MACINFO_define Word64 Text
| DW_MACINFO_undef Word64 Text
| DW_MACINFO_start_file Word64 Word64
| DW_MACINFO_end_file
| DW_MACINFO_vendor_ext Word64 Text
deriving (Eq, Ord, Read, Show, Generic)
instance TextShow DW_MACINFO where showbPrec = genericShowbPrec
parseMacInfo :: B.ByteString -> [DW_MACINFO]
parseMacInfo = strictGet getMacInfo
getMacInfo :: Get [DW_MACINFO]
getMacInfo = do
x <- getWord8
case x of
0x00 -> pure []
0x01 -> pure (:) <*> (pure DW_MACINFO_define <*> getULEB128 <*> getUTF8Str0) <*> getMacInfo
0x02 -> pure (:) <*> (pure DW_MACINFO_undef <*> getULEB128 <*> getUTF8Str0) <*> getMacInfo
0x03 -> pure (:) <*> (pure DW_MACINFO_start_file <*> getULEB128 <*> getULEB128) <*> getMacInfo
0x04 -> pure (:) <*> pure DW_MACINFO_end_file <*> getMacInfo
0xff -> pure (:) <*> (pure DW_MACINFO_vendor_ext <*> getULEB128 <*> getUTF8Str0) <*> getMacInfo
_ -> fail $ "Invalid MACINFO id: " ++ show x
data DW_CIEFDE
= DW_CIE
{ cieAugmentation :: Text
, cieCodeAlignmentFactor :: Word64
, cieDataAlignmentFactor :: Int64
, cieReturnAddressRegister :: Word64
, cieInitialInstructions :: [DW_CFA]
}
| DW_FDE
{ fdeCiePointer :: Word64
, fdeInitialLocation :: Word64
, fdeAddressRange :: Word64
, fdeInstructions :: [DW_CFA]
}
deriving (Eq, Ord, Read, Show, Generic)
instance TextShow DW_CIEFDE where showbPrec = genericShowbPrec
getCIEFDE :: Endianess -> TargetSize -> Get DW_CIEFDE
getCIEFDE endianess target64 = do
let der = endianReader endianess
(desr, endPos) <- getUnitLength der
let dr = reader target64 desr
cie_id <- drGetOffset dr
if cie_id == drLargestOffset dr then do
version <- getWord8
augmentation <- getUTF8Str0
code_alignment_factor <- getULEB128
data_alignment_factor <- getSLEB128
return_address_register <- case version of
1 -> fromIntegral <$> getWord8
3 -> getULEB128
n -> fail $ "Unrecognized CIE version " ++ show n
curPos <- fromIntegral <$> Get.bytesRead
raw_instructions <- getByteString $ fromIntegral (endPos curPos)
let initial_instructions = strictGet (getWhileNotEmpty (getDW_CFA dr)) raw_instructions
pure $ DW_CIE augmentation code_alignment_factor data_alignment_factor return_address_register initial_instructions
else do
initial_location <- drGetTargetAddress dr
address_range <- drGetTargetAddress dr
curPos <- fromIntegral <$> Get.bytesRead
raw_instructions <- getByteString $ fromIntegral (endPos curPos)
let instructions = strictGet (getWhileNotEmpty (getDW_CFA dr)) raw_instructions
pure $ DW_FDE cie_id initial_location address_range instructions
parseFrame ::
Endianess -> TargetSize
-> B.ByteString
-> [DW_CIEFDE]
parseFrame endianess target64 =
strictGet . getWhileNotEmpty $ getCIEFDE endianess target64
newtype RangeEnd = RangeEnd Word64
parseRanges :: Reader -> B.ByteString -> [Either RangeEnd Range]
parseRanges = strictGet . getRanges
getMRange :: Reader -> Get (Maybe (Either RangeEnd Range))
getMRange dr = do
begin <- drGetTargetAddress dr
end <- drGetTargetAddress dr
pure $
if begin == 0 && end == 0
then Nothing
else Just $
if begin == drLargestTargetAddress dr
then Left $ RangeEnd end
else Right $ Range begin end
getRanges :: Reader -> Get [Either RangeEnd Range]
getRanges dr = whileJust $ getMRange dr
parseLoc :: Reader -> B.ByteString -> [Either RangeEnd (Range, B.ByteString)]
parseLoc dr = strictGet (getLoc dr)
getLoc :: Reader -> Get [Either RangeEnd (Range, B.ByteString)]
getLoc dr = whileJust $ traverse mkRange =<< getMRange dr
where
mkRange (Left end) = pure $ Left end
mkRange (Right range) =
Right . (,) range <$> getByteStringLen (drGetW16 dr)
data DIERefs = DIERefs
{ dieRefsParent :: Maybe DieID
, dieRefsSiblingLeft :: Maybe DieID
, dieRefsSiblingRight :: Maybe DieID
, dieRefsDIE :: DIE
} deriving (Show, Generic)
type DIEMap = M.Map DieID DIERefs
type DIECollector = WriterT DIEMap
data DIE = DIE
{ dieId :: DieID
, dieTag :: DW_TAG
, dieAttributes :: [(DW_AT, DW_ATVAL)]
, dieChildren :: [DIE]
, dieReader :: Reader
}
instance Show DIE where show = Text.unpack . showt
instance TextShow DIE where
showb (DIE (DieID i) tag attrs children _) =
mconcat $ mconcat
[ [ "DIE@", fromString (showHex i ""), "{", showb tag, " (", showb (length children), " children)"]
, mconcat
[ [" ", showb attr, "=(", showb val, ")"]
| (attr, val) <- attrs
]
, [ "}" ]
]
addRefs :: Maybe DieID -> [DIE] -> [DIERefs]
addRefs mParent = go Nothing
where
go _lSibling [] = []
go lSibling (die : xs) =
DIERefs mParent lSibling (dieId <$> listToMaybe xs) die :
go (Just (dieId die)) xs
withToldRefs :: (Applicative m, Monad m) => Maybe DieID -> [DIE] -> DIECollector m [DIE]
withToldRefs mParent dies =
dies <$
(Writer.tell . M.fromList . map (dieId . dieRefsDIE &&& id) . addRefs mParent) dies
getDieAndSiblings :: DieID -> CUContext -> DIECollector Get [DIE]
getDieAndSiblings parent cuContext =
withToldRefs (Just parent) =<< (whileJust . getDIEAndDescendants) cuContext
getForm :: CUContext -> DW_FORM -> Get DW_ATVAL
getForm
cuContext@CUContext { cuReader = dr, cuOffset = cu, cuSections = dc }
form
= case form of
DW_FORM_addr -> DW_ATVAL_UINT <$> drGetTargetAddress dr
DW_FORM_block1 -> DW_ATVAL_BLOB <$> getByteStringLen getWord8
DW_FORM_block2 -> DW_ATVAL_BLOB <$> getByteStringLen (drGetW16 dr)
DW_FORM_block4 -> DW_ATVAL_BLOB <$> getByteStringLen (drGetW32 dr)
DW_FORM_block -> DW_ATVAL_BLOB <$> getByteStringLen getULEB128
DW_FORM_data1 -> DW_ATVAL_UINT . fromIntegral <$> getWord8
DW_FORM_data2 -> DW_ATVAL_UINT . fromIntegral <$> drGetW16 dr
DW_FORM_data4 -> DW_ATVAL_UINT . fromIntegral <$> drGetW32 dr
DW_FORM_data8 -> DW_ATVAL_UINT <$> drGetW64 dr
DW_FORM_udata -> DW_ATVAL_UINT <$> getULEB128
DW_FORM_sdata -> DW_ATVAL_INT <$> getSLEB128
DW_FORM_flag -> DW_ATVAL_BOOL . (/= 0) <$> getWord8
DW_FORM_string -> DW_ATVAL_STRING <$> getUTF8Str0
DW_FORM_ref1 -> DW_ATVAL_REF . inCU cu <$> getWord8
DW_FORM_ref2 -> DW_ATVAL_REF . inCU cu <$> drGetW16 dr
DW_FORM_ref4 -> DW_ATVAL_REF . inCU cu <$> drGetW32 dr
DW_FORM_ref8 -> DW_ATVAL_REF . inCU cu <$> drGetW64 dr
DW_FORM_ref_udata -> DW_ATVAL_REF . inCU cu <$> getULEB128
DW_FORM_ref_addr -> DW_ATVAL_UINT <$> drGetOffset dr
DW_FORM_sec_offset -> DW_ATVAL_UINT <$> drGetOffset dr
DW_FORM_exprloc -> DW_ATVAL_BLOB <$> getByteStringLen getULEB128
DW_FORM_flag_present -> pure $ DW_ATVAL_BOOL True
DW_FORM_ref_sig8 -> DW_ATVAL_UINT <$> drGetW64 dr
DW_FORM_indirect -> getForm cuContext . dw_form =<< getULEB128
DW_FORM_strp -> do
offset <- drGetOffset dr
pure . DW_ATVAL_STRING .
getAt getUTF8Str0 offset $ dsStrSection dc
getDIEAndDescendants :: CUContext -> DIECollector Get (Maybe DIE)
getDIEAndDescendants cuContext = do
offset <- lift $ DieID . fromIntegral <$> Get.bytesRead
let
go abbrid = do
let
abbrev = cuAbbrevMap cuContext M.! abbrid
tag = abbrevTag abbrev
(attrs, forms) = unzip $ abbrevAttrForms abbrev
values <- lift $ mapM (getForm cuContext) forms
children <-
if abbrevChildren abbrev
then getDieAndSiblings offset cuContext
else pure []
pure $ DIE offset tag (zip attrs values) children dr
traverse go =<< lift getMAbbrevId
where
dr = cuReader cuContext
getCUHeader ::
EndianReader -> Sections ->
Get (CUOffset, M.Map AbbrevId DW_ABBREV, Reader)
getCUHeader der dwarfSections = do
cu_offset <- CUOffset . fromIntegral <$> Get.bytesRead
(desr, _) <- getUnitLength der
_version <- desrGetW16 desr
abbrev_offset <- desrGetOffset desr
let abbrev_map = M.fromList . map (abbrevId &&& id) .
getAt getAbbrevList abbrev_offset $
dsAbbrevSection dwarfSections
addr_size <- getWord8
dr <- case addr_size of
4 -> pure $ reader TargetSize32 desr
8 -> pure $ reader TargetSize64 desr
_ -> fail $ "Invalid address size: " ++ show addr_size
return (cu_offset, abbrev_map, dr)
getDieCus :: EndianReader -> Sections -> DIECollector Get [DIE]
getDieCus der dwarfSections =
withToldRefs Nothing <=<
whileJust . condAct (lift Get.isEmpty) $ do
(cu_offset, abbrev_map, dr) <- lift $ getCUHeader der dwarfSections
maybe (fail "Compilation Unit must have a DIE") return =<<
getDIEAndDescendants CUContext
{ cuReader = dr
, cuAbbrevMap = abbrev_map
, cuOffset = cu_offset
, cuSections = dwarfSections
}
parseInfo :: Endianess -> Sections -> ([DIE], DIEMap)
parseInfo endianess dwarfSections =
strictGet act $ dsInfoSection dwarfSections
where
act = runWriterT $ getDieCus dr dwarfSections
dr = endianReader endianess