-- -----------------------------------------------------------------------------
-- | GHC LLVM Mangler
--
-- This script processes the assembly produced by LLVM, rewriting all symbols
-- of type @function to @object. This keeps them from going through the PLT,
-- which would be bad due to tables-next-to-code. On x86_64,
-- it also rewrites AVX instructions that require alignment to their
-- unaligned counterparts, since the stack is only 16-byte aligned but these
-- instructions require 32-byte alignment.
--

module LlvmMangler ( llvmFixupAsm ) where

import GhcPrelude

import DynFlags ( DynFlags, targetPlatform )
import GHC.Platform ( platformArch, Arch(..) )
import ErrUtils ( withTiming )
import Outputable ( text )

import Control.Exception
import qualified Data.ByteString.Char8 as B
import System.IO

-- | Read in assembly file and process
llvmFixupAsm :: DynFlags -> FilePath -> FilePath -> IO ()
llvmFixupAsm :: DynFlags -> FilePath -> FilePath -> IO ()
llvmFixupAsm DynFlags
dflags FilePath
f1 FilePath
f2 = {-# SCC "llvm_mangler" #-}
    DynFlags -> SDoc -> (() -> ()) -> IO () -> IO ()
forall (m :: * -> *) a.
MonadIO m =>
DynFlags -> SDoc -> (a -> ()) -> m a -> m a
withTiming DynFlags
dflags (FilePath -> SDoc
text FilePath
"LLVM Mangler") () -> ()
forall a. a -> a
id (IO () -> IO ()) -> IO () -> IO ()
forall a b. (a -> b) -> a -> b
$
    FilePath -> IOMode -> (Handle -> IO ()) -> IO ()
forall r. FilePath -> IOMode -> (Handle -> IO r) -> IO r
withBinaryFile FilePath
f1 IOMode
ReadMode ((Handle -> IO ()) -> IO ()) -> (Handle -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \Handle
r -> FilePath -> IOMode -> (Handle -> IO ()) -> IO ()
forall r. FilePath -> IOMode -> (Handle -> IO r) -> IO r
withBinaryFile FilePath
f2 IOMode
WriteMode ((Handle -> IO ()) -> IO ()) -> (Handle -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \Handle
w -> do
        Handle -> Handle -> IO ()
go Handle
r Handle
w
        Handle -> IO ()
hClose Handle
r
        Handle -> IO ()
hClose Handle
w
        () -> IO ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
  where
    go :: Handle -> Handle -> IO ()
    go :: Handle -> Handle -> IO ()
go Handle
r Handle
w = do
      Either IOError ByteString
e_l <- IO ByteString -> IO (Either IOError ByteString)
forall e a. Exception e => IO a -> IO (Either e a)
try (IO ByteString -> IO (Either IOError ByteString))
-> IO ByteString -> IO (Either IOError ByteString)
forall a b. (a -> b) -> a -> b
$ Handle -> IO ByteString
B.hGetLine Handle
r ::IO (Either IOError B.ByteString)
      let writeline :: ByteString -> IO ()
writeline ByteString
a = Handle -> ByteString -> IO ()
B.hPutStrLn Handle
w (DynFlags -> [Rewrite] -> ByteString -> ByteString
rewriteLine DynFlags
dflags [Rewrite]
rewrites ByteString
a) IO () -> IO () -> IO ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Handle -> Handle -> IO ()
go Handle
r Handle
w
      case Either IOError ByteString
e_l of
        Right ByteString
l -> ByteString -> IO ()
writeline ByteString
l
        Left IOError
_  -> () -> IO ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()

-- | These are the rewrites that the mangler will perform
rewrites :: [Rewrite]
rewrites :: [Rewrite]
rewrites = [Rewrite
rewriteSymType, Rewrite
rewriteAVX]

type Rewrite = DynFlags -> B.ByteString -> Maybe B.ByteString

-- | Rewrite a line of assembly source with the given rewrites,
-- taking the first rewrite that applies.
rewriteLine :: DynFlags -> [Rewrite] -> B.ByteString -> B.ByteString
rewriteLine :: DynFlags -> [Rewrite] -> ByteString -> ByteString
rewriteLine DynFlags
dflags [Rewrite]
rewrites ByteString
l
  -- We disable .subsections_via_symbols on darwin and ios, as the llvm code
  -- gen uses prefix data for the info table.  This however does not prevent
  -- llvm from generating .subsections_via_symbols, which in turn with
  -- -dead_strip, strips the info tables, and therefore breaks ghc.
  | ByteString -> Bool
isSubsectionsViaSymbols ByteString
l =
    (FilePath -> ByteString
B.pack FilePath
"## no .subsection_via_symbols for ghc. We need our info tables!")
  | Bool
otherwise =
    case [Maybe ByteString] -> Maybe ByteString
forall a. [Maybe a] -> Maybe a
firstJust ([Maybe ByteString] -> Maybe ByteString)
-> [Maybe ByteString] -> Maybe ByteString
forall a b. (a -> b) -> a -> b
$ (Rewrite -> Maybe ByteString) -> [Rewrite] -> [Maybe ByteString]
forall a b. (a -> b) -> [a] -> [b]
map (\Rewrite
rewrite -> Rewrite
rewrite DynFlags
dflags ByteString
rest) [Rewrite]
rewrites of
      Maybe ByteString
Nothing        -> ByteString
l
      Just ByteString
rewritten -> [ByteString] -> ByteString
B.concat ([ByteString] -> ByteString) -> [ByteString] -> ByteString
forall a b. (a -> b) -> a -> b
$ [ByteString
symbol, FilePath -> ByteString
B.pack FilePath
"\t", ByteString
rewritten]
  where
    isSubsectionsViaSymbols :: ByteString -> Bool
isSubsectionsViaSymbols = ByteString -> ByteString -> Bool
B.isPrefixOf (FilePath -> ByteString
B.pack FilePath
".subsections_via_symbols")

    (ByteString
symbol, ByteString
rest) = ByteString -> (ByteString, ByteString)
splitLine ByteString
l

    firstJust :: [Maybe a] -> Maybe a
    firstJust :: [Maybe a] -> Maybe a
firstJust (Just a
x:[Maybe a]
_) = a -> Maybe a
forall a. a -> Maybe a
Just a
x
    firstJust []         = Maybe a
forall a. Maybe a
Nothing
    firstJust (Maybe a
_:[Maybe a]
rest)   = [Maybe a] -> Maybe a
forall a. [Maybe a] -> Maybe a
firstJust [Maybe a]
rest

-- | This rewrites @.type@ annotations of function symbols to @%object@.
-- This is done as the linker can relocate @%functions@ through the
-- Procedure Linking Table (PLT). This is bad since we expect that the
-- info table will appear directly before the symbol's location. In the
-- case that the PLT is used, this will be not an info table but instead
-- some random PLT garbage.
rewriteSymType :: Rewrite
rewriteSymType :: Rewrite
rewriteSymType DynFlags
_ ByteString
l
  | ByteString -> Bool
isType ByteString
l  = ByteString -> Maybe ByteString
forall a. a -> Maybe a
Just (ByteString -> Maybe ByteString) -> ByteString -> Maybe ByteString
forall a b. (a -> b) -> a -> b
$ Char -> ByteString -> ByteString
rewrite Char
'@' (ByteString -> ByteString) -> ByteString -> ByteString
forall a b. (a -> b) -> a -> b
$ Char -> ByteString -> ByteString
rewrite Char
'%' ByteString
l
  | Bool
otherwise = Maybe ByteString
forall a. Maybe a
Nothing
  where
    isType :: ByteString -> Bool
isType = ByteString -> ByteString -> Bool
B.isPrefixOf (FilePath -> ByteString
B.pack FilePath
".type")

    rewrite :: Char -> B.ByteString -> B.ByteString
    rewrite :: Char -> ByteString -> ByteString
rewrite Char
prefix = ByteString -> ByteString -> ByteString -> ByteString
replaceOnce ByteString
funcType ByteString
objType
      where
        funcType :: ByteString
funcType = Char
prefix Char -> ByteString -> ByteString
`B.cons` FilePath -> ByteString
B.pack FilePath
"function"
        objType :: ByteString
objType  = Char
prefix Char -> ByteString -> ByteString
`B.cons` FilePath -> ByteString
B.pack FilePath
"object"

-- | This rewrites aligned AVX instructions to their unaligned counterparts on
-- x86-64. This is necessary because the stack is not adequately aligned for
-- aligned AVX spills, so LLVM would emit code that adjusts the stack pointer
-- and disable tail call optimization. Both would be catastrophic here so GHC
-- tells LLVM that the stack is 32-byte aligned (even though it isn't) and then
-- rewrites the instructions in the mangler.
rewriteAVX :: Rewrite
rewriteAVX :: Rewrite
rewriteAVX DynFlags
dflags ByteString
s
  | Bool -> Bool
not Bool
isX86_64 = Maybe ByteString
forall a. Maybe a
Nothing
  | ByteString -> Bool
isVmovdqa ByteString
s  = ByteString -> Maybe ByteString
forall a. a -> Maybe a
Just (ByteString -> Maybe ByteString) -> ByteString -> Maybe ByteString
forall a b. (a -> b) -> a -> b
$ ByteString -> ByteString -> ByteString -> ByteString
replaceOnce (FilePath -> ByteString
B.pack FilePath
"vmovdqa") (FilePath -> ByteString
B.pack FilePath
"vmovdqu") ByteString
s
  | ByteString -> Bool
isVmovap ByteString
s   = ByteString -> Maybe ByteString
forall a. a -> Maybe a
Just (ByteString -> Maybe ByteString) -> ByteString -> Maybe ByteString
forall a b. (a -> b) -> a -> b
$ ByteString -> ByteString -> ByteString -> ByteString
replaceOnce (FilePath -> ByteString
B.pack FilePath
"vmovap") (FilePath -> ByteString
B.pack FilePath
"vmovup") ByteString
s
  | Bool
otherwise    = Maybe ByteString
forall a. Maybe a
Nothing
  where
    isX86_64 :: Bool
isX86_64 = Platform -> Arch
platformArch (DynFlags -> Platform
targetPlatform DynFlags
dflags) Arch -> Arch -> Bool
forall a. Eq a => a -> a -> Bool
== Arch
ArchX86_64
    isVmovdqa :: ByteString -> Bool
isVmovdqa = ByteString -> ByteString -> Bool
B.isPrefixOf (FilePath -> ByteString
B.pack FilePath
"vmovdqa")
    isVmovap :: ByteString -> Bool
isVmovap = ByteString -> ByteString -> Bool
B.isPrefixOf (FilePath -> ByteString
B.pack FilePath
"vmovap")

-- | @replaceOnce match replace bs@ replaces the first occurrence of the
-- substring @match@ in @bs@ with @replace@.
replaceOnce :: B.ByteString -> B.ByteString -> B.ByteString -> B.ByteString
replaceOnce :: ByteString -> ByteString -> ByteString -> ByteString
replaceOnce ByteString
matchBS ByteString
replaceOnceBS = ByteString -> ByteString
loop
  where
    loop :: B.ByteString -> B.ByteString
    loop :: ByteString -> ByteString
loop ByteString
cts =
        case ByteString -> ByteString -> (ByteString, ByteString)
B.breakSubstring ByteString
matchBS ByteString
cts of
          (ByteString
hd,ByteString
tl) | ByteString -> Bool
B.null ByteString
tl -> ByteString
hd
                  | Bool
otherwise -> ByteString
hd ByteString -> ByteString -> ByteString
`B.append` ByteString
replaceOnceBS ByteString -> ByteString -> ByteString
`B.append`
                                 Int -> ByteString -> ByteString
B.drop (ByteString -> Int
B.length ByteString
matchBS) ByteString
tl

-- | This function splits a line of assembly code into the label and the
-- rest of the code.
splitLine :: B.ByteString -> (B.ByteString, B.ByteString)
splitLine :: ByteString -> (ByteString, ByteString)
splitLine ByteString
l = (ByteString
symbol, (Char -> Bool) -> ByteString -> ByteString
B.dropWhile Char -> Bool
isSpace ByteString
rest)
  where
    isSpace :: Char -> Bool
isSpace Char
' ' = Bool
True
    isSpace Char
'\t' = Bool
True
    isSpace Char
_ = Bool
False
    (ByteString
symbol, ByteString
rest) = (Char -> Bool) -> ByteString -> (ByteString, ByteString)
B.span (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
isSpace) ByteString
l