-- -----------------------------------------------------------------------------
-- | GHC LLVM Mangler
--
-- This script processes the assembly produced by LLVM, rewriting all symbols
-- of type @function to @object. This keeps them from going through the PLT,
-- which would be bad due to tables-next-to-code. On x86_64,
-- it also rewrites AVX instructions that require alignment to their
-- unaligned counterparts, since the stack is only 16-byte aligned but these
-- instructions require 32-byte alignment.
--

module GHC.CmmToLlvm.Mangler ( llvmFixupAsm ) where

import GHC.Prelude

import GHC.Platform ( Platform, platformArch, Arch(..) )
import GHC.Utils.Exception (try)

import qualified Data.ByteString.Char8 as B
import System.IO

-- | Read in assembly file and process
llvmFixupAsm :: Platform -> FilePath -> FilePath -> IO ()
llvmFixupAsm :: Platform -> FilePath -> FilePath -> IO ()
llvmFixupAsm Platform
platform FilePath
f1 FilePath
f2 = {-# SCC "llvm_mangler" #-}
  FilePath -> IOMode -> (Handle -> IO ()) -> IO ()
forall r. FilePath -> IOMode -> (Handle -> IO r) -> IO r
withBinaryFile FilePath
f1 IOMode
ReadMode ((Handle -> IO ()) -> IO ()) -> (Handle -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \Handle
r -> FilePath -> IOMode -> (Handle -> IO ()) -> IO ()
forall r. FilePath -> IOMode -> (Handle -> IO r) -> IO r
withBinaryFile FilePath
f2 IOMode
WriteMode ((Handle -> IO ()) -> IO ()) -> (Handle -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \Handle
w -> do
      Handle -> Handle -> IO ()
go Handle
r Handle
w
      Handle -> IO ()
hClose Handle
r
      Handle -> IO ()
hClose Handle
w
      () -> IO ()
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return ()
  where
    go :: Handle -> Handle -> IO ()
    go :: Handle -> Handle -> IO ()
go Handle
r Handle
w = do
      Either IOError ByteString
e_l <- IO ByteString -> IO (Either IOError ByteString)
forall e a. Exception e => IO a -> IO (Either e a)
try (IO ByteString -> IO (Either IOError ByteString))
-> IO ByteString -> IO (Either IOError ByteString)
forall a b. (a -> b) -> a -> b
$ Handle -> IO ByteString
B.hGetLine Handle
r ::IO (Either IOError B.ByteString)
      let writeline :: ByteString -> IO ()
writeline ByteString
a = Handle -> ByteString -> IO ()
B.hPutStrLn Handle
w (Platform -> [Rewrite] -> ByteString -> ByteString
rewriteLine Platform
platform [Rewrite]
rewrites ByteString
a) IO () -> IO () -> IO ()
forall a b. IO a -> IO b -> IO b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Handle -> Handle -> IO ()
go Handle
r Handle
w
      case Either IOError ByteString
e_l of
        Right ByteString
l -> ByteString -> IO ()
writeline ByteString
l
        Left IOError
_  -> () -> IO ()
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return ()

-- | These are the rewrites that the mangler will perform
rewrites :: [Rewrite]
rewrites :: [Rewrite]
rewrites = [Rewrite
rewriteSymType, Rewrite
rewriteAVX, Rewrite
rewriteCall]

type Rewrite = Platform -> B.ByteString -> Maybe B.ByteString

-- | Rewrite a line of assembly source with the given rewrites,
-- taking the first rewrite that applies.
rewriteLine :: Platform -> [Rewrite] -> B.ByteString -> B.ByteString
rewriteLine :: Platform -> [Rewrite] -> ByteString -> ByteString
rewriteLine Platform
platform [Rewrite]
rewrites ByteString
l
  -- We disable .subsections_via_symbols on darwin and ios, as the llvm code
  -- gen uses prefix data for the info table.  This however does not prevent
  -- llvm from generating .subsections_via_symbols, which in turn with
  -- -dead_strip, strips the info tables, and therefore breaks ghc.
  | ByteString -> Bool
isSubsectionsViaSymbols ByteString
l =
    (FilePath -> ByteString
B.pack FilePath
"## no .subsection_via_symbols for ghc. We need our info tables!")
  | Bool
otherwise =
    case [Maybe ByteString] -> Maybe ByteString
forall a. [Maybe a] -> Maybe a
firstJust ([Maybe ByteString] -> Maybe ByteString)
-> [Maybe ByteString] -> Maybe ByteString
forall a b. (a -> b) -> a -> b
$ (Rewrite -> Maybe ByteString) -> [Rewrite] -> [Maybe ByteString]
forall a b. (a -> b) -> [a] -> [b]
map (\Rewrite
rewrite -> Rewrite
rewrite Platform
platform ByteString
rest) [Rewrite]
rewrites of
      Maybe ByteString
Nothing        -> ByteString
l
      Just ByteString
rewritten -> [ByteString] -> ByteString
B.concat ([ByteString] -> ByteString) -> [ByteString] -> ByteString
forall a b. (a -> b) -> a -> b
$ [ByteString
symbol, FilePath -> ByteString
B.pack FilePath
"\t", ByteString
rewritten]
  where
    isSubsectionsViaSymbols :: ByteString -> Bool
isSubsectionsViaSymbols = ByteString -> ByteString -> Bool
B.isPrefixOf (FilePath -> ByteString
B.pack FilePath
".subsections_via_symbols")

    (ByteString
symbol, ByteString
rest) = ByteString -> (ByteString, ByteString)
splitLine ByteString
l

    firstJust :: [Maybe a] -> Maybe a
    firstJust :: forall a. [Maybe a] -> Maybe a
firstJust (Just a
x:[Maybe a]
_) = a -> Maybe a
forall a. a -> Maybe a
Just a
x
    firstJust []         = Maybe a
forall a. Maybe a
Nothing
    firstJust (Maybe a
_:[Maybe a]
rest)   = [Maybe a] -> Maybe a
forall a. [Maybe a] -> Maybe a
firstJust [Maybe a]
rest

-- | This rewrites @.type@ annotations of function symbols to @%object@.
-- This is done as the linker can relocate @%functions@ through the
-- Procedure Linking Table (PLT). This is bad since we expect that the
-- info table will appear directly before the symbol's location. In the
-- case that the PLT is used, this will be not an info table but instead
-- some random PLT garbage.
rewriteSymType :: Rewrite
rewriteSymType :: Rewrite
rewriteSymType Platform
_ ByteString
l
  | ByteString -> Bool
isType ByteString
l  = ByteString -> Maybe ByteString
forall a. a -> Maybe a
Just (ByteString -> Maybe ByteString) -> ByteString -> Maybe ByteString
forall a b. (a -> b) -> a -> b
$ Char -> ByteString -> ByteString
rewrite Char
'@' (ByteString -> ByteString) -> ByteString -> ByteString
forall a b. (a -> b) -> a -> b
$ Char -> ByteString -> ByteString
rewrite Char
'%' ByteString
l
  | Bool
otherwise = Maybe ByteString
forall a. Maybe a
Nothing
  where
    isType :: ByteString -> Bool
isType = ByteString -> ByteString -> Bool
B.isPrefixOf (FilePath -> ByteString
B.pack FilePath
".type")

    rewrite :: Char -> B.ByteString -> B.ByteString
    rewrite :: Char -> ByteString -> ByteString
rewrite Char
prefix = ByteString -> ByteString -> ByteString -> ByteString
replaceOnce ByteString
funcType ByteString
objType
      where
        funcType :: ByteString
funcType = Char
prefix Char -> ByteString -> ByteString
`B.cons` FilePath -> ByteString
B.pack FilePath
"function"
        objType :: ByteString
objType  = Char
prefix Char -> ByteString -> ByteString
`B.cons` FilePath -> ByteString
B.pack FilePath
"object"

-- | This rewrites aligned AVX instructions to their unaligned counterparts on
-- x86-64. This is necessary because the stack is not adequately aligned for
-- aligned AVX spills, so LLVM would emit code that adjusts the stack pointer
-- and disable tail call optimization. Both would be catastrophic here so GHC
-- tells LLVM that the stack is 32-byte aligned (even though it isn't) and then
-- rewrites the instructions in the mangler.
rewriteAVX :: Rewrite
rewriteAVX :: Rewrite
rewriteAVX Platform
platform ByteString
s
  | Bool -> Bool
not Bool
isX86_64 = Maybe ByteString
forall a. Maybe a
Nothing
  | ByteString -> Bool
isVmovdqa ByteString
s  = ByteString -> Maybe ByteString
forall a. a -> Maybe a
Just (ByteString -> Maybe ByteString) -> ByteString -> Maybe ByteString
forall a b. (a -> b) -> a -> b
$ ByteString -> ByteString -> ByteString -> ByteString
replaceOnce (FilePath -> ByteString
B.pack FilePath
"vmovdqa") (FilePath -> ByteString
B.pack FilePath
"vmovdqu") ByteString
s
  | ByteString -> Bool
isVmovap ByteString
s   = ByteString -> Maybe ByteString
forall a. a -> Maybe a
Just (ByteString -> Maybe ByteString) -> ByteString -> Maybe ByteString
forall a b. (a -> b) -> a -> b
$ ByteString -> ByteString -> ByteString -> ByteString
replaceOnce (FilePath -> ByteString
B.pack FilePath
"vmovap") (FilePath -> ByteString
B.pack FilePath
"vmovup") ByteString
s
  | Bool
otherwise    = Maybe ByteString
forall a. Maybe a
Nothing
  where
    isX86_64 :: Bool
isX86_64 = Platform -> Arch
platformArch Platform
platform Arch -> Arch -> Bool
forall a. Eq a => a -> a -> Bool
== Arch
ArchX86_64
    isVmovdqa :: ByteString -> Bool
isVmovdqa = ByteString -> ByteString -> Bool
B.isPrefixOf (FilePath -> ByteString
B.pack FilePath
"vmovdqa")
    isVmovap :: ByteString -> Bool
isVmovap = ByteString -> ByteString -> Bool
B.isPrefixOf (FilePath -> ByteString
B.pack FilePath
"vmovap")

-- | This rewrites (tail) calls to avoid creating PLT entries for
-- functions on riscv64. The replacement will load the address from the
-- GOT, which is resolved to point to the real address of the function.
rewriteCall :: Rewrite
rewriteCall :: Rewrite
rewriteCall Platform
platform ByteString
l
  | Bool -> Bool
not Bool
isRISCV64 = Maybe ByteString
forall a. Maybe a
Nothing
  | ByteString -> Bool
isCall ByteString
l      = ByteString -> Maybe ByteString
forall a. a -> Maybe a
Just (ByteString -> Maybe ByteString) -> ByteString -> Maybe ByteString
forall a b. (a -> b) -> a -> b
$ FilePath -> FilePath -> FilePath -> ByteString -> ByteString
replaceCall FilePath
"call" FilePath
"jalr" FilePath
"ra" ByteString
l
  | ByteString -> Bool
isTail ByteString
l      = ByteString -> Maybe ByteString
forall a. a -> Maybe a
Just (ByteString -> Maybe ByteString) -> ByteString -> Maybe ByteString
forall a b. (a -> b) -> a -> b
$ FilePath -> FilePath -> FilePath -> ByteString -> ByteString
replaceCall FilePath
"tail" FilePath
"jr" FilePath
"t1" ByteString
l
  | Bool
otherwise     = Maybe ByteString
forall a. Maybe a
Nothing
  where
    isRISCV64 :: Bool
isRISCV64 = Platform -> Arch
platformArch Platform
platform Arch -> Arch -> Bool
forall a. Eq a => a -> a -> Bool
== Arch
ArchRISCV64
    isCall :: ByteString -> Bool
isCall = ByteString -> ByteString -> Bool
B.isPrefixOf (FilePath -> ByteString
B.pack FilePath
"call\t")
    isTail :: ByteString -> Bool
isTail = ByteString -> ByteString -> Bool
B.isPrefixOf (FilePath -> ByteString
B.pack FilePath
"tail\t")

    replaceCall :: FilePath -> FilePath -> FilePath -> ByteString -> ByteString
replaceCall FilePath
call FilePath
jump FilePath
reg ByteString
l =
        FilePath -> ByteString -> ByteString
appendInsn (FilePath
jump FilePath -> FilePath -> FilePath
forall a. [a] -> [a] -> [a]
++ FilePath
"\t" FilePath -> FilePath -> FilePath
forall a. [a] -> [a] -> [a]
++ FilePath
reg) (ByteString -> ByteString) -> ByteString -> ByteString
forall a b. (a -> b) -> a -> b
$ ByteString -> ByteString
removePlt (ByteString -> ByteString) -> ByteString -> ByteString
forall a b. (a -> b) -> a -> b
$
        ByteString -> ByteString -> ByteString -> ByteString
replaceOnce (FilePath -> ByteString
B.pack FilePath
call) (FilePath -> ByteString
B.pack (FilePath
"la\t" FilePath -> FilePath -> FilePath
forall a. [a] -> [a] -> [a]
++ FilePath
reg FilePath -> FilePath -> FilePath
forall a. [a] -> [a] -> [a]
++ FilePath
",")) ByteString
l
      where
        removePlt :: ByteString -> ByteString
removePlt = ByteString -> ByteString -> ByteString -> ByteString
replaceOnce (FilePath -> ByteString
B.pack FilePath
"@plt") (FilePath -> ByteString
B.pack FilePath
"")
        appendInsn :: FilePath -> ByteString -> ByteString
appendInsn FilePath
i = (ByteString -> ByteString -> ByteString
`B.append` FilePath -> ByteString
B.pack (FilePath
"\n\t" FilePath -> FilePath -> FilePath
forall a. [a] -> [a] -> [a]
++ FilePath
i))

-- | @replaceOnce match replace bs@ replaces the first occurrence of the
-- substring @match@ in @bs@ with @replace@.
replaceOnce :: B.ByteString -> B.ByteString -> B.ByteString -> B.ByteString
replaceOnce :: ByteString -> ByteString -> ByteString -> ByteString
replaceOnce ByteString
matchBS ByteString
replaceOnceBS = ByteString -> ByteString
loop
  where
    loop :: B.ByteString -> B.ByteString
    loop :: ByteString -> ByteString
loop ByteString
cts =
        case ByteString -> ByteString -> (ByteString, ByteString)
B.breakSubstring ByteString
matchBS ByteString
cts of
          (ByteString
hd,ByteString
tl) | ByteString -> Bool
B.null ByteString
tl -> ByteString
hd
                  | Bool
otherwise -> ByteString
hd ByteString -> ByteString -> ByteString
`B.append` ByteString
replaceOnceBS ByteString -> ByteString -> ByteString
`B.append`
                                 Int -> ByteString -> ByteString
B.drop (ByteString -> Int
B.length ByteString
matchBS) ByteString
tl

-- | This function splits a line of assembly code into the label and the
-- rest of the code.
splitLine :: B.ByteString -> (B.ByteString, B.ByteString)
splitLine :: ByteString -> (ByteString, ByteString)
splitLine ByteString
l = (ByteString
symbol, (Char -> Bool) -> ByteString -> ByteString
B.dropWhile Char -> Bool
isSpace ByteString
rest)
  where
    isSpace :: Char -> Bool
isSpace Char
' ' = Bool
True
    isSpace Char
'\t' = Bool
True
    isSpace Char
_ = Bool
False
    (ByteString
symbol, ByteString
rest) = (Char -> Bool) -> ByteString -> (ByteString, ByteString)
B.span (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
isSpace) ByteString
l