{-# LANGUAGE LambdaCase #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE QuasiQuotes #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeApplications #-} module Main where import Data.LLVM.BitCode (parseBitCodeLazyFromFile,Error(..),formatError) import qualified Text.LLVM.AST as AST import Text.LLVM.PP ( ppLLVM, ppLLVM35, ppLLVM36, ppLLVM37, ppLLVM38, llvmPP ) import qualified Control.Exception as EX import Control.Lens ( (^?), _Right ) import Control.Monad ( foldM, unless, when ) import qualified Control.Monad.Catch as X import Control.Monad.IO.Class ( MonadIO, liftIO ) import Control.Monad.Trans.State import Data.Bifunctor (first) import qualified Data.ByteString.Lazy as L import Data.Char (ord,isLetter,isSpace,chr) import Data.Generics (everywhere, mkT) -- SYB import Data.List ( find, isInfixOf, isPrefixOf, isSuffixOf, nub, sort ) import Data.Map ( (!), (!?) ) import qualified Data.Map as Map import Data.Maybe ( fromMaybe ) import Data.Proxy ( Proxy(..) ) import Data.String.Interpolate import qualified Data.Text as T import Data.Typeable (Typeable) import Data.Versions (Versioning, versioning, prettyV, major, minor) import qualified GHC.IO.Exception as GE import qualified Options.Applicative as OA import qualified Prettyprinter as PP import qualified Prettyprinter.Util as PPU import qualified System.Console.Terminal.Size as Term import System.Directory ( doesFileExist, getTemporaryDirectory , listDirectory , removeFile ) import System.Exit (ExitCode(..), exitFailure, exitSuccess) import System.FilePath ( (), (<.>) ) import System.IO (openBinaryTempFile,hClose,openTempFile,hPutStrLn) import qualified System.IO as IO (stderr) import qualified System.Process as Proc import Test.Tasty import Test.Tasty.ExpectedFailure ( ignoreTestBecause , expectFailBecause ) import Test.Tasty.HUnit ( assertFailure, testCase ) import qualified Test.Tasty.Options as TO import qualified Test.Tasty.Runners as TR import qualified Test.Tasty.Sugar as TS import Text.Read (readMaybe) import Text.Show.Pretty (ppShow) descr :: PP.Doc ann descr = PP.vcat $ let block = PPU.reflow in [ block [iii| This test verifies that the llvm-pretty-bc-parser is capable of parsing bitcode properly. This is done by two sets of operations: one which uses the llvm-as assembler to generate bitcode from .ll files (text assembly in LLVM format), and one which uses the clang compiler to generate bitcode from C files; this library should be able to parse both types of generated bitcode files. |] , "" , block [iii| The assembler test method starts with a known .ll file (LLVM text assembly format) and assembles it to LLVM bitcode (via llvm-as from the LLVM tool suite). Then the test will use both llvm-dis (from the LLVM tool suite) and llvm-disasm (from this package, via direct library calls) to convert that bitcode back into the .ll text format, and also (for the latter) into an AST representation. [NOTE: C sources should be kept minimal to be focused in particular; if too large there are too many opportunities for unrelated issues to interfere in successful evaluation.] |] , "" , block [iii| The compiler test method starts with a known .c or .cpp file and uses clang to directly generate a bitcode file. The test then proceeds just as with the assembler test. The only difference therefore is the starting file and first command used on that file, but the compiler method will usually generate more variance in the bitcode files as the compiler version changes. |] , "" , " +-------------[2nd cycle]------------+" , " | |" , " v |" , " .ll --[llvm-as]--> .bc ---[llvm-dis]--> .ll |" , " ^ `-[llvm-disasm]---> .ll" , " | `-> .AST" , " .c/.cpp --[clang]---+ |" , " | [show]" , " .bc -[pre-existing]-+ |" , " v" , " [compare first and second of these:] .txt" , "" , block [iii| The differences between the two .ll text formats are displayed for user information, but differences do not constitute a test failure. The .ll text file obtained from llvm-disasm is then *re-assembled* into another bitcode file, which is again converted back to both .ll and AST formats (repeating the above actions, thus any differences in the .ll files may be displayed twice. Finally, the two AST's are serialized to text and the resulting text is compared for differences. The tests will fail if any of the conversion steps fail, or if there are differences between the first round-trip .ll file and the second round-trip .ll file produced by llvm-pretty-bc-parser. |] , "" , PP.indent 2 $ PP.hang 2 $ block [iii|* If opaque pointers are present, *all* pointers are converted to opaque pointers. This is because the LLVM tools are stricter about opaque pointers, whereas this package is more permissive. Opaque pointers are the standard in LLVM 15. |] , "" , PP.indent 2 $ PP.hang 2 $ block [iii|* Metadata information between the two AST text formats is *not* compared (and is actually discarded before serialization, resetting all indices to zero). This is because metadata indexes are dynamically generated and therefore unstable. |] , "" , PP.indent 2 $ PP.hang 2 $ block [iii|* If the --roundtrip-disabled option is specified on the command line, then only one assembly-disassembly will be performed and the results will not be compared to anything (although any llvm-dis/llvm-disasm differences will still be shown). This mode ensures that the llvm-disasm will not fail, but it does not validate the results. |] ] -- Option Parsing -------------------------------------------------------------- newtype LLVMAs = LLVMAs FilePath defaultLLVMAs :: LLVMAs defaultLLVMAs = LLVMAs "llvm-as" instance TO.IsOption LLVMAs where defaultValue = defaultLLVMAs parseValue = Just . LLVMAs optionName = pure "with-llvm-as" optionHelp = pure "path to llvm-as" showDefaultValue (LLVMAs as) = Just as optionCLParser = TO.mkOptionCLParser $ OA.metavar "FILEPATH" newtype LLVMDis = LLVMDis FilePath instance TO.IsOption LLVMDis where defaultValue = LLVMDis "llvm-dis" parseValue = Just . LLVMDis optionName = pure "with-llvm-dis" optionHelp = pure "path to llvm-dis" showDefaultValue (LLVMDis dis) = Just dis optionCLParser = TO.mkOptionCLParser $ OA.metavar "FILEPATH" newtype Clang = Clang FilePath instance TO.IsOption Clang where defaultValue = Clang "clang" parseValue = Just . Clang optionName = pure "with-clang" optionHelp = pure "path to clang" showDefaultValue (Clang dis) = Just dis optionCLParser = TO.mkOptionCLParser $ OA.metavar "FILEPATH" newtype Roundtrip = Roundtrip Bool instance TO.IsOption Roundtrip where defaultValue = Roundtrip True parseValue = fmap Roundtrip . TO.safeReadBool optionName = pure "roundtrip-disabled" optionHelp = pure "disable roundtrip tests (AST/AST diff)" showDefaultValue (Roundtrip r) = Just $ show r optionCLParser = TO.mkOptionCLParser $ OA.short 'r' newtype Keep = Keep Bool instance TO.IsOption Keep where defaultValue = Keep False parseValue = fmap Keep . TO.safeReadBool optionName = pure "keep" optionHelp = pure "keep all generated files for manual inspection" showDefaultValue (Keep k) = Just $ show k optionCLParser = TO.mkOptionCLParser $ OA.short 'k' newtype Details = Details Bool instance TO.IsOption Details where defaultValue = Details False parseValue = fmap Details . TO.safeReadBool optionName = pure "details" optionHelp = pure "show details of each individual test execution (for debug)" showDefaultValue (Details d) = Just $ show d optionCLParser = TO.mkOptionCLParser $ OA.short 'd' disasmTestIngredients :: VersionCheck -> [TR.Ingredient] disasmTestIngredients llvmver = includingOptions [ TO.Option (Proxy @LLVMAs) , TO.Option (Proxy @LLVMDis) , TO.Option (Proxy @Clang) , TO.Option (Proxy @Roundtrip) , TO.Option (Proxy @Keep) , TO.Option (Proxy @Details) ] : TS.sugarIngredients [ assemblyCube llvmver , cCompilerCube llvmver , ccCompilerCube llvmver ] <> defaultIngredients parseCmdLine :: IO TO.OptionSet parseCmdLine = do TR.installSignalHandlers llvmver <- getLLVMAsVersion defaultLLVMAs let disasmOptDescrs = TO.uniqueOptionDescriptions $ TR.coreOptions ++ TS.sugarOptions ++ TR.ingredientsOptions (disasmTestIngredients llvmver) (disasmOptWarns, disasmOptParser) = TR.optionParser disasmOptDescrs mapM_ (hPutStrLn IO.stderr) disasmOptWarns ts <- maybe 80 Term.width <$> Term.size let pr = OA.prefs $ OA.columns ts OA.customExecParser pr $ OA.info (OA.helper <*> disasmOptParser) ( OA.fullDesc <> OA.header "llvm-pretty-bc-parser disassembly test suite" <> OA.footerDoc (Just $ PP.align descr) ) -- Querying Tool Versions ------------------------------------------------------ -- | Captures the name of the tool and either the error when attempting to get -- the tool version or the actual parsed version self-reported by the tool. Lack -- of a decipherable version is not fatal to running the tests. data VersionCheck = VC String (Either T.Text Versioning) showVC :: VersionCheck -> String showVC (VC nm v) = nm <> " " <> (T.unpack $ either id prettyV v) vcVersioning :: VersionCheck -> Either T.Text Versioning vcVersioning (VC _ v) = v mkVC :: String -> String -> VersionCheck mkVC nm raw = let r = T.pack raw in VC nm $ first (const r) $ versioning r getLLVMAsVersion :: LLVMAs -> IO VersionCheck getLLVMAsVersion (LLVMAs llvmAsPath) = getLLVMToolVersion "llvm-as" llvmAsPath getLLVMDisVersion :: LLVMDis -> IO VersionCheck getLLVMDisVersion (LLVMDis llvmDisPath) = getLLVMToolVersion "llvm-dis" llvmDisPath getClangVersion :: Clang -> IO VersionCheck getClangVersion (Clang clangPath) = getLLVMToolVersion "clang" clangPath -- Determine which version of an LLVM tool will be used for these tests (if -- possible). Uses partial 'head' but this is just tests, and failure is -- captured. getLLVMToolVersion :: String -> FilePath -> IO VersionCheck getLLVMToolVersion toolName toolPath = do let isVerLine l = isInfixOf "LLVM version" l || isInfixOf "clang version" l dropLetter = dropWhile (all isLetter) getVer (Right inp) = -- example inp: "LLVM version 10.0.1" or "clang version 11.1.0" case filter isVerLine $ lines inp of [] -> "NO VERSION IDENTIFIED FOR " <> toolName (l:_) -> case dropLetter $ words l of [] -> toolName <> " VERSION NOT PARSED: " <> l (v:_) -> fst $ break (== '-') v -- remove vendor suffix (e.g. 12.0.1-19ubuntu3) getVer (Left full) = full mkVC toolName . getVer <$> readProcessVersion toolPath -- Runs the tool with a --version argument to have it self-report its version. -- The tool may not even be installed. Returns either an error string or the -- output string from the tool. readProcessVersion :: String -> IO (Either String String) readProcessVersion forTool = X.catches (Right <$> Proc.readProcess forTool [ "--version" ] "") [ X.Handler $ \(e :: EX.IOException) -> if GE.ioe_type e == GE.NoSuchThing then return $ Left "[missing]" -- tool executable not found else do putStrLn $ "Warning: IO error attempting to determine " <> forTool <> " version:" putStrLn $ show e return $ Left "unknown" , X.Handler $ \(e :: X.SomeException) -> do putStrLn $ "Warning: error attempting to determine " <> forTool <> " version:" putStrLn $ show e return $ Left "??" ] -- Test Running ---------------------------------------------------------------- -- | Run all provided tests. main :: IO () main = do -- This is a bit more involved than a typical tasty `main` function. The -- problem is that the number of tests that we generate (via -- `withSugarGroups`) depends on the version of the --llvm-as argument, -- which must be checked in IO. Unfortunately, a typical -- `defaultMainWithIngredients` invocation doesn't allow you to -- generate a dynamic number of tests in IO based on argument values. As a -- result, we have to resort to using more of tasty's internals here. disasmOpts <- parseCmdLine let llvmAs' = TO.lookupOption disasmOpts llvmDis' = TO.lookupOption disasmOpts clang' = TO.lookupOption disasmOpts llvmAsVC <- getLLVMAsVersion llvmAs' llvmDisVC <- getLLVMDisVersion llvmDis' clangVC <- getClangVersion clang' unless (and [ vcVersioning llvmAsVC == vcVersioning llvmDisVC , vcVersioning llvmAsVC == vcVersioning clangVC ]) $ error $ unlines [ "Unexpected version mismatch between clang, llvm-as and llvm-dis" , "* llvm-as version: " ++ showVC llvmAsVC , "* llvm-dis version: " ++ showVC llvmDisVC , "* clang version: " ++ showVC clangVC ] knownBugs <- getKnownBugs sweets1 <- TS.findSugar $ assemblyCube llvmAsVC sweets2 <- TS.findSugar $ cCompilerCube llvmAsVC sweets3 <- TS.findSugar $ ccCompilerCube llvmAsVC sweets4 <- TS.findSugar $ bitcodeCube llvmAsVC atests <- TS.withSugarGroups sweets1 testGroup $ \s _ e -> runAssemblyTest llvmAsVC knownBugs s e ctests <- TS.withSugarGroups sweets2 testGroup $ \s _ e -> runCompileTest llvmAsVC knownBugs s e cctests <- TS.withSugarGroups sweets3 testGroup $ \s _ e -> runCompileTest llvmAsVC knownBugs s e bctests <- TS.withSugarGroups sweets4 testGroup $ \s _ e -> runRawBCTest llvmAsVC knownBugs s e let tests = atests <> ctests case TR.tryIngredients (disasmTestIngredients llvmAsVC) disasmOpts (testGroup "Disassembly tests" [ testGroup ("llvm-as " <> showVC llvmAsVC) atests , testGroup ("C " <> showVC clangVC) ctests , testGroup ("C++ " <> showVC clangVC) cctests , testGroup ("rawBC " <> showVC llvmAsVC) bctests ]) of Nothing -> hPutStrLn IO.stderr "No ingredients agreed to run. Something is wrong either with your ingredient set or the options." Just act -> do ok <- act if ok then exitSuccess else exitFailure defaultMainWithIngredients (disasmTestIngredients llvmAsVC) $ testGroup "Disassembly tests" tests ---------------------------------------------------------------------- -- Assembly/disassembly tests assemblyCube :: VersionCheck -> TS.CUBE assemblyCube llvmver = TS.mkCUBE { TS.inputDirs = ["disasm-test/tests"] , TS.rootName = "*.ll" , TS.separators = "." , TS.validParams = [ ("llvm-range", Just [ "recent-llvm" , "pre-llvm12" , "pre-llvm13" , "pre-llvm14" , "pre-llvm15" ]) ] -- Somewhat unusually for tasty-sugar, we make the expectedSuffix the same -- as the rootName suffix. This is because we are comparing the contents of -- each .ll file against *itself* after parsing it with -- llvm-pretty-bc-parser, pretty-printing it with llvm-pretty, and -- then normalizing it. As such, each .ll file acts as its own golden file. , TS.expectedSuffix = "ll" , TS.sweetAdjuster = \cb -> -- In addition to range matching, this is a round-trip test (assemble + -- disassemble) where the rootname is the same as the expected name. -- Filter out any expectations that don't match the root name. -- (e.g. remove: root=poison.ll with exp=poison.pre-llvm12.ll). let rootExpSame s e = TS.rootFile s == TS.expectedFile e addExpFilter s = s { TS.expected = filter (rootExpSame s) $ TS.expected s } in fmap (fmap addExpFilter) . rangeMatch llvmver cb } rangeMatch :: MonadIO m => VersionCheck -> TS.CUBE -> [TS.Sweets] -> m [TS.Sweets] rangeMatch llvmver cb swts = do -- Perform ranged-matching of the llvm-range parameter against the version of -- llvm (reported by llvm-as) to filter the tasty-sugar expectations. Note -- that there is a built-in expectation here that there is only one llvm -- version available to the test. TS.rangedParamAdjuster "llvm-range" (readMaybe . drop (length ("pre-llvm" :: String))) (<) (vcVersioning llvmver ^? (_Right . major)) cb swts -- | Returns true if this particular test should be skipped, which is signalled -- by the expected file contents starting with "SKIP_TEST". For test cases that -- require a minimum LLVM version, this technique is used to prevent running the -- test on older LLVM versions. skipTest :: TS.Expectation -> IO Bool skipTest expct = ("SKIP_TEST" `L.isPrefixOf`) <$> L.readFile (TS.expectedFile expct) -- | Attempt to compare the assembly generated by llvm-pretty and llvm-dis. runAssemblyTest :: VersionCheck -> KnownBugs -> TS.Sweets -> TS.Expectation -> IO [TestTree] runAssemblyTest llvmVersion knownBugs sweet expct = do shouldSkip <- skipTest expct let tmod = if shouldSkip then ignoreTestBecause "not valid for this LLVM version" else case isKnownBug knownBugs sweet expct llvmVersion of Just (from, why) -> expectFailBecause $ why <> " [see " <> from <> "]" Nothing -> id let pfx = TS.rootBaseName sweet return $ (:[]) $ tmod $ testCaseM llvmVersion pfx $ with2Files (processLL pfx $ TS.rootFile sweet) $ \(parsed1, ast) -> case ast of Nothing -> return () Just ast1 -> -- Re-assemble and re-disassemble with2Files (processLL pfx parsed1) $ \(_, mb'ast2) -> case mb'ast2 of Just ast2 -> diffCmp ast1 ast2 -- Ensure that the ASTs match -- Ensure that the disassembled files match. -- This is usually too strict (and doesn't -- really provide more info). We normalize -- the AST (see below) to ensure that the -- ASTs match modulo metadata numbering, but -- the equivalent isn't possible for the -- assembly: we need llvm-as to be able to -- re-assemble it. -- -- diffCmp parsed1 parsed2 Nothing -> error "Failed processLL" diffCmp :: FilePath -> FilePath -> TestM () diffCmp file1 file2 = do let assertF = liftIO . assertFailure . unlines (code, stdout, stderr) <- liftIO $ Proc.readCreateProcessWithExitCode (Proc.proc "diff" ["-u", file1, file2]) "" case code of ExitFailure _ -> assertF ["diff failed", stdout, stderr] ExitSuccess -> if stdout /= "" || stderr /= "" then assertF ["non-empty diff", stdout, stderr] else do Details det <- gets showDetails when det $ liftIO $ mapM_ putStrLn ["success: empty diff: ", file1, file2] -- Assembles the specified .ll file to bitcode, then disassembles it with -- llvm-dis. Also parses the bitcode with this library (effectively llvm-disasm) -- and prints the difference between the parsed version and the .ll file. -- Returns the library parsed version and the serialized AST from the library. processLL :: FilePath -> FilePath -> TestM (FilePath, Maybe FilePath) processLL pfx f = do Details det <- gets showDetails when det $ liftIO $ putStrLn (showString f ": ") X.handle logError $ withFile (assembleToBitCode pfx f) $ parseBC pfx where logError (ParseError msg) = liftIO $ assertFailure $ unlines $ "failure" : map ("; " ++) (lines (formatError msg)) parseBC :: FilePath -> FilePath -> TestM (FilePath, Maybe FilePath) parseBC pfx bc = do withFile (X.handle (\(_ :: GE.IOException) -> return "LLVM llvm-dis failed to parse this file") (disasmBitCode pfx bc)) $ \ norm -> do (parsed, ast) <- processBitCode pfx bc Details dets <- gets showDetails when dets $ liftIO $ do -- Informationally display if there are differences between the llvm-dis -- and llvm-disasm outputs, but no error if they differ. Note that the -- arguments to this diff are not the same as those supplied to the diffCmp -- function. The diff here is intended to supply additional information to -- the user for diagnostics, and whitespace changes are likely unimportant -- in that context; this diff does not determine the pass/fail status of -- the testing. On the other hand, the diffCmp *does* determine if the -- tests pass or fail, so ignoring whitespace in that determination would -- potentially weaken the testing to an unsatisfactory degree and would -- need more careful evaluation. putStrLn "## Output differences: LLVM's llvm-dis <--> this llvm-disasm" ignore (Proc.callProcess "diff" ["-u", "-b", "-B", "-w", norm, parsed]) putStrLn ("successfully parsed " ++ show pfx ++ " bitcode") return (parsed, ast) ---------------------------------------------------------------------- -- Compiler->Assembly->Disassembly tests -- The compilation tests ensure that the clang version-specific generated .bc -- file can be reasonably parsed by this library. This is a parallel to the -- assemblyCube-driven tests, but starts with a C source file. One distinction -- is that the .ll used for the assemblyCube is typically representative of a -- specific LLVM version, and while it is assembled and disassembled by newer -- versions of LLVM tools, it will never introduce any newer element, whereas the -- clang-generated bitcode will contain version-current output which might have -- newer elements and ordering. -- -- The cCompilerCube uses .c (C source) files as the input and .ll files for the -- expected output. The ccCompilerCube uses .cc (C++ source) files as the input -- and .ll files for the expected output. The assemblyCube uses the .ll file as -- both input and output. The actual testing done is very similar, and the -- assemblyCube always generates a superset of the compilerCube tests (i.e. when -- no .c or .cc file is present). cCompilerCube :: VersionCheck -> TS.CUBE cCompilerCube llvmver = (assemblyCube llvmver) { TS.rootName = "*.(c|cc|cpp)" , TS.sweetAdjuster = rangeMatch llvmver } ccCompilerCube :: VersionCheck -> TS.CUBE ccCompilerCube llvmver = (cCompilerCube llvmver) { TS.rootName = "*.cc"} runCompileTest :: VersionCheck -> KnownBugs -> TS.Sweets -> TS.Expectation -> IO [TestTree] runCompileTest llvmVersion knownBugs sweet expct = do shouldSkip <- skipTest expct let tmod = if shouldSkip then ignoreTestBecause "not valid for this LLVM version" else case isKnownBug knownBugs sweet expct llvmVersion of Just (from, why) -> expectFailBecause $ why <> " [see " <> from <> "]" Nothing -> id let pfx = TS.rootBaseName sweet return $ (:[]) $ tmod $ testCaseM llvmVersion pfx $ withFile (compileToBitCode pfx $ TS.rootFile sweet) $ \bc -> with2Files (parseBC pfx bc) $ \(parsed1, ast) -> case ast of Nothing -> -- No round trip, so this just verifies that the bitcode could be -- parsed without generating an error. return () Just ast1 -> -- Assemble and re-parse the bitcode to make sure it can be -- round-tripped successfully. with2Files (processLL pfx parsed1) $ \(_, mb'ast2) -> case mb'ast2 of Just ast2 -> diffCmp ast1 ast2 Nothing -> error "failed processLL" -- fst is ignored because .ll files are not compared; see -- runAssemblyTest for details. ---------------------------------------------------------------------- -- Pre-existing bitcode tests tests bitcodeCube :: VersionCheck -> TS.CUBE bitcodeCube llvmver = (assemblyCube llvmver) { TS.rootName = "*.bc" , TS.inputDirs = ["disasm-test/bc_src_tests"] , TS.sweetAdjuster = rangeMatch llvmver } runRawBCTest :: VersionCheck -> KnownBugs -> TS.Sweets -> TS.Expectation -> IO [TestTree] runRawBCTest llvmVersion knownBugs sweet expct = do shouldSkip <- skipTest expct let tmod = if shouldSkip then ignoreTestBecause "not valid for this LLVM version" else case isKnownBug knownBugs sweet expct llvmVersion of Just (from, why) -> expectFailBecause $ why <> " [see " <> from <> "]" Nothing -> id let pfx = TS.rootBaseName sweet let bc = TS.rootFile sweet return $ (:[]) $ tmod $ testCaseM llvmVersion pfx $ with2Files (parseBC pfx bc) $ \(parsed1, ast) -> case ast of Nothing -> -- No round trip, so this just verifies that the bitcode could be -- parsed without generating an error. return () Just ast1 -> -- Assemble and re-parse the bitcode to make sure it can be -- round-tripped successfully. with2Files (processLL pfx parsed1) $ \(_, mb'ast2) -> case mb'ast2 of Just ast2 -> diffCmp ast1 ast2 Nothing -> error "Failed processLL" -- fst is ignored because .ll files are not compared; see -- runAssemblyTest for details. ---------------------------------------------------------------------- -- Helpers -- | A test failure. data TestFailure = ParseError Error -- ^ A parser failure deriving (Typeable,Show) instance X.Exception TestFailure -- This structure essentially recapitulates the TestOptions, but in a way that -- they will be accessible in a TestTree (via: StateT TestState IO a). data TestState = TestState { keepTemp :: Keep , rndTrip :: Roundtrip , showDetails :: Details , llvmAs :: LLVMAs , llvmDis :: LLVMDis , clang :: Clang , llvmVer :: VersionCheck } type TestM a = StateT TestState IO a testCaseM :: VersionCheck -> FilePath -> TestM () -> TestTree testCaseM llvmVersion pfx ops = askOption $ \llvmAs' -> askOption $ \llvmDis' -> askOption $ \roundtrip -> askOption $ \keep -> askOption $ \details -> askOption $ \clang' -> testCase pfx $ evalStateT ops (TestState { keepTemp = keep , rndTrip = roundtrip , showDetails = details , llvmAs = llvmAs' , llvmDis = llvmDis' , clang = clang' , llvmVer = llvmVersion }) -- | Assemble some llvm assembly, producing a bitcode file in /tmp. assembleToBitCode :: FilePath -> FilePath -> TestM FilePath assembleToBitCode pfx file = do tmp <- liftIO getTemporaryDirectory LLVMAs asm <- gets llvmAs X.bracketOnError (liftIO $ openBinaryTempFile tmp (pfx <.> "bc")) (rmFile . fst) $ \(bc,h) -> do liftIO $ hClose h callProc asm ["-o", bc, file] return bc -- | Compile a C or C++ source, producing a bitcode file in /tmp. compileToBitCode :: FilePath -> FilePath -> TestM FilePath compileToBitCode pfx file = do tmp <- liftIO getTemporaryDirectory Clang comp' <- gets clang let comp = if ".cc" `isSuffixOf` file then comp' <> "++" else comp' X.bracketOnError (liftIO $ openBinaryTempFile tmp (pfx <.> "bc")) (rmFile . fst) $ \(bc,h) -> do liftIO $ hClose h callProc comp ["-c", "-emit-llvm", "-O0", "-U_FORTIFY_SOURCE", "-g", "-o", bc, file] return bc -- | Use llvm-dis to parse a bitcode file, to obtain a normalized version of the -- llvm assembly. disasmBitCode :: FilePath -> FilePath -> TestM FilePath disasmBitCode pfx file = do tmp <- liftIO $ getTemporaryDirectory LLVMDis dis <- gets llvmDis X.bracketOnError (liftIO $ openTempFile tmp (pfx ++ "llvm-dis" <.> "ll")) (rmFile . fst) $ \(norm,h) -> do liftIO $ hClose h callProc dis ["-o", norm, file] -- stripComments norm return norm -- | Usually, the ASTs aren't "on the nose" identical. -- The big thing is that the metadata numbering differs, so we zero out all -- metadata indices and sort the unnamed metadata list. -- Done with SYB (Scrap Your Boilerplate). normalizeModule :: AST.Module -> AST.Module normalizeModule = sorted . everywhere (mkT zeroValMdRef) . everywhere (mkT zeroNamedMd) where sorted m = m { AST.modUnnamedMd = sort (map (\um -> um { AST.umIndex = 0 }) (AST.modUnnamedMd m)) } -- Zero out all ValMdRefs zeroValMdRef (AST.ValMdRef _) = AST.ValMdRef 0 zeroValMdRef a = (a :: AST.ValMd) -- avoid ambiguous type -- Reduce all named metadata zeroNamedMd (AST.NamedMd x _) = AST.NamedMd x [] -- | Parse a bitcode file using llvm-pretty, failing the test if the parser -- fails. processBitCode :: FilePath -> FilePath -> TestM (FilePath, Maybe FilePath) processBitCode pfx file = do let handler :: X.SomeException -> IO (Either Error AST.Module) handler se = return (Left (Error [] (show se))) printToTempFile sufx stuff = do tmp <- getTemporaryDirectory (parsed,h) <- openTempFile tmp (pfx ++ "llvm-disasm" <.> sufx) hPutStrLn h stuff hClose h return parsed e <- liftIO $ parseBitCodeLazyFromFile file `X.catch` handler case e of Left err -> X.throwM (ParseError err) Right m -> do let m' = AST.fixupOpaquePtrs m postParseTests m' llvmVersion <- gets llvmVer llvmAssembly <- case vcVersioning llvmVersion ^? (_Right . major) of Nothing -> do liftIO $ hPutStrLn IO.stderr ( "warning: unknown LLVM version (" <> showVC llvmVersion <> "), assuming 3.5") return $ ppLLVM35 $ llvmPP m' Just v -> case v of 3 -> case vcVersioning llvmVersion ^? (_Right . minor) of Just 5 -> return $ ppLLVM35 $ llvmPP m' Just 6 -> return $ ppLLVM36 $ llvmPP m' Just 7 -> return $ ppLLVM37 $ llvmPP m' Just 8 -> return $ ppLLVM38 $ llvmPP m' o -> if maybe True (< 5) o then return $ ppLLVM35 $ llvmPP m' else return $ ppLLVM38 $ llvmPP m' _ -> return $ ppLLVM (fromEnum v) $ llvmPP m' parsed <- liftIO $ printToTempFile "ll" $ show llvmAssembly Roundtrip roundtrip <- gets rndTrip -- stripComments parsed Details det <- gets showDetails if roundtrip then do tmp2 <- liftIO $ printToTempFile "ast" (ppShow (normalizeModule m')) when det $ liftIO $ putStrLn $ "## parsed Bitcode to " <> parsed <> " and " <> tmp2 return (parsed, Just tmp2) else do when det $ liftIO $ putStrLn $ "## parsed Bitcode to " <> parsed return (parsed, Nothing) -- | These are common tests that should be run on the AST that is parsed from the -- bitcode file. This tests invariants that are not accessible or testable from -- the serialized formats. postParseTests :: AST.Module -> TestM () postParseTests m = ensureValidMetadataIndices m where -- This test is to ensure that all unnamed metadata instances have a unique -- index value. ensureValidMetadataIndices md = do let idxs = AST.umIndex <$> AST.modUnnamedMd md let uniqIdxs = nub idxs let numDups = length idxs - length uniqIdxs unless (numDups == 0) $ do Details det <- gets showDetails when det $ liftIO $ putStrLn $ "Unnamed metadata (modUnnamedMd) indices: " <> show idxs liftIO $ assertFailure $ show numDups <> " duplicated Unnamed metadata (modUnnamedMd) indices" -- | Remove comments from a .ll file, stripping everything including the -- semi-colon. stripComments :: FilePath -> TestM () stripComments path = do Keep keep <- gets keepTemp bytes <- liftIO $ L.readFile path when (not keep) $ rmFile path mapM_ (writeLine . dropComments) (bsLines bytes) where writeLine bs | L.null bs = return () | otherwise = liftIO $ do L.appendFile path bs L.appendFile path (L.singleton 0x0a) -- | Split a bytestring by its lines. bsLines :: L.ByteString -> [L.ByteString] bsLines = L.split char where char = fromIntegral (ord '\n') -- | Take characters until the llvm comment delimiter is found. dropComments :: L.ByteString -> L.ByteString dropComments = dropTrailingSpace . L.takeWhile (/= char) where char = fromIntegral (ord ';') -- | Drop trailing space from a bytestring. dropTrailingSpace :: L.ByteString -> L.ByteString dropTrailingSpace bs | len <= 0 = L.empty | otherwise = L.take (loop len) bs where len = L.length bs - 1 loop n | isSpace (chr (fromIntegral (L.index bs n))) = loop (n-1) | otherwise = n -- | Ignore a command that fails. ignore :: IO () -> IO () ignore = X.handle f where f :: EX.IOException -> IO () f _ = return () callProc :: String -> [String] -> TestM () callProc p args = do Details dets <- gets showDetails when dets $ liftIO $ putStrLn ("## Running: " ++ p ++ " " ++ unwords args) liftIO $ Proc.callProcess p args withFile :: TestM FilePath -> (FilePath -> TestM r) -> TestM r withFile iofile f = X.bracket iofile rmFile f with2Files :: TestM (FilePath, Maybe FilePath) -> ((FilePath, Maybe FilePath) -> TestM r) -> TestM r with2Files iofiles f = let cleanup (tmp1, mbTmp2) = do rmFile tmp1 traverse rmFile mbTmp2 in X.bracket iofiles cleanup f rmFile :: FilePath -> TestM () rmFile tmp = do Keep keep <- gets keepTemp unless keep $ do do exists <- liftIO $ doesFileExist tmp when exists $ do Details dets <- gets showDetails when dets $ liftIO $ putStrLn $ "## Removing " <> tmp liftIO $ removeFile tmp ---------------------------------------------------------------------- -- Handling Known Bugs -- | A map from a known bug file to the identifiers and identifier values in that -- known bug file. type KnownBugs = Map.Map FilePath (Map.Map String [String]) -- | There is a directory containing files which describe known bugs (one per -- file). Those files contain marker lines (prefixed with "##> ") which specify -- the elements and values that should match a test for this test to be a known -- bug. This function reads in that file and creates a map from file to a map of -- markers and values in that file. This returned value should be passed to the -- isKnownBug function, along with the test information to determine if the test -- is associated with a known bug. getKnownBugs :: IO KnownBugs getKnownBugs = do let kbdir = "disasm-test/known_bugs" known <- listDirectory kbdir let interestingLine = ("##> " `isPrefixOf`) let addInterestingLine l = case words l of (_ : t : ws) -> Map.insertWith (<>) t ws _ -> id let interestingLineMap ls = foldr addInterestingLine mempty $ filter interestingLine ls let addKnownBugInfo mp f = do let fpath = kbdir f X.try (lines <$> readFile fpath) >>= \case Left (_e :: IOError) -> -- Error reading the known bug file: ignore it return mp Right ls -> let buginfo = interestingLineMap ls in return $ if Map.null buginfo then mp else Map.insert fpath buginfo mp foldM addKnownBugInfo mempty known -- | This function checks to see if the current test being defined corresponds to -- one of those known bugs, and if so, returns the name of the known bugs file -- and the summary string from that file. isKnownBug :: KnownBugs -> TS.Sweets -> TS.Expectation -> VersionCheck -> Maybe (FilePath, String) isKnownBug knownBugs sweet _expct llvmver = let matchOf (_,km) = and (uncurry isMatch <$> Map.assocs km) isMatch = \case "rootMatchName:" -> (TS.rootMatchName sweet `elem`) "llvmver:" -> case vcVersioning llvmver ^? (_Right . major) of Just v -> (show v `elem`) Nothing -> const False _ -> const True found = find matchOf $ Map.assocs knownBugs getSummary (f,_) = (f , head (fromMaybe [] (knownBugs ! f !? "summary:") <> ["this is a known bug"]) ) in getSummary <$> found