module Report where import Control.Arrow import Control.Monad.Error import Data.Function import Data.List import Data.List.Split import Data.Maybe import Data.DateTime( formatDateTime, startOfTime ) import qualified Data.Map as Map import qualified Text.Tabular as Tab import System.Directory import System.FilePath import System.IO import Definitions import Graph import Shellish hiding ( run ) import Standard import TabularRST as TR import qualified Data.ByteString.Char8 as BS -- ---------------------------------------------------------------------- -- tables -- ---------------------------------------------------------------------- type BenchmarkTable = Tab.Table String String String tabulateRepo :: Formatter -> RepoTable -> Tab.Table String String String tabulateRepo format repo = Tab.Table rowhdrs colhdrs rows where rowhdrs = Tab.Group Tab.NoLine $ map Tab.Header (rtRows repo) colhdrs = Tab.Group Tab.SingleLine $ map Tab.Header $ concatMap (format myundefined . ColHeader) $ rtColumns repo myundefined = error "Formatting is undefined for column headers" rows = map formatRow $ rtTable repo formatRow (tu, rs) = concatMap (fmt tu) rs fmt tu (Just mt) = format tu (Cell mt) fmt tu Nothing = format tu MissingCell -- ---------------------------------------------------------------------- -- timings files -- ---------------------------------------------------------------------- readAllTimings :: IO [[(Test a, Maybe MemTimeOutput)]] readAllTimings = do rdir <- resultsDir tdirs <- filter isTimingFile `fmap` getDirectoryContents rdir let pstamps = map dropExtension tdirs mapM readTimingsForParams pstamps where isTimingFile f = takeExtension f == ".timings" unknownBinary x = TestBinary { binCommand = x, binVersionString = "unknown" , binDate = startOfTime, binGHC = "unknown", binVCS = VCSDarcs , binContext = BS.empty } -- | Map an sha1 of darcs binary into the original binary description. readInfos :: String -> (String -> TestBinary) readInfos bits = \x -> case lookup x table of Nothing -> unknownBinary x Just b -> b where table' = sortBy order (case reads bits of [] -> [] ((x,_):_) -> x) ids = [ id | (id:_:_) <- group . sort $ map (binCommand . snd) table' ] fixid n id ((sha, bin):rem) | binCommand bin == id = (sha, bin { binCommand = id ++ " " ++ show n }) : fixid (n + 1) id rem | otherwise = (sha, bin) : fixid n id rem fixid _ _ [] = [] table = (foldl (.) id (map (fixid 0) ids)) table' order (_, x) (_, y) = case compare (binVersion x) (binVersion y) of EQ -> compare (binDate x) (binDate y) ord -> ord readTimingsForParams :: String -> IO [(Test a, Maybe MemTimeOutput)] readTimingsForParams pstamp = do rdir <- resultsDir let pdir = rdir pstamp <.> "timings" ifile = rdir pstamp <.> "info" infos <- (readInfos `fmap` readFile ifile) `catch` \_ -> do hPutStrLn stderr $ "WARNING: Could read " ++ ifile return unknownBinary tfiles <- filter notJunk `fmap` getDirectoryContents pdir entries <- concat `fmap` mapM parseTimingsFile (map (pdir ) tfiles) return . map (process infos) . Map.toList . Map.fromListWith (++) . map (second (:[])) $ entries where notJunk = not . (`elem` [".",".."]) process :: (String -> TestBinary) -> ((String, String, String), [MemTime]) -> (Test a, Maybe MemTimeOutput) process infos ((repo, binhash, bm), times) = (key, val) where key = Test (Description bm) (mkTr repo) (infos binhash) val = Just $ mkMemTimeOutput times mkTr n = TestRepo n (guessCoreName n) n Nothing [] [] guessCoreName :: String -> String guessCoreName n = case [ n `chop` (' ':s) | s <- suffixes, s `isSuffixOf` n ] of [] -> n (h:_) -> h where x `chop` s = take (length x - length s) x suffixes = sortBy (compare `on` (negate . length)) -- longest suffixes first $ map vShortName allVariants type TimingsFileEntry = ((String,String,String),MemTime) parseTimingsFile :: FilePath -> IO [TimingsFileEntry] parseTimingsFile tf = do ms <- (map parseLine . lines) `fmap` readFile tf let unknowns = length $ filter isNothing ms when (unknowns > 0) $ hPutStrLn stderr $ "Warning: could not understand " ++ show unknowns ++ " lines in " ++ tf return (catMaybes ms) parseLine :: String -> Maybe TimingsFileEntry parseLine l = case wordsBy (== '\t') l of [ repo, dbin, bm, mem, time ] -> Just ((repo, dbin, bm), memtime time mem) _ -> Nothing where memtime t m = MemTime (toRational (read m :: Float)) (read t) -- ---------------------------------------------------------------------- -- reports -- ---------------------------------------------------------------------- renderMany :: [(Test a, Maybe MemTimeOutput)] -> Command () renderMany results = do echo . unlines $ [ "Benchmark Results" , "=====================================================" , "" , machine_details , "" , "How to read these tables" , "=====================================================" , "" , "NB: times are reported as mean + 1 std deviation" , "" , def "?x" "less than 5 runs used" , def "~x" "less than 20 runs used" , def "sdev" "std deviation" , descriptions_of_variants , "" , binary_details , "" , "Timing Graphs" , "====================================================" , "" , intercalate "\n" (map showG t_graphs) , "Timings" , "====================================================" , "" , intercalate "\n" (map showT t_tables) , "Memory Graphs" , "====================================================" , "" , intercalate "\n" (map showG m_graphs) , "Memory" , "====================================================" , "" , intercalate "\n" (map showT m_tables) ] where tables = repoTables benchmarks results -- machine_details = intercalate "\n" $ map detail [ "Machine description", "Year", "CPU", "Memory", "Hard disk", "Notes" ] detail k = k ++ "\n *Replace Me*" -- descriptions_of_variants = intercalate "\n" $ map (describe . toVariant) [ OptimizePristineVariant ] describe v = def (vSuffix v) (vDescription v ++ " variant") def k v = "* " ++ k ++ " = " ++ v -- repoTuple tabulate repo = (rtRepo repo, tabulate repo) t_tables = map (repoTuple $ tabulateRepo formatTimeResult) tables m_tables = map (repoTuple $ tabulateRepo formatMemoryResult) tables showT (r,t) = intercalate "\n" [ r , replicate (length r) '-' , "" , TR.render id id id t ] -- t_graphs = map (repoTuple graphRepoTime) tables m_graphs = map (repoTuple graphRepoMemory) tables showG (r,gs) = intercalate "\n" $ [ r , replicate (length r) '-' , "" ] ++ (map imgDirective gs) ++ [""] imgDirective = (".. image:: " ++) binaries = map head . group . sort $ [ bin | (Test _ _ bin, _) <- results ] -- binary_details = unlines $ map describe_bin binaries describe_bin bin = padl 12 (binCommand bin ++ ": ") ++ binVersionString bin ++ ",\n" ++ (replicate 12 ' ') ++ (formatDateTime "%Y-%m-%d %H:%M:%S" $ binDate bin) ++ ", GHC " ++ binGHC bin padr n x = x ++ pad n x padl n x = pad n x ++ x pad n x = take (n - length x) (repeat ' ') printCumulativeReport :: Command () printCumulativeReport = do ts <- liftIO readAllTimings mapM_ renderMany ts -- TODO: split this into sections for each param stamp -- | For each repository, the benchmarks that we have enough data for sufficientData :: [(Test a, Maybe MemTimeOutput)] -> [ (String, [String]) ] sufficientData results = map (rtRepo &&& missingPoints) tables where tables = repoTables benchmarks results missingPoints t = [ d | (d,mts) <- zip (rtRows t) (rtTable t), all enough (snd mts) ] -- enough (Just mt) | mtSampleSize mt >= 20 = True -- TODO: avoid duplication with formatTimeResult enough _ = False