{-- Benchmarks.
We re-use the 'Language' generator of thr test suite 
and compare how long 'SimpleParser' and 'Parsec' take 
to parse all words of a language, 
which is written to a file before parsing, 
to ensure that both parsers start from an equally-evaluated input. 
--}
{-# LANGUAGE OverloadedStrings, RankNTypes, FlexibleContexts, TypeApplications #-}
import GenLanguage
import GenCsv
import qualified Test.QuickCheck as Q
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.IO as TIO
import Data.Set (Set)
import qualified Data.Set as Set
import Data.Void (Void)
import qualified Data.Csv as Cassava
import qualified Data.ByteString.Lazy as B
import qualified Data.Foldable as Fold
import Text.Megaparsec
import Text.Megaparsec.Char.Lexer (decimal,signed)
import Text.Megaparsec.Simple 
import Test.Tasty.Bench (Benchmark,bench,bgroup,bcompare,envWithCleanup,nfIO,defaultMain)
import System.Directory (removeFile)
import System.IO.Temp (emptySystemTempFile)
import Control.DeepSeq (NFData(..))
import Control.Monad (mzero)

-- * hard-coded test benchmark parameters 

-- | size parameters passed to the language generator 'randomLanguages' 
useLangSizes :: [Int]
useLangSizes = three `mtimes` (10:[100,120..200]) where
    mtimes :: Monad m => m () -> m a -> m a
    mtimes scalar m = m >>= (\a -> fmap (const a) scalar)
    three = [(),(),()] -- generate three languages of each size

-- | number of rows in randomly generated CSV file
csvNumRows :: Int
csvNumRows = 100000 :: Int

-- | number of randomly generated CSV tabke structures
csvNumStructures :: Int
csvNumStructures = 5

-- * parser benchmark type

data ParserBenchmark a = ParserBenchmark {
    parseFromFile :: FilePath,
    withParsec      :: IO [a],
    withSimpleParse :: IO [a],
    withCassava     :: IO [a]
    }
instance NFData (ParserBenchmark a) where
    rnf = rnf . parseFromFile -- ^ 'envWithCleanup' requires its type parameter to be member of 'NFData' bot IO actions do not have a normal form. It is enough if the 'testFile' is written. 

genBenchmark :: NFData b => (a -> String) -> (a -> IO (ParserBenchmark b)) -> a -> Benchmark
genBenchmark name toBenchmark a = envWithCleanup setUp cleanUp genBench where
    setUp = toBenchmark a
    cleanUp = removeFile . parseFromFile
    genBench pb = bgroup (name a) [ 
        (bench "Parsec" (nfIO (withParsec pb))),
        (bcompare "Parsec"  (bench "SimpleParser" (nfIO (withSimpleParse pb)))),
        (bcompare "Parsec"  (bench "Cassava"      (nfIO (withCassava     pb))))]

-- * benchmark language parsers

writeAllWords :: FilePath -> Language (Set Text) -> IO ()
writeAllWords file = TIO.writeFile file . 
    foldl1 (\txt w -> txt <> "\n" <> w) .
    allWords

-- | read the file written by 'writeAllWords' 
-- and parse using the 'wordsParser', 
-- printing the parsed words to stdout.
-- 
-- @
-- parseAllWordsWith (runParser @Void)
-- parseAllWordsWith simpleParse
-- @
parseAllWordsWith :: (MonadParsec Void Text p) => 
    (forall a. p a -> String -> Text -> result a) -> 
    Language x -> 
    FilePath -> IO (result [Text])
parseAllWordsWith runP lang testFile = fmap 
    (runP (wordsParser lang) testFile) 
    (TIO.readFile testFile)

-- | we keep this paremetric so that 
-- we don't have to mention the megaparsec error type 
-- which changed through library versions. 
ignoreParsecError :: Either err [a] -> [a]
ignoreParsecError = either (const []) id

ignoreNothing :: Maybe [a] -> [a]
ignoreNothing = maybe [] id

-- | generate a parser for words separated by newline characters
wordsParser :: MonadParsec Void Text p => Language x -> p [Text]
wordsParser lang = let langWord = genParser lang in (langWord `sepBy` single '\n') <* eof

simpleParse :: SimpleParser Text a -> String -> Text -> Maybe a
simpleParse p _ s = fmap  fst (runSimpleParser p s)

langDepth :: Language x -> Int
langDepth (Word _) = 0
langDepth (Choice _ x y) = 1 + max (langDepth x) (langDepth y)
langDepth (Concat _ x y) = 1 + langDepth x + langDepth y

benchName :: Language (Set Text) -> String
benchName lang = let ws = allWords lang in 
    "language of depth " <> show (langDepth lang) <> 
        " and size " <> show (Set.size ws) <> 
        " e.g. " <> T.unpack (Set.findMin ws)

-- | Generate as many 'Language's as integers given,
-- where the integer is the size parameter passed to QuickCheck's 
-- 'Q.Gen'erator. 
randomLanguages :: [Int] -> IO [Language (Set Text)]
randomLanguages sizes = Q.generate (traverse (flip Q.resize Q.arbitrary) sizes)

-- | 'writeAllWords' and return two actions that parse the same test file, 
-- to be compared in the benchmark.
genBenchmarkable :: Language (Set Text) -> IO (ParserBenchmark Text)
genBenchmarkable lang = do
    testFile <- emptySystemTempFile "parserBenchmark.txt"
    writeAllWords testFile lang
    return $ ParserBenchmark
        testFile
        (fmap ignoreParsecError (parseAllWordsWith (runParser @Void) lang testFile))
        (fmap ignoreNothing     (parseAllWordsWith simpleParse       lang testFile))
        (fmap (const [])        (return ()))

genBenchmarks :: IO Benchmark
genBenchmarks = do
    langs <- randomLanguages useLangSizes 
    return (bgroup "language parser comparison (cassava not applicable here)" (fmap (genBenchmark benchName genBenchmarkable) langs))


-- * benchmark CSV parsers

data Comma = LeadingComma | TrailingComma

-- | skip over uninteresting fields
skipFields :: MonadParsec Void Text p => Int -> Comma -> p ()
skipFields n comma = if n <= 0 
    then return () 
    else () <$ (count n skipField) where
        co = case comma of
            TrailingComma -> (\p -> p <* single ',')
            LeadingComma -> (\p -> single ',' *> p)
        skipField = co (takeWhileP (Just "some csv field") (\c -> c /= ',' && c /= '\n'))

parsecBool :: MonadParsec Void Text p => p Bool
parsecBool = (True <$ chunk "True") <|> (False <$ chunk "False")

-- | The 'CSVStructure' dictates that in column 'fooCol' an integer 
-- and in column 'barCol' a boolean is to be found. 
parsecCSVrow :: MonadParsec Void Text p => CSVStructure -> p (Int,Bool)
parsecCSVrow c = do
    _ <- skipFields (fooCol c) TrailingComma
    i <- signed (return ()) decimal <* single ','
    _ <- skipFields (barCol c - fooCol c - 1) TrailingComma
    b <- parsecBool
    _ <- skipFields (tableWidth c - 1 - barCol c) LeadingComma
    return (i,b)

-- | We extract and decode two columns from a CSV file of up two 50 columns
parsecCSV :: MonadParsec Void Text p => CSVStructure -> p [(Int,Bool)]
parsecCSV c = takeWhileP (Just "header line") ('\n' /=) *> single '\n' *> 
    (parsecCSVrow c `sepEndBy` single '\n') <* eof

-- analogous to 'parseAllWordsWith'
parseCsvWith :: (MonadParsec Void Text p) => 
    (forall a. p a -> String -> Text -> result a) -> 
    CSVStructure -> 
    FilePath -> IO (result [(Int,Bool)])
parseCsvWith runP struct testFile = fmap 
    (runP (parsecCSV struct) testFile) 
    (TIO.readFile testFile)

csvBenchName :: CSVStructure -> String
csvBenchName c = "CSV file with " <> show (tableWidth c) <> " columns " <> 
    "and Int in column " <> show (fooCol c) <> 
    " and Bool in column " <> show (barCol c)

newtype LitBool = LitBool Bool
instance NFData LitBool where
    rnf (LitBool b) = rnf b
instance Cassava.FromField LitBool where
    parseField s
        | s == "True"  = pure (LitBool True)
        | s == "False" = pure (LitBool False)
        | otherwise = mzero

data IntBool = IntBool !Int !LitBool
instance NFData IntBool where
    rnf (IntBool i b) = rnf (i,b)
instance Cassava.FromNamedRecord IntBool where
    parseNamedRecord r = IntBool <$> r Cassava..: "foo" <*> r Cassava..: "bar"
fromIntBool :: IntBool -> (Int,Bool)
fromIntBool (IntBool i (LitBool b)) = (i,b)

-- decode using cassava.
cassavaIntBool :: FilePath -> IO [(Int,Bool)]
cassavaIntBool testFile = fmap dec (B.readFile testFile) where
    dec = either (const []) (Fold.toList . fmap fromIntBool .snd) . Cassava.decodeByName

-- analogous to 'genBenchmarkable'
csvBenchmarkable :: CSVStructure -> IO (ParserBenchmark (Int,Bool))
csvBenchmarkable c = do
    content <- Q.generate (genCSVFile c csvNumRows)
    testFile <- emptySystemTempFile "parserBenchmark.csv"
    TIO.writeFile testFile content
    return $ ParserBenchmark
        testFile
        (fmap ignoreParsecError (parseCsvWith (runParser @Void) c testFile))
        (fmap ignoreNothing     (parseCsvWith simpleParse       c testFile))
        (                        cassavaIntBool                   testFile)

genCsvBenchmarks :: IO Benchmark
genCsvBenchmarks = do
    structures <- Q.generate (sequence (replicate csvNumStructures genCSVStructure))
    return (bgroup "csv parser comparison" (fmap (genBenchmark csvBenchName csvBenchmarkable) structures))


-- * main

main :: IO ()
main = do
    bench1 <- genBenchmarks
    bench2 <- genCsvBenchmarks
    defaultMain [bench1,bench2]