module Main where

import Control.Monad (when)
import Criterion.Config
import Criterion.Main
import Data.Monoid (Last(..))
import Data.Map (Map)
import qualified Data.Map as Map
import System (getArgs, getProgName)
import System.FilePath ((</>))
import System.IO (stderr, hPutStrLn)
import Toktok (Lexer, mkLexer, mkLexerWithSandhis)

myConfig = defaultConfig {
             cfgSummaryFile = Last $ Just "benchmark.csv"}

main = defaultMainWith myConfig (return ()) $
         map createBGroup [ ("Transducer", mkLexerWithSandhis [])
                          , ("Tries", mkLexer)
                          , ("Baseline", baseline)
                          , ("Dummy", dummy)
                          ]

createBGroup :: (String, [String] -> Lexer) -> Benchmark
createBGroup (name, mkLexer)
   = bgroup name
            [ bench "English" $ nfIO $ mkBenchmark "english" mkLexer
            , bench "French"  $ nfIO $ mkBenchmark "french"  mkLexer
            ]

mkBenchmark :: String -> ([String] -> Lexer) -> IO ()
mkBenchmark dir mklexerf = do
                       lexicon <- readLexicon dir
                       let lexer = mkLexer (" ":filter (not . null) lexicon)
                       sts <- readSentences dir
                       let results = map (not . null . lexer) sts
                       --putStrLn $ unlines $ map show results
                       when (not $ and results) $ error "Problem..."
                       return ()
                       
readLexicon :: String -> IO [String]
readLexicon dir = readLineFiles $ "data" </> dir </> "lexicon.txt"

readSentences :: String -> IO [String]
readSentences dir = readLineFiles $ "data" </> dir </> "sentences.txt"

readLineFiles :: FilePath -> IO [String]
readLineFiles f = do
                  t <- readFile f
                  return $ lines t

-- Lexers for comparaison

-- | This is the standard haskell lexer, 'words'.
-- It just split the string where there is white-spaces.
dummy :: [String] -> Lexer
dummy _ = return . words

-- | this is a lexer based on haskell maps
baseline :: [String] -> Lexer
baseline ss = useMapLexer mapLexer
   where mapLexer = Map.fromList $ map (\x -> (x,True)) ss
         useMapLexer :: Map String Bool -> Lexer
         useMapLexer m = uml 1
            where uml i s | i >= length s = []
                  uml i s = case Map.lookup (take i s) m of
                         Just True -> [take i s:l | l <- uml 1 (drop i s)]
                                      ++ uml (i + 1) s
                         _         -> uml (i + 1) s