{-# Language OverloadedStrings #-}

--------------------------------------------------------------------
-- |
-- Module : Utils.Katt.SourceHandler
--
-- Provides searching of source code files and language identification.
--
-- Language identification is required in order to detect any
-- inconsistencies (e.g. combining Java and C), and to tag submissions.
--
-- Java also requires identifying which file provides the main method.

module Utils.Katt.SourceHandler
(parseFilter, findFiles, determineLanguage, findMainClass, languageKattisName, languageContentType)
where

import           Control.Applicative ((<$>))
import           Control.Arrow ((***))
import           Control.Monad (filterM, mapAndUnzipM, void)
import qualified Data.ByteString.Char8 as B
import           Data.List ((\\), isSuffixOf)
import qualified Data.Set as Set
import           System.Directory (doesDirectoryExist, getDirectoryContents)
import           System.FilePath (takeBaseName, takeExtension)
import           Text.Parsec
import           Text.Parsec.ByteString
import           Utils.Katt.Utils

-- | All supported source file extensions, per language.
supported :: KattisLanguage -> Set.Set FilePath
supported LangCplusplus = Set.fromList [".cc", ".cpp", ".hpp", ".h"]
supported LangC         = Set.fromList [".c", ".h"]
supported LangJava      = Set.fromList [".java"]
supported LangHaskell   = Set.fromList [".hs"]

-- | Parse an argument list from the +file1 -file2 style into
--   two lists of file paths (included and ignored files).
parseFilter :: [String] -> Maybe ([FilePath], [FilePath])
parseFilter input = (filter' *** filter') <$> mapAndUnzipM go input
  where
  go ('+' : file) = Just (file, "")
  go ('-' : file) = Just ("", file)
  go _ = Nothing
  filter' = filter (not . null)

-- | Locate all source files recursively from the current directory.
findFiles :: IO [FilePath]
findFiles = explore "" "."
  where
  explore prefix dir = do
    contents <- (\\ [".", ".."]) <$> getDirectoryContents dir
    let withPrefix = map (prefix++) contents

    dirs <- filterM doesDirectoryExist withPrefix
    let sourceFiles = filter isValidSourceFile (withPrefix \\ dirs)
    nextDepth <- mapM exploreDir dirs
    return $ sourceFiles ++ concat nextDepth

  isValidSourceFile file = any (`isSuffixOf` file)
    (Set.toList . Set.unions $ map supported [LangCplusplus, LangC, LangJava, LangHaskell])
  exploreDir dir = explore (dir ++ "/") dir

-- | Determine source code language by studying file extensions.
--   There is an implicit priority ordering, since C is a subset of C++.
determineLanguage :: [FilePath] -> Maybe KattisLanguage
determineLanguage files
  | is LangC = Just LangC
  | is LangCplusplus = Just LangCplusplus
  | is LangJava = Just LangJava
  | is LangHaskell = Just LangHaskell
  | otherwise = Nothing
  where
  fileSet = Set.fromList $ map takeExtension files
  is lang = fileSet `Set.isSubsetOf` supported lang

-- | Locate main class based on source file contents.
--   C++ and C solutions do not need to be specified, returns an empty string.
--
--   In the Java case all souce code files are parsed.
--   All occurences of a /main/ method defined with /public static void/ are located.
--
--   Will return 'Data.Maybe.Nothing' if result is ambiguous.
findMainClass :: ([FilePath], KattisLanguage) -> IO (Maybe FilePath)
findMainClass ([], _)            = return Nothing
findMainClass (_, LangCplusplus) = return $ Just ""
findMainClass (_, LangC)         = return $ Just ""
findMainClass (_, LangHaskell)   = return $ Just ""
findMainClass (files, LangJava)  = survey <$> filterM containsMain files
  where
  containsMain file = do
    parseResult <- parseFromFile mainParser file
    case parseResult of
      Right _ -> return True
      Left _ -> return False

  mainParser = manyTill
    (lineComment <|> blockComment <|> stringData <|> void anyChar)
    mainFunc
  blockComment = void $ string "/*" >> manyTill anyChar (try $ string "*/")
  lineComment = void . try $ string "//" >> manyTill anyChar newline
  stringData = void $ char '"' >> manyTill anyChar (char '"')
  mainFunc = try $ mapM_ keyWord ["public", "static", "void", "main"]
  keyWord str = void $ string str >> spaces

  survey [singleton] = Just $ takeBaseName singleton
  survey _ = Nothing

-- | Determine content type of submission language.
languageContentType :: KattisLanguage -> B.ByteString
languageContentType LangCplusplus = "text/x-c++src"
languageContentType LangJava = "text/x-c++src"
languageContentType LangC = "text/x-c++src"
languageContentType LangHaskell = "text/x-c++src"

-- | Determine Kattis language string identifier.
languageKattisName :: KattisLanguage -> B.ByteString
languageKattisName LangCplusplus = "C++"
languageKattisName LangJava = "Java"
languageKattisName LangC = "C"
languageKattisName LangHaskell = "Haskell"