-----------------------------------------------------------------------------
-- |
-- Module      :  Lentil.Parse.Source
-- Copyright   :  © 2015 Francesco Ariis
-- License     :  GPLv3 (see the LICENSE file)
--
-- Issues parsing from source.
-----------------------------------------------------------------------------

module Lentil.Parse where

import Lentil.Types

import Text.Parsec

import Control.Applicative hiding ( (<|>), optional, many )

import qualified Data.Char as C
import qualified Data.List as L
import qualified System.FilePath as SF
import qualified System.Directory as D
import qualified System.IO as I
import qualified Data.Either as E


-- TODO: eliminate lookahead? fai il parser davvero da bottom up senza sub
--       parsing [lint] [refactor] ?


-----------
-- TYPES --
-----------

type ParSource = Parsec String ParState

data ParState = ParState { psPath :: FilePath }


---------------
-- PRIMITIVE --
---------------

-- a unix file is defined as many lines (each one ended by '\n')
-- this parser captures the definition and can be used as eof replacement too
eoft :: ParSource ()
eoft = optional (char '\n') *> eof

-- i.e. remove unneeded whitespace
htmlify :: String -> String
htmlify cs = unwords . words $ cs


-- occhio che il primo carattere non è controllato da ED
-- TODO: manyTill1 broken? occhio che al primon on chiama ed! [test]
manyTill1 :: ParSource a -> ParSource b -> ParSource [a]
manyTill1 p ed = (:) <$> p <*> manyTill p ed


----------
-- TAGS --
----------

-- simple tags parsing

-- tag only
tag :: ParSource Tag
tag = Tag <$> (openPar *> tagLabel <* closePar) <?> "tag"

-- anything goes, apart from ' '
tagLabel :: ParSource String
tagLabel = many1 (satisfy sf) <?> "tag label"
    where sf :: Char -> Bool
          sf c | c == closeDel = False
               | C.isSpace c   = False
               | otherwise     = True

openPar, closePar :: ParSource Char
openPar  = char openDel  <?> "open-tag delimiter"
closePar = char closeDel <?> "close-tag delimiter"


------------
-- ISSUES --
------------

-- simple issue parsing

-- parses a number of issues from a given line-of-text
issues :: ParSource [Issue]
issues = many (try $ manyTill anyChar (lookAhead $ try incipit) *> issue)
         <?> "issues"

-- an issue is a flagword, followed by : , followed by some description and
-- ended optionally by some tags (No t/f? End by whiteline or eof or
-- another TODO).
issue :: ParSource Issue
issue = Issue <$> fmap psPath getState
              <*> fmap sourceLine getPosition
              <*> (incipit *> fmap htmlify freeText)
              <*> (optional spaces *> option [] (try tags))
        <?> "issue"


-- flagwords + ": "
incipit :: ParSource ()
incipit = () <$ (choice (map string flagWords) *> string ": ") <?> "incipit"

flagWords :: [String]
flagWords = ["TODO", "FIXME", "NOTE", "XXX"]

tags :: ParSource [Tag]
tags = sepEndBy1 tag spaces <?> "tags"

-- any text. Since tags/fields at the end of the issue are optional, we need
-- a way to delimit this. Delimiters are: eof, tags/fields, new issue
freeText :: ParSource Description
freeText = manyTill anyChar end <?> "free text"
    where vp p      = try . parsecMap (const ()) $ p
          end       = lookAhead $ choice [vp (spaces *> tag),
                                          vp (spaces *> incipit), vp eoft]


------------
-- BLOCKS --
------------

-- blocks parsing, check `block` comment for more info

blocks :: ParSource [Issue]
blocks = concat <$> many1 block

-- a block is some text separated by emptyline(s) (a paragraph). Once you
-- parse it you need to unwrap it and scan for comments. Whitespace
-- is treated like in html.
block :: ParSource [Issue]
block = getPosition                                  >>= \lep ->
        manyTill1 anyChar (try $ emptyline <|> eoft) >>= \rb ->
        getState                                     >>= \ps ->
        case runParser (bi lep) ps "<block-parse>" rb of
            Right r  -> return r
            Left  l  -> parserFail ("block subp: " ++ show l)
        <?> "block"
    where
          bi p = setPosition p >> issues


emptyline :: ParSource ()
emptyline = () <$ (endOfLine *> many (satisfy ws) *> endOfLine) <?> "emptyline"
    where ws c = C.isSpace c && notElem c "\n\r\f\v"
               -- as per definition of isSpace in Data.Char


--------------
-- COMMENTS --
--------------

-- TYPES --

-- StringPos is the comment itself + its indentation level. Conventionally
-- multiline comments will have IL of 0 to sever them from single line
-- comments
type StringPos  = (Int, Int, String) -- Col, Row, String


-- FUNCTIONS --

-- Comment is the same, with just line indication, for comments that go
-- together indentation wise. Check the utility function sps2cr
type CommentRow = (Int, String) -- row, String

sps2cr :: [[StringPos]] -> [CommentRow]
sps2cr sps = map sp2cr sps
    where sp2r (_,r,_) = r
          sp2s (_,_,s) = s

          sp2cr [] = error "empty group in sps2cr"
          sp2cr zs = (sp2r . head $ zs, myUnlines . map sp2s $ zs)

          -- do not add unneeded \n at end
          myUnlines zs = L.intercalate "\n" zs


-- comments: from a source file to something parsable by `blocks`
-- indent level 0, to recognise them from single line
multiLine :: String -> String -> ParSource StringPos
multiLine st ed = getPosition >>= \a ->
                  (,,) <$> pure 0
                       <*> pure (sourceLine a)
                       <*> (string st *> manyTill anyChar (try $ string ed))
                  <?> "multiline comment"

-- returns single line comment and starting column of such comment
singleLine :: String -> ParSource StringPos -- int: starting col
singleLine st = getPosition >>= \a ->
                (string st *> manyTill anyChar (() <$ endOfLine <|> eoft)) >>= \s ->
                return (sourceColumn a, sourceLine a, s)
                <?> "singleline comment"


rawComment :: String -> String -> String -> ParSource StringPos
rawComment st ed slt = try (multiLine st ed) <|> singleLine slt
                       <?> "raw comment"

rawComments :: String -> String -> String ->
               ParSource [StringPos]
rawComments st ed slt =
        E.rights <$> many1 ( try (Right <$> rawComment st ed slt) <|>
                             try (Left  <$> (() <$ codeString))   <|>
                                 (Left  <$> other ) )
                -- TODO: o many? [test]
        <?> "raw comments"
    where
          initP = lookAhead (try (() <$ string st))  <|> -- long comment
                  lookAhead (try (() <$ string slt)) <|> -- line comment
                  lookAhead (() <$ char '"')         <|> -- code string
                  (() <$ char '\n') -- TODO: use eoft [bug]

          -- non comment stuff
          other = () <$ manyTill anyChar initP -- XXX

-- quoted strings, by mauke^
codeString :: ParSource String
codeString = q *> many ((char '\\' *> anyChar) <|> noneOf "\"\\") <* q
             <?> "codestring"
    where q = char '"'

-- single lineblock (which will be parsed) plus its original row position
groupStringPos :: [StringPos] -> [CommentRow]
groupStringPos sps =
            let grp    = L.groupBy sameBlock sps'
                -- sps'': now that they are grouped, go back to sp tuple
                c2s qs = map (\(c,r,s,_) -> (c,r,s)) qs
                sps''  = map c2s grp  -- this is a list of lists, hence
                                      -- the doublemap
            in sps2cr sps''
    where -- how to verify if lines are sequential?
          -- subtract [1..] to line-number and you can use Eq on this!

          -- clever hack, the subtraction is the 4th value of the tuple
          sps' = map (\((c,r,s),k) -> (c,r,s,r-k)) (zip sps [1..])

          -- equality function:
          -- multiline blocks are always to be treated alone
          -- single line blocks are to be grouped only if on same col and
          -- sequential lines
          sameBlock (0,_,_,_ ) _            = False
          sameBlock (c,_,_,il) (c',_,_,il') = c == c' && il == il'


------------
-- SOURCE --
------------

source :: String -> String -> String -> ParSource [Issue]
source st ed slt = fmap groupStringPos (rawComments st ed slt) >>= \cc ->
                   fmap psPath getState                        >>= \fp ->
                   fmap concat (mapM (parseCr fp) cc)
    where
          parseCr :: FilePath -> CommentRow -> ParSource [Issue]
          parseCr fp (r,s) = case runParSource (bpp r) fp s of
                               Right y -> pure y
                               Left  _ -> parserFail "<source>"
                                     -- TODO: more meaningful err mes. [duct]
          bpp r            = getPosition                     >>= \p ->
                             setPosition (setSourceLine p r) >>
                             blocks
          -- TODO: elimina runParser per runParSource [duct]


----------
-- FILE --
----------

-- main function with wich to parse issues; errors on stderr
issueFinder :: [FilePath] -> IO [Issue]
issueFinder fps = concat <$> mapM (uncurry fileParser) (matchPar fps)
    where -- matches each file w/ its parser, filters parserless files
          matchPar :: [FilePath] -> [(FilePath, ParSource [Issue])]
          matchPar fpz = filtNotPar . map (\f -> (f, langParser f)) $ fpz

          -- I can't use == on ParSource
          filtNotPar :: [(FilePath, Maybe (ParSource [Issue]))] ->
                        [(FilePath, ParSource [Issue])]
          filtNotPar []                 = []
          filtNotPar ((f, Just p):tps)  = (f,p) : filtNotPar tps
          filtNotPar ((_, Nothing):tps) = filtNotPar tps


fileParser :: FilePath -> ParSource [Issue] -> IO [Issue]
fileParser fp p = rfe fp >>= \cs ->
                  case runParSource p fp cs of
                      Left l  -> perr (fp ++ " : parse error " ++ show l) >>
                                 return []
                      Right r -> return r
    where rfe :: FilePath -> IO String
          rfe _ = D.doesFileExist fp >>= \fb ->
                  if fb then readFile fp
                        else perr (fp ++ " : no such file") >> return ""

          perr cs = I.hPutStrLn I.stderr cs

runParSource :: ParSource a -> FilePath -> String -> Either ParseError a
runParSource p fp cs = runParser p (ParState fp) fp cs

-- TODO: add more langparsers [feature:intermediate]
langParser :: FilePath -> Maybe (ParSource [Issue])
langParser fp | ext `elem` [".hs", ".lhs"]  = Just haskell
              | ext `elem` [".c", ".h"]     = Just c
              | ext `elem` [".txt"]        = Just blocks
              | otherwise                  = Nothing
    where ext = SF.takeExtension fp

          haskell = source "{-" "-}" "--"
          c       = source "/*" "*/" "//"
            -- Shell: # (Perl: same)
            -- php: as C + Shell
            -- ruby: shell and =begin =end
            -- javascript: C + html
            -- python: # and """
            -- java, objective-c, etc. watch tiobe
            -- html

         -- TODO: do not add more languages but parse every text
         --       file? [design] [u:3]