{-# LANGUAGE BangPatterns               #-}
{-# LANGUAGE LambdaCase                 #-}
{-# LANGUAGE GADTs                      #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE DerivingStrategies         #-}
{-# LANGUAGE NamedFieldPuns             #-}
{-# LANGUAGE OverloadedStrings          #-}
{-# LANGUAGE ScopedTypeVariables        #-}

-- | Parser combinators for vim style tags (ctags)
--
module GhcTags.CTag.Parser
  ( parseTagsFile
  , parseTagLine
  -- * parse a ctag
  , parseTag
  -- * parse a pseudo-ctag
  , parseHeader
  ) where

import           Control.Arrow ((***))
import           Control.Applicative (many, (<|>))
import           Control.Monad (guard)
import           Data.ByteString (ByteString)
import qualified Data.ByteString as BS
import           Data.Attoparsec.ByteString  (Parser, (<?>))
import qualified Data.Attoparsec.ByteString  as AB
import qualified Data.Attoparsec.ByteString.Char8  as AChar
import           Data.Functor (void, ($>))
import           Data.Function (on)
import           Data.Text          (Text)
import qualified Data.Text          as Text
import qualified Data.Text.Encoding as Text
import qualified System.FilePath.ByteString as FilePath

import           GhcTags.Tag
import qualified GhcTags.Utils as Utils
import           GhcTags.CTag.Header
import           GhcTags.CTag.Utils


-- | Parser for a 'CTag' from a single text line.
--
parseTag :: Parser CTag
parseTag =
      (\tagName tagFilePath tagAddr (tagKind, tagFields)
        -> Tag { tagName
               , tagFilePath
               , tagAddr
               , tagKind
               , tagFields
               , tagDefinition = NoTagDefinition
               })
    <$> parseTagName
    <*  separator

    <*> parseTagFileName
    <*  separator

    -- includes an optional ';"' separator
    <*> parseTagAddress

    <*> (  -- kind field followed by list of fields or end of line, e.g.
           -- '(TagField, CTagFields)'.
              ((,) <$> ( separator *> parseKindField )
                   <*> ( separator *> parseFields <* endOfLine
                         <|>
                         endOfLine $> mempty)
                       )

          -- list of fields (kind field might be later, but don't check it, we
          -- always format it as the first field) or end of line.
          <|> curry id NoKind
                <$> ( separator *> parseFields <* endOfLine
                      <|>
                      endOfLine $> mempty
                    )

          -- kind encoded as a single letter, followed by a list
          -- of fields or end of line.
          <|> curry (charToTagKind *** id)
                  <$> ( separator *> AChar.satisfy notTabOrNewLine )
                  <*> ( separator *> parseFields <* endOfLine
                        <|>
                        endOfLine $> mempty
                      )
          <|> endOfLine $> (NoKind, mempty)
        )

  where
    separator :: Parser Char
    separator = AChar.char '\t'

    parseTagName :: Parser TagName
    parseTagName = TagName . Text.decodeUtf8
                    <$> AChar.takeWhile (/= '\t')
                    <?> "parsing tag name failed"

    parseTagFileName :: Parser TagFilePath
    parseTagFileName =
          TagFilePath . Text.decodeUtf8 . FilePath.normalise
      <$> AChar.takeWhile (/= '\t')

    parseExCommand :: Parser ExCommand
    parseExCommand = (\x -> ExCommand $ Text.decodeUtf8 $ BS.take (BS.length x - 1) x)
                 <$> AChar.scan "" go
                 <*  AChar.anyChar
      where
        -- go until either eol or ';"' sequence is found.
        go :: String -> Char -> Maybe String

        go !s c  | -- eol
                   take (length Utils.endOfLine) (c : s)
                     == reverse Utils.endOfLine
                              = Nothing

                 | -- ';"' sequence
                   l == "\";" = Nothing

                 | otherwise  = Just l
          where
            l = take 2 (c : s)

    -- We only parse `TagLine` or `TagCommand`.
    parseTagAddress :: Parser CTagAddress
    parseTagAddress =
          TagLine <$> AChar.decimal <* (endOfLine <|> void (AB.string ";\""))
      <|>
          TagCommand <$> parseExCommand

    parseKindField :: Parser CTagKind
    parseKindField = do
      x <-
        Text.decodeUtf8
          <$> (AB.string "kind:" *> AChar.takeWhile notTabOrNewLine)
      guard (Text.length x == 1)
      pure $ charToTagKind (Text.head x)

    parseFields :: Parser CTagFields
    parseFields = TagFields <$> AChar.sepBy parseField separator


parseField :: Parser TagField
parseField =
         on TagField Text.decodeUtf8
     <$> AChar.takeWhile (\x -> x /= ':' && notTabOrNewLine x)
     <*  AChar.char ':'
     <*> AChar.takeWhile notTabOrNewLine


-- | A vim-style tag file parser.
--
parseTags :: Parser [Either Header CTag]
parseTags = many parseTagLine


-- | Parse either a header line ot a 'CTag'.
--
parseTagLine :: Parser (Either Header CTag)
parseTagLine =
    AChar.eitherP
      (parseHeader <?> "failed parsing tag")
      (parseTag    <?> "failed parsing header")


parseHeader :: Parser Header
parseHeader = do
    e <- AB.string "!_TAG_" $> False
         <|>
         AB.string "!_" $> True
    case e of
      True ->
               flip parsePseudoTagArgs (Text.decodeUtf8 <$> AChar.takeWhile notTabOrNewLine)
             . PseudoTag
             . Text.decodeUtf8
         =<< AChar.takeWhile (\x -> notTabOrNewLine x && x /= '!')
      False -> do
        headerType <-
              AB.string "FILE_ENCODING"     $> SomeHeaderType FileEncoding
          <|> AB.string "FILE_FORMAT"       $> SomeHeaderType FileFormat
          <|> AB.string "FILE_SORTED"       $> SomeHeaderType FileSorted
          <|> AB.string "OUTPUT_MODE"       $> SomeHeaderType OutputMode
          <|> AB.string "KIND_DESCRIPTION"  $> SomeHeaderType KindDescription
          <|> AB.string "KIND_SEPARATOR"    $> SomeHeaderType KindSeparator
          <|> AB.string "PROGRAM_AUTHOR"    $> SomeHeaderType ProgramAuthor
          <|> AB.string "PROGRAM_NAME"      $> SomeHeaderType ProgramName
          <|> AB.string "PROGRAM_URL"       $> SomeHeaderType ProgramUrl
          <|> AB.string "PROGRAM_VERSION"   $> SomeHeaderType ProgramVersion
          <|> AB.string "EXTRA_DESCRIPTION" $> SomeHeaderType ExtraDescription
          <|> AB.string "FIELD_DESCRIPTION" $> SomeHeaderType FieldDescription
        case headerType of
          SomeHeaderType ht@FileEncoding ->
              parsePseudoTagArgs ht (Text.decodeUtf8 <$> AChar.takeWhile notTabOrNewLine)
          SomeHeaderType ht@FileFormat ->
              parsePseudoTagArgs ht AChar.decimal
          SomeHeaderType ht@FileSorted ->
              parsePseudoTagArgs ht AChar.decimal
          SomeHeaderType ht@OutputMode ->
              parsePseudoTagArgs ht (Text.decodeUtf8 <$> AChar.takeWhile notTabOrNewLine)
          SomeHeaderType ht@KindDescription ->
              parsePseudoTagArgs ht (Text.decodeUtf8 <$> AChar.takeWhile notTabOrNewLine)
          SomeHeaderType ht@KindSeparator ->
              parsePseudoTagArgs ht (Text.decodeUtf8 <$> AChar.takeWhile notTabOrNewLine)
          SomeHeaderType ht@ProgramAuthor ->
              parsePseudoTagArgs ht (Text.decodeUtf8 <$> AChar.takeWhile notTabOrNewLine)
          SomeHeaderType ht@ProgramName ->
              parsePseudoTagArgs ht (Text.decodeUtf8 <$> AChar.takeWhile notTabOrNewLine)
          SomeHeaderType ht@ProgramUrl ->
              parsePseudoTagArgs ht (Text.decodeUtf8 <$> AChar.takeWhile notTabOrNewLine)
          SomeHeaderType ht@ProgramVersion ->
              parsePseudoTagArgs ht (Text.decodeUtf8 <$> AChar.takeWhile notTabOrNewLine)
          SomeHeaderType ht@ExtraDescription ->
              parsePseudoTagArgs ht (Text.decodeUtf8 <$> AChar.takeWhile notTabOrNewLine)
          SomeHeaderType ht@FieldDescription ->
              parsePseudoTagArgs ht (Text.decodeUtf8 <$> AChar.takeWhile notTabOrNewLine)
          SomeHeaderType PseudoTag {} ->
              error "parseHeader: impossible happened"

  where
    parsePseudoTagArgs :: Show ty
                       => HeaderType ty
                       -> Parser ty
                       -> Parser Header
    parsePseudoTagArgs ht parseArg =
              Header ht
          <$> ( (Just . Text.decodeUtf8 <$> (AChar.char '!' *> AChar.takeWhile notTabOrNewLine))
                <|> pure Nothing
              )
          <*> (AChar.char '\t' *> parseArg)
          <*> (AChar.char '\t' *> parseComment)

    parseComment :: Parser Text
    parseComment =
         AChar.char '/'
      *> (Text.init . Text.decodeUtf8 <$> AChar.takeWhile notNewLine)
      <* endOfLine



-- | Parse a vim-style tag file.
--
parseTagsFile :: ByteString
              -> IO (Either String [Either Header CTag])
parseTagsFile =
      fmap AChar.eitherResult
    . AChar.parseWith (pure mempty) parseTags


--
-- Utils
--


-- | Unlike 'AChar.endOfLine', it also matches for a single '\r' characters (which
-- marks enf of lines on darwin).
--
endOfLine :: Parser ()
endOfLine = AB.string "\r\n" $> ()
        <|> AChar.char '\r' $> ()
        <|> AChar.char '\n' $> ()


notTabOrNewLine :: Char -> Bool
notTabOrNewLine = \x -> x /= '\t' && notNewLine x

notNewLine :: Char -> Bool
notNewLine = \x -> x /= '\n' && x /= '\r'