{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TypeApplications #-}

{- HLINT ignore structField "Reduce duplication" -}
{- HLINT ignore typeRef "Use <$>" -}

module FlatBuffers.Internal.Compiler.Parser where

import           Control.Monad                            ( when )
import qualified Control.Monad.Combinators.NonEmpty       as NE

import qualified Data.ByteString                          as BS
import           Data.Coerce                              ( coerce )
import           Data.Functor                             ( void )
import           Data.List.NonEmpty                       ( NonEmpty )
import qualified Data.List.NonEmpty                       as NE
import qualified Data.Map.Strict                          as Map
import           Data.Maybe                               ( catMaybes )
import           Data.Text                                ( Text )
import qualified Data.Text                                as T
import qualified Data.Text.Encoding                       as T
import           Data.Void                                ( Void )
import           Data.Word                                ( Word8 )

import           FlatBuffers.Internal.Compiler.SyntaxTree
import           FlatBuffers.Internal.Constants           ( fileIdentifierSize )

import           Text.Megaparsec
import           Text.Megaparsec.Char
import qualified Text.Megaparsec.Char.Lexer               as L


type Parser = Parsec Void String

-- | Roughly based on: https://google.github.io/flatbuffers/flatbuffers_grammar.html.
-- Differences between this parser and the above grammar:
--
--   * Unions members now support aliases.
--   * An enum's underlying type used to be optional (defaulting to @short@), but now it's mandatory.
--   * Attributes can be reffered to either as an identifier or as a string literal (e.g. @attr@ or @"attr"@).
--   * Struct fields can't have default values.
--   * The grammar states that table/struct field defaults can only be scalars (integer/floating point constants),
--     when in reality, it could be also be a boolean or an enum identifier.
--   * The grammar says attribute values can be integers, floats or string literals.
--     Flatc only allows integers and string literals. To make things simpler, we decided to go with flatc's
--     approach and disallow floats.
--   * The grammar says namespaces must include at least one fragment, but an empty namespace
--     (i.e. @namespace ;@) is perfectly valid.
--   * This supports @native_include@ statements
--     (see: https://google.github.io/flatbuffers/flatbuffers_guide_use_cpp.html#flatbuffers_cpp_object_based_api)
schema :: Parser Schema
schema = do
  sc
  includes <- catMaybes <$> many (Just <$> include <|> Nothing <$ nativeInclude)
  decls <- many (decl <|> failOnInclude)
  eof
  pure $ Schema includes (catMaybes decls)
  where
    failOnInclude =
      rword "include" *> fail "\"include\" statements must be at the beginning of the file."
      <|> (rword "native_include" *> fail "\"native_include\" statements must be at the beginning of the file.")

decl :: Parser (Maybe Decl)
decl =
  choice
    [ Just . DeclN <$> namespaceDecl
    , Just . DeclT <$> tableDecl
    , Just . DeclS <$> structDecl
    , Just . DeclE <$> enumDecl
    , Just . DeclU <$> unionDecl
    , Just . DeclR <$> rootDecl
    , Just . DeclFI <$> fileIdentifierDecl
    , Just . DeclA <$> attributeDecl
    , Nothing <$ fileExtensionDecl
    , Nothing <$ jsonObj
    , Nothing <$ rpcDecl
    ]

-- | space consumer - this consumes and ignores any whitespace + comments
sc :: Parser ()
sc = L.space space1 lineCmnt blockCmnt
  where
    lineCmnt  = L.skipLineComment "//"
    blockCmnt = L.skipBlockComment "/*" "*/"

lexeme :: Parser a -> Parser a
lexeme = L.lexeme sc

symbol :: String -> Parser String
symbol = L.symbol sc

rword :: String -> Parser ()
rword w = (lexeme . try) (string w *> notFollowedBy (alphaNumChar <|> char '_'))

curly, square, parens :: Parser a -> Parser a
curly = between (symbol "{") (symbol "}")
square = between (symbol "[") (symbol "]")
parens = between (symbol "(") (symbol ")")


commaSep :: Parser a -> Parser [a]
commaSep p = sepBy p (symbol ",")

commaSep1 :: Parser a -> Parser (NonEmpty a)
commaSep1 p = NE.sepBy1 p (symbol ",")

semi, colon :: Parser ()
semi = void $ symbol ";"
colon = void $ symbol ":"

ident :: Parser Ident
ident = label "identifier" $ (lexeme . try) identifier
  where
    identifier = fmap (Ident . T.pack) $ (:) <$> letterChar <*> many (alphaNumChar <|> char '_')

typ :: Parser Type
typ =
  TInt8 <$ (rword "int8" <|> rword "byte") <|>
  TInt16 <$ (rword "int16" <|> rword "short") <|>
  TInt32 <$ (rword "int32" <|> rword "int") <|>
  TInt64 <$ (rword "int64" <|> rword "long") <|>
  TWord8 <$ (rword "uint8" <|> rword "ubyte") <|>
  TWord16 <$ (rword "uint16" <|> rword "ushort") <|>
  TWord32 <$ (rword "uint32" <|> rword "uint") <|>
  TWord64 <$ (rword "uint64" <|> rword "ulong") <|>

  TFloat <$ (rword "float32" <|> rword "float") <|>
  TDouble <$ (rword "float64" <|> rword "double") <|>

  TBool <$ rword "bool" <|>
  TString <$ rword "string" <|>
  label "type identifier" (TRef <$> typeRef) <|>
  label "vector type" vector
  where
    vector = TVector <$> between
              (symbol "[" *> (notFollowedBy (symbol "[") <|> fail "nested vector types not supported" ))
              (symbol "]")
              typ

typeRef :: Parser TypeRef
typeRef = do
  idents <- many (try (ident <* symbol "."))
  i <- ident
  pure $ TypeRef (Namespace (coerce idents)) i

tableField :: Parser TableField
tableField = do
  i <- ident
  colon
  t <- typ
  def <- optional (symbol "=" *> defaultVal)
  md <- metadata
  semi
  pure $ TableField i t def md

structField :: Parser StructField
structField = do
  i <- ident
  colon
  t <- typ
  md <- metadata
  semi
  pure $ StructField i t md

tableDecl :: Parser TableDecl
tableDecl = do
  rword "table"
  i <- ident
  md <- metadata
  fs <- curly (many tableField)
  pure $ TableDecl i md fs

structDecl :: Parser StructDecl
structDecl = do
  rword "struct"
  i <- ident
  md <- metadata
  fs <- curly (NE.some structField)
  pure $ StructDecl i md fs

enumDecl :: Parser EnumDecl
enumDecl = do
  rword "enum"
  i <- ident
  colon
  t <- typ
  md <- metadata
  v <- curly (commaSep1 enumVal)
  pure $ EnumDecl i t md v

enumVal :: Parser EnumVal
enumVal = EnumVal <$> ident <*> optional (symbol "=" *> intLiteral)

unionDecl :: Parser UnionDecl
unionDecl = do
  rword "union"
  i <- ident
  md <- metadata
  v <- curly (commaSep1 unionVal)
  pure $ UnionDecl i md v

unionVal :: Parser UnionVal
unionVal = UnionVal <$> optional (try (ident <* colon)) <*> typeRef

namespaceDecl :: Parser NamespaceDecl
namespaceDecl =
  NamespaceDecl . Namespace . coerce <$>
    (rword "namespace" *> sepBy ident (symbol ".") <* semi)

stringLiteral :: Parser StringLiteral
stringLiteral =
  label "string literal" $
    fmap (StringLiteral . T.pack) . lexeme $
      char '"' >> manyTill L.charLiteral (char '"')

intLiteral :: Parser IntLiteral
intLiteral =
  label "integer literal" . lexeme $
    L.signed sc L.decimal

attributeVal :: Parser AttributeVal
attributeVal =
  choice
    [ AttrI . unIntLiteral <$> intLiteral
    , AttrS . unStringLiteral <$> stringLiteral
    ]

defaultVal :: Parser DefaultVal
defaultVal =
  choice
    [ DefaultBool True <$ rword "true"
    , DefaultBool False <$ rword "false"
    , DefaultNum <$> label "number literal" (lexeme (L.signed sc L.scientific))
    , DefaultRef <$> ident
    ]

metadata :: Parser Metadata
metadata =
  label "metadata"
    . fmap (Metadata . Map.fromList . maybe [] NE.toList)
    . optional
    . parens
    . commaSep1 $
  (,) <$> attributeName <*> optional (colon *> attributeVal)

include :: Parser Include
include = Include <$> (rword "include" *> stringLiteral <* semi)

-- | See: https://google.github.io/flatbuffers/flatbuffers_guide_use_cpp.html#flatbuffers_cpp_object_based_api
nativeInclude :: Parser ()
nativeInclude = void (rword "native_include" >> stringLiteral >> semi)

rootDecl :: Parser RootDecl
rootDecl = RootDecl <$> (rword "root_type" *> typeRef <* semi)

fileExtensionDecl :: Parser ()
fileExtensionDecl = void (rword "file_extension" *> stringLiteral <* semi)

fileIdentifierDecl :: Parser FileIdentifierDecl
fileIdentifierDecl = do
  rword "file_identifier"
  fi <- coerce stringLiteral

  let byteCount = BS.length (T.encodeUtf8 fi)
  let codePointCount = T.length fi

  when (byteCount /= fileIdentifierSize) $
    if codePointCount == byteCount
      -- if the user is using ASCII characters
      then fail $ "file_identifier must be exactly " <> show (fileIdentifierSize @Word8) <> " characters"
      -- if the user is using multi UTF-8 code unit characters, show a more detailed error message
      else fail $ "file_identifier must be exactly " <> show (fileIdentifierSize @Word8) <> " UTF-8 code units"

  semi
  pure (FileIdentifierDecl fi)

attributeDecl :: Parser AttributeDecl
attributeDecl = AttributeDecl <$> (rword "attribute" *> attributeName <* semi)

attributeName :: Parser Text
attributeName = coerce stringLiteral <|> coerce ident

jsonObj :: Parser ()
jsonObj =
  label "JSON object" (void jobject)
  where
    json = choice [void jstring, void jnumber, jbool, jnull, void jarray, void jobject]
    jnull = rword "null"
    jbool = rword "true" <|> rword "false"
    jstring = stringLiteral
    jnumber = lexeme $ L.signed sc L.scientific
    jarray  = square (commaSep json)
    jobject = curly (commaSep keyValuePair)

    keyValuePair = do
      void stringLiteral <|> void ident
      colon
      json

rpcDecl :: Parser ()
rpcDecl = void $ rword "rpc_service" >> ident >> curly (NE.some rpcMethod)

rpcMethod :: Parser ()
rpcMethod = ident >> parens ident >> colon >> ident >> metadata >> void semi