{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}

module GLua.Lexer where

import GLua.AG.Token (MToken (..), Token (..))

import Data.Char (ord)
import GLua.Position (LineColPos (..), Region (..))
import Text.Parsec (
  ParseError,
  SourcePos,
  anyChar,
  char,
  digit,
  endOfLine,
  eof,
  getPosition,
  lookAhead,
  many,
  many1,
  manyTill,
  noneOf,
  oneOf,
  option,
  optionMaybe,
  parse,
  satisfy,
  sourceColumn,
  sourceLine,
  space,
  string,
  try,
  (<?>),
  (<|>),
 )
import Text.Parsec.Pos (newPos)
import Text.Parsec.String (Parser)

-- | Region start to SourcePos
rgStart2sp :: Region -> SourcePos
rgStart2sp (Region start _) = lcp2sp start

-- | Region end to SourcePos
rgEnd2sp :: Region -> SourcePos
rgEnd2sp (Region _ end) = lcp2sp end

-- | SourcePos to region
sp2Rg :: SourcePos -> Region
sp2Rg sp = Region (sp2lcp sp) (sp2lcp sp)

-- | LineColPos to SourcePos
lcp2sp :: LineColPos -> SourcePos
lcp2sp (LineColPos l c _) = newPos "source.lua" (l + 1) (c + 1)

-- | SourcePos to LineColPos
sp2lcp :: SourcePos -> LineColPos
sp2lcp pos = LineColPos (sourceLine pos - 1) (sourceColumn pos - 1) 0

-- | Get the source position
pPos :: Parser LineColPos
pPos = sp2lcp <$> getPosition

-- See luajit's src/lj_char.c and src/lj_char.h
pIdentifierCharacter :: Parser Char
pIdentifierCharacter = satisfy validChar
  where
    validChar :: Char -> Bool
    validChar c =
      cBetween c '0' '9'
        || cBetween c 'A' 'Z'
        || c == '_'
        || cBetween c 'a' 'z'
        || ord c >= 128

pNonDigitIdentifierCharacter :: Parser Char
pNonDigitIdentifierCharacter = satisfy validChar
  where
    validChar :: Char -> Bool
    validChar c =
      cBetween c 'A' 'Z'
        || c == '_'
        || cBetween c 'a' 'z'
        || ord c >= 128

cBetween :: Char -> Char -> Char -> Bool
cBetween c left right = c >= left && c <= right

-- | Parse the string until the end. Used in parseLineComment among others.
pUntilEnd :: Parser String
pUntilEnd = manyTill anyChar (lookAhead (endOfLine <|> const '\n' <$> eof))

-- | Whitespace parser that requires at least one whitespace character
parseWhitespace :: Parser String
parseWhitespace = many1 space <?> "whitespace"

-- | Whitespace parser that requires 0 or more whitespace characters
parseOptionalWhitespace :: Parser String
parseOptionalWhitespace = many space <?> "whitespace"

-- | An escaped character of a single line string
-- Takes the delimiter of the string as parameter
escapedStringChar :: Char -> Parser String
escapedStringChar delim = (\escaped -> '\\' : escaped) <$ char '\\' <*> escapedChar <|> (: []) <$> noneOf ['\n', delim]
  where
    escapedChar = (:) <$> char 'z' <*> parseOptionalWhitespace <|> (: []) <$> anyChar

-- | The start of a nested string
-- Returns the amount of =-signs at the start of the nested string
startNestedString :: Parser String
startNestedString = do
  _ <- char '['
  depth <- many (char '=')
  _ <- char '['
  return depth

-- | Parses a multiline string
-- returns the amount of =-signs in the string delimiter and the contents
nestedString :: String -> Parser String
nestedString depth = do
  let
    endStr = "]" ++ depth ++ "]"
  manyTill anyChar (try $ string endStr)

-- | Parse Lua style comments
parseLuaComment :: Parser Token
parseLuaComment = do
  _ <- string "--"
  -- When a nested comment is started, it must be finished
  startNested <- optionMaybe (try startNestedString)

  case startNested of
    Nothing -> DashComment <$> pUntilEnd
    Just depth -> do
      contents <- nestedString depth
      return $ DashBlockComment (length depth) contents

-- | Parse C-Style comments
parseCComment :: Parser Token
parseCComment = do
  _ <- char '/'
  try (SlashComment <$ char '/' <*> pUntilEnd)
    <|> SlashBlockComment <$ char '*' <*> manyTill anyChar (try (string "*/") <|> const "\n" <$> eof)

-- | Parse any kind of comment.
parseComment :: Parser Token
parseComment = parseLuaComment <|> parseCComment <?> "Comment"

-- | Convert the result of the nestedString parser to a MultiLine string
nestedStringToMLString :: String -> String -> Token
nestedStringToMLString depth contents = MLString (showChar '[' . showString depth . showChar '[' . showString contents . showChar ']' . showString depth . showChar ']' $ "")

-- | Parse single line strings e.g. "sdf", 'werf'.
parseLineString :: Char -> Parser String
parseLineString c = concat <$ char c <*> many (escapedStringChar c) <* char c

-- | Single and multiline strings.
parseString :: Parser Token
parseString =
  SQString <$> parseLineString '\''
    <|> DQString <$> parseLineString '"'
    <|> do
      depth <- startNestedString
      nestedStringToMLString depth <$> nestedString depth <?> "String"

-- | Parse any kind of number.
-- Except for numbers that start with a '.'. That's handled by parseDots to solve ambiguity.
parseNumber :: Parser Token
parseNumber = TNumber <$> ((++) <$> (pHexadecimal <|> pBinary <|> pNumber) <*> (pLLULL <|> option "" parseNumberSuffix)) <?> "Number"
  where
    pNumber :: Parser String
    pNumber = (++) <$> many1 digit <*> option "" pDecimal

    pDecimal :: Parser String
    pDecimal = (:) <$> char '.' <*> many digit

    pHexDecimal :: Parser String
    pHexDecimal = (:) <$> char '.' <*> many pHex

    pHexadecimal :: Parser String
    pHexadecimal = (++) <$> (try (string "0x") <|> try (string "0X")) <*> ((++) <$> many1 pHex <*> option "" pHexDecimal)

    pHex :: Parser Char
    pHex = digit <|> oneOf ['a', 'b', 'c', 'd', 'e', 'f', 'A', 'B', 'C', 'D', 'E', 'F']

    pBinary :: Parser String
    pBinary = (++) <$> (try (string "0b") <|> try (string "0B")) <*> ((++) <$> many1 pBin <*> option "" pDecimal)

    pBin :: Parser Char
    pBin = digit <|> oneOf ['0', '1']

    -- LL/ULL suffix of a number, making it signed/unsigned int64 respectively
    -- http://luajit.org/ext_ffi_api.html#literals
    pLLULL :: Parser String
    pLLULL = pULL <|> pLL

    pLL :: Parser String
    pLL = "LL" <$ oneOf ['L', 'l'] <* oneOf ['L', 'l']

    pULL :: Parser String
    pULL = "ULL" <$ oneOf ['U', 'u'] <* pLL

-- Parse the suffix of a number
parseNumberSuffix :: Parser String
parseNumberSuffix = imaginary <|> extension
  where
    imaginary = (: []) <$> oneOf ['i', 'I']
    extension =
      (\e s d -> e : s ++ d)
        <$> oneOf ['e', 'E', 'p', 'P']
        <*> option "" (string "+" <|> string "-")
        <*> many1 digit

-- | Parse either a keyword or an identifier that starts with that keyword
parseKeyword :: Token -> String -> Parser Token
parseKeyword tok str =
  try
    ( do
        _ <- string str
        (\s -> Identifier (str ++ s)) <$> many1 (pIdentifierCharacter)
          <|> return tok
        <?> "Keyword "
        ++ str
    )

-- | Parse just an identifier.
parseIdentifier :: Parser String
parseIdentifier = (:) <$> pNonDigitIdentifierCharacter <*> many pIdentifierCharacter <?> "Identifier"

-- | Parse a label.
parseLabel :: Parser Token
parseLabel =
  Label
    <$ string "::"
    <*> parseOptionalWhitespace
    <*> parseIdentifier
    <*> parseOptionalWhitespace
    <* string "::"
    <?> "Label"

-- | Parse anything to do with dots. Indexaction (.), concatenation (..), varargs (...) or numbers that start with a dot
parseDots :: Parser Token
parseDots = do
  _ <- char '.' -- first .
  try
    ( do
        _ <- char '.' -- second .
        try (VarArg <$ char '.')
          <|> return Concatenate -- third .
    )
    <|>
    -- try to parse a number that starts with a .
    try
      ( do
          nums <- many1 digit
          suffix <- option "" parseNumberSuffix
          return $ TNumber ('.' : (nums ++ suffix))
      )
    <|> return Dot

-- | Parse any kind of token.
parseToken :: Parser Token
parseToken =
  Whitespace <$> parseWhitespace
    <|>
    -- Constants
    try parseString
    <|> parseNumber
    <|> parseKeyword TTrue "true"
    <|> parseKeyword TFalse "false"
    <|> parseKeyword Nil "nil"
    <|> parseKeyword Not "not"
    <|> parseKeyword And "and"
    <|> parseKeyword Or "or"
    <|> parseKeyword Function "function"
    <|> parseKeyword Local "local"
    <|> parseKeyword If "if"
    <|> parseKeyword Then "then"
    <|> parseKeyword Elseif "elseif"
    <|> parseKeyword Else "else"
    <|> parseKeyword For "for"
    <|> parseKeyword In "in"
    <|> parseKeyword Do "do"
    <|> parseKeyword While "while"
    <|> parseKeyword Until "until"
    <|> parseKeyword Repeat "repeat"
    <|> parseKeyword Continue "continue"
    <|> parseKeyword Break "break"
    <|> parseKeyword Return "return"
    <|> parseKeyword End "end"
    <|> Identifier <$> parseIdentifier
    <|> Semicolon <$ char ';'
    <|> parseDots
    <|> try parseComment
    <|>
    -- Operators
    Plus <$ string "+"
    <|> Minus <$ string "-"
    <|> Multiply <$ string "*"
    <|> Divide <$ string "/"
    <|> Modulus <$ string "%"
    <|> Power <$ string "^"
    <|> TEq <$ try (string "==")
    <|> Equals <$ string "="
    <|> TNEq <$ string "~="
    <|> TCNEq <$ try (string "!=")
    <|> CNot <$ string "!"
    <|> TLEQ <$ try (string "<=")
    <|> TLT <$ string "<"
    <|> TGEQ <$ try (string ">=")
    <|> TGT <$ string ">"
    <|> try parseLabel
    <|> Colon <$ string ":"
    <|> Comma <$ string ","
    <|> Hash <$ string "#"
    <|> CAnd <$ string "&&"
    <|> COr <$ string "||"
    <|> LRound <$ char '('
    <|> RRound <$ char ')'
    <|> LCurly <$ char '{'
    <|> RCurly <$ char '}'
    <|> LSquare <$ char '['
    <|> RSquare <$ char ']'

-- | A thing of which the region is to be parsed
annotated :: (Region -> a -> b) -> Parser a -> Parser b
annotated f p = (\s t e -> f (Region s e) t) <$> pPos <*> p <*> pPos

-- | parse located MToken
parseMToken :: Parser MToken
parseMToken =
  do
    start <- pPos
    tok <- parseToken
    end <- pPos

    return $ MToken (Region start end) tok

-- | Parse a list of tokens and turn them into MTokens.
parseTokens :: Parser [MToken]
parseTokens = many parseMToken

-- | Parse the potential #!comment on the first line
-- Lua ignores the first line if it starts with #
parseHashBang :: Parser String
parseHashBang = option "" (char '#' *> pUntilEnd)

-- | Parse a string into MTokens.
execParseTokens :: String -> Either ParseError [MToken]
execParseTokens = parse (parseHashBang *> parseTokens <* eof) "input"