{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}

-- | Duet syntax tokenizer.

module JL.Tokenizer where

import           Control.Monad
import           Data.Char
import           Data.List
import           Data.Text (Text)
import qualified Data.Text as T
import           JL.Types
import           Text.Parsec hiding (anyToken)
import           Text.Parsec.Text
import           Text.Printf

tokenize :: FilePath -> Text -> Either ParseError [(Token, Location)]
tokenize fp t = parse tokensTokenizer fp t

tokensTokenizer :: Parser [(Token, Location)]
tokensTokenizer =
  manyTill (many space >>= tokenTokenizer) (try (spaces >> eof))

tokenTokenizer :: [Char] -> Parser (Token, Location)
tokenTokenizer prespaces =
  choice
    [ if isSuffixOf "\n" prespaces
        then do
          pos <- getPosition
          pure
            ( NonIndentedNewline
            , Location
                (sourceLine pos)
                (sourceColumn pos)
                (sourceLine pos)
                (sourceColumn pos))
        else unexpected "indented newline"
    , atomThenSpace If "if"
    , atomThenSpace Then "then"
    , atomThenSpace Else "else"
    , atomThenSpace Case "case"
    , atomThenSpace Of "of"
    , atom RightArrow "->"
    , atom Period "."
    , atom Colon ":"
    , atom Backslash "\\"
    , atom OpenParen "("
    , atom CloseParen ")"
    , atom OpenBrace "{"
    , atom CloseBrace "}"
    , atom OpenBracket "["
    , atom CloseBracket "]"

    , atom Dollar "$"
    , atom Comma ","

    , do tok <-
           parsing
             Operator
             (fmap
                T.pack
                (choice
                   [ string "*"
                   , string "+"
                   , try (string ">=")
                   , try (string "<=")
                   , try (string "/=")
                   , string ">"
                   , string "<"
                   , string "/"
                   , string "="
                   , string "&&"
                   , try (string "||")
                   ]))
             "operator (e.g. *, <, +, =, etc.)"
         when
           (null prespaces)
           (unexpected
              (tokenString tok ++
               ", there should be spaces before and after operators."))
         lookAhead spaces1 <?> ("space after " ++ tokenString tok)
         pure tok
         , atom Bar "|"
    , parsing
        StringToken
        (do _ <- string "\""
            chars <- many (satisfy (\c -> c /= '"'))
            when
              (any (== '\\') chars)
              (unexpected "\\ character, not allowed inside a string.")
            when
              (any (== '\n') chars)
              (unexpected "newline character, not allowed inside a string.")
            _ <- string "\"" <?> "double quotes (\") to close the string"
            pure (T.pack chars))
        "string (e.g. \"hello\", \"123\", etc.)"
    , do (var, loc) <-
           parsing
             VariableToken
             (do variable <-
                   do start <- many1 (satisfy (\c -> c=='_' || isLetter c))
                      end <-
                        many
                          (satisfy
                             (\c -> c=='_' || isLetter c || isDigit c))
                      pure (start ++ end)
                 pure (T.pack variable))
             "variable (e.g. “elephant”, “age”, “t2”, etc.)"
         pure
           ( case var of
               VariableToken "null" -> NullToken
               VariableToken "true" -> TrueToken
               VariableToken "false" -> FalseToken
               _ -> var
           , loc)
    , parseNumbers prespaces
    ]
  where

spaces1 :: Parser ()
spaces1 = space >> spaces

ellipsis :: Int -> [Char] -> [Char]
ellipsis n text =
  if length text > 2
    then take n text ++ "…"
    else text

specialParsing ::  (t1 -> t) -> Parser  t1 -> String -> Parser  (t, Location)
specialParsing constructor parser description = do
  start <- getPosition
  thing <- parser <?> description
  end <- getPosition
  pure
    ( constructor thing
    , Location
        (sourceLine start)
        (sourceColumn start)
        (sourceLine end)
        (sourceColumn end))

atom ::  t -> String -> Parser  (t, Location)
atom constructor text = do
  start <- getPosition
  _ <- try (string text) <?> smartQuotes text
  end <- getPosition
  pure
    ( constructor
    , Location
        (sourceLine start)
        (sourceColumn start)
        (sourceLine end)
        (sourceColumn end))

atomThenSpace :: t -> String -> Parser (t, Location)
atomThenSpace constructor text = do
  start <- getPosition
  _ <-
    try ((string text <?> smartQuotes text) <*
         (lookAhead spaces1 <?> ("space or newline after " ++ smartQuotes text)))
  end <- getPosition
  pure
    ( constructor
    , Location
        (sourceLine start)
        (sourceColumn start)
        (sourceLine end)
        (sourceColumn end))

parsing ::  (Text -> t) -> Parser  Text -> String -> Parser  (t, Location)
parsing constructor parser description = do
  start <- getPosition
  text <- parser <?> description
  end <- getPosition
  pure
    ( constructor text
    , Location
        (sourceLine start)
        (sourceColumn start)
        (sourceLine end)
        (sourceColumn end))

parseNumbers :: [a] -> Parser (Token, Location)
parseNumbers prespaces = parser <?> "number (e.g. 42, 3.141, etc.)"
  where
    parser = do
      start <- getPosition
      neg <- fmap Just (char '-') <|> pure Nothing
      let operator = do
            end <- getPosition
            pure
              ( Operator "-"
              , Location
                  (sourceLine start)
                  (sourceColumn start)
                  (sourceLine end)
                  (sourceColumn end))
          number
            :: (forall a. (Num a) =>
                            a -> a)
            -> Parser (Token, Location)
          number f = do
            x <- many1 digit
            (do _ <- char '.'
                y <- many1 digit <?> ("decimal component, e.g. " ++ x ++ ".0")
                end <- getPosition
                pure
                  ( Decimal (f (read (x ++ "." ++ y)))
                  , Location
                      (sourceLine start)
                      (sourceColumn start)
                      (sourceLine end)
                      (sourceColumn end))) <|>
              (do end <- getPosition
                  pure
                    ( Integer (f (read x))
                    , Location
                        (sourceLine start)
                        (sourceColumn start)
                        (sourceLine end)
                        (sourceColumn end)))
      case neg of
        Nothing -> number id
        Just {} -> do
          when
            (null prespaces)
            (unexpected
               (curlyQuotes "-" ++ ", there should be a space before it."))
          (number (* (-1)) <?> "number (e.g. 123)") <|>
            operator <* (space <?> ("space after operator " ++ curlyQuotes "-"))

smartQuotes :: [Char] -> [Char]
smartQuotes t = "“" <> t <> "”"

equalToken :: Token -> TokenParser Location
equalToken p = fmap snd (satisfyToken (==p) <?> tokenStr p)

-- | Consume the given predicate from the token stream.
satisfyToken :: (Token -> Bool) -> TokenParser (Token, Location)
satisfyToken p =
  consumeToken (\tok -> if p tok
                           then Just tok
                           else Nothing)

-- | The parser @anyToken@ accepts any kind of token. It is for example
-- used to implement 'eof'. Returns the accepted token.
anyToken :: TokenParser (Token, Location)
anyToken = consumeToken Just

-- | Consume the given predicate from the token stream.
consumeToken :: (Token -> Maybe a) -> TokenParser (a, Location)
consumeToken f = do
  u <- getState
  tokenPrim
    tokenString
    tokenPosition
    (\(tok, loc) ->
       if locationStartColumn loc > u
         then fmap (, loc) (f tok)
         else Nothing)

-- | Make a string out of the token, for error message purposes.
tokenString :: (Token, Location) -> [Char]
tokenString = tokenStr . fst

tokenStr :: Token -> [Char]
tokenStr tok =
  case tok of
    If -> curlyQuotes "if"
    Then -> curlyQuotes "then"
    RightArrow -> curlyQuotes "->"
    Else -> curlyQuotes "else"
    Case -> curlyQuotes "case"
    Of -> curlyQuotes "of"
    NonIndentedNewline -> "non-indented newline"
    Backslash -> curlyQuotes ("backslash " ++ curlyQuotes "\\")
    OpenParen -> "opening parenthesis " ++ curlyQuotes "("
    CloseParen -> "closing parenthesis " ++ curlyQuotes ")"
    VariableToken t -> "variable " ++ curlyQuotes (T.unpack t)
    StringToken !t -> "string " ++ show t
    Operator !t -> "operator " ++ curlyQuotes (T.unpack t)
    Comma -> curlyQuotes ","
    Integer !i -> "integer " ++ show i
    Decimal !d -> "decimal " ++ printf "%f" d
    Bar -> curlyQuotes "|"
    Dollar -> curlyQuotes "$"
    Period -> curlyQuotes "."
    TrueToken -> curlyQuotes "true"
    FalseToken -> curlyQuotes "false"
    NullToken -> curlyQuotes "null"
    CloseBrace -> curlyQuotes "}"
    OpenBrace -> curlyQuotes "{"
    CloseBracket -> curlyQuotes "]"
    OpenBracket -> curlyQuotes "["
    Colon -> curlyQuotes ":"

-- | Update the position by the token.
tokenPosition :: SourcePos -> (Token, Location) -> t -> SourcePos
tokenPosition pos (_, l) _ =
  setSourceColumn (setSourceLine pos line) col
  where (line,col) = (locationStartLine l, locationStartColumn l)

type TokenParser e = forall s m. Stream s m (Token, Location) => ParsecT s Int m e

-- | @notFollowedBy p@ only succeeds when parser @p@ fails. This parser
-- does not consume any input. This parser can be used to implement the
-- \'longest match\' rule. For example, when recognizing keywords (for
-- example @let@), we want to make sure that a keyword is not followed
-- by a legal identifier character, in which case the keyword is
-- actually an identifier (for example @lets@). We can program this
-- behaviour as follows:
--
-- >  keywordLet  = try (do{ string "let"
-- >                       ; notFollowedBy alphaNum
-- >                       })
notFollowedBy' :: TokenParser (Token, Location) -> TokenParser ()
notFollowedBy' p =
  try ((do c <- try p
           unexpected (tokenString c)) <|>
       return ())

-- | This parser only succeeds at the end of the input. This is not a
-- primitive parser but it is defined using 'notFollowedBy'.
--
-- >  eof  = notFollowedBy anyToken <?> "end of input"
endOfTokens :: TokenParser ()
endOfTokens = notFollowedBy' anyToken <?> "end of input"

curlyQuotes :: [Char] -> [Char]
curlyQuotes t = "‘" <> t <> "’"