module Language.TL.Lexer
  ( Parser,

    -- * Helper functions
    many,
    many',
    some,
    optional,
    lexeme,
    sepBy1,

    -- * Tokenizing
    string_,
    char_,
    between',
    comment,
    lcIdent',
    ucIdent',
    lcNsIdent,
    ucNsIdent,
    lcFullIdent,
    emptyKw,
    newKw,
    finalKw,
    nat,
  )
where

import Control.Applicative (liftA2)
import Data.Char
import Data.List.NonEmpty (NonEmpty (..))
import Data.Text (Text, cons)
import Data.Void
import Language.TL.Types
import Text.Megaparsec hiding (many, optional, sepBy1, some)
import Text.Megaparsec.Char
import qualified Text.Megaparsec.Char.Lexer as L

-- | Parser type
type Parser = Parsec Void Text

-- | remove whitespaces
sc :: Parser ()
sc =
  L.space
    space1
    (L.skipLineComment "//")
    (L.skipBlockComment "/*" "*/")

-- | 'many'' but removes whitespaces and comments before parsing each segment
many :: Parser a -> Parser [a]
many = many' . lexeme

-- | @some@ that backtracks and removes whitespaces etc.
some :: Parser a -> Parser (NonEmpty a)
some p = (:|) <$> p <*> many p

-- | @many@ that backtracks
many' :: Parser a -> Parser [a]
many' v = many_v
  where
    many_v = try some_v <|> pure []
    some_v = liftA2 (:) v many_v

-- | 'optional'' but removes whitespaces etc.
optional :: Parser a -> Parser (Maybe a)
optional = optional' . lexeme

-- | @optional@ that backtracks
optional' :: Parser a -> Parser (Maybe a)
optional' v = Just <$> try v <|> pure Nothing

-- | Removes whitespaces and comments before parsing
lexeme :: Parser a -> Parser a
lexeme = (sc >>)

-- | String lexeme
string_ :: Text -> Parser Text
string_ = lexeme . string

-- | Character lexeme
char_ :: Char -> Parser Char
char_ = lexeme . char

-- | 'between' but removes whitespaces etc.
between' :: Parser open -> Parser close -> Parser a -> Parser a
between' o c p = between o (lexeme c) (lexeme p)

-- | take til line end
tilLineEnd :: Parser Text
tilLineEnd =
  takeWhileP (Just "Line content") (/= '\n')

-- | parses block comment
blockComment :: Parser Text
blockComment = do
  string "/*"
  blockCommentBody

blockCommentBody :: Parser Text
blockCommentBody = do
  c <- takeWhileP (Just "Block comment content") (/= '*')
  c' <- (string "*/" *> pure "") <|> do
    ch <- char '*'
    bc <- blockCommentBody
    pure (cons ch bc)
  pure (c <> c')

-- | Parses line comment
lineComment :: Parser Text
lineComment = do
  string "//"
  tilLineEnd

-- | Parses a comment block
comment :: Parser Comment
comment = bc <|> lc
  where
    bc = BlockComment <$> blockComment
    lc = LineComment <$> lineComment

identChar :: Char -> Bool
identChar c = isAlphaNum c || c == '_'

lcIdent :: Parser Text
lcIdent = do
  h <- lowerChar
  t <- takeWhileP (Just "ident-char") identChar
  pure $ cons h t

-- | Parses a lower case identfier
lcIdent' :: Parser Ident
lcIdent' = do
  ident <- lcIdent
  pure Unqualified {casing = L, ..}

ucIdent :: Parser Text
ucIdent = do
  h <- upperChar
  t <- takeWhileP (Just "ident-char") identChar
  pure $ cons h t

-- | Parses a upper case identifier
ucIdent' :: Parser Ident
ucIdent' = do
  ident <- ucIdent
  pure Unqualified {casing = U, ..}

nsIdent :: Parser Text
nsIdent = lcIdent

-- | Parses a lower case potentially qualified identifier
lcNsIdent :: Parser Ident
lcNsIdent = try unqualified <|> qualified
  where
    casing = L
    qualified = do
      ns <- nsIdent
      char '.'
      ident <- lcIdent
      pure Qualified {..}
    unqualified = do
      ident <- lcIdent
      pure Unqualified {..}

-- | Parses a upper case potentially qualified identifier
ucNsIdent :: Parser Ident
ucNsIdent = try unqualified <|> qualified
  where
    casing = U
    qualified = do
      ns <- nsIdent
      char '.'
      ident <- ucIdent
      pure Qualified {..}
    unqualified = do
      ident <- ucIdent
      pure Unqualified {..}

-- | Parses a lower case full identifier
lcFullIdent :: Parser FullIdent
lcFullIdent = do
  ident <- lcNsIdent
  name <- optional $ do
    char '#'
    L.hexadecimal
  pure $ FullName ident name

-- | Consumes keyword @Final@
finalKw :: Parser ()
finalKw = do
  string "Final"
  pure ()

-- | Consumes keyword @New@
newKw :: Parser ()
newKw = do
  string "New"
  pure ()

-- | Consumes keyword @Empty@
emptyKw :: Parser ()
emptyKw = do
  string "Empty"
  pure ()

-- | Parses a natural number
nat :: Parser Int
nat = L.decimal

-- | @sepBy1@ that backtracks and removes whitespaces etc.
sepBy1 :: Parser a -> Parser sep -> Parser (NonEmpty a)
sepBy1 p sep = liftA2 (:|) p (many (lexeme sep *> lexeme p))