{-
  Definitions

  * Lexer: `Text -> [Text]`
  * Token: The leaves in your AST, e.g. TextLiteral, Number, etc

  Here, we implement a structurally aware lexer that supports one token type
  (text literals) for convenience.
-}
module Snail.Lexer (
    -- * The parsers you should use
    SnailAst (..),
    sExpression,
    snailAst,

    -- * Exported for testing
    nonQuoteCharacter,
    textLiteral,
) where

import Data.Text (Text)
import Data.Text qualified as Text
import Data.Void
import Snail.Characters
import Text.Megaparsec hiding (token)
import Text.Megaparsec.Char
import Text.Megaparsec.Char.Lexer qualified as L

-- | TODO: 'Void' is the error type but we should use an explicit error type
type Parser = Parsec Void Text

{- | Megaparsec's 'skipLineComment' takes a prefix and skips lines that begin
 with that prefix
-}
skipLineComment :: Parser ()
skipLineComment :: Parser ()
skipLineComment = forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Tokens s -> m ()
L.skipLineComment Tokens Text
"--"

{- | Megaparsec's 'skipBlockComment' takes prefix and suffix and skips anything
 in between
-}
skipBlockComment :: Parser ()
skipBlockComment :: Parser ()
skipBlockComment = forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Tokens s -> Tokens s -> m ()
L.skipBlockCommentNested Tokens Text
"{-" Tokens Text
"-}"

{- | Generate a parser for whitespace in a language with 'skipLineComment' and
 'skipBlockComment'
-}
spaces :: Parser ()
spaces :: Parser ()
spaces = forall e s (m :: * -> *).
MonadParsec e s m =>
m () -> m () -> m () -> m ()
L.space forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m ()
space1 Parser ()
skipLineComment Parser ()
skipBlockComment

-- | Parse a 'Text' verbatim
symbol :: Text -> Parser Text
symbol :: Text -> Parser Text
symbol = forall e s (m :: * -> *).
MonadParsec e s m =>
m () -> Tokens s -> m (Tokens s)
L.symbol Parser ()
spaces

-- | Parse an S-Expression
parens :: Parser a -> Parser a
parens :: forall a. Parser a -> Parser a
parens = forall (m :: * -> *) open close a.
Applicative m =>
m open -> m close -> m a -> m a
between (Text -> Parser Text
symbol Text
"(") (Text -> Parser Text
symbol Text
")")

{- | The list of valid token characters, note that we allow invalid tokens at
 this point
-}
validCharacter :: Parser Char
validCharacter :: Parser Char
validCharacter =
    forall (f :: * -> *) e s (m :: * -> *).
(Foldable f, MonadParsec e s m) =>
f (Token s) -> m (Token s)
oneOf
        ( String
initialCharacter
            forall a. Semigroup a => a -> a -> a
<> String
specialInitialCharacter
            forall a. Semigroup a => a -> a -> a
<> String
digitCharacter
            forall a. Semigroup a => a -> a -> a
<> String
specialSubsequentCharacter
        )

{-
    A possibly empty tree of s-expressions

    Technically,
    @
    Token (SourcePos {..}, "hello")
    @

    isn't a valid s-expression. This is,

    @
    SExpression [Token (SourcePos {..}, "hello")]
    @

    and this is also valid,

    @
    SExpression []
    @

    The 'Data.Tree.Tree' type in containers is non-empty which isn't exactly what we are looking for
-}
data SnailAst
    = Lexeme (SourcePos, Text)
    | TextLiteral (SourcePos, Text)
    | SExpression (Maybe Char) [SnailAst]
    deriving (SnailAst -> SnailAst -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: SnailAst -> SnailAst -> Bool
$c/= :: SnailAst -> SnailAst -> Bool
== :: SnailAst -> SnailAst -> Bool
$c== :: SnailAst -> SnailAst -> Bool
Eq, Int -> SnailAst -> ShowS
[SnailAst] -> ShowS
SnailAst -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [SnailAst] -> ShowS
$cshowList :: [SnailAst] -> ShowS
show :: SnailAst -> String
$cshow :: SnailAst -> String
showsPrec :: Int -> SnailAst -> ShowS
$cshowsPrec :: Int -> SnailAst -> ShowS
Show)

{- | Any 'Text' object that starts with an appropriately valid character. This
 could be an variable or function name. For example, `hello` is a valid
 lexeme in the s-expression below.

 @
 (hello)
 @
-}
lexeme :: Parser SnailAst
lexeme :: Parser SnailAst
lexeme = do
    SourcePos
sourcePosition <- forall s e (m :: * -> *).
(TraversableStream s, MonadParsec e s m) =>
m SourcePos
getSourcePos
    String
lexeme' <- forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
some Parser Char
validCharacter
    forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a b. (a -> b) -> a -> b
$ (SourcePos, Text) -> SnailAst
Lexeme (SourcePos
sourcePosition, String -> Text
Text.pack String
lexeme')

-- | An escaped quote to support nesting `"` inside a 'textLiteral'
escapedQuote :: Parser Text
escapedQuote :: Parser Text
escapedQuote = forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
string Tokens Text
"\\\""

-- | Matches any non-quote character
nonQuoteCharacter :: Parser Text
nonQuoteCharacter :: Parser Text
nonQuoteCharacter = do
    Char
character <- forall e s (m :: * -> *).
MonadParsec e s m =>
Token s -> m (Token s)
anySingleBut Char
'\"'
    forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a b. (a -> b) -> a -> b
$ Char -> Text
Text.singleton Char
character

quote :: Parser Char
quote :: Parser Char
quote = forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
'\"'

quotes :: Parser a -> Parser a
quotes :: forall a. Parser a -> Parser a
quotes = forall (m :: * -> *) open close a.
Applicative m =>
m open -> m close -> m a -> m a
between Parser Char
quote Parser Char
quote

{- | Matches a literal text and supports nested quotes, e.g.

 @
 ("hello\"")
 @
-}
textLiteral :: Parser SnailAst
textLiteral :: Parser SnailAst
textLiteral = do
    SourcePos
sourcePosition <- forall s e (m :: * -> *).
(TraversableStream s, MonadParsec e s m) =>
m SourcePos
getSourcePos
    Maybe [Text]
mText <- forall a. Parser a -> Parser a
quotes forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
some forall a b. (a -> b) -> a -> b
$ Parser Text
escapedQuote forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Text
nonQuoteCharacter
    forall e s (m :: * -> *) a. MonadParsec e s m => m a -> m ()
notFollowedBy (Parser Char
validCharacter forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
'\"')
    forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a b. (a -> b) -> a -> b
$ case Maybe [Text]
mText of
        Maybe [Text]
Nothing -> (SourcePos, Text) -> SnailAst
TextLiteral (SourcePos
sourcePosition, Text
"")
        Just [Text]
text -> (SourcePos, Text) -> SnailAst
TextLiteral (SourcePos
sourcePosition, [Text] -> Text
Text.concat [Text]
text)

{- | Parse one of the possible structures in 'SnailAst'. These are parsed
 recursively separated by 'spaces' in 'sExpression'.
-}
leaves :: Parser SnailAst
leaves :: Parser SnailAst
leaves = Parser SnailAst
lexeme forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser SnailAst
textLiteral forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser SnailAst
sExpression

-- | Parse an 'SExpression'
sExpression :: Parser SnailAst
sExpression :: Parser SnailAst
sExpression =
    Maybe Char -> [SnailAst] -> SnailAst
SExpression
        forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional (forall (f :: * -> *) e s (m :: * -> *).
(Foldable f, MonadParsec e s m) =>
f (Token s) -> m (Token s)
oneOf String
parenthesisStartingCharacter)
        forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall a. Parser a -> Parser a
parens (Parser SnailAst
leaves forall (m :: * -> *) a sep. MonadPlus m => m a -> m sep -> m [a]
`sepEndBy` Parser ()
spaces)

-- | Parse a valid snail file
snailAst :: Parser [SnailAst]
snailAst :: ParsecT Void Text Identity [SnailAst]
snailAst = (Parser ()
spaces forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser SnailAst
sExpression forall (m :: * -> *) a sep. MonadPlus m => m a -> m sep -> m [a]
`sepEndBy1` Parser ()
spaces) forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall e s (m :: * -> *). MonadParsec e s m => m ()
eof