{-
  Definitions

  * Lexer: `Text -> [Text]`
  * Token: The leaves in your AST, e.g. TextLiteral, Number, etc

  Here, we implement a structurally aware lexer that supports one token type
  (text literals) for convenience.
-}
module Snail.Lexer (
    -- * The parsers you should use
    SnailAst (..),
    sExpression,
    snailAst,

    -- * Exported for testing
    nonQuoteCharacter,
    textLiteral,
) where

import Data.Text (Text)
import Data.Text qualified as Text
import Data.Void
import Snail.Characters
import Text.Megaparsec hiding (token)
import Text.Megaparsec.Char
import Text.Megaparsec.Char.Lexer qualified as L

-- | TODO: 'Void' is the error type but we should use an explicit error type
type Parser = Parsec Void Text

{- | Megaparsec's 'skipLineComment' takes a prefix and skips lines that begin
 with that prefix
-}
skipLineComment :: Parser ()
skipLineComment :: Parser ()
skipLineComment = Tokens Text -> Parser ()
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Tokens s -> m ()
L.skipLineComment Tokens Text
"--"

{- | Megaparsec's 'skipBlockComment' takes prefix and suffix and skips anything
 in between
-}
skipBlockComment :: Parser ()
skipBlockComment :: Parser ()
skipBlockComment = Tokens Text -> Tokens Text -> Parser ()
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Tokens s -> Tokens s -> m ()
L.skipBlockCommentNested Tokens Text
"{-" Tokens Text
"-}"

{- | Generate a parser for whitespace in a language with 'skipLineComment' and
 'skipBlockComment'
-}
spaces :: Parser ()
spaces :: Parser ()
spaces = Parser () -> Parser () -> Parser () -> Parser ()
forall e s (m :: * -> *).
MonadParsec e s m =>
m () -> m () -> m () -> m ()
L.space Parser ()
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m ()
space1 Parser ()
skipLineComment Parser ()
skipBlockComment

-- | Parse a 'Text' verbatim
symbol :: Text -> Parser Text
symbol :: Text -> Parser Text
symbol = Parser ()
-> Tokens Text -> ParsecT Void Text Identity (Tokens Text)
forall e s (m :: * -> *).
MonadParsec e s m =>
m () -> Tokens s -> m (Tokens s)
L.symbol Parser ()
spaces

-- | Parse an S-Expression
parens :: Parser a -> Parser a
parens :: forall a. Parser a -> Parser a
parens = Parser Text
-> Parser Text
-> ParsecT Void Text Identity a
-> ParsecT Void Text Identity a
forall (m :: * -> *) open close a.
Applicative m =>
m open -> m close -> m a -> m a
between (Text -> Parser Text
symbol Text
"(") (Text -> Parser Text
symbol Text
")")

{- | The list of valid token characters, note that we allow invalid tokens at
 this point
-}
validCharacter :: Parser Char
validCharacter :: Parser Char
validCharacter =
    [Token Text] -> ParsecT Void Text Identity (Token Text)
forall (f :: * -> *) e s (m :: * -> *).
(Foldable f, MonadParsec e s m) =>
f (Token s) -> m (Token s)
oneOf
        ( String
initialCharacter
            String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
specialInitialCharacter
            String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
digitCharacter
            String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
specialSubsequentCharacter
        )

{-
    A possibly empty tree of s-expressions

    Technically,
    @
    Token (SourcePos {..}, "hello")
    @

    isn't a valid s-expression. This is,

    @
    SExpression [Token (SourcePos {..}, "hello")]
    @

    and this is also valid,

    @
    SExpression []
    @

    The 'Data.Tree.Tree' type in containers is non-empty which isn't exactly what we are looking for
-}
data SnailAst
    = Lexeme (SourcePos, Text)
    | TextLiteral (SourcePos, Text)
    | SExpression (Maybe Char) [SnailAst]
    deriving (SnailAst -> SnailAst -> Bool
(SnailAst -> SnailAst -> Bool)
-> (SnailAst -> SnailAst -> Bool) -> Eq SnailAst
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: SnailAst -> SnailAst -> Bool
== :: SnailAst -> SnailAst -> Bool
$c/= :: SnailAst -> SnailAst -> Bool
/= :: SnailAst -> SnailAst -> Bool
Eq, Int -> SnailAst -> String -> String
[SnailAst] -> String -> String
SnailAst -> String
(Int -> SnailAst -> String -> String)
-> (SnailAst -> String)
-> ([SnailAst] -> String -> String)
-> Show SnailAst
forall a.
(Int -> a -> String -> String)
-> (a -> String) -> ([a] -> String -> String) -> Show a
$cshowsPrec :: Int -> SnailAst -> String -> String
showsPrec :: Int -> SnailAst -> String -> String
$cshow :: SnailAst -> String
show :: SnailAst -> String
$cshowList :: [SnailAst] -> String -> String
showList :: [SnailAst] -> String -> String
Show)

{- | Any 'Text' object that starts with an appropriately valid character. This
 could be an variable or function name. For example, `hello` is a valid
 lexeme in the s-expression below.

 @
 (hello)
 @
-}
lexeme :: Parser SnailAst
lexeme :: Parser SnailAst
lexeme = do
    SourcePos
sourcePosition <- ParsecT Void Text Identity SourcePos
forall s e (m :: * -> *).
(TraversableStream s, MonadParsec e s m) =>
m SourcePos
getSourcePos
    String
lexeme' <- Parser Char -> ParsecT Void Text Identity String
forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
some Parser Char
validCharacter
    SnailAst -> Parser SnailAst
forall a. a -> ParsecT Void Text Identity a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (SnailAst -> Parser SnailAst) -> SnailAst -> Parser SnailAst
forall a b. (a -> b) -> a -> b
$ (SourcePos, Text) -> SnailAst
Lexeme (SourcePos
sourcePosition, String -> Text
Text.pack String
lexeme')

-- | An escaped quote to support nesting `"` inside a 'textLiteral'
escapedQuote :: Parser Text
escapedQuote :: Parser Text
escapedQuote = Tokens Text -> ParsecT Void Text Identity (Tokens Text)
forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
string Text
Tokens Text
"\\\""

-- | Matches any non-quote character
nonQuoteCharacter :: Parser Text
nonQuoteCharacter :: Parser Text
nonQuoteCharacter = do
    Char
character <- Token Text -> ParsecT Void Text Identity (Token Text)
forall e s (m :: * -> *).
MonadParsec e s m =>
Token s -> m (Token s)
anySingleBut Char
Token Text
'\"'
    Text -> Parser Text
forall a. a -> ParsecT Void Text Identity a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Text -> Parser Text) -> Text -> Parser Text
forall a b. (a -> b) -> a -> b
$ Char -> Text
Text.singleton Char
character

quote :: Parser Char
quote :: Parser Char
quote = Token Text -> ParsecT Void Text Identity (Token Text)
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
Token Text
'\"'

quotes :: Parser a -> Parser a
quotes :: forall a. Parser a -> Parser a
quotes = Parser Char
-> Parser Char
-> ParsecT Void Text Identity a
-> ParsecT Void Text Identity a
forall (m :: * -> *) open close a.
Applicative m =>
m open -> m close -> m a -> m a
between Parser Char
quote Parser Char
quote

{- | Matches a literal text and supports nested quotes, e.g.

 @
 ("hello\"")
 @
-}
textLiteral :: Parser SnailAst
textLiteral :: Parser SnailAst
textLiteral = do
    SourcePos
sourcePosition <- ParsecT Void Text Identity SourcePos
forall s e (m :: * -> *).
(TraversableStream s, MonadParsec e s m) =>
m SourcePos
getSourcePos
    Maybe [Text]
mText <- Parser (Maybe [Text]) -> Parser (Maybe [Text])
forall a. Parser a -> Parser a
quotes (Parser (Maybe [Text]) -> Parser (Maybe [Text]))
-> (ParsecT Void Text Identity [Text] -> Parser (Maybe [Text]))
-> ParsecT Void Text Identity [Text]
-> Parser (Maybe [Text])
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ParsecT Void Text Identity [Text] -> Parser (Maybe [Text])
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional (ParsecT Void Text Identity [Text] -> Parser (Maybe [Text]))
-> ParsecT Void Text Identity [Text] -> Parser (Maybe [Text])
forall a b. (a -> b) -> a -> b
$ Parser Text -> ParsecT Void Text Identity [Text]
forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
some (Parser Text -> ParsecT Void Text Identity [Text])
-> Parser Text -> ParsecT Void Text Identity [Text]
forall a b. (a -> b) -> a -> b
$ Parser Text
escapedQuote Parser Text -> Parser Text -> Parser Text
forall a.
ParsecT Void Text Identity a
-> ParsecT Void Text Identity a -> ParsecT Void Text Identity a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Text
nonQuoteCharacter
    Parser Char -> Parser ()
forall a. ParsecT Void Text Identity a -> Parser ()
forall e s (m :: * -> *) a. MonadParsec e s m => m a -> m ()
notFollowedBy (Parser Char
validCharacter Parser Char -> Parser Char -> Parser Char
forall a.
ParsecT Void Text Identity a
-> ParsecT Void Text Identity a -> ParsecT Void Text Identity a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Token Text -> ParsecT Void Text Identity (Token Text)
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
Token Text
'\"')
    SnailAst -> Parser SnailAst
forall a. a -> ParsecT Void Text Identity a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (SnailAst -> Parser SnailAst) -> SnailAst -> Parser SnailAst
forall a b. (a -> b) -> a -> b
$ case Maybe [Text]
mText of
        Maybe [Text]
Nothing -> (SourcePos, Text) -> SnailAst
TextLiteral (SourcePos
sourcePosition, Text
"")
        Just [Text]
text -> (SourcePos, Text) -> SnailAst
TextLiteral (SourcePos
sourcePosition, [Text] -> Text
Text.concat [Text]
text)

{- | Parse one of the possible structures in 'SnailAst'. These are parsed
 recursively separated by 'spaces' in 'sExpression'.
-}
leaves :: Parser SnailAst
leaves :: Parser SnailAst
leaves = Parser SnailAst
lexeme Parser SnailAst -> Parser SnailAst -> Parser SnailAst
forall a.
ParsecT Void Text Identity a
-> ParsecT Void Text Identity a -> ParsecT Void Text Identity a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser SnailAst
textLiteral Parser SnailAst -> Parser SnailAst -> Parser SnailAst
forall a.
ParsecT Void Text Identity a
-> ParsecT Void Text Identity a -> ParsecT Void Text Identity a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser SnailAst
sExpression

-- | Parse an 'SExpression'
sExpression :: Parser SnailAst
sExpression :: Parser SnailAst
sExpression =
    Maybe Char -> [SnailAst] -> SnailAst
SExpression
        (Maybe Char -> [SnailAst] -> SnailAst)
-> ParsecT Void Text Identity (Maybe Char)
-> ParsecT Void Text Identity ([SnailAst] -> SnailAst)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Char -> ParsecT Void Text Identity (Maybe Char)
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional ([Token Text] -> ParsecT Void Text Identity (Token Text)
forall (f :: * -> *) e s (m :: * -> *).
(Foldable f, MonadParsec e s m) =>
f (Token s) -> m (Token s)
oneOf String
[Token Text]
parenthesisStartingCharacter)
        ParsecT Void Text Identity ([SnailAst] -> SnailAst)
-> ParsecT Void Text Identity [SnailAst] -> Parser SnailAst
forall a b.
ParsecT Void Text Identity (a -> b)
-> ParsecT Void Text Identity a -> ParsecT Void Text Identity b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> ParsecT Void Text Identity [SnailAst]
-> ParsecT Void Text Identity [SnailAst]
forall a. Parser a -> Parser a
parens (Parser SnailAst
leaves Parser SnailAst
-> Parser () -> ParsecT Void Text Identity [SnailAst]
forall (m :: * -> *) a sep. MonadPlus m => m a -> m sep -> m [a]
`sepEndBy` Parser ()
spaces)

-- | Parse a valid snail file
snailAst :: Parser [SnailAst]
snailAst :: ParsecT Void Text Identity [SnailAst]
snailAst = (Parser ()
spaces Parser ()
-> ParsecT Void Text Identity [SnailAst]
-> ParsecT Void Text Identity [SnailAst]
forall a b.
ParsecT Void Text Identity a
-> ParsecT Void Text Identity b -> ParsecT Void Text Identity b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser SnailAst
sExpression Parser SnailAst
-> Parser () -> ParsecT Void Text Identity [SnailAst]
forall (m :: * -> *) a sep. MonadPlus m => m a -> m sep -> m [a]
`sepEndBy1` Parser ()
spaces) ParsecT Void Text Identity [SnailAst]
-> Parser () -> ParsecT Void Text Identity [SnailAst]
forall a b.
ParsecT Void Text Identity a
-> ParsecT Void Text Identity b -> ParsecT Void Text Identity a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
forall e s (m :: * -> *). MonadParsec e s m => m ()
eof