-- Extended Example
-- ================
--
-- This document shows off the features of the
-- [luthor](https://hackage.haskell.org/package/luthor) package. Since this
-- is an overview of every feature, we've decided to focus on a simple
-- example and not go too far in-depth on the possibilities luthor provides.
-- For that, see our other documentation, which goes into each toolset
-- independently. Also, since our example is actually a fairly small parser,
-- the benefits of using `Lex` aren't clear, but it does pay off once your
-- grammar gets perhaps two or three times this size.
--
-- It is highly recommended to read this file with the
-- [API Reference](https://hackage.haskell.org/package/luthor) on hand.
-- We use several functions from those packages but do not re-document
-- them here, so a serious understanding will require a little homework.
--
-- You might be reading this in html/markdown or in a Haskell file. The html
-- is generated directly from the Haskell, so you can be assured that the
-- code works as advertized, as well as run it yourself.
--
-- First off, let's import some usual stuff...

import System.IO
import System.Environment
import System.Exit
import Data.List

-- and also import the relevant luthor modules.

import Text.Luthor
import Text.Luthor.Syntax
import Text.Luthor.Indent
import Text.Luthor.Lex as Lex

import Data.Functor.Identity (Identity)

-- Let's go ahead and define our abstract syntax right up front to give
-- context to our parsers.

data Lisp = Atom Atom | List [Lisp]
data Atom = ASymbol String
          | ANumber Rational
          | AString String

-- Since we're using a scannerful (lexing) parser, we'll need to connect the
-- two processing segments with a data type for tokens.

data Token = Space
           | AtomTok Atom
           | OpenParen | CloseParen
           | Indent | Nextline | Dedent
    deriving (Show)

-- A couple high-level shortcuts so we don't clutter the rest of the code.

type ParserST = ((),IndentState String () Identity)
type Lexer a = ParsecI String () a
type Lexed = Lex String ParserST Token
type Parser a = Luthor Token ParserST a

parseLisp :: SourceName -> String -> Either ParseError [Lisp]
parseLisp = runLuthor lexer parser ((),startIndent (DontMix " ") wss)

-- And now we can hop right into the lexer.
-- We'll start with simple atoms...

lispSymbol :: Lexed
lispSymbol = lexeme $ AtomTok . ASymbol <$>
    let ident = charClass "a-zA-Z_0-9-" `many1Not` charClass "0-9-"
    in ident `notFollowedBy` canStartAtom

lispInteger :: Lexed
lispInteger = lexeme $ AtomTok . ANumber . fromIntegral <$>
    integer `notFollowedBy` (dot <|> void canStartAtom)

lispDecimal :: Lexed
lispDecimal = lexeme $ AtomTok . ANumber <$>
    scientific `notFollowedBy` canStartAtom

lispString :: Lexed
lispString = lexeme $ AtomTok . AString <$>
    dqString cEscapes `notFollowedBy` canStartAtom

canStartAtom :: Lexer Char
canStartAtom = aChar $ charClass "a-zA-Z_0-9\"+-"

-- and then handle punctuation.

lispPunct :: Lexed
lispPunct = lexeme $ dispatch
    [ (void $ char '(', pure OpenParen)
    , (void $ char ')', pure CloseParen)
    , (indent, pure Indent)
    , (nextline, pure Nextline)
    , (dedent, pure Dedent)
    ]

-- We'll also need to deal with whitespace.
-- It will especially come in handy when configuring the indentation part
-- of the parser to handle blank lines appropriately.
--
-- For our purposes, whitespace includes spaces and tabs (`lws`),
-- line comments (starting with `;`) and line folds (backslash-newline).

wss :: [Lexer ()]
wss = [ void lws, void $ lineComment ";", bsnl ]

ws :: Lexed
ws = lexeme $ Space <$ many1_ (choice wss)

-- Finally, we tie it all together into a token recognizer.

lispAtom :: Lexed
lispAtom = choice [ lispSymbol, lispInteger, lispDecimal, lispString ]

lexer :: Lexed
lexer = choice [ lispAtom, lispPunct, ws ]

-- Now, we can move onto parsing. Normal s-exprs are just an atom or
-- a parenthesized list of s-exprs. Adding indentation-sensitivity,
-- we also allow indented s-exprs separated by newlines.
-- Ah yes, and there's the special nil s-expr, spelled `()`.
--
-- There is one oddity: we want close parens and dedents to be
-- interchangeable. Therefore, a list expression can end with a dedent or
-- with a close paren, regardless of how it began.
--
-- Out first step is to provide a clean way to extract particular payloads
-- from our lexeme stream, the same way the Parsec implementation has to
-- define a way to get at the `Char`s when parsing `String`s.

atom :: Parser Lisp
atom = unlexWith $ \t -> case t of
    AtomTok x -> Just $ Atom x
    _ -> Nothing

openParen :: Parser ()
openParen = unlexWith $ \t -> case t of { OpenParen -> Just (); _ -> Nothing }

openIndent :: Parser ()
openIndent = unlexWith $ \t -> case t of { Indent -> Just (); _ -> Nothing }

close :: Parser ()
close = (endOfLexemes <|>) $ unlexWith $ \t -> case t of
    CloseParen -> Just ()
    Dedent -> Just ()
    _ -> Nothing

next :: Parser ()
next = unlexWith $ \t -> case t of { Nextline -> Just (); _ -> Nothing }

nil :: Parser Lisp
nil = List [] <$ do
    openParen
    unlexWith $ \t -> case t of { CloseParen -> Just (); _ -> Nothing }

-- Now, we get down to the business of grammar:

bareExpr :: Parser [Lisp]
bareExpr = many1 expr

expr :: Parser Lisp
expr = atom <||> nil <||> parenExpr <||> indentExpr

parenExpr :: Parser Lisp
parenExpr = between openParen close $ List <$> bareExpr

indentExpr :: Parser Lisp
indentExpr = between openIndent close $ do
    inner <- bareExpr `sepBy1` next
    return $ case inner of
        [e] -> List e
        es -> List (List <$> es)

-- Finally, we filter out extraneous whitespace and parse a file full of
-- s-exprs.

isExtraSpace :: Token -> Bool
isExtraSpace t = case t of { Space -> True; _ -> False }

parser :: Parser [Lisp]
parser = between (ignore isExtraSpace) endOfLexemes $
    (wrap <$> bareExpr) `sepEndBy` next
    where
    wrap [e] = e
    wrap es = List es

-- ...and we're done with the parser. We've already built our `parseLisp`
-- shortcut, so we can move on to setting up a program to actually use our
-- new parser, but before we build our main, I'll set up some `Show`
-- instances...

instance Show Atom where
    show (ASymbol name) = name
    show (ANumber n) = show n
    show (AString str) = show str
instance Show Lisp where
    show (Atom a) = show a
    show (List xs) = "(" ++ intercalate " " (map show xs) ++ ")"

-- ...because the main really is just a transpiler from this
-- indentation-sensitive Lisp to a fully-parenthesized Lisp.

main :: IO ()
main = do
    inFile <- getArgs >>= \args -> case args of
        [inFile] -> return inFile
        _ -> hPutStrLn stderr ("usage: lisp.hs filename") *> exitFailure
    results <- parseLisp inFile <$> readFile inFile
    case results of
        Right exprs -> mapM_ print exprs
        Left err -> print err *> exitFailure

-- And there you have it. Try this out on some of the example files included
-- in the package (`docs/*.l`), or experiment with your own.
--
-- Going further, it would be a simple matter to introduce the rest of the
-- familiar Lisp syntax:
--
-- * Add quotation and quasiquotation by adding appropriate token sorts,
--     tokenizers and parser.
-- * Add dotted-expressions with a token sort, tokenizer and a
--     chainr-based parser.
-- * Comment out s-exprs the same way you might quote an s-expr.
--
-- And of course, it's not hard to build a Lisp interpreter. You could always
-- build a driver that interprets instead of transpiles.