{-# LANGUAGE OverloadedStrings #-}

module Pencil.Internal.Parser where

import Text.ParserCombinators.Parsec
import qualified Data.List as DL
import qualified Data.Text as T
import qualified Text.Parsec as P

-- Doctest setup.
--
-- $setup
-- >>> :set -XOverloadedStrings
-- >>> import Data.Either (isLeft)

-- | Pencil's @Page@ AST.
data PNode =
    PText T.Text
  | PVar T.Text
  | PFor T.Text [PNode]
  | PIf T.Text [PNode]
  | PPartial T.Text
  | PPreamble T.Text

  -- Signals a If/For expression in the stack waiting for expressions. So that we
  -- can find the next unused open if/for-statement in nested if/for-statements.
  | PMetaIf T.Text
  | PMetaFor T.Text

  -- A terminating node that represents the end of the program, to help with AST
  -- converstion
  | PMetaEnd
  deriving (Show, Eq)

-- | Pencil's tokens for content.
data Token =
    TokText T.Text
  | TokVar T.Text
  | TokFor T.Text
  | TokIf T.Text
  | TokPartial T.Text
  | TokPreamble T.Text
  | TokEnd
  deriving (Show, Eq)

-- | Convert Tokens to PNode AST.
--
-- >>> transform [TokText "hello", TokText "world"]
-- [PText "hello",PText "world"]
--
-- >>> transform [TokIf "title", TokEnd]
-- [PIf "title" []]
--
-- >>> transform [TokIf "title", TokText "hello", TokText "world", TokEnd]
-- [PIf "title" [PText "hello",PText "world"]]
--
-- > ${if(title)}
-- >   ${for(posts)}
-- >     world
-- >   ${end}
-- > ${end}
--
-- >>> transform [TokIf "title", TokFor "posts", TokText "world", TokEnd, TokEnd]
-- [PIf "title" [PFor "posts" [PText "world"]]]
--
-- > begin
-- > now
-- > ${if(title)}
-- >   hello
-- >   world
-- >   ${if(body)}
-- >     ${body}
-- >     ${someothervar}
-- >     wahh
-- >   ${end}
-- >   final
-- >   thing
-- > ${end}
-- > the
-- > lastline
--
-- >>> transform [TokText "begin", TokText "now", TokIf "title", TokText "hello", TokText "world", TokIf "body", TokVar "body", TokVar "someothervar", TokText "wahh", TokEnd, TokText "final", TokText "thing", TokEnd, TokText "the", TokText "lastline"]
-- [PText "begin",PText "now",PIf "title" [PText "hello",PText "world",PIf "body" [PVar "body",PVar "someothervar",PText "wahh"],PText "final",PText "thing"],PText "the",PText "lastline"]
--
-- > <!--PREAMBLE
-- > foo: bar
-- > do:
-- >   - re
-- >   - me
-- > -->
-- > Hello world ${foo}
--
-- >>> transform [TokPreamble "foo: bar\ndo:\n  - re\n  -me", TokText "Hello world ", TokVar "foo"]
-- [PPreamble "foo: bar\ndo:\n  - re\n  -me",PText "Hello world ",PVar "foo"]
--
transform :: [Token] -> [PNode]
transform toks =
  let stack = ast [] toks
  in reverse stack

-- | Converts Tokens, which is just the raw list of parsed tokens, into PNodes
-- which are the tree-structure expressions (i.e. if/for nesting)
--
-- This function works by using a stack to keep track of where we are for nested
-- expressions such as if and for statements. When a token that starts a nesting
-- is found (like a TokIf), a "meta" expression (PMetaIf) is pushed into the
-- stack. When we finally see an end token (TokEnd), we pop all the expressions
-- off the stack until the first meta tag (e.g PMetaIf) is reached. All the
-- expressions popped off are now known to be nested inside that if statement.
--
ast :: [PNode] -- stack
    -> [Token] -- remaining
    -> [PNode] -- (AST, remaining)
ast stack [] = stack
ast stack (TokText t : toks) = ast (PText t : stack) toks
ast stack (TokVar t : toks)  = ast (PVar t : stack) toks
ast stack (TokPartial fp : toks) = ast (PPartial fp : stack) toks
ast stack (TokPreamble t : toks) = ast (PPreamble t : stack) toks
ast stack (TokIf t : toks)   = ast (PMetaIf t : stack) toks
ast stack (TokFor t : toks)  = ast (PMetaFor t : stack) toks
ast stack (TokEnd : toks) =
  let (node, popped, remaining) = popNodes stack
      -- ^ Find the last unused if/for statement, and grab all the expressions
      -- in-between this TokEnd and the opening if/for keyword.
      n = case node of
            PMetaIf t -> PIf t popped
            PMetaFor t -> PFor t popped
            _ -> PMetaEnd
  -- Push the statement into the stack
  in ast (n : remaining) toks

-- | Pop nodes until we hit a If/For statement.
-- Return pair (constructor found, nodes popped, remaining stack)
popNodes :: [PNode] -> (PNode, [PNode], [PNode])
popNodes = popNodes_ []

-- | Helper for 'popNodes'.
popNodes_ :: [PNode] -> [PNode] -> (PNode, [PNode], [PNode])
popNodes_ popped [] = (PMetaEnd, popped, [])
popNodes_ popped (PMetaIf t : rest) = (PMetaIf t, popped, rest)
popNodes_ popped (PMetaFor t : rest) = (PMetaFor t, popped, rest)
popNodes_ popped (t : rest) = popNodes_ (t : popped) rest

-- | Render nodes as string.
renderNodes :: [PNode] -> T.Text
renderNodes = DL.foldl' (\str n -> (T.append str (renderNode n))) ""

-- | Render node as string.
renderNode :: PNode -> T.Text
renderNode (PText t) = t
renderNode (PVar t) = T.append (T.append "${" t) "}"
renderNode (PFor t nodes) =
  let for = T.append (T.append "${for(" t) ")}"
      body = renderNodes nodes
      end = "${end}"
  in T.append (T.append for body) end
renderNode (PIf t nodes) =
  let for = T.append (T.append "${if(" t) ")}"
      body = renderNodes nodes
      end = "${end}"
  in T.append (T.append for body) end
renderNode (PPartial file) = T.append (T.append "${partial(" file) ")}"
renderNode (PMetaIf v) = renderNode (PIf v [])
renderNode (PMetaFor v) = renderNode (PFor v [])
renderNode PMetaEnd = ""
renderNode (PPreamble _) = "" -- Don't render the PREAMBLE

-- | Render tokens.
renderTokens :: [Token] -> T.Text
renderTokens = DL.foldl' (\str n -> (T.append str (renderToken n))) ""

-- | Render token.
renderToken :: Token -> T.Text
renderToken (TokText t) = t
renderToken (TokVar t) = T.append (T.append "${" t) "}"
renderToken (TokPartial fp) = T.append (T.append "${partial(\"" fp) "\"}"
renderToken (TokFor t) = T.append (T.append "${for(" t) ")}"
renderToken (TokEnd) = "${end}"
renderToken (TokIf t) = T.append (T.append "${if(" t) ")}"
renderToken (TokPreamble _) = "" -- Hide preamble content

-- | Parse text.
parseText :: T.Text -> Either ParseError [PNode]
parseText text = do
  toks <- parse parseEverything (T.unpack "") (T.unpack text)
  return $ transform toks

-- | Parse everything.
--
-- >>> parse parseEverything "" "Hello ${man} and ${woman}."
-- Right [TokText "Hello ",TokVar "man",TokText " and ",TokVar "woman",TokText "."]
--
-- >>> parse parseEverything "" "Hello ${man} and ${if(woman)} text here ${end}."
-- Right [TokText "Hello ",TokVar "man",TokText " and ",TokIf "woman",TokText " text here ",TokEnd,TokText "."]
--
-- >>> parse parseEverything "" "Hi ${for(people)} ${name}, ${end} everyone!"
-- Right [TokText "Hi ",TokFor "people",TokText " ",TokVar "name",TokText ", ",TokEnd,TokText " everyone!"]
--
-- >>> parse parseEverything "" "${realvar} $.get(javascript) $$ $$$ $} $( $45.50 $$escape $${escape2} wonderful life! ${truth}"
-- Right [TokVar "realvar",TokText " $.get(javascript) $$ $$$ $} $( $45.50 $$escape ",TokText "${",TokText "escape2} wonderful life! ",TokVar "truth"]
--
-- >>> parse parseEverything "" "<!--PREAMBLE  \n  foo: bar\ndo:\n  - re\n  -me\n  -->waffle house ${lyfe}"
-- Right [TokPreamble "  \n  foo: bar\ndo:\n  - re\n  -me\n  ",TokText "waffle house ",TokVar "lyfe"]
--
-- >>> parse parseEverything "" "YO ${foo} <!--PREAMBLE  \n  ${foo}: bar\ndo:\n  - re\n  -me\n  -->waffle house ${lyfe}"
-- Right [TokText "YO ",TokVar "foo",TokText " ",TokPreamble "  \n  ${foo}: bar\ndo:\n  - re\n  -me\n  ",TokText "waffle house ",TokVar "lyfe"]
--
-- This is a degenerate case that we will just allow (for now) to go sideways:
-- >>> parse parseEverything "" "<b>this ${var never closes</b> ${realvar}"
-- Right [TokText "<b>this ",TokVar "var never closes</b> ${realvar"]
--
parseEverything :: Parser [Token]
parseEverything =
  -- Note that order matters here. We want "most general" to be last (variable
  -- names).
  many1 (try parsePreamble
     <|> try parseEscape
     <|> try parseContent
     <|> try parseEnd
     <|> try parseFor
     <|> try parseIf
     <|> try parseEnd
     <|> try parsePartial
     <|> parseVar)

-- >>> parse parseVar "" "${ffwe} yep"
-- Right (TokVar "ffwe")
--
-- >>> parse parseVar "" "${spaces technically allowed}"
-- Right (TokVar "spaces technically allowed")
--
-- >>> isLeft $ parse parseVar "" "Hello ${name}"
-- True
--
-- >>> isLeft $ parse parseVar "" "${}"
-- True
--
-- | Parse variables.
parseVar :: Parser Token
parseVar = try $ do
  _ <- char '$'
  _ <- char '{'
  varName <- many1 (noneOf "}")
  _ <- char '}'
  return $ TokVar (T.pack varName)

-- | Parse preamble.
parsePreamble :: Parser Token
parsePreamble = do
  _ <- parsePreambleStart

  -- "Note the overlapping parsers anyChar and string "-->", and therefore the
  -- use of the try combinator."
  -- (https://hackage.haskell.org/package/parsec-3.1.11/docs/Text-Parsec.html)
  content <- manyTill anyChar (try (string "-->"))
  return $ TokPreamble (T.pack content)

-- | Parse the start of a PREAMBLE.
parsePreambleStart :: Parser String
parsePreambleStart = string "<!--PREAMBLE"


-- | Parse partial commands.
--
-- >>> parse parsePartial "" "${partial(\"my/file/name.html\")}"
-- Right (TokPartial "my/file/name.html")
--
parsePartial :: Parser Token
parsePartial = do
  _ <- string "${partial(\""
  filename <- many (noneOf "\"")
  _ <- string "\")}"
  return $ TokPartial (T.pack filename)

-- | Parse escape sequence "$${"
--
-- >>> parse parseEscape "" "$${example}"
-- Right (TokText "${")
--
parseEscape :: Parser Token
parseEscape = do
  _ <- try $ string "$${"
  return (TokText "${")

-- | Parse boring, boring text.
--
-- >>> parse parseContent "" "hello ${ffwe} you!"
-- Right (TokText "hello ")
--
-- >>> parse parseContent "" "hello $.get() $ $( $$ you!"
-- Right (TokText "hello $.get() $ $( $$ you!")
--
-- Because of our first parser to grab a character that is not a $, we can't
-- grab strings that start with a $, even if it's text. It's a bug, just deal
-- with it for now.
-- >>> isLeft $ parse parseContent "" "$$$ what"
-- True
--
-- >>> isLeft $ parse parseContent "" "${name}!!"
-- True
--
parseContent :: Parser Token
parseContent = do
  -- The manyTill big parser below will accept an empty string, which is bad. So
  -- grab a single character to start things off.
  h <- noneOf "$"

  -- Grab chars until we see something that looks like a ${...}, or eof. Use
  -- both lookAhead (does not consume successful "${" found) and try (does not
  -- consume failure to find "${"). Not having both produces bugs, so.
  --
  -- Also grab "$${", which should be captured as an escape (parseEscape).
  --
  -- https://stackoverflow.com/questions/20020350/parsec-difference-between-try-and-lookahead
  stuff <- manyTill anyChar (try (lookAhead (string "$${")) <|>
                             try (lookAhead (string "${")) <|>
                             try (lookAhead parsePreambleStart) <|>
                             (eof >> return " "))
  return $ TokText (T.pack (h : stuff))

-- | Parse for loop declaration.
--
-- >>> parse parseFor "" "${for(posts)}"
-- Right (TokFor "posts")
--
-- >>> parse parseFor "" "${for(variable name with spaces technically allowed)}"
-- Right (TokFor "variable name with spaces technically allowed")
--
-- >>> isLeft $ parse parseFor "" "${for()}"
-- True
--
-- >>> isLeft $ parse parseFor "" "${for foo}"
-- True
--
parseFor :: Parser Token
parseFor = parseFunction "for" TokFor

-- | Parse if directive.
parseIf :: Parser Token
parseIf = parseFunction "if" TokIf

-- | General parse template functions.
parseFunction :: String -> (T.Text -> Token) -> Parser Token
parseFunction keyword ctor = do
  _ <- char '$'
  _ <- char '{'
  _ <- try $ string (keyword ++ "(")
  varName <- many1 (noneOf ")")
  _ <- char ')'
  _ <- char '}'
  return $ ctor (T.pack varName)

-- | Parse end keyword.
--
-- >>> parse parseEnd "" "${end}"
-- Right TokEnd
--
-- >>> isLeft $ parse parseEnd "" "${enddd}"
-- True
--
parseEnd :: Parser Token
parseEnd = do
  _ <- try $ string "${end}"
  return TokEnd

-- | A hack to capture strings that "almost" are templates. I couldn't figure
-- out another way.
parseFakeVar :: Parser Token
parseFakeVar = do
  _ <- char '$'
  n <- noneOf "{"
  rest <- many1 (noneOf "$")
  return $ TokText (T.pack ("$" ++ [n] ++ rest))

-- | @many1Till p end@ will parse one or more @p@ until @end.
--
-- From https://hackage.haskell.org/package/pandoc-1.10.0.4/docs/Text-Pandoc-Parsing.html
many1Till :: P.Stream s m t => P.ParsecT s u m a -> P.ParsecT s u m end -> P.ParsecT s u m [a]
many1Till p end = do
  first <- p
  rest <- manyTill p end
  return (first : rest)

-- | Find the preamble content from the given @PNode@s.
findPreambleText :: [PNode] -> Maybe T.Text
findPreambleText nodes = DL.find isPreamble nodes >>= preambleText

-- | Returns @True@ if the @PNode@ is a @PPreamble@.
isPreamble :: PNode -> Bool
isPreamble (PPreamble _) = True
isPreamble _ = False

-- | Gets the content of the @PPreamble@
preambleText :: PNode -> Maybe T.Text
preambleText (PPreamble t) = Just t
preambleText _ = Nothing