-- |
-- Module      :  Data.SExpresso.Parse.Generic
-- Copyright   :  © 2019 Vincent Archambault
-- License     :  0BSD
--
-- Maintainer  :  Vincent Archambault <archambault.v@gmail.com>
-- Stability   :  experimental
--
-- This module includes everything you need to write a parser for
-- S-expression ('SExpr'). It is based on the "Text.Megaparsec"
-- library and parsers can be defined for any kind of ('MonadParsec' e
-- s m) instance. This is quite generic, if you are working with
-- streams of 'Char', we suggest you also import
-- "Data.SExpresso.Parse.Char" or simply "Data.SExpresso.Parse" which
-- re-exports everything.
--
-- You can customize your 'SExpr' parser by specifying the following:
--
--   * The parser for atoms
--
--   * The opening tag, the closing tag, and a possible dependency of
--     the closing tag on the opening one.
--
--   * If some space is required or optional between any pair of
--     atoms.
--
--   * How to parse space (ex: treat comments as whitespace)

{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ExistentialQuantification #-}

module Data.SExpresso.Parse.Generic
  (
    SExprParser(..),

    getAtom,
    getSpace,
    getSpacingRule,

    setTags,
    setTagsFromList,
    setTagsFromMap,
    setSpace,
    setSpacingRule,
    setAtom,

    SpacingRule(..),
    spaceIsMandatory,
    spaceIsOptional,
    mkSpacingRule,

    withLocation,

    parseSExprList,
    parseSExpr,
    decodeOne,
    decode
   )
  where

import Data.Maybe
import qualified Data.Map as M
import Control.Applicative
import Text.Megaparsec
import Data.SExpresso.SExpr
import Data.SExpresso.Parse.Location

-- | The 'SpacingRule' datatype is used to indicate if space is optional or mandatory between two consecutive @'SAtom' _@.
data SpacingRule =
  -- | Space is mandatory
  SMandatory
  -- | Space is optional
  | SOptional
   deriving (Show, Eq)

-- | The @'SExprParser' m b a@ datatype defines how to parse an
-- @'SExpr' b a@. Most parsing functions require the underlying monad
-- @m@ to be an instance of ('MonadParsec' e s m).

data SExprParser m b a
  -- | The @c@ parameter in the first two arguments is the type of the
  -- relation between the opening tag and the closing one.
  = forall c. SExprParser
  (m c) -- ^ The parser for the opening tag. Returns an object of an
        -- arbitrary type @c@ that will be used to create the closing
        -- tag parser.
  (c -> m b) -- ^ A function that takes the object returned by the
             -- opening tag parser and provide a parser for the
             -- closing tag.
  (m a) -- ^ The parser for atoms
  (m ()) -- ^ A parser for space tokens which does not accept empty
         -- input (e.g. 'Text.Megaparsec.Char.space1')
  (a -> a -> SpacingRule) -- ^ A function to tell if two consecutive
                          -- atoms must be separated by space or
                          -- not. See also 'mkSpacingRule' and
                          -- 'setSpacingRule'

-- | The 'getSpace' function returns the parser for whitespace of an 'SExprParser' object.
getSpace :: SExprParser m b a -> m ()
getSpace (SExprParser _ _ _ sp _) = sp

-- | The 'getSpacingRule' function returns spacing rule function of an 'SExprParser' object.
getSpacingRule :: SExprParser m b a -> (a -> a -> SpacingRule)
getSpacingRule (SExprParser _ _ _ _ sr) = sr

-- | The 'getAtom' function returns the parser for atoms of an 'SExprParser' object.
getAtom :: SExprParser m b a -> m a
getAtom (SExprParser _ _ a _ _) = a

-- | The 'withLocation' function adds source location to a @'SExprParser'@. See also 'Location'.
withLocation :: (MonadParsec e s m) => SExprParser m b a -> SExprParser m (Located b) (Located a)
withLocation (SExprParser pSTag pETag atom sp sr) =
  let s = do
        pos <- getSourcePos
        c <- pSTag
        return (pos, c)
      e = \(pos, c) -> do
        b <- pETag c
        pos2 <- getSourcePos
        return $ At (Span pos pos2) b
  in SExprParser s e (located atom) sp (\(At _ a1) (At _ a2) -> sr a1 a2)

-- | The 'setAtom' function updates a parser with a new parser for atoms and and new spacing rule function.
setAtom :: m a -> (a -> a -> SpacingRule) -> SExprParser m b a' -> SExprParser m b a
setAtom a sr (SExprParser pSTag pETag _ sp _) = SExprParser pSTag pETag a sp sr

-- | The 'setTags' function updates a parser with a new parser for the opening and closing tags.
setTags :: m c -> (c -> m b) -> SExprParser m b' a -> SExprParser m b a
setTags s e (SExprParser _ _ a sp sr) = SExprParser s e a sp sr

-- | The 'setTagsFromList' function helps you build the opening and
-- closing parsers from a list of triplets. Each triplet specifies a
-- stream of tokens to parse as the opening tag, a stream of tokens to
-- parse at the closing tag and what to return when this pair is
-- encountered. The 'setTagsFromList' can handle multiple triplets
-- with the same opening tags. See also 'setTagsFromMap'.
--
-- The example e1 parses "()" as @'SList' () []@.
--
-- > e1 = setTagsFromList [("(", ")", ()] p
--
-- The example e2 parses both "()" and "[]" as @'SList' () []@ but does
-- not parse "(]" or "[)"
--
-- > e2 = setTagsFromList [("(", ")", ()), ("[", "]", ())] p 
--
-- The example e3 parses "()" as @'SList' List []@ and "#()" as
-- @'SList' Vector []@, but does not parse "(]" or "[)"
--
-- > e3 = setTagsFromList [("(", ")", List), ("#(",")",Vector)] p
--
-- The example e4 parses "()" as @'SList' ')' []@ and "(]" as
-- @'SList' ']' []@, but does not parse "])"
--
-- > e4 = setTagsFromList [("(", ")", ')'), ("(", "]", ']')] p 
setTagsFromList ::  (MonadParsec e s m) =>
                    [(Tokens s, Tokens s, b)] -> SExprParser m b' a -> SExprParser m b a
setTagsFromList l p =
  let m = M.fromListWith (++) $ map (\(s,e,b) -> (s, [(e,b)])) l
  in setTagsFromMap m p

-- | The 'setTagsFromMap' function helps you build the opening and
-- closing parsers from a map. Each key specifies a stream of tokens to
-- parse as the opening tag and the value of the map specifies one or
-- more streams of tokens to parse at the closing tag and what to
-- return when this pair is encountered. See also 'setTagsFromList'.
--
-- The example e1 parses "()" as @'SList' () []@.
--
-- > e1 = setTagsFromList $ M.fromList [("(", [")", ()]] p
--
-- The example e2 parses both "()" and "[]" as @'SList' () []@ but does
-- not parse "(]" or "[)"
--
-- > e2 = setTagsFromList $ M.fromList [("(", [")", ()]), ("[", ["]", ()])] p 
--
-- The example e3 parses "()" as @'SList' List []@ and "#()" as
-- @'SList' Vector []@, but does not parse "(]" or "[)"
--
-- > e3 = setTagsFromList $ M.fromList [("(", [")", List]), ("#(", [")",Vector])] p
--
-- The example e4 parses "()" as @'SList' ')' []@ and "(]" as
-- @'SList' ']' []@, but does not parse "])"
--
-- > e4 = setTagsFromList $ M.fromList [("(", [(")", ')'), ("]", ']')])] p 
setTagsFromMap :: (MonadParsec e s m) =>
                  M.Map (Tokens s) [(Tokens s, b)] -> SExprParser m b' a -> SExprParser m b a
setTagsFromMap m p =
  let l = M.toList m

      choose [] = empty
      choose ((s, eb) : ts) = (chunk s >> return eb) <|> choose ts

      stag = choose l

      etag = \xs -> choice $ map (\(e, b) -> chunk e >> return b) xs
  in setTags stag etag p

-- | The 'spaceIsMandatory' function is a spacing rule where space is always mandatory. See also 'getSpacingRule'.
spaceIsMandatory :: a -> a -> SpacingRule
spaceIsMandatory = \_ _ -> SMandatory

-- | The 'spaceIsOptional' function is a spacing rule where space is always optional. See also 'getSpacingRule'.
spaceIsOptional :: a -> a -> SpacingRule
spaceIsOptional = \_ _ -> SOptional

-- | The 'setSpacingRule' function modifies a 'SExprParser' by setting
-- the function to tell if two consecutive atoms must be separated by
-- space or not. See also 'mkSpacingRule'.
setSpacingRule :: (a -> a -> SpacingRule) -> SExprParser m b a -> SExprParser m b a
setSpacingRule r p@(SExprParser pSTag pETag _ _ _) = SExprParser pSTag pETag (getAtom p) (getSpace p) r

-- | The 'mkSpacingRule' function is a helper to create a valid
-- spacing rule function for 'SExprParser' when some atoms have the
-- same 'SpacingRule' both before and after no matter what the other
-- atom is. It takes as argument a function @f@ that takes a single
-- atom and returns the 'SpacingRule' that applies both before and
-- after this atom.
--
-- For example, to create a spacing rule where space is optional both
-- before and after the fictitious @MyString@ token:
--
-- > s (MyString _) = SOptional
-- > s _ = Mandatory
-- > spacingRule = mkSpacingRule s
--
-- The above is equivalent to :
--
-- > spacingRule (MyString _) _ = SOptional
-- > spacingRule _ (MyString _) = SOptional
-- > spacingRule _ _ = SMandatory

mkSpacingRule :: (a -> SpacingRule) -> (a -> a -> SpacingRule)
mkSpacingRule f = \a1 a2 -> case f a1 of
                              SOptional -> SOptional
                              SMandatory -> f a2

-- | The 'setSpace' function modifies a 'SExprParser' by setting the
-- parser to parse whitespace. The parser for whitespace must not
-- accept the empty input (e.g. 'Text.Megaparsec.Char.space1')
setSpace :: m () -> SExprParser m b a -> SExprParser m b a
setSpace sp (SExprParser s e a _ sr) = SExprParser s e a sp sr

-- Tells if the space (or absence of) between two atoms is valid or not 
spaceIsOK :: (a -> a -> SpacingRule) -> (SExpr b a) -> (SExpr b a) -> Bool -> Bool
spaceIsOK getSpacingRule' sexp1 sexp2 spaceInBetween =
  case (sexp1, sexp2, spaceInBetween) of
    (_, _, True) -> True
    (SList _ _, _, _) -> True
    (_, SList _ _, _) -> True
    (SAtom a1, SAtom a2, _) -> getSpacingRule' a1 a2 == SOptional

sepEndBy' :: (MonadParsec e s m) => m (SExpr b a) -> m () -> (a -> a -> SpacingRule) -> m [SExpr b a]
sepEndBy' p sep f = sepEndBy1' p sep f <|> pure []

sepEndBy1' :: (MonadParsec e s m) => m (SExpr b a) -> m () -> (a -> a -> SpacingRule) -> m [SExpr b a]
sepEndBy1' p sep f = do
  x <- p
  xs <- parseContent x
  return $ x : xs

  where parseContent a1 = do
          s <- maybe False (const True) <$> optional sep
          mpos <- if not s then Just <$> getSourcePos else return Nothing
          mx <- optional p
          case mx of
            Nothing -> return []
            Just a2 ->
              if spaceIsOK f a1 a2 s
              then do
                xs <- parseContent a2
                return $ a2 : xs
              else fail ("The previous two atoms are not separated by space.\n" ++
                         "A space was expected at " ++ sourcePosPretty (fromJust mpos))

-- | The 'parseSExprList' function return a parser for parsing S-expression of the form @'SList' _ _@.
parseSExprList :: (MonadParsec e s m) =>
                SExprParser m b a -> m (SExpr b a)
parseSExprList def@(SExprParser pSTag pETag _ sp sr)  = do
          c <- pSTag
          _ <- optional sp
          xs <- sepEndBy' (parseSExpr def) sp sr
          b <- pETag c
          return $ SList b xs

-- | The 'parseSExpr' function return a parser for parsing
-- S-expression ('SExpr'), that is either an atom (@'SAtom' _@) or a
-- list @'SList' _ _@. See also 'decodeOne' and 'decode'.
parseSExpr :: (MonadParsec e s m) =>
              SExprParser m b a -> m (SExpr b a)
parseSExpr def = (getAtom def >>= return . SAtom) <|> (parseSExprList def)

-- | The 'decodeOne' function return a parser for parsing a file
-- containing only one S-expression ('SExpr'). It can parse extra
-- whitespace at the beginning and at the end of the file. See also
-- 'parseSExpr' and 'decode'.
decodeOne :: (MonadParsec e s m) => SExprParser m b a -> m (SExpr b a)
decodeOne def =
  let ws = getSpace def
  in optional ws *> parseSExpr def <* (optional ws >> eof)

-- | The 'decode' function return a parser for parsing a file
-- containing many S-expression ('SExpr'). It can parse extra
-- whitespace at the beginning and at the end of the file. See also
-- 'parseSExpr' and 'decodeOne'.
decode :: (MonadParsec e s m) => SExprParser m b a -> m [SExpr b a]
decode def =
  let ws = getSpace def
  in optional ws *> sepEndBy' (parseSExpr def) ws (getSpacingRule def) <* eof