{-# LANGUAGE DeriveDataTypeable #-}

module Language.Sexp.Parser (
        Sexp(..), sexpParser,
        ParseException(..), parse, parseExn, parseMaybe
    ) where

import Control.Applicative ( (<$>), (<*), (*>), many )
import Control.Exception ( Exception )
import Data.Attoparsec.ByteString.Char8 ( char, space, notInClass, (<?>) )
import Data.Attoparsec.ByteString.Lazy ( Parser, Result(..) )
import Data.Attoparsec.Combinator ( choice )
import Data.ByteString.Lazy.Char8 ( ByteString )
import Data.Sexp ( Sexp(..), unescape )
import Data.Typeable ( Typeable )
import qualified Control.Exception as CE
import qualified Data.Attoparsec.ByteString.Char8 as AC
import qualified Data.Attoparsec.ByteString.Lazy as A
import qualified Data.ByteString.Lazy.Char8 as BL

data ParseException = ParseException String ByteString
                    deriving ( Show, Typeable )

instance Exception ParseException

-- | Parse S-Expressions from a lazy 'ByteString'.  If the parse was
-- successful, @Right sexps@ is returned; otherwise, @Left (errorMsg,
-- leftover)@ is returned.
parse :: ByteString -> Either (String, ByteString) [Sexp]
parse = resultToEither . A.parse (whiteSpace *> many sexpParser)
  where
    resultToEither (Fail leftover _ctxs reason) =
        Left (reason, leftover)
    resultToEither (Done leftover sexps) =
        if BL.null leftover
        then Right sexps
        else Left ("garbage at end", leftover)

-- | A variant of 'parse' that returns 'Nothing' if the parse fails.
parseMaybe :: ByteString -> Maybe [Sexp]
parseMaybe s =
    case parse s of
        Left _      -> Nothing
        Right sexps -> Just sexps

-- | A variant of 'parse' that throws a 'ParseException' if the parse
-- fails.
parseExn :: ByteString -> [Sexp]
parseExn text =
    case parse text of
        Left (reason, leftover) -> CE.throw (ParseException reason leftover)
        Right sexps             -> sexps

-- | A parser for S-Expressions.  Ignoring whitespace, we follow the
-- following EBNF:
--
-- SEXP           ::= '(' ATOM* ')' | ATOM
-- ATOM           ::= '"' ESCAPED_STRING* '"' | [^ \t\n()]+
-- ESCAPED_STRING ::= ...
--
sexpParser :: Parser Sexp
sexpParser =
    choice [ list <?> "list"
           , atom <?> "atom"
           ]
  where
    list = List <$> (char '(' *> whiteSpace *> many sexpParser <* char ')') <* whiteSpace
    atom = Atom . unescape <$> (choice [string, anything]) <* whiteSpace
    string = BL.fromChunks . (:[]) <$> (char '"' *> AC.scan False escapedStringScanner <* char '"')
    anything = BL.fromChunks . (:[]) <$> AC.takeWhile1 (notInClass " \t\n()")

    -- Scan an escaped string.
    escapedStringScanner :: Bool -> Char -> Maybe Bool
    escapedStringScanner True  _    = Just False
    escapedStringScanner False '\\' = Just True
    escapedStringScanner False '"'  = Nothing
    escapedStringScanner False _    = Just False

-- | A parser for conventional ASCII whitespace and ";" line comments.
whiteSpace :: Parser ()
whiteSpace = do
    _ <- many space
    _ <- many comment
    return ()
  where
    comment = char ';' >> many (AC.notChar '\n') >> many space