src/Text/Ogmarkup/Private/Parser.hs

{-|
Module      : Text.Ogmarkup.Private.Parser
Copyright   : (c) Ogma Project, 2016
License     : MIT
Stability   : experimental

This module provides several parsers that can be used in order to
extract the 'Ast' of an Ogmarkup document.

Please consider that only 'document' should be used outside this
module.
-}

{-# LANGUAGE OverloadedStrings #-}

module Text.Ogmarkup.Private.Parser where

import           Control.Monad
import           Data.String
import Text.ParserCombinators.Parsec hiding (parse)

import qualified Text.Ogmarkup.Private.Ast     as Ast

-- | Keep track of the currently opened formats.
data ParserState = ParserState { -- | Already parsing text with emphasis
                                 parseWithEmph        :: Bool
                                 -- | Already parsing text with strong
                                 --   emphasis
                               , parseWithStrongEmph  :: Bool
                                 -- | Already parsing a quote
                               , parseWithinQuote     :: Bool
                               }

-- | Update the 'ParserState' to guard against nested emphasis.
enterEmph :: OgmarkupParser ()
enterEmph = do st <- getState
               if parseWithEmph st
                 then fail "guard against nested emphasis"
                 else do setState st { parseWithEmph = True }
                         return ()

-- | Update the 'ParserState' to be able to parse input with emphasis
-- again.
leaveEmph :: OgmarkupParser ()
leaveEmph = do st <- getState
               if parseWithEmph st
                 then do setState st { parseWithEmph = False }
                         return ()
                 else fail "cannot leave emphasis when you did not enter"

-- | Update the 'ParserState' to guard against nested strong emphasis.
enterStrongEmph :: OgmarkupParser ()
enterStrongEmph = do st <- getState
                     if parseWithStrongEmph st
                       then fail "guard against nested strong emphasis"
                       else do setState st { parseWithStrongEmph = True }
                               return ()

-- | Update the 'ParserState' to be able to parse input with strong emphasis
-- again.
leaveStrongEmph :: OgmarkupParser ()
leaveStrongEmph = do st <- getState
                     if parseWithStrongEmph st
                       then do setState st { parseWithStrongEmph = False }
                               return ()
                       else fail "cannot leave strong emphasis when you did not enter"

-- | Update the 'ParserState' to guard against nested quoted inputs.
enterQuote :: OgmarkupParser ()
enterQuote = do st <- getState
                if parseWithinQuote st
                  then fail "guard against nested quotes"
                  else do setState st { parseWithinQuote = True }
                          return ()

-- | Update the 'ParserState' to be able to parse an input
-- surrounded by quotes again.
leaveQuote :: OgmarkupParser ()
leaveQuote = do st <- getState
                if parseWithinQuote st
                  then do setState st { parseWithinQuote = False }
                          return ()
                  else fail "cannot leave quote when you did not enter"

-- | A initial ParserState instance to be used at the begining of
-- a document parsing.
initParserState :: ParserState
initParserState = ParserState False False False

-- | An ogmarkup parser processes 'Char' tokens and carries a 'ParserState'.
type OgmarkupParser = GenParser Char ParserState

-- | A wrapper around the 'runParser' function of Parsec. It uses
-- 'initParserState' as an initial state.
parse :: OgmarkupParser a -> String -> String -> Either ParseError a
parse ogma = runParser ogma initParserState

-- | Try its best to parse an ogmarkup document. When it encounters an
--   error, it returns an Ast and the remaining input.
--
--   See 'Ast.Document'.
document :: IsString a
         => OgmarkupParser (Ast.Document a, String)
document = do spaces
              sects <- many (try section)
              input <- getInput

              return (sects, input)

-- | See 'Ast.Section'.
section :: IsString a
           => OgmarkupParser (Ast.Section a)
section = aside <|> story

-- | See 'Ast.Aside'.
aside :: IsString a
         => OgmarkupParser (Ast.Section a)
aside = do asideSeparator
           cls <- optionMaybe asideClass
           spaces
           ps <- many1 (paragraph <* spaces)
           asideSeparator
           manyTill space (skip (char '\n') <|> eof)
           spaces

           return $ Ast.Aside cls ps
  where
    asideClass :: IsString a
               => OgmarkupParser a
    asideClass = do a <- many1 letter
                    asideSeparator

                    return $ fromString a

-- | See 'Ast.Story'.
story :: IsString a
      => OgmarkupParser (Ast.Section a)
story = Ast.Story `fmap` many1 (paragraph <* spaces)

-- | See 'Ast.Paragraph'.
paragraph :: IsString a
          => OgmarkupParser (Ast.Paragraph a)
paragraph = many1 component <* blank

-- | See 'Ast.Component'.
component :: IsString a
          => OgmarkupParser (Ast.Component a)
component = try (dialogue <|> thought <|> teller) <|> illformed

-- | See 'Ast.IllFormed'.
illformed :: IsString a
          => OgmarkupParser (Ast.Component a)
illformed = Ast.IllFormed `fmap` restOfParagraph

-- | Parse the rest of the current paragraph with no regards for the
-- ogmarkup syntax. This Parser is used when the document is ill-formed, to
-- find a new point of synchronization.
restOfParagraph :: IsString a
                => OgmarkupParser a
restOfParagraph = do lookAhead anyToken
                     notFollowedBy endOfParagraph
                     str <- manyTill anyToken (lookAhead $ try endOfParagraph)
                     return $ fromString str

-- | See 'Ast.Teller'.
teller :: IsString a
       => OgmarkupParser (Ast.Component a)
teller = Ast.Teller `fmap` many1 format

-- | See 'Ast.Dialogue'.
dialogue :: IsString a
         => OgmarkupParser (Ast.Component a)
dialogue = talk '[' ']' Ast.Dialogue

-- | See 'Ast.Thought'.
thought :: IsString a
        => OgmarkupParser (Ast.Component a)
thought = talk '<' '>' Ast.Thought

-- | @'talk' c c' constr@ wraps a reply surrounded by @c@ and @c'@ inside
--   @constr@ (either 'Ast.Dialogue' or 'Ast.Thought').
talk :: IsString a
     => Char -- ^ A character to mark the begining of a reply
     -> Char -- ^ A character to mark the end of a reply
     -> (Ast.Reply a -> Maybe a -> Ast.Component a) -- ^ Either 'Ast.Dialogue' or 'Ast.Thought' according to the situation
     -> OgmarkupParser (Ast.Component a)
talk c c' constructor = do
  rep <- reply c c'
  auth <- optionMaybe characterName
  blank

  return $ constructor rep auth

-- | Parse the name of the character which speaks or thinks. According to
-- the ogmarkup syntax, it is surrounded by parentheses.
characterName :: IsString a
           => OgmarkupParser a
characterName = do
  char '('
  notFollowedBy (char ')') <?> "Empty character names are not allowed"
  auth <- manyTill anyToken (char ')') <?> "Missing closing )"

  return $ fromString auth

-- | 'reply' parses a 'Ast.Reply'.
reply :: IsString a
      => Char
      -> Char
      -> OgmarkupParser (Ast.Reply a)
reply c c' = do char c
                blank
                p1 <- many1 format
                x <- oneOf ['|', c']

                case x of '|' -> do blank
                                    ws <- many1 format
                                    char '|' <?> "Missing | to close the with say"
                                    blank
                                    p2 <- many format
                                    char c'

                                    return $ Ast.WithSay p1 ws p2
                          _ -> return $ Ast.Simple p1

-- | See 'Ast.Format'.
format :: IsString a
       => OgmarkupParser (Ast.Format a)
format = choice [ raw
                , emph
                , strongEmph
                , quote
                ]

-- | See 'Ast.Raw'.
raw :: IsString a
    => OgmarkupParser (Ast.Format a)
raw = Ast.Raw `fmap` many1 atom 

-- | See 'Ast.Emph'.
emph :: IsString a
     => OgmarkupParser (Ast.Format a)
emph = do char '*'
          blank
          enterEmph
          f <- format
          fs <- manyTill format (char '*' >> blank)
          leaveEmph
          return . Ast.Emph $ (f:fs)

-- | See 'Ast.StrongEmph'.
strongEmph :: IsString a
           => OgmarkupParser (Ast.Format a)
strongEmph = do char '+'
                blank
                enterStrongEmph
                f <- format
                fs <- manyTill format (char '+' >> blank)
                leaveStrongEmph
                return . Ast.StrongEmph $ (f:fs)

-- | See 'Ast.Quote'.
quote :: IsString a
      => OgmarkupParser (Ast.Format a)
quote = do char '"'
           blank
           enterQuote
           f <- format
           fs <- manyTill format (char '"' >> blank)
           leaveQuote
           return . Ast.Quote $ (f:fs)

-- | See 'Ast.Atom'.
atom :: IsString a
     => OgmarkupParser (Ast.Atom a)
atom = (mark <|> longword <|> word) <* blank

-- | See 'Ast.Word'. This parser does not consume the following spaces, so
--   the caller needs to take care of it.
word :: IsString a
     => OgmarkupParser (Ast.Atom a)
word = do lookAhead anyToken -- not the end of the parser
          notFollowedBy endOfWord

          str <- manyTill anyToken (lookAhead $ try endOfWord)

          return $ Ast.Word (fromString str)
  where
    specChar = "\"«»`+*[]<>|_\'’"

    endOfWord :: OgmarkupParser ()
    endOfWord =     eof <|> skip space <|> skip (oneOf specChar) <|> skip mark

-- | Wrap a raw string surrounded by @`@ inside a 'Ast.Word'.
--
--   >>> parse longword "" "`test *ei*`"
--   Right (Ast.Word "test *ei*")
--
--   Therefore, @`@ can be used to insert normally reserved symbol
--   inside a generated document.
longword :: IsString a
         => OgmarkupParser (Ast.Atom a)
longword = do char '`'
              notFollowedBy (char '`') <?> "empty raw string are not accepted"
              str <- manyTill anyToken (char '`')
              return $ Ast.Word (fromString str)

-- | See 'Ast.Punctuation'. Be aware that 'mark' does not parse the quotes
--   because they are processed 'quote'.
mark :: OgmarkupParser (Ast.Atom a)
mark = Ast.Punctuation `fmap` (semicolon
        <|> colon
        <|> question
        <|> exclamation
        <|> try longDash
        <|> try dash
        <|> hyphen
        <|> comma
        <|> apostrophe
        <|> try suspensionPoints
        <|> point)
  where
    parseMark p m = p >> return m

    semicolon        = parseMark (char ';') Ast.Semicolon
    colon            = parseMark (char ':') Ast.Colon
    question         = parseMark (char '?') Ast.Question
    exclamation      = parseMark (char '!') Ast.Exclamation
    longDash         = parseMark (string "—" <|> string "---") Ast.LongDash
    dash             = parseMark (string "–" <|> string "--") Ast.Dash
    hyphen           = parseMark (char '-') Ast.Hyphen
    comma            = parseMark (char ',') Ast.Comma
    point            = parseMark (char '.') Ast.Point
    apostrophe       = parseMark (char '\'' <|> char '’') Ast.Apostrophe
    suspensionPoints = parseMark (string ".." >> many (char '.')) Ast.SuspensionPoints

-- | See 'Ast.OpenQuote'. This parser consumes the following blank (see 'blank')
--   and skip the result.
openQuote :: OgmarkupParser ()
openQuote = do char '«' <|> char '"'
               blank

-- | See 'Ast.CloseQuote'. This parser consumes the following blank (see 'blank')
--   and skip the result.
closeQuote :: OgmarkupParser ()
closeQuote = do char '»' <|> char '"'
                blank

-- | An aside section (see 'Ast.Aside') is a particular region
--   surrounded by two lines of underscores (at least three).
--   This parser consumes one such line.
asideSeparator :: OgmarkupParser ()
asideSeparator = do string "__"
                    many1 (char '_')

                    return ()

-- | The end of a paragraph is the end of the document or two blank lines
-- or an aside separator, that is a line of underscores.
endOfParagraph :: OgmarkupParser ()
endOfParagraph = try betweenTwoSections
                 <|> asideSeparator
                 <|> eof
  where
    betweenTwoSections :: OgmarkupParser ()
    betweenTwoSections = do count 2 $ manyTill space (eof <|> skip (char '\n'))
                            spaces

-- | This parser consumes all the white spaces until it finds either an aside
--   surrounding marker (see 'Ast.Aside'), the end of the document or
--   one blank line. The latter marks the end of the current paragraph.
blank :: OgmarkupParser ()
blank = optional (notFollowedBy endOfParagraph >> spaces)

-- | @skip p@ parses @p@ and skips the result.
skip :: OgmarkupParser a -> OgmarkupParser ()
skip = void