{-# LANGUAGE TypeOperators, FlexibleInstances #-}

-- |
-- This module provides the same functions and combinators as "GLL.Combinators.Interface".
-- The only difference is that the combinators of this module construct only symbol expressions ('SymbExpr'/'BNF').
-- The combinators are therefore easier to use: they are just as freely combined but with simpler types and simpler type-errors.
-- However, the the underlying grammars are binarised, resulting in slower parsing.
module GLL.Combinators.BinaryInterface (
    -- * Elementary parsers
    term_parser, satisfy,
    -- ** Elementary parsers using the 'Token' datatype 
    keychar, keyword, int_lit, float_lit, bool_lit, char_lit, string_lit, alt_id_lit, id_lit, token,
    -- ** Elementary character-level parsers
    char,
    -- * Elementary combinators
    -- *** Sequencing
    (<**>),
    -- *** Choice
    (<||>),
    -- *** Semantic actions
    (<$$>),
    -- *** Nonterminal introduction
    (<:=>),(<::=>),chooses,chooses_prec,
    -- * Types
    -- ** Grammar (combinator expression) types
    BNF, SymbExpr, toSymb, mkRule,
    -- ** Parseable token types 
    Token(..), Parseable(..), SubsumesToken(..), unlexTokens, unlexToken,
    -- * Running a parser 
    grammarOf, parse, printParseData, evaluatorWithParseData,
    -- **  Running a parser with options
    parseWithOptions, parseWithParseOptions, printParseDataWithOptions, evaluatorWithParseDataAndOptions,printGrammarData,
    -- *** Possible options
    CombinatorOptions, CombinatorOption,
             GLL.Combinators.Options.maximumErrors, throwErrors,
             maximumPivot, maximumPivotAtNt, leftBiased,
    -- **** Parser options
    fullSPPF, allNodes, packedNodesOnly, strictBinarisation,
      GLL.Parser.noSelectTest,
    -- *** Running a parser with options and explicit failure
    parseWithOptionsAndError, parseWithParseOptionsAndError,
    -- ** Runing a parser to obtain 'ParseResult'.
    parseResult, parseResultWithOptions,ParseResult(..),
    -- ** Builtin lexers.
    default_lexer,
    -- *** Lexer settings
        lexer, LexerSettings(..), emptyLanguage,
    -- * Derived combinators
    mkNt,
    -- *** Ignoring semantic results
    (<$$), (**>), (<**),
    -- *** EBNF patterns
    optional, preferably, reluctantly, optionalWithDef,
    multiple, multiple1, multipleSepBy, multipleSepBy1,
      multipleSepBy2, within, parens, braces, brackets, angles,
     -- *** Disambiguation  
            (<:=), (<::=),(<<<**>), (<**>>>), (<<**>), (<<<**), (**>>>), (<**>>),
            longest_match,shortest_match,
            many, many1, some, some1,
            manySepBy, manySepBy1, manySepBy2,
              someSepBy, someSepBy1,someSepBy2,
     -- * Memoisation
    memo, newMemoTable, memClear, MemoTable, MemoRef, useMemoisation,
    module GLL.Combinators.Interface
    ) where

import GLL.Combinators.Interface hiding (within, (**>), (<**>), (<**), (<<<**>), (<<<**), (**>>>), (<**>>>), satisfy, (<||>), (<||), (||>), (<$$>), (<$$), (<:=>), (<:=),(<::=>), (<::=), mkNt, manySepBy, manySepBy1, manySepBy2, multiple, multipleSepBy, many, multipleSepBy1, multipleSepBy2, someSepBy, someSepBy1, someSepBy2, some, memo, some1, many1, multiple1, shortest_match, longest_match, (<**>>), (<<**>), angles, braces, brackets, parens, within, optional, optionalWithDef, preferably, reluctantly, chooses, chooses_prec)
import qualified GLL.Combinators.Interface as IF
import GLL.Combinators.Options
import GLL.Combinators.Visit.Join
import GLL.Combinators.Visit.Sem (emptyAncestors)
import GLL.Combinators.Memoisation
import GLL.Combinators.Lexer
import GLL.Types.Grammar
import GLL.Parser hiding (parse, parseWithOptions)
import qualified GLL.Parser as GLL

import Control.Compose (OO(..))
import Control.Arrow
import qualified Data.Array as A
import qualified Data.IntMap as IM
import qualified Data.Map as M
import Data.Text (pack)
import Data.IORef
import Data.Time.Clock
import System.IO.Unsafe


infixl 2 <:=>
-- | 
-- Form a rule by giving the name of the left-hand side of the new rule.
-- Use this combinator on recursive non-terminals.
(<:=>) :: (Show t, Ord t) => String -> BNF t a -> BNF t a
n <:=> p = n IF.<:=> p
infixl 2 <::=>

-- | 
--  Variant of '<:=>' for recursive non-terminals that have a potentially infinite
--  number of derivations for some input string.
--
--  A non-terminal yields infinitely many derivations  
--  if and only if it is left-recursive and would be
--  left-recursive if all the right-hand sides of the productions of the
--  grammar are reversed.
(<::=>) :: (Show t, Ord t) => String -> BNF t a -> BNF t a
n <::=> p = n IF.<::=> p

-- | Variant of '<::=>' that can be supplied with a list of alternates
chooses :: (Show t, Ord t) => String -> [BNF t a] -> BNF t a
chooses p alts = IF.chooses p alts

-- | Variant of '<::=' that can be supplied with a list of alternates
chooses_prec :: (Show t, Ord t) => String -> [BNF t a] -> BNF t a
chooses_prec p alts = IF.chooses_prec p alts

infixl 4 <$$>
-- |
-- Form an 'AltExpr' by mapping some semantic action overy the result
-- of the second argument.
(<$$>) :: (Show t, Ord t) => (a -> b) -> BNF t a -> BNF t b
f <$$> p' = IF.toSymb (f IF.<$$> p')

infixl 4 <**>,<<<**>,<**>>>
-- | 
-- Add a 'SymbExpr' to the right-hand side represented by an 'AltExpr'
-- creating a new 'AltExpr'. 
-- The semantic result of the first argument is applied to the second 
-- as a cross-product. 
(<**>) :: (Show t, Ord t) =>  BNF t (a -> b) -> BNF t a -> BNF t b
pl' <**> pr' = IF.toSymb (pl' IF.<**> pr')

-- | Variant of '<**>' that applies longest match on the left operand.
(<**>>>) :: (Show t, Ord t) => BNF t (a -> b) -> BNF t a -> BNF t b
pl' <**>>> pr' = IF.toSymb (pl' IF.<**>>> pr')

-- | Variant of '<**>' that applies shortest match on the left operand.
(<<<**>) :: (Show t, Ord t) => BNF t (a -> b) -> BNF t a -> BNF t b
pl' <<<**> pr' = IF.toSymb (pl' IF.<<<**> pr')

infixr 3 <||>
-- |
-- Add an 'AltExpr' to a list of 'AltExpr'
-- The resuling  '[] :. AltExpr' forms the right-hand side of a rule.
(<||>) :: (Show t, Ord t) => BNF t a -> BNF t a -> BNF t a
l' <||> r' = IF.toSymb (l' IF.<||> r')

-- |
-- Apply this combinator to an alternative to turn all underlying occurrences
-- of '<**>' (or variants) apply 'longest match'.
longest_match :: (Show t, Ord t) => BNF t a -> BNF t a
longest_match isalt = IF.toSymb (IF.longest_match isalt)

-- Apply this combinator to an alternative to turn all underlying occurrences
-- of '<**>' (or variants) apply 'shortest match'.
shortest_match :: (Show t, Ord t) => BNF t a -> BNF t a
shortest_match isalt = IF.toSymb (IF.shortest_match isalt)

-- | The empty right-hand side that yields its 
--  first argument as a semantic result.
satisfy :: (Show t, Ord t ) => a -> BNF t a
satisfy a = IF.toSymb (IF.satisfy a)

-- | 
-- This function memoises a parser, given:
--
-- * A 'MemoRef' pointing to a fresh 'MemoTable', created using 'newMemoTable'.
-- * The 'SymbExpr' to memoise.
--
-- Use 'memo' on those parsers that are expected to derive the same 
-- substring multiple times. If the same combinator expression is used
-- to parse multiple times the 'MemoRef' needs to be cleared using 'memClear'.
--
-- 'memo' relies on 'unsafePerformIO' and is therefore potentially unsafe.
-- The option 'useMemoisation' enables memoisation.
-- It is off by default, even if 'memo' is used in a combinator expression.
memo :: (Ord t, Show t) => MemoRef [a] -> BNF t a -> BNF t a
memo ref p' = IF.memo ref p'
-- | 
-- Helper function for defining new combinators.
-- Use 'mkNt' to form a new unique non-terminal name based on
-- the symbol of a given 'SymbExpr' and a 'String' that is unique to
-- the newly defined combinator.
mkNt :: (Show t, Ord t) => BNF t a -> String -> String
mkNt p str = IF.mkNt p str

-- | 
-- Variant of '<$$>' that ignores the semantic result of its second argument. 
(<$$) :: (Show t, Ord t) => b -> BNF t a -> BNF t b
f <$$ p = const f <$$> p
infixl 4 <$$

-- | 
infixl 4 **>, <<**>, **>>>

-- | 
-- Variant of '<**>' that ignores the semantic result of the first argument.
(**>) :: (Show t, Ord t) => BNF t a -> BNF t b -> BNF t b
l **> r = flip const <$$> l <**> r

-- Variant of '<**>' that applies longest match on its left operand. 
(**>>>) :: (Show t, Ord t) => BNF t a -> BNF t b -> BNF t b
l **>>> r = flip const <$$> l <**>>> r

-- Variant of '<**>' that ignores shortest match on its left operand.
(<<**>) :: (Show t, Ord t) => BNF t a -> BNF t b -> BNF t b
l <<**>r = flip const <$$> l <<<**> r


infixl 4 <**, <<<**, <**>>
-- | 
-- Variant of '<**>' that ignores the semantic result of the second argument.
(<**) :: (Show t, Ord t) => BNF t a -> BNF t b -> BNF t a
l <** r = const <$$> l <**> r

-- | Variant of '<**' that applies longest match on its left operand.
(<**>>) :: (Show t, Ord t) => BNF t a -> BNF t b -> BNF t a
l <**>> r = const <$$> l <**>>> r

-- | Variant '<**' that applies shortest match on its left operand
(<<<**) :: (Show t, Ord t) => BNF t a -> BNF t b -> BNF t a
l <<<** r = const <$$> l <<<**> r

-- | 
-- Variant of '<::=>' that prioritises productions from left-to-right (or top-to-bottom).
x <::= altPs = x IF.<::= altPs
infixl 2 <::=

-- | 
-- Variant of '<:=>' that prioritises productions from left-to-right (or top-to-bottom).
x <:= altPs = x IF.<:= altPs
infixl 2 <:=

-- | Try to apply a parser multiple times (0 or more) with shortest match
-- applied to each occurrence of the parser.
many :: (Show t, Ord t) => BNF t a -> BNF t [a]
many = multiple_ (<<<**>)

-- | Try to apply a parser multiple times (1 or more) with shortest match
-- applied to each occurrence of the parser.
many1 :: (Show t, Ord t) => BNF t a -> BNF t [a]
many1 = multiple1_ (<<<**>)

-- | Try to apply a parser multiple times (0 or more) with longest match
-- applied to each occurrence of the parser.
some :: (Show t, Ord t) => BNF t a -> BNF t [a]
some = multiple_ (<**>>>)

-- | Try to apply a parser multiple times (1 or more) with longest match
-- applied to each occurrence of the parser.
some1 :: (Show t, Ord t) => BNF t a -> BNF t [a]
some1 = multiple1_ (<**>>>)

-- | Try to apply a parser multiple times (0 or more). The results are returned in a list.
-- In the case of ambiguity the largest list is returned.
multiple :: (Show t, Ord t) => BNF t a -> BNF t [a]
multiple = multiple_ (<**>)

-- | Try to apply a parser multiple times (1 or more). The results are returned in a list.
-- In the case of ambiguity the largest list is returned.
multiple1 :: (Show t, Ord t) => BNF t a -> BNF t [a]
multiple1 = multiple1_ (<**>)

-- | Internal
multiple_ disa p = let fresh = mkNt p "*"
                    in fresh <::=> ((:) <$$> p) `disa` (multiple_ disa p) <||> satisfy []

-- | Internal
multiple1_ disa p = let fresh = mkNt p "+"
                     in fresh <::=> ((:) <$$> p) `disa` (multiple_ disa p)

-- | Same as 'many' but with an additional separator.
manySepBy :: (Show t, Ord t) => BNF t a -> BNF t b -> BNF t [a]
manySepBy = sepBy many
-- | Same as 'many1' but with an additional separator.
manySepBy1 :: (Show t, Ord t) => BNF t a -> BNF t b -> BNF t [a]
manySepBy1 = sepBy1 many
-- | Same as 'some1' but with an additional separator.
someSepBy :: (Show t, Ord t) => BNF t a -> BNF t b -> BNF t [a]
someSepBy = sepBy some
-- | Same as 'some1' but with an additional separator.
someSepBy1 :: (Show t, Ord t) => BNF t a -> BNF t b -> BNF t [a]
someSepBy1 = sepBy1 some
-- | Same as 'multiple' but with an additional separator.
multipleSepBy :: (Show t, Ord t) => BNF t a -> BNF t b -> BNF t [a]
multipleSepBy = sepBy multiple
-- | Same as 'multiple1' but with an additional separator.
multipleSepBy1 :: (Show t, Ord t) => BNF t a -> BNF t b -> BNF t [a]
multipleSepBy1 = sepBy1 multiple

sepBy :: (Show t, Ord t) => (BNF t a -> BNF t [a]) -> BNF t a -> BNF t b -> BNF t [a]
sepBy mult p c = mkRule $ satisfy [] <||> (:) <$$> p <**> mult (c **> p)

sepBy1 :: (Show t, Ord t) => (BNF t a -> BNF t [a]) -> BNF t a -> BNF t b -> BNF t [a]
sepBy1 mult p c = mkRule $ (:) <$$> p <**> mult (c **> p)

-- | Like 'multipleSepBy1' but matching at least two occurrences of the 
-- first argument. The returned list is therefore always of at least
-- length 2. At least one separator will be consumed.
multipleSepBy2 p s = mkRule $
  (:) <$$> p <** s <**> multipleSepBy1 p s

-- | Like 'multipleSepBy2' but matching the minimum number of 
-- occurrences of the first argument as possible (at least 2).
someSepBy2 p s = mkRule $
  (:) <$$> p <** s <**> someSepBy1 p s

-- | Like 'multipleSepBy2' but matching the maximum number of
-- occurrences of the first argument as possible (at least 2).
manySepBy2 p s = mkRule $
  (:) <$$> p <** s <**> manySepBy1 p s

-- | Derive either from the given symbol or the empty string.
optional :: (Show t, Ord t) => BNF t a -> BNF t (Maybe a)
optional p = fresh
  <:=>  Just <$$> p
  <||>  satisfy Nothing
  where fresh = mkNt p "?"

-- | Version of 'optional' that prefers to derive from the given symbol,
-- affects only nullable nonterminal symbols
preferably :: (Show t, Ord t) => BNF t a -> BNF t (Maybe a)
preferably p = fresh
  <:=   Just <$$> p
  <||>  satisfy Nothing
  where fresh = mkNt p "?"

-- | Version of 'optional' that prefers to derive the empty string from 
-- the given symbol, affects only nullable nonterminal symbols
reluctantly :: (Show t, Ord t) => BNF t a -> BNF t (Maybe a)
reluctantly p = fresh
  <:=   satisfy Nothing
  <||>  Just <$$> p
  where fresh = mkNt p "?"

optionalWithDef :: (Show t, Ord t) => BNF t a -> a -> BNF t a
optionalWithDef p def = mkNt p "?" <:=> id <$$> p <||> satisfy def

-- | Place a piece of BNF /within/ two other BNF fragments, ignoring their semantics.
within :: (Show t, Ord t) => BNF t a -> BNF t b -> BNF t c -> BNF t b
within l p r = IF.toSymb (l **> p <** r)

-- | Place a piece of BNF between the characters '(' and ')'.
parens p = within (keychar '(') p (keychar ')')
-- | Place a piece of BNF between the characters '{' and '}'.
braces p = within (keychar '{') p (keychar '}')
-- | Place a piece of BNF between the characters '[' and ']'.
brackets p = within (keychar '[') p (keychar ']')
-- | Place a piece of BNF between the characters '<' and '>'.
angles p = within (keychar '<') p (keychar '>')
-- | Place a piece of BNF between two single quotes.
quotes p = within (keychar '\'') p (keychar '\'')
-- | Place a piece of BNF between two double quotes.
dquotes p = within (keychar '"') p (keychar '"')