{-# LANGUAGE Trustworthy #-}
{-# LANGUAGE DeriveFunctor, StandaloneDeriving, RecordWildCards, NamedFieldPuns, CPP #-}
#include "portable-unlifted.h"
{-# OPTIONS_HADDOCK hide #-}
{-|
Module      : Text.Gigaparsec.Internal
Description : Internals of Gigaparsec
License     : BSD-3-Clause
Maintainer  : Jamie Willis, Gigaparsec Maintainers
Stability   : unstable

This module does __not__ adhere to PVP, and can change at any time as
required by the maintainers of the library. Use this functionality at your
own risk.

@since 0.1.0.0
-}
module Text.Gigaparsec.Internal (module Text.Gigaparsec.Internal) where

import Text.Gigaparsec.Internal.RT (RT)
import Text.Gigaparsec.Internal.Errors (ParseError, ExpectItem, CaretWidth)
import Text.Gigaparsec.Internal.Errors qualified as Errors (
    emptyErr, expectedErr, specialisedErr, mergeErr, unexpectedErr,
    expecteds, isExpectedEmpty, presentationOffset, useHints
  )

import Control.Applicative (Applicative(liftA2), Alternative(empty, (<|>), many, some)) -- liftA2 required until 9.6
import Control.Selective (Selective(select))

import Data.Set (Set)
import Data.Set qualified as Set (empty, union)

CPP_import_PortableUnlifted

{-
Notes:

We are making a stripped back implementation, where there are way fewer generalisations
on the type: for now, no monad transformers, generalised input types, etc etc.
For consistency with other libraries, this is usually called `Parsec`.

Experimentally, it seems like dual-continuation implementations may be
faster than quad-continuation implementations. This will need some more
investigation and benchmarking to be sure about this however. We'll get a
core representation settled before doing any "hard" work (the composite
combinator API, however, can be done whenever).
-}

{-|
This type represents parsers as a first-class value.

Values of this type are constructed using the library's combinators, to build
up a final 'Parsec' value that can be passed to 'Text.Gigaparsec.parse' or one
of the similar functions. This is implemented internally similar to other
libraries like @parsec@ and @gigaparsec@.
-}
type Parsec :: * -> *
newtype Parsec a = Parsec {
    forall a.
Parsec a
-> forall r.
   State
   -> (a -> State -> RT r) -> (ParseError -> State -> RT r) -> RT r
unParsec :: forall r. State
             -> (a -> State -> RT r) -- the good continuation
             -> (ParseError -> State -> RT r)      -- the bad continuation
             -> RT r
  }

deriving stock instance Functor Parsec -- not clear if there is a point to implementing this

instance Applicative Parsec where
  pure :: a -> Parsec a
  pure :: forall a. a -> Parsec a
pure a
x = forall a.
(forall r.
 State
 -> (a -> State -> RT r) -> (ParseError -> State -> RT r) -> RT r)
-> Parsec a
Parsec forall a b. (a -> b) -> a -> b
$ \State
st a -> State -> RT r
ok ParseError -> State -> RT r
_ -> a -> State -> RT r
ok a
x State
st
  -- Continue with x and no input consumed.

  liftA2 :: (a -> b -> c) -> Parsec a -> Parsec b -> Parsec c
  liftA2 :: forall a b c. (a -> b -> c) -> Parsec a -> Parsec b -> Parsec c
liftA2 a -> b -> c
f (Parsec forall r.
State
-> (a -> State -> RT r) -> (ParseError -> State -> RT r) -> RT r
p) (Parsec forall r.
State
-> (b -> State -> RT r) -> (ParseError -> State -> RT r) -> RT r
q) = forall a.
(forall r.
 State
 -> (a -> State -> RT r) -> (ParseError -> State -> RT r) -> RT r)
-> Parsec a
Parsec forall a b. (a -> b) -> a -> b
$ \State
st c -> State -> RT r
ok ParseError -> State -> RT r
err ->
    let ok' :: a -> State -> RT r
ok' a
x State
st' = forall r.
State
-> (b -> State -> RT r) -> (ParseError -> State -> RT r) -> RT r
q State
st' (c -> State -> RT r
ok forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> b -> c
f a
x) ParseError -> State -> RT r
err
    --                    ^^^^^^^^^^
    -- continue with (f x y), where y is the output of q
    in  forall r.
State
-> (a -> State -> RT r) -> (ParseError -> State -> RT r) -> RT r
p State
st a -> State -> RT r
ok' ParseError -> State -> RT r
err

  (*>) :: Parsec a -> Parsec b -> Parsec b
  *> :: forall a b. Parsec a -> Parsec b -> Parsec b
(*>) = forall (f :: * -> *) a b c.
Applicative f =>
(a -> b -> c) -> f a -> f b -> f c
liftA2 (forall a b. a -> b -> a
const forall a. a -> a
id)

  (<*) :: Parsec a -> Parsec b -> Parsec a
  <* :: forall a b. Parsec a -> Parsec b -> Parsec a
(<*) = forall (f :: * -> *) a b c.
Applicative f =>
(a -> b -> c) -> f a -> f b -> f c
liftA2 forall a b. a -> b -> a
const

  {-# INLINE pure #-}
  {-# INLINE liftA2 #-}
  {-# INLINE (<*) #-}
  {-# INLINE (*>) #-}

instance Selective Parsec where
  select :: Parsec (Either a b) -> Parsec (a -> b) -> Parsec b
  select :: forall a b. Parsec (Either a b) -> Parsec (a -> b) -> Parsec b
select Parsec (Either a b)
p Parsec (a -> b)
q = forall a b c.
Parsec (Either a b)
-> Parsec (a -> c) -> Parsec (b -> c) -> Parsec c
_branch Parsec (Either a b)
p Parsec (a -> b)
q (forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a. a -> a
id)

  {-# INLINE select #-}

{-# INLINE _branch #-}
{-|
This is an internal implementation of `branch`, which is more efficient than
the Selective default `branch`. We should be using this internally, and it
can be dropped if https://github.com/snowleopard/selective/issues/74 is implemented.
-}
_branch :: Parsec (Either a b) -> Parsec (a -> c) -> Parsec (b -> c) -> Parsec c
_branch :: forall a b c.
Parsec (Either a b)
-> Parsec (a -> c) -> Parsec (b -> c) -> Parsec c
_branch (Parsec forall r.
State
-> (Either a b -> State -> RT r)
-> (ParseError -> State -> RT r)
-> RT r
p) (Parsec forall r.
State
-> ((a -> c) -> State -> RT r)
-> (ParseError -> State -> RT r)
-> RT r
q1) (Parsec forall r.
State
-> ((b -> c) -> State -> RT r)
-> (ParseError -> State -> RT r)
-> RT r
q2) = forall a.
(forall r.
 State
 -> (a -> State -> RT r) -> (ParseError -> State -> RT r) -> RT r)
-> Parsec a
Parsec forall a b. (a -> b) -> a -> b
$ \State
st c -> State -> RT r
ok ParseError -> State -> RT r
err ->
  let ok' :: Either a b -> State -> RT r
ok' Either a b
x State
st' = case Either a b
x of
        Left a
a  -> forall r.
State
-> ((a -> c) -> State -> RT r)
-> (ParseError -> State -> RT r)
-> RT r
q1 State
st' (c -> State -> RT r
ok forall b c a. (b -> c) -> (a -> b) -> a -> c
. (forall a b. (a -> b) -> a -> b
$ a
a)) ParseError -> State -> RT r
err
        --                ^^^^^^^^^^^^
        Right b
b -> forall r.
State
-> ((b -> c) -> State -> RT r)
-> (ParseError -> State -> RT r)
-> RT r
q2 State
st' (c -> State -> RT r
ok forall b c a. (b -> c) -> (a -> b) -> a -> c
. (forall a b. (a -> b) -> a -> b
$ b
b)) ParseError -> State -> RT r
err
        --                ^^^^^^^^^^^^
        -- feed a/b to the function of the good continuation
  in  forall r.
State
-> (Either a b -> State -> RT r)
-> (ParseError -> State -> RT r)
-> RT r
p State
st Either a b -> State -> RT r
ok' ParseError -> State -> RT r
err

instance Monad Parsec where
  return :: a -> Parsec a
  return :: forall a. a -> Parsec a
return = forall (f :: * -> *) a. Applicative f => a -> f a
pure

  (>>=) :: Parsec a -> (a -> Parsec b) -> Parsec b
  Parsec forall r.
State
-> (a -> State -> RT r) -> (ParseError -> State -> RT r) -> RT r
p >>= :: forall a b. Parsec a -> (a -> Parsec b) -> Parsec b
>>= a -> Parsec b
f = forall a.
(forall r.
 State
 -> (a -> State -> RT r) -> (ParseError -> State -> RT r) -> RT r)
-> Parsec a
Parsec forall a b. (a -> b) -> a -> b
$ \State
st b -> State -> RT r
ok ParseError -> State -> RT r
err ->
    let ok' :: a -> State -> RT r
ok' a
x State
st' = forall a.
Parsec a
-> forall r.
   State
   -> (a -> State -> RT r) -> (ParseError -> State -> RT r) -> RT r
unParsec (a -> Parsec b
f a
x) State
st' b -> State -> RT r
ok ParseError -> State -> RT r
err
    --              ^^^^^^^^^^^^^^
    -- get the parser obtained from feeding the output of p to f
    in  forall r.
State
-> (a -> State -> RT r) -> (ParseError -> State -> RT r) -> RT r
p State
st a -> State -> RT r
ok' ParseError -> State -> RT r
err

  (>>) :: Parsec a -> Parsec b -> Parsec b
  >> :: forall a b. Parsec a -> Parsec b -> Parsec b
(>>) = forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
(*>)

  {-# INLINE return #-}
  {-# INLINE (>>=) #-}

raise :: (State -> ParseError) -> Parsec a
raise :: forall a. (State -> ParseError) -> Parsec a
raise State -> ParseError
mkErr = forall a.
(forall r.
 State
 -> (a -> State -> RT r) -> (ParseError -> State -> RT r) -> RT r)
-> Parsec a
Parsec forall a b. (a -> b) -> a -> b
$ \State
st a -> State -> RT r
_ ParseError -> State -> RT r
bad -> forall r.
(ParseError -> State -> RT r) -> ParseError -> State -> RT r
useHints ParseError -> State -> RT r
bad (State -> ParseError
mkErr State
st) State
st

instance Alternative Parsec where
  empty :: Parsec a
  empty :: forall a. Parsec a
empty = forall a. (State -> ParseError) -> Parsec a
raise (State -> Word -> ParseError
`emptyErr` Word
0)

  -- FIXME: I feel like there is something missing here with hint merging from ctx.mergeHints
  -- if the hint stack is not real then it lives in the continuation trace, but I don't know... which
  (<|>) :: Parsec a -> Parsec a -> Parsec a
  Parsec forall r.
State
-> (a -> State -> RT r) -> (ParseError -> State -> RT r) -> RT r
p <|> :: forall a. Parsec a -> Parsec a -> Parsec a
<|> Parsec forall r.
State
-> (a -> State -> RT r) -> (ParseError -> State -> RT r) -> RT r
q = forall a.
(forall r.
 State
 -> (a -> State -> RT r) -> (ParseError -> State -> RT r) -> RT r)
-> Parsec a
Parsec forall a b. (a -> b) -> a -> b
$ \State
st a -> State -> RT r
ok ParseError -> State -> RT r
bad ->
    let bad' :: ParseError -> State -> RT r
bad' ParseError
err State
st'
          | State -> Word
consumed State
st' forall a. Ord a => a -> a -> Bool
> State -> Word
consumed State
st = ParseError -> State -> RT r
bad ParseError
err State
st'
          --  ^ fail if p failed *and* consumed
          | Bool
otherwise    = forall r.
State
-> (a -> State -> RT r) -> (ParseError -> State -> RT r) -> RT r
q State
st' (\a
x State
st'' -> a -> State -> RT r
ok a
x (State -> ParseError -> State
errorToHints State
st'' ParseError
err))
                                 (\ParseError
err' -> ParseError -> State -> RT r
bad (ParseError -> ParseError -> ParseError
Errors.mergeErr ParseError
err ParseError
err'))
    in  forall r.
State
-> (a -> State -> RT r) -> (ParseError -> State -> RT r) -> RT r
p State
st a -> State -> RT r
ok ParseError -> State -> RT r
bad'

  many :: Parsec a -> Parsec [a]
  many :: forall a. Parsec a -> Parsec [a]
many = forall a b. (a -> b -> b) -> b -> Parsec a -> Parsec b
manyr (:) []

  some :: Parsec a -> Parsec [a]
  some :: forall a. Parsec a -> Parsec [a]
some = forall a b. (a -> b -> b) -> b -> Parsec a -> Parsec b
somer (:) []

  {-# INLINE empty #-}
  {-# INLINE (<|>) #-}
  {-# INLINE many #-}
  {-# INLINE some #-}

{-# INLINE manyr #-}
manyr :: (a -> b -> b) -> b -> Parsec a -> Parsec b
manyr :: forall a b. (a -> b -> b) -> b -> Parsec a -> Parsec b
manyr a -> b -> b
f b
k Parsec a
p = let go :: Parsec b
go = forall (f :: * -> *) a b c.
Applicative f =>
(a -> b -> c) -> f a -> f b -> f c
liftA2 a -> b -> b
f Parsec a
p Parsec b
go forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall (f :: * -> *) a. Applicative f => a -> f a
pure b
k in Parsec b
go

{-# INLINE somer #-}
somer :: (a -> b -> b) -> b -> Parsec a -> Parsec b
somer :: forall a b. (a -> b -> b) -> b -> Parsec a -> Parsec b
somer a -> b -> b
f b
k Parsec a
p = forall (f :: * -> *) a b c.
Applicative f =>
(a -> b -> c) -> f a -> f b -> f c
liftA2 a -> b -> b
f Parsec a
p (forall a b. (a -> b -> b) -> b -> Parsec a -> Parsec b
manyr a -> b -> b
f b
k Parsec a
p)

instance Semigroup m => Semigroup (Parsec m) where
  (<>) :: Parsec m -> Parsec m -> Parsec m
  <> :: Parsec m -> Parsec m -> Parsec m
(<>) = forall (f :: * -> *) a b c.
Applicative f =>
(a -> b -> c) -> f a -> f b -> f c
liftA2 forall a. Semigroup a => a -> a -> a
(<>)

  {-# INLINE (<>) #-}

instance Monoid m => Monoid (Parsec m) where
  mempty :: Parsec m
  mempty :: Parsec m
mempty = forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a. Monoid a => a
mempty

  {-# INLINE mempty #-}

type State :: UnliftedDatatype
data State = State {
    -- | the input string, in future this may be generalised
    State -> String
input :: !String,
    -- | has the parser consumed input since the last relevant handler?
    State -> Word
consumed :: {-# UNPACK #-} !Word,
    -- | the current line number (incremented by \n)
    State -> Word
line :: {-# UNPACK #-} !Word,
    -- | the current column number (have to settle on a tab handling scheme)
    State -> Word
col  :: {-# UNPACK #-} !Word,
    -- | the valid for which hints can be used
    State -> Word
hintsValidOffset :: {-# UNPACK #-} !Word,
    -- | the hints at this point in time
    State -> Set ExpectItem
hints :: !(Set ExpectItem),
    -- | Debug nesting
    State -> Int
debugLevel :: {-# UNPACK #-} !Int
  }

emptyState :: String -> State
emptyState :: String -> State
emptyState !String
str = State { input :: String
input = String
str
                        , consumed :: Word
consumed = Word
0
                        , line :: Word
line = Word
1
                        , col :: Word
col = Word
1
                        , hintsValidOffset :: Word
hintsValidOffset = Word
0
                        , hints :: Set ExpectItem
hints = forall a. Set a
Set.empty
                        , debugLevel :: Int
debugLevel = Int
0
                        }

emptyErr :: State -> Word -> ParseError
emptyErr :: State -> Word -> ParseError
emptyErr State{Int
String
Word
Set ExpectItem
debugLevel :: Int
hints :: Set ExpectItem
hintsValidOffset :: Word
col :: Word
line :: Word
consumed :: Word
input :: String
debugLevel :: State -> Int
hints :: State -> Set ExpectItem
hintsValidOffset :: State -> Word
col :: State -> Word
line :: State -> Word
input :: State -> String
consumed :: State -> Word
..} = Word -> Word -> Word -> Word -> ParseError
Errors.emptyErr Word
consumed Word
line Word
col

expectedErr :: State -> Set ExpectItem -> Word -> ParseError
expectedErr :: State -> Set ExpectItem -> Word -> ParseError
expectedErr State{Int
String
Word
Set ExpectItem
debugLevel :: Int
hints :: Set ExpectItem
hintsValidOffset :: Word
col :: Word
line :: Word
consumed :: Word
input :: String
debugLevel :: State -> Int
hints :: State -> Set ExpectItem
hintsValidOffset :: State -> Word
col :: State -> Word
line :: State -> Word
input :: State -> String
consumed :: State -> Word
..} = String
-> Word -> Word -> Word -> Set ExpectItem -> Word -> ParseError
Errors.expectedErr String
input Word
consumed Word
line Word
col

specialisedErr :: State -> [String] -> CaretWidth -> ParseError
specialisedErr :: State -> [String] -> CaretWidth -> ParseError
specialisedErr State{Int
String
Word
Set ExpectItem
debugLevel :: Int
hints :: Set ExpectItem
hintsValidOffset :: Word
col :: Word
line :: Word
consumed :: Word
input :: String
debugLevel :: State -> Int
hints :: State -> Set ExpectItem
hintsValidOffset :: State -> Word
col :: State -> Word
line :: State -> Word
input :: State -> String
consumed :: State -> Word
..} = Word -> Word -> Word -> [String] -> CaretWidth -> ParseError
Errors.specialisedErr Word
consumed Word
line Word
col

unexpectedErr :: State -> Set ExpectItem -> String -> CaretWidth -> ParseError
unexpectedErr :: State -> Set ExpectItem -> String -> CaretWidth -> ParseError
unexpectedErr State{Int
String
Word
Set ExpectItem
debugLevel :: Int
hints :: Set ExpectItem
hintsValidOffset :: Word
col :: Word
line :: Word
consumed :: Word
input :: String
debugLevel :: State -> Int
hints :: State -> Set ExpectItem
hintsValidOffset :: State -> Word
col :: State -> Word
line :: State -> Word
input :: State -> String
consumed :: State -> Word
..} = Word
-> Word
-> Word
-> Set ExpectItem
-> String
-> CaretWidth
-> ParseError
Errors.unexpectedErr Word
consumed Word
line Word
col

errorToHints :: State -> ParseError -> State
errorToHints :: State -> ParseError -> State
errorToHints st :: State
st@State{Int
String
Word
Set ExpectItem
debugLevel :: Int
hints :: Set ExpectItem
hintsValidOffset :: Word
col :: Word
line :: Word
consumed :: Word
input :: String
debugLevel :: State -> Int
hints :: State -> Set ExpectItem
hintsValidOffset :: State -> Word
col :: State -> Word
line :: State -> Word
input :: State -> String
consumed :: State -> Word
..} ParseError
err
  | Word
consumed forall a. Eq a => a -> a -> Bool
== ParseError -> Word
Errors.presentationOffset ParseError
err
  , Bool -> Bool
not (ParseError -> Bool
Errors.isExpectedEmpty ParseError
err) =
    if Word
hintsValidOffset forall a. Ord a => a -> a -> Bool
< Word
consumed then State
st { hints :: Set ExpectItem
hints = ParseError -> Set ExpectItem
Errors.expecteds ParseError
err, hintsValidOffset :: Word
hintsValidOffset = Word
consumed }
    else                                State
st { hints :: Set ExpectItem
hints = forall a. Ord a => Set a -> Set a -> Set a
Set.union Set ExpectItem
hints (ParseError -> Set ExpectItem
Errors.expecteds ParseError
err) }
errorToHints State
st ParseError
_ = State
st

useHints :: (ParseError -> State -> RT r) -> (ParseError -> State -> RT r)
useHints :: forall r.
(ParseError -> State -> RT r) -> ParseError -> State -> RT r
useHints ParseError -> State -> RT r
bad ParseError
err st :: State
st@State{Word
hintsValidOffset :: Word
hintsValidOffset :: State -> Word
hintsValidOffset, Set ExpectItem
hints :: Set ExpectItem
hints :: State -> Set ExpectItem
hints}
  | Word
presentationOffset forall a. Eq a => a -> a -> Bool
== Word
hintsValidOffset = ParseError -> State -> RT r
bad (Set ExpectItem -> ParseError -> ParseError
Errors.useHints Set ExpectItem
hints ParseError
err) State
st
  | Bool
otherwise                              = ParseError -> State -> RT r
bad ParseError
err State
st{ hintsValidOffset :: Word
hintsValidOffset = Word
presentationOffset, hints :: Set ExpectItem
hints = forall a. Set a
Set.empty }
  where !presentationOffset :: Word
presentationOffset = ParseError -> Word
Errors.presentationOffset ParseError
err

adjustErr :: (ParseError -> ParseError) -> Parsec a -> Parsec a
adjustErr :: forall a. (ParseError -> ParseError) -> Parsec a -> Parsec a
adjustErr ParseError -> ParseError
f (Parsec forall r.
State
-> (a -> State -> RT r) -> (ParseError -> State -> RT r) -> RT r
p) = forall a.
(forall r.
 State
 -> (a -> State -> RT r) -> (ParseError -> State -> RT r) -> RT r)
-> Parsec a
Parsec forall a b. (a -> b) -> a -> b
$ \State
st a -> State -> RT r
good ParseError -> State -> RT r
bad -> forall r.
State
-> (a -> State -> RT r) -> (ParseError -> State -> RT r) -> RT r
p State
st a -> State -> RT r
good forall a b. (a -> b) -> a -> b
$ \ParseError
err -> ParseError -> State -> RT r
bad (ParseError -> ParseError
f ParseError
err)