{-# OPTIONS_GHC -Wall #-}
{-# LANGUAGE StandaloneDeriving #-}
module Parser.Tokeniser (
Tokeniser',
Tokens',
add_token,
current_line_and_char,
delete_char,
gather_token,
get_char,
get_line_and_char,
get_token,
take_token,
tokenisation_error,
tokenise,
tokens_ended) where
import Control.Monad.Except (MonadError (..))
import Control.Monad.RWS.Strict (RWST, execRWST)
import Control.Monad.Reader (MonadReader (..))
import Control.Monad.State.Strict (MonadState (..))
import Control.Monad.Writer.Strict (MonadWriter (..))
import Parser.Errors (Error (..))
import Parser.Line_and_char (L (..), Line_and_char, init_line_and_char)
data State char = State {State char -> Line_and_char
state_line_and_char :: Line_and_char, State char -> [char]
state_text :: [char]}
data Tokeniser' char token err t =
Tokeniser {Tokeniser' char token err t
-> RWST
(char -> Line_and_char -> Line_and_char)
[L token]
(State char)
(Either err)
t
run_tokeniser :: RWST (char -> Line_and_char -> Line_and_char) [L token] (State char) (Either err) t}
data Tokens' token = Tokens [L token] Line_and_char
instance Applicative (Tokeniser' char token err) where
Tokeniser RWST
(char -> Line_and_char -> Line_and_char)
[L token]
(State char)
(Either err)
(a -> b)
tokenise_0 <*> :: Tokeniser' char token err (a -> b)
-> Tokeniser' char token err a -> Tokeniser' char token err b
<*> Tokeniser RWST
(char -> Line_and_char -> Line_and_char)
[L token]
(State char)
(Either err)
a
tokenise_1 = RWST
(char -> Line_and_char -> Line_and_char)
[L token]
(State char)
(Either err)
b
-> Tokeniser' char token err b
forall char token err t.
RWST
(char -> Line_and_char -> Line_and_char)
[L token]
(State char)
(Either err)
t
-> Tokeniser' char token err t
Tokeniser (RWST
(char -> Line_and_char -> Line_and_char)
[L token]
(State char)
(Either err)
(a -> b)
tokenise_0 RWST
(char -> Line_and_char -> Line_and_char)
[L token]
(State char)
(Either err)
(a -> b)
-> RWST
(char -> Line_and_char -> Line_and_char)
[L token]
(State char)
(Either err)
a
-> RWST
(char -> Line_and_char -> Line_and_char)
[L token]
(State char)
(Either err)
b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> RWST
(char -> Line_and_char -> Line_and_char)
[L token]
(State char)
(Either err)
a
tokenise_1)
pure :: a -> Tokeniser' char token err a
pure a
x = RWST
(char -> Line_and_char -> Line_and_char)
[L token]
(State char)
(Either err)
a
-> Tokeniser' char token err a
forall char token err t.
RWST
(char -> Line_and_char -> Line_and_char)
[L token]
(State char)
(Either err)
t
-> Tokeniser' char token err t
Tokeniser (a
-> RWST
(char -> Line_and_char -> Line_and_char)
[L token]
(State char)
(Either err)
a
forall (m :: * -> *) a. Monad m => a -> m a
return a
x)
instance Functor (Tokeniser' char token err) where
fmap :: (a -> b)
-> Tokeniser' char token err a -> Tokeniser' char token err b
fmap a -> b
f (Tokeniser RWST
(char -> Line_and_char -> Line_and_char)
[L token]
(State char)
(Either err)
a
tokenise') = RWST
(char -> Line_and_char -> Line_and_char)
[L token]
(State char)
(Either err)
b
-> Tokeniser' char token err b
forall char token err t.
RWST
(char -> Line_and_char -> Line_and_char)
[L token]
(State char)
(Either err)
t
-> Tokeniser' char token err t
Tokeniser (a -> b
f (a -> b)
-> RWST
(char -> Line_and_char -> Line_and_char)
[L token]
(State char)
(Either err)
a
-> RWST
(char -> Line_and_char -> Line_and_char)
[L token]
(State char)
(Either err)
b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> RWST
(char -> Line_and_char -> Line_and_char)
[L token]
(State char)
(Either err)
a
tokenise')
instance Monad (Tokeniser' char token err) where
Tokeniser RWST
(char -> Line_and_char -> Line_and_char)
[L token]
(State char)
(Either err)
a
tokenise' >>= :: Tokeniser' char token err a
-> (a -> Tokeniser' char token err b)
-> Tokeniser' char token err b
>>= a -> Tokeniser' char token err b
f = RWST
(char -> Line_and_char -> Line_and_char)
[L token]
(State char)
(Either err)
b
-> Tokeniser' char token err b
forall char token err t.
RWST
(char -> Line_and_char -> Line_and_char)
[L token]
(State char)
(Either err)
t
-> Tokeniser' char token err t
Tokeniser (RWST
(char -> Line_and_char -> Line_and_char)
[L token]
(State char)
(Either err)
a
tokenise' RWST
(char -> Line_and_char -> Line_and_char)
[L token]
(State char)
(Either err)
a
-> (a
-> RWST
(char -> Line_and_char -> Line_and_char)
[L token]
(State char)
(Either err)
b)
-> RWST
(char -> Line_and_char -> Line_and_char)
[L token]
(State char)
(Either err)
b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Tokeniser' char token err b
-> RWST
(char -> Line_and_char -> Line_and_char)
[L token]
(State char)
(Either err)
b
forall char token err t.
Tokeniser' char token err t
-> RWST
(char -> Line_and_char -> Line_and_char)
[L token]
(State char)
(Either err)
t
run_tokeniser (Tokeniser' char token err b
-> RWST
(char -> Line_and_char -> Line_and_char)
[L token]
(State char)
(Either err)
b)
-> (a -> Tokeniser' char token err b)
-> a
-> RWST
(char -> Line_and_char -> Line_and_char)
[L token]
(State char)
(Either err)
b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> a -> Tokeniser' char token err b
f)
deriving instance Show char => Show (State char)
deriving instance Show token => Show (Tokens' token)
add_token :: Line_and_char -> token -> Tokeniser' char token err ()
add_token :: Line_and_char -> token -> Tokeniser' char token err ()
add_token Line_and_char
line_and_char token
token = [L token] -> Tokeniser' char token err ()
forall token char err. [L token] -> Tokeniser' char token err ()
tell_Tokeniser [Line_and_char -> token -> L token
forall t. Line_and_char -> t -> L t
L Line_and_char
line_and_char token
token]
ask_Tokeniser :: Tokeniser' char token err (char -> Line_and_char -> Line_and_char)
ask_Tokeniser :: Tokeniser' char token err (char -> Line_and_char -> Line_and_char)
ask_Tokeniser = RWST
(char -> Line_and_char -> Line_and_char)
[L token]
(State char)
(Either err)
(char -> Line_and_char -> Line_and_char)
-> Tokeniser'
char token err (char -> Line_and_char -> Line_and_char)
forall char token err t.
RWST
(char -> Line_and_char -> Line_and_char)
[L token]
(State char)
(Either err)
t
-> Tokeniser' char token err t
Tokeniser RWST
(char -> Line_and_char -> Line_and_char)
[L token]
(State char)
(Either err)
(char -> Line_and_char -> Line_and_char)
forall r (m :: * -> *). MonadReader r m => m r
ask
current_line_and_char :: Tokens' token -> Line_and_char
current_line_and_char :: Tokens' token -> Line_and_char
current_line_and_char (Tokens [L token]
tokens Line_and_char
end_line_and_char) =
case [L token]
tokens of
[] -> Line_and_char
end_line_and_char
L Line_and_char
line_and_char token
_ : [L token]
_ -> Line_and_char
line_and_char
delete_char :: Tokeniser' char token err ()
delete_char :: Tokeniser' char token err ()
delete_char =
do
char -> Line_and_char -> Line_and_char
next_line_and_char <- Tokeniser' char token err (char -> Line_and_char -> Line_and_char)
forall char token err.
Tokeniser' char token err (char -> Line_and_char -> Line_and_char)
ask_Tokeniser
State Line_and_char
line_and_char [char]
text <- Tokeniser' char token err (State char)
forall char token err. Tokeniser' char token err (State char)
get_Tokeniser
case [char]
text of
[] -> () -> Tokeniser' char token err ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
char
char : [char]
text' -> State char -> Tokeniser' char token err ()
forall char token err. State char -> Tokeniser' char token err ()
put_Tokeniser (Line_and_char -> [char] -> State char
forall char. Line_and_char -> [char] -> State char
State (char -> Line_and_char -> Line_and_char
next_line_and_char char
char Line_and_char
line_and_char) [char]
text')
gather_token :: (char -> Maybe Char) -> (String -> token) -> Tokeniser' char token err ()
gather_token :: (char -> Maybe Char)
-> (String -> token) -> Tokeniser' char token err ()
gather_token char -> Maybe Char
recognise_char String -> token
string_to_token =
do
Line_and_char
line_and_char <- Tokeniser' char token err Line_and_char
forall char token err. Tokeniser' char token err Line_and_char
get_line_and_char
String
token <- (char -> Maybe Char) -> Tokeniser' char token err String
forall char token err.
(char -> Maybe Char) -> Tokeniser' char token err String
gather_token' char -> Maybe Char
recognise_char
Line_and_char -> token -> Tokeniser' char token err ()
forall token char err.
Line_and_char -> token -> Tokeniser' char token err ()
add_token Line_and_char
line_and_char (String -> token
string_to_token String
token)
gather_token' :: (char -> Maybe Char) -> Tokeniser' char token err String
gather_token' :: (char -> Maybe Char) -> Tokeniser' char token err String
gather_token' char -> Maybe Char
recognise_char =
let
f :: Tokeniser' char token err String
f = (char -> Maybe Char) -> Tokeniser' char token err String
forall char token err.
(char -> Maybe Char) -> Tokeniser' char token err String
gather_token' char -> Maybe Char
recognise_char
in
do
Maybe char
maybe_char <- Int -> Tokeniser' char token err (Maybe char)
forall char token err.
Int -> Tokeniser' char token err (Maybe char)
get_char Int
0
case Maybe char
maybe_char Maybe char -> (char -> Maybe Char) -> Maybe Char
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= char -> Maybe Char
recognise_char of
Maybe Char
Nothing -> String -> Tokeniser' char token err String
forall (m :: * -> *) a. Monad m => a -> m a
return String
""
Just Char
char ->
do
Tokeniser' char token err ()
forall char token err. Tokeniser' char token err ()
delete_char
String
token <- Tokeniser' char token err String
forall token err. Tokeniser' char token err String
f
String -> Tokeniser' char token err String
forall (m :: * -> *) a. Monad m => a -> m a
return (Char
char Char -> ShowS
forall a. a -> [a] -> [a]
: String
token)
get_Tokeniser :: Tokeniser' char token err (State char)
get_Tokeniser :: Tokeniser' char token err (State char)
get_Tokeniser = RWST
(char -> Line_and_char -> Line_and_char)
[L token]
(State char)
(Either err)
(State char)
-> Tokeniser' char token err (State char)
forall char token err t.
RWST
(char -> Line_and_char -> Line_and_char)
[L token]
(State char)
(Either err)
t
-> Tokeniser' char token err t
Tokeniser RWST
(char -> Line_and_char -> Line_and_char)
[L token]
(State char)
(Either err)
(State char)
forall s (m :: * -> *). MonadState s m => m s
get
get_char :: Int -> Tokeniser' char token err (Maybe char)
get_char :: Int -> Tokeniser' char token err (Maybe char)
get_char Int
i =
do
[char]
text <- State char -> [char]
forall char. State char -> [char]
state_text (State char -> [char])
-> Tokeniser' char token err (State char)
-> Tokeniser' char token err [char]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Tokeniser' char token err (State char)
forall char token err. Tokeniser' char token err (State char)
get_Tokeniser
Maybe char -> Tokeniser' char token err (Maybe char)
forall (m :: * -> *) a. Monad m => a -> m a
return
(case Int
0 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
i Bool -> Bool -> Bool
&& Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< [char] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [char]
text of
Bool
False -> Maybe char
forall a. Maybe a
Nothing
Bool
True -> char -> Maybe char
forall a. a -> Maybe a
Just ([char]
text [char] -> Int -> char
forall a. [a] -> Int -> a
!! Int
i))
get_line_and_char :: Tokeniser' char token err Line_and_char
get_line_and_char :: Tokeniser' char token err Line_and_char
get_line_and_char = State char -> Line_and_char
forall char. State char -> Line_and_char
state_line_and_char (State char -> Line_and_char)
-> Tokeniser' char token err (State char)
-> Tokeniser' char token err Line_and_char
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Tokeniser' char token err (State char)
forall char token err. Tokeniser' char token err (State char)
get_Tokeniser
get_token :: Tokens' token -> Maybe token
get_token :: Tokens' token -> Maybe token
get_token (Tokens [L token]
tokens Line_and_char
_) =
case [L token]
tokens of
[] -> Maybe token
forall a. Maybe a
Nothing
L Line_and_char
_ token
token : [L token]
_ -> token -> Maybe token
forall a. a -> Maybe a
Just token
token
put_Tokeniser :: State char -> Tokeniser' char token err ()
put_Tokeniser :: State char -> Tokeniser' char token err ()
put_Tokeniser State char
st = RWST
(char -> Line_and_char -> Line_and_char)
[L token]
(State char)
(Either err)
()
-> Tokeniser' char token err ()
forall char token err t.
RWST
(char -> Line_and_char -> Line_and_char)
[L token]
(State char)
(Either err)
t
-> Tokeniser' char token err t
Tokeniser (State char
-> RWST
(char -> Line_and_char -> Line_and_char)
[L token]
(State char)
(Either err)
()
forall s (m :: * -> *). MonadState s m => s -> m ()
put State char
st)
take_token :: (token -> Maybe t) -> Tokens' token -> Maybe (t, Tokens' token)
take_token :: (token -> Maybe t) -> Tokens' token -> Maybe (t, Tokens' token)
take_token token -> Maybe t
f (Tokens [L token]
tokens Line_and_char
end_line_and_char) =
case [L token]
tokens of
[] -> Maybe (t, Tokens' token)
forall a. Maybe a
Nothing
L Line_and_char
_ token
token : [L token]
tokens' ->
do
t
x <- token -> Maybe t
f token
token
(t, Tokens' token) -> Maybe (t, Tokens' token)
forall (m :: * -> *) a. Monad m => a -> m a
return (t
x, [L token] -> Line_and_char -> Tokens' token
forall token. [L token] -> Line_and_char -> Tokens' token
Tokens [L token]
tokens' Line_and_char
end_line_and_char)
tell_Tokeniser :: [L token] -> Tokeniser' char token err ()
tell_Tokeniser :: [L token] -> Tokeniser' char token err ()
tell_Tokeniser [L token]
tokens = RWST
(char -> Line_and_char -> Line_and_char)
[L token]
(State char)
(Either err)
()
-> Tokeniser' char token err ()
forall char token err t.
RWST
(char -> Line_and_char -> Line_and_char)
[L token]
(State char)
(Either err)
t
-> Tokeniser' char token err t
Tokeniser ([L token]
-> RWST
(char -> Line_and_char -> Line_and_char)
[L token]
(State char)
(Either err)
()
forall w (m :: * -> *). MonadWriter w m => w -> m ()
tell [L token]
tokens)
throwError_Tokeniser :: err -> Tokeniser' char token err t
throwError_Tokeniser :: err -> Tokeniser' char token err t
throwError_Tokeniser err
err = RWST
(char -> Line_and_char -> Line_and_char)
[L token]
(State char)
(Either err)
t
-> Tokeniser' char token err t
forall char token err t.
RWST
(char -> Line_and_char -> Line_and_char)
[L token]
(State char)
(Either err)
t
-> Tokeniser' char token err t
Tokeniser (err
-> RWST
(char -> Line_and_char -> Line_and_char)
[L token]
(State char)
(Either err)
t
forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError err
err)
tokenisation_error :: (Line_and_char -> err) -> Tokeniser' char token err t
tokenisation_error :: (Line_and_char -> err) -> Tokeniser' char token err t
tokenisation_error Line_and_char -> err
err =
do
Line_and_char
line_and_char <- Tokeniser' char token err Line_and_char
forall char token err. Tokeniser' char token err Line_and_char
get_line_and_char
err -> Tokeniser' char token err t
forall err char token t. err -> Tokeniser' char token err t
throwError_Tokeniser (Line_and_char -> err
err Line_and_char
line_and_char)
tokenise ::
(
(Char -> char) ->
(char -> Line_and_char -> Line_and_char) ->
Tokeniser' char token err () ->
String ->
Either Error (Either err (Tokens' token)))
tokenise :: (Char -> char)
-> (char -> Line_and_char -> Line_and_char)
-> Tokeniser' char token err ()
-> String
-> Either Error (Either err (Tokens' token))
tokenise Char -> char
classify_char char -> Line_and_char -> Line_and_char
next_line_and_char (Tokeniser RWST
(char -> Line_and_char -> Line_and_char)
[L token]
(State char)
(Either err)
()
tokenise') String
text =
case RWST
(char -> Line_and_char -> Line_and_char)
[L token]
(State char)
(Either err)
()
-> (char -> Line_and_char -> Line_and_char)
-> State char
-> Either err (State char, [L token])
forall (m :: * -> *) r w s a.
Monad m =>
RWST r w s m a -> r -> s -> m (s, w)
execRWST RWST
(char -> Line_and_char -> Line_and_char)
[L token]
(State char)
(Either err)
()
tokenise' char -> Line_and_char -> Line_and_char
next_line_and_char (Line_and_char -> [char] -> State char
forall char. Line_and_char -> [char] -> State char
State Line_and_char
init_line_and_char (Char -> char
classify_char (Char -> char) -> String -> [char]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> String
text)) of
Left err
err -> Either err (Tokens' token)
-> Either Error (Either err (Tokens' token))
forall a b. b -> Either a b
Right (err -> Either err (Tokens' token)
forall a b. a -> Either a b
Left err
err)
Right (State Line_and_char
line_and_char [char]
text', [L token]
tokens) ->
case [char]
text' of
[] -> Either err (Tokens' token)
-> Either Error (Either err (Tokens' token))
forall a b. b -> Either a b
Right (Tokens' token -> Either err (Tokens' token)
forall a b. b -> Either a b
Right ([L token] -> Line_and_char -> Tokens' token
forall token. [L token] -> Line_and_char -> Tokens' token
Tokens [L token]
tokens Line_and_char
line_and_char))
[char]
_ -> Error -> Either Error (Either err (Tokens' token))
forall a b. a -> Either a b
Left Error
Incomplete_tokenisation
tokens_ended :: Tokens' token -> Bool
tokens_ended :: Tokens' token -> Bool
tokens_ended (Tokens [L token]
tokens Line_and_char
_) = [L token] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [L token]
tokens