{-|
Module      : Toml.Lexer.Utils
Description : Wrapper and actions for generated lexer
Copyright   : (c) Eric Mertens, 2023
License     : ISC
Maintainer  : emertens@gmail.com

This module provides a custom engine for the Alex generated
lexer. This lexer drive provides nested states, unicode support,
and file location tracking.

The various states of this module are needed to deal with the varying
lexing rules while lexing values, keys, and string-literals.

-}
module Toml.Lexer.Utils (

    -- * Types
    Action,
    Context(..),

    -- * Input processing
    locatedUncons,

    -- * Actions
    value,
    value_,
    token,
    token_,

    squareO,
    squareC,
    curlyO,
    curlyC,

    equals,
    timeValue,
    eofToken,

    -- * String literals
    strFrag,
    startMlStr,
    startStr,
    endStr,
    unicodeEscape,
    ) where

import Control.Monad.Trans.State.Strict (State, state)
import Data.Char (ord, chr, isAscii)
import Data.Foldable (asum)
import Data.Time.Format (parseTimeM, defaultTimeLocale, ParseTime)
import Numeric (readHex)

import Toml.Located (Located(..))
import Toml.Position (move, Position)
import Toml.Lexer.Token (Token(..))

-- | Type of actions associated with lexer patterns
type Action = Located String -> State [Context] [Located Token]

-- | Representation of the current lexer state.
data Context
  = ListContext Position -- ^ processing an inline list, lex values
  | TableContext Position -- ^ processing an inline table, don't lex values
  | ValueContext -- ^ processing after an equals, lex one value
  | MlStrContext Position [String] -- ^ position of opening delimiter and list of fragments
  | StrContext   Position [String] -- ^ position of opening delimiter and list of fragments
  deriving Int -> Context -> ShowS
[Context] -> ShowS
Context -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Context] -> ShowS
$cshowList :: [Context] -> ShowS
show :: Context -> String
$cshow :: Context -> String
showsPrec :: Int -> Context -> ShowS
$cshowsPrec :: Int -> Context -> ShowS
Show

-- | Add a literal fragment of a string to the current string state.
strFrag :: Action
strFrag :: Action
strFrag Located String
s = forall (m :: * -> *) s a. Monad m => (s -> (a, s)) -> StateT s m a
state \case
  StrContext   Position
p [String]
acc : [Context]
st -> ([], Position -> [String] -> Context
StrContext   Position
p (forall a. Located a -> a
locThing Located String
s forall a. a -> [a] -> [a]
: [String]
acc) forall a. a -> [a] -> [a]
: [Context]
st)
  MlStrContext Position
p [String]
acc : [Context]
st -> ([], Position -> [String] -> Context
MlStrContext Position
p (forall a. Located a -> a
locThing Located String
s forall a. a -> [a] -> [a]
: [String]
acc) forall a. a -> [a] -> [a]
: [Context]
st)
  [Context]
_                       -> forall a. HasCallStack => String -> a
error String
"strFrag: panic"

-- | End the current string state and emit the string literal token.
endStr :: Action
endStr :: Action
endStr Located String
x = forall (m :: * -> *) s a. Monad m => (s -> (a, s)) -> StateT s m a
state \case
    StrContext   Position
p [String]
acc : [Context]
st -> ([forall a. Position -> a -> Located a
Located Position
p (String -> Token
TokString   (forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat (forall a. [a] -> [a]
reverse (forall a. Located a -> a
locThing Located String
x forall a. a -> [a] -> [a]
: [String]
acc))))], [Context]
st)
    MlStrContext Position
p [String]
acc : [Context]
st -> ([forall a. Position -> a -> Located a
Located Position
p (String -> Token
TokMlString (forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat (forall a. [a] -> [a]
reverse (forall a. Located a -> a
locThing Located String
x forall a. a -> [a] -> [a]
: [String]
acc))))], [Context]
st)
    [Context]
_                       -> forall a. HasCallStack => String -> a
error String
"endStr: panic"

-- | Start a basic string literal
startStr :: Action
startStr :: Action
startStr Located String
t = forall (m :: * -> *) s a. Monad m => (s -> (a, s)) -> StateT s m a
state \case
  Context
ValueContext : [Context]
st -> ([], Position -> [String] -> Context
StrContext (forall a. Located a -> Position
locPosition Located String
t) [] forall a. a -> [a] -> [a]
: [Context]
st)
  [Context]
st                -> ([], Position -> [String] -> Context
StrContext (forall a. Located a -> Position
locPosition Located String
t) [] forall a. a -> [a] -> [a]
: [Context]
st)

-- | Start a multi-line basic string literal
startMlStr :: Action
startMlStr :: Action
startMlStr Located String
t = forall (m :: * -> *) s a. Monad m => (s -> (a, s)) -> StateT s m a
state \case
  Context
ValueContext : [Context]
st -> ([], Position -> [String] -> Context
MlStrContext (forall a. Located a -> Position
locPosition Located String
t) [] forall a. a -> [a] -> [a]
: [Context]
st)
  [Context]
st                -> ([], Position -> [String] -> Context
MlStrContext (forall a. Located a -> Position
locPosition Located String
t) [] forall a. a -> [a] -> [a]
: [Context]
st)

-- | Resolve a unicode escape sequence and add it to the current string literal
unicodeEscape :: Action
unicodeEscape :: Action
unicodeEscape (Located Position
p String
lexeme) =
  case forall a. (Eq a, Num a) => ReadS a
readHex (forall a. Int -> [a] -> [a]
drop Int
2 String
lexeme) of
    [(Int
n,String
_)] | Int
0xd800 forall a. Ord a => a -> a -> Bool
<= Int
n, Int
n forall a. Ord a => a -> a -> Bool
< Int
0xe000 -> forall (f :: * -> *) a. Applicative f => a -> f a
pure [forall a. Position -> a -> Located a
Located Position
p (String -> Token
TokError String
"non-scalar unicode escape")]
      | Int
n forall a. Ord a => a -> a -> Bool
>= Int
0x110000                 -> forall (f :: * -> *) a. Applicative f => a -> f a
pure [forall a. Position -> a -> Located a
Located Position
p (String -> Token
TokError String
"unicode escape too large")]
      | Bool
otherwise                     -> Action
strFrag (forall a. Position -> a -> Located a
Located Position
p [Int -> Char
chr Int
n])
    [(Int, String)]
_                                 -> forall a. HasCallStack => String -> a
error String
"unicodeEscape: panic"

-- | Record an @=@ token and update the state
equals :: Action
equals :: Action
equals Located String
t = forall (m :: * -> *) s a. Monad m => (s -> (a, s)) -> StateT s m a
state \case
  [Context]
st -> ([Token
TokEquals forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Located String
t], Context
ValueContext forall a. a -> [a] -> [a]
: [Context]
st)

-- | Record an opening square bracket and update the state
squareO :: Action
squareO :: Action
squareO Located String
t = forall (m :: * -> *) s a. Monad m => (s -> (a, s)) -> StateT s m a
state \case
  Context
ValueContext  : [Context]
st -> ([Token
TokSquareO forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Located String
t], Position -> Context
ListContext (forall a. Located a -> Position
locPosition Located String
t) forall a. a -> [a] -> [a]
: [Context]
st)
  ListContext Position
p : [Context]
st -> ([Token
TokSquareO forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Located String
t], Position -> Context
ListContext (forall a. Located a -> Position
locPosition Located String
t)forall a. a -> [a] -> [a]
: Position -> Context
ListContext Position
p forall a. a -> [a] -> [a]
: [Context]
st)
  [Context]
st                 -> ([Token
TokSquareO forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Located String
t], [Context]
st)

-- | Record a closing square bracket and update the state
squareC :: Action
squareC :: Action
squareC Located String
t = forall (m :: * -> *) s a. Monad m => (s -> (a, s)) -> StateT s m a
state \case
  ListContext Position
_ : [Context]
st -> ([Token
TokSquareC forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Located String
t], [Context]
st)
  [Context]
st                 -> ([Token
TokSquareC forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Located String
t], [Context]
st)

-- | Record an opening curly bracket and update the state
curlyO :: Action
curlyO :: Action
curlyO Located String
t = forall (m :: * -> *) s a. Monad m => (s -> (a, s)) -> StateT s m a
state \case
  Context
ValueContext  : [Context]
st -> ([Token
TokCurlyO forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Located String
t], Position -> Context
TableContext (forall a. Located a -> Position
locPosition Located String
t) forall a. a -> [a] -> [a]
: [Context]
st)
  ListContext Position
p : [Context]
st -> ([Token
TokCurlyO forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Located String
t], Position -> Context
TableContext (forall a. Located a -> Position
locPosition Located String
t) forall a. a -> [a] -> [a]
: Position -> Context
ListContext Position
p forall a. a -> [a] -> [a]
: [Context]
st)
  [Context]
st                 -> ([Token
TokCurlyO forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Located String
t], [Context]
st)

-- | Record a closing curly bracket and update the state
curlyC :: Action
curlyC :: Action
curlyC Located String
t = forall (m :: * -> *) s a. Monad m => (s -> (a, s)) -> StateT s m a
state \case
  TableContext Position
_ : [Context]
st -> ([Token
TokCurlyC forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Located String
t], [Context]
st)
  [Context]
st                  -> ([Token
TokCurlyC forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Located String
t], [Context]
st)

-- | Emit a token ignoring the current lexeme
token_ :: Token -> Action
token_ :: Token -> Action
token_ Token
t Located String
x = forall (f :: * -> *) a. Applicative f => a -> f a
pure [Token
t forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Located String
x]

-- | Emit a token using the current lexeme
token :: (String -> Token) -> Action
token :: (String -> Token) -> Action
token String -> Token
f Located String
x = forall (f :: * -> *) a. Applicative f => a -> f a
pure [String -> Token
f forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Located String
x]

-- | Emit a value token and update the current state
value_ :: Token -> Action
value_ :: Token -> Action
value_ Token
t = (String -> Token) -> Action
value (forall a b. a -> b -> a
const Token
t)

-- | Emit a value token using the current lexeme and update the current state
value :: (String -> Token) -> Action
value :: (String -> Token) -> Action
value String -> Token
f Located String
x = forall (m :: * -> *) s a. Monad m => (s -> (a, s)) -> StateT s m a
state \[Context]
st ->
  case [Context]
st of
    Context
ValueContext : [Context]
st' -> ([String -> Token
f forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Located String
x], [Context]
st')
    [Context]
_                  -> ([String -> Token
f forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Located String
x], [Context]
st )

-- | Attempt to parse the current lexeme as a date-time token.
timeValue ::
  ParseTime a =>
  String       {- ^ description for error messages -} ->
  [String]     {- ^ possible valid patterns        -} ->
  (a -> Token) {- ^ token constructor              -} ->
  Action
timeValue :: forall a.
ParseTime a =>
String -> [String] -> (a -> Token) -> Action
timeValue String
description [String]
patterns a -> Token
constructor = (String -> Token) -> Action
value \String
str ->
  case forall (t :: * -> *) (f :: * -> *) a.
(Foldable t, Alternative f) =>
t (f a) -> f a
asum [forall (m :: * -> *) t.
(MonadFail m, ParseTime t) =>
Bool -> TimeLocale -> String -> String -> m t
parseTimeM Bool
False TimeLocale
defaultTimeLocale String
pat String
str | String
pat <- [String]
patterns] of
    Maybe a
Nothing -> String -> Token
TokError (String
"malformed " forall a. [a] -> [a] -> [a]
++ String
description)
    Just a
t  -> a -> Token
constructor a
t

-- | Pop the first character off a located string if it's not empty.
-- The resulting 'Int' will either be the ASCII value of the character
-- or @1@ for non-ASCII Unicode values. To avoid a clash, @\x1@ is
-- remapped to @0@.
locatedUncons :: Located String -> Maybe (Int, Located String)
locatedUncons :: Located String -> Maybe (Int, Located String)
locatedUncons Located { locPosition :: forall a. Located a -> Position
locPosition = Position
p, locThing :: forall a. Located a -> a
locThing = String
str } =
  case String
str of
    String
"" -> forall a. Maybe a
Nothing
    Char
x:String
xs
      | Char
x forall a. Eq a => a -> a -> Bool
== Char
'\1' -> forall a. a -> Maybe a
Just (Int
0,     Located String
rest)
      | Char -> Bool
isAscii Char
x -> forall a. a -> Maybe a
Just (Char -> Int
ord Char
x, Located String
rest)
      | Bool
otherwise -> forall a. a -> Maybe a
Just (Int
1,     Located String
rest)
      where
        rest :: Located String
rest = Located { locPosition :: Position
locPosition = Char -> Position -> Position
move Char
x Position
p, locThing :: String
locThing = String
xs }

-- | Generate the correct terminating token given the current lexer state.
eofToken :: [Context] -> Located String -> Located Token
eofToken :: [Context] -> Located String -> Located Token
eofToken (MlStrContext Position
p [String]
_ : [Context]
_) Located String
_ = forall a. Position -> a -> Located a
Located Position
p (String -> Token
TokError String
"unterminated multi-line string literal")
eofToken (StrContext   Position
p [String]
_ : [Context]
_) Located String
_ = forall a. Position -> a -> Located a
Located Position
p (String -> Token
TokError String
"unterminated string literal")
eofToken (ListContext  Position
p   : [Context]
_) Located String
_ = forall a. Position -> a -> Located a
Located Position
p (String -> Token
TokError String
"unterminated '['")
eofToken (TableContext Position
p   : [Context]
_) Located String
_ = forall a. Position -> a -> Located a
Located Position
p (String -> Token
TokError String
"unterminated '{'")
eofToken (Context
ValueContext     : [Context]
s) Located String
t = [Context] -> Located String -> Located Token
eofToken [Context]
s Located String
t
eofToken [Context]
_                      Located String
t = Token
TokEOF forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Located String
t