{-| This module contains the building blocks used to construct the lexer.
-}
module Agda.Syntax.Parser.LexActions
    ( -- * Main function
      lexToken
      -- * Lex actions
      -- ** General actions
    , token
    , withInterval, withInterval', withInterval_
    , withLayout
    , andThen, skip
    , begin, end, beginWith, endWith
    , begin_, end_
    , lexError
      -- ** Specialized actions
    , keyword, symbol, identifier, literal, literal', integer
      -- * Lex predicates
    , followedBy, eof, inState
    ) where

import Control.Monad.State (modify)

import Data.Bifunctor
import Data.Char
import Data.Foldable (foldl')
import Data.Maybe

import Agda.Syntax.Common (pattern Ranged)
import Agda.Syntax.Parser.Lexer
import Agda.Syntax.Parser.Alex
import Agda.Syntax.Parser.Monad
import Agda.Syntax.Parser.Tokens
import Agda.Syntax.Position
import Agda.Syntax.Literal

import Agda.Utils.List
import Agda.Utils.List1 (String1, toList)
import qualified Agda.Utils.List1 as List1

import Agda.Utils.Impossible

{--------------------------------------------------------------------------
    Scan functions
 --------------------------------------------------------------------------}

-- | Called at the end of a file. Returns 'TokEOF'.
returnEOF :: AlexInput -> Parser Token
returnEOF :: AlexInput -> Parser Token
returnEOF AlexInput{ SrcFile
lexSrcFile :: AlexInput -> SrcFile
lexSrcFile :: SrcFile
lexSrcFile, PositionWithoutFile
lexPos :: AlexInput -> PositionWithoutFile
lexPos :: PositionWithoutFile
lexPos } = do
  -- Andreas, 2018-12-30, issue #3480
  -- The following setLastPos leads to parse error reporting
  -- far away from the interesting position, in particular
  -- if there is a long comment before the EOF.
  -- (Such a long comment is frequent in interactive programming, as
  -- commenting out until the end of the file is a common habit.)
  -- -- setLastPos lexPos
  -- Without it, we get much more useful error locations.
  String -> Parser ()
setPrevToken String
"<EOF>"
  forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Interval -> Token
TokEOF forall a b. (a -> b) -> a -> b
$ forall a.
a -> PositionWithoutFile -> PositionWithoutFile -> Interval' a
posToInterval SrcFile
lexSrcFile PositionWithoutFile
lexPos PositionWithoutFile
lexPos

-- | Set the current input and lex a new token (calls 'lexToken').
skipTo :: AlexInput -> Parser Token
skipTo :: AlexInput -> Parser Token
skipTo AlexInput
inp = do
  AlexInput -> Parser ()
setLexInput AlexInput
inp
  Parser Token
lexToken

{-| Scan the input to find the next token. Calls
'Agda.Syntax.Parser.Lexer.alexScanUser'. This is the main lexing function
where all the work happens. The function 'Agda.Syntax.Parser.Lexer.lexer',
used by the parser is the continuation version of this function.
-}
lexToken :: Parser Token
lexToken :: Parser Token
lexToken =
    do  AlexInput
inp <- Parser AlexInput
getLexInput
        [LexState]
lss <- Parser [LexState]
getLexState
        ParseFlags
flags <- Parser ParseFlags
getParseFlags
        case ([LexState], ParseFlags)
-> AlexInput -> LexState -> AlexReturn (LexAction Token)
alexScanUser ([LexState]
lss, ParseFlags
flags) AlexInput
inp (forall a. a -> [a] -> a
headWithDefault forall a. HasCallStack => a
__IMPOSSIBLE__ [LexState]
lss) of
            AlexReturn (LexAction Token)
AlexEOF                     -> AlexInput -> Parser Token
returnEOF AlexInput
inp
            AlexSkip AlexInput
inp' LexState
len           -> AlexInput -> Parser Token
skipTo AlexInput
inp'
            AlexToken AlexInput
inp' LexState
len LexAction Token
action   -> Token -> Token
postToken forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall r.
LexAction r -> AlexInput -> AlexInput -> LexState -> Parser r
runLexAction LexAction Token
action AlexInput
inp AlexInput
inp' LexState
len
            AlexError AlexInput
i                 -> forall a. String -> Parser a
parseError forall a b. (a -> b) -> a -> b
$ forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat
              [ String
"Lexical error"
              , case forall a. [a] -> Maybe a
listToMaybe forall a b. (a -> b) -> a -> b
$ AlexInput -> String
lexInput AlexInput
i of
                  Just Char
'\t'                -> String
" (you may want to replace tabs with spaces)"
                  Just Char
c | Bool -> Bool
not (Char -> Bool
isPrint Char
c) -> String
" (unprintable character)"
                  Maybe Char
_ -> String
""
              , String
":"
              ]

isSub :: Char -> Bool
isSub :: Char -> Bool
isSub Char
c = Char
'\x2080' forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\x2089'

readSubscript :: [Char] -> Integer
readSubscript :: String -> Integer
readSubscript = forall a. Read a => String -> a
read forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a -> b) -> [a] -> [b]
map (\Char
c -> forall a. Enum a => LexState -> a
toEnum (forall a. Enum a => a -> LexState
fromEnum Char
c forall a. Num a => a -> a -> a
- LexState
0x2080 forall a. Num a => a -> a -> a
+ forall a. Enum a => a -> LexState
fromEnum Char
'0'))

postToken :: Token -> Token
postToken :: Token -> Token
postToken (TokId (Interval
r, String
"\x03bb")) = Symbol -> Interval -> Token
TokSymbol Symbol
SymLambda Interval
r
postToken (TokId (Interval
r, String
"\x2026")) = Symbol -> Interval -> Token
TokSymbol Symbol
SymEllipsis Interval
r
postToken (TokId (Interval
r, String
"\x2192")) = Symbol -> Interval -> Token
TokSymbol Symbol
SymArrow Interval
r
postToken (TokId (Interval
r, String
"\x2983")) = Symbol -> Interval -> Token
TokSymbol Symbol
SymDoubleOpenBrace Interval
r
postToken (TokId (Interval
r, String
"\x2984")) = Symbol -> Interval -> Token
TokSymbol Symbol
SymDoubleCloseBrace Interval
r
postToken (TokId (Interval
r, String
"\x2987")) = Symbol -> Interval -> Token
TokSymbol Symbol
SymOpenIdiomBracket Interval
r
postToken (TokId (Interval
r, String
"\x2988")) = Symbol -> Interval -> Token
TokSymbol Symbol
SymCloseIdiomBracket Interval
r
postToken (TokId (Interval
r, String
"\x2987\x2988")) = Symbol -> Interval -> Token
TokSymbol Symbol
SymEmptyIdiomBracket Interval
r
postToken (TokId (Interval
r, String
"\x2200")) = Keyword -> Interval -> Token
TokKeyword Keyword
KwForall Interval
r
postToken Token
t = Token
t

{--------------------------------------------------------------------------
    Lex actions
 --------------------------------------------------------------------------}

-- | The most general way of parsing a token.
token :: (String -> Parser tok) -> LexAction tok
token :: forall tok. (String -> Parser tok) -> LexAction tok
token String -> Parser tok
action = forall r.
(AlexInput -> AlexInput -> LexState -> Parser r) -> LexAction r
LexAction forall a b. (a -> b) -> a -> b
$ \ AlexInput
inp AlexInput
inp' LexState
len ->
    do  AlexInput -> Parser ()
setLexInput AlexInput
inp'
        let t :: String
t = forall a. LexState -> [a] -> [a]
take LexState
len forall a b. (a -> b) -> a -> b
$ AlexInput -> String
lexInput AlexInput
inp
        String -> Parser ()
setPrevToken String
t
        PositionWithoutFile -> Parser ()
setLastPos forall a b. (a -> b) -> a -> b
$ AlexInput -> PositionWithoutFile
lexPos AlexInput
inp
        String -> Parser tok
action String
t

-- | Parse a token from an 'Interval' and the lexed string.
withInterval :: ((Interval, String) -> tok) -> LexAction tok
withInterval :: forall tok. ((Interval, String) -> tok) -> LexAction tok
withInterval (Interval, String) -> tok
f = forall tok. (String -> Parser tok) -> LexAction tok
token forall a b. (a -> b) -> a -> b
$ \String
s -> do
                   Interval
r <- Parser Interval
getParseInterval
                   forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ (Interval, String) -> tok
f (Interval
r,String
s)

-- | Like 'withInterval', but applies a function to the string.
withInterval' :: (String -> a) -> ((Interval, a) -> tok) -> LexAction tok
withInterval' :: forall a tok.
(String -> a) -> ((Interval, a) -> tok) -> LexAction tok
withInterval' String -> a
f (Interval, a) -> tok
t = forall tok. ((Interval, String) -> tok) -> LexAction tok
withInterval ((Interval, a) -> tok
t forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (p :: * -> * -> *) b c a.
Bifunctor p =>
(b -> c) -> p a b -> p a c
second String -> a
f)

-- | Return a token without looking at the lexed string.
withInterval_ :: (Interval -> r) -> LexAction r
withInterval_ :: forall r. (Interval -> r) -> LexAction r
withInterval_ Interval -> r
f = forall tok. ((Interval, String) -> tok) -> LexAction tok
withInterval (Interval -> r
f forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a, b) -> a
fst)


-- | Executed for layout keywords. Enters the 'Agda.Syntax.Parser.Lexer.layout'
--   state and performs the given action.
withLayout :: Keyword -> LexAction r -> LexAction r
withLayout :: forall r. Keyword -> LexAction r -> LexAction r
withLayout Keyword
kw LexAction r
a = LexState -> Parser ()
pushLexState LexState
layout forall r. Parser () -> LexAction r -> LexAction r
`andThen` Parser ()
setLayoutKw forall r. Parser () -> LexAction r -> LexAction r
`andThen` LexAction r
a
  where
  setLayoutKw :: Parser ()
setLayoutKw = forall s (m :: * -> *). MonadState s m => (s -> s) -> m ()
modify forall a b. (a -> b) -> a -> b
$ \ ParseState
st -> ParseState
st { parseLayKw :: Keyword
parseLayKw = Keyword
kw }

infixr 1 `andThen`

-- | Prepend some parser manipulation to an action.
andThen :: Parser () -> LexAction r -> LexAction r
andThen :: forall r. Parser () -> LexAction r -> LexAction r
andThen Parser ()
cmd LexAction r
a = forall r.
(AlexInput -> AlexInput -> LexState -> Parser r) -> LexAction r
LexAction forall a b. (a -> b) -> a -> b
$ \ AlexInput
inp AlexInput
inp' LexState
n -> do
  Parser ()
cmd
  forall r.
LexAction r -> AlexInput -> AlexInput -> LexState -> Parser r
runLexAction LexAction r
a AlexInput
inp AlexInput
inp' LexState
n

-- | Visit the current lexeme again.
revisit :: LexAction Token
revisit :: LexAction Token
revisit = forall r.
(AlexInput -> AlexInput -> LexState -> Parser r) -> LexAction r
LexAction forall a b. (a -> b) -> a -> b
$ \ AlexInput
_ AlexInput
_ LexState
_ -> Parser Token
lexToken

-- | Throw away the current lexeme.
skip :: LexAction Token
skip :: LexAction Token
skip = forall r.
(AlexInput -> AlexInput -> LexState -> Parser r) -> LexAction r
LexAction forall a b. (a -> b) -> a -> b
$ \ AlexInput
_ AlexInput
inp' LexState
_ -> AlexInput -> Parser Token
skipTo AlexInput
inp'

-- | Enter a new state without consuming any input.
begin :: LexState -> LexAction Token
begin :: LexState -> LexAction Token
begin LexState
code = forall a. LexState -> LexAction a -> LexAction a
beginWith LexState
code LexAction Token
revisit

-- | Exit the current state without consuming any input.
end :: LexAction Token
end :: LexAction Token
end = forall a. LexAction a -> LexAction a
endWith LexAction Token
revisit

-- | Enter a new state throwing away the current lexeme.
begin_ :: LexState -> LexAction Token
begin_ :: LexState -> LexAction Token
begin_ LexState
code = forall a. LexState -> LexAction a -> LexAction a
beginWith LexState
code LexAction Token
skip

-- | Exit the current state throwing away the current lexeme.
end_ :: LexAction Token
end_ :: LexAction Token
end_ = forall a. LexAction a -> LexAction a
endWith LexAction Token
skip

-- | Enter a new state and perform the given action.
beginWith :: LexState -> LexAction a -> LexAction a
beginWith :: forall a. LexState -> LexAction a -> LexAction a
beginWith LexState
code LexAction a
a = LexState -> Parser ()
pushLexState LexState
code forall r. Parser () -> LexAction r -> LexAction r
`andThen` LexAction a
a

-- | Exit the current state and perform the given action.
endWith :: LexAction a -> LexAction a
endWith :: forall a. LexAction a -> LexAction a
endWith LexAction a
a = Parser ()
popLexState forall r. Parser () -> LexAction r -> LexAction r
`andThen` LexAction a
a


-- | Parse a 'Keyword' token, triggers layout for 'layoutKeywords'.
keyword :: Keyword -> LexAction Token
keyword :: Keyword -> LexAction Token
keyword Keyword
k =
    case Keyword
k of

        -- Unconditional layout keyword.
        Keyword
_ | Keyword
k forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Keyword]
layoutKeywords ->
            forall r. Keyword -> LexAction r -> LexAction r
withLayout Keyword
k LexAction Token
cont

        -- Andreas, 2021-05-06, issue #5356:
        -- @constructor@ is not a layout keyword after all, replaced by @data _ where@.
        -- -- @constructor@ is not a layout keyword in @record ... where@ blocks,
        -- -- only in @interleaved mutual@ blocks.
        -- KwConstructor -> do
        --     cxt <- getContext
        --     if inMutualAndNotInWhereBlock cxt
        --       then withLayout k cont
        --       else cont

        Keyword
_ -> LexAction Token
cont
    where
    cont :: LexAction Token
cont = forall r. (Interval -> r) -> LexAction r
withInterval_ (Keyword -> Interval -> Token
TokKeyword Keyword
k)

    -- Andreas, 2021-05-06, issue #5356:
    -- @constructor@ is not a layout keyword after all, replaced by @data _ where@.
    -- -- Most recent block decides ...
    -- inMutualAndNotInWhereBlock = \case
    --   Layout KwMutual _ _ : _ -> True
    --   Layout KwWhere  _ _ : _ -> False
    --   _ : bs                  -> inMutualAndNotInWhereBlock bs
    --   []                      -> True  -- For better errors on stray @constructor@ decls.


-- | Parse a 'Symbol' token.
symbol :: Symbol -> LexAction Token
symbol :: Symbol -> LexAction Token
symbol Symbol
s = forall r. (Interval -> r) -> LexAction r
withInterval_ (Symbol -> Interval -> Token
TokSymbol Symbol
s)


-- | Parse a number.

number :: String -> Integer
number :: String -> Integer
number String
str = case String
str of
    Char
'0' : Char
'x' : String
num -> Integer -> String -> Integer
parseNumber Integer
16 String
num
    Char
'0' : Char
'b' : String
num -> Integer -> String -> Integer
parseNumber Integer
2  String
num
    String
num             -> Integer -> String -> Integer
parseNumber Integer
10 String
num
    where
        parseNumber :: Integer -> String -> Integer
        parseNumber :: Integer -> String -> Integer
parseNumber Integer
radix = forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (Integer -> Integer -> Char -> Integer
addDigit Integer
radix) Integer
0

        -- We rely on Agda.Syntax.Parser.Lexer to enforce that the digits are
        -- in the correct range (so e.g. the digit 'E' cannot appear in a
        -- binary number).
        addDigit :: Integer -> Integer -> Char -> Integer
        addDigit :: Integer -> Integer -> Char -> Integer
addDigit Integer
radix Integer
n Char
'_' = Integer
n
        addDigit Integer
radix Integer
n Char
c   = Integer
n forall a. Num a => a -> a -> a
* Integer
radix forall a. Num a => a -> a -> a
+ forall a b. (Integral a, Num b) => a -> b
fromIntegral (Char -> LexState
digitToInt Char
c)

integer :: String -> Integer
integer :: String -> Integer
integer = \case
  Char
'-' : String
str -> - (String -> Integer
number String
str)
  String
str       -> String -> Integer
number String
str

-- | Parse a literal.
literal' :: (String -> a) -> (a -> Literal) -> LexAction Token
literal' :: forall a. (String -> a) -> (a -> Literal) -> LexAction Token
literal' String -> a
read a -> Literal
lit = forall a tok.
(String -> a) -> ((Interval, a) -> tok) -> LexAction tok
withInterval' String -> a
read forall a b. (a -> b) -> a -> b
$ \ (Interval
r, a
a) ->
  RLiteral -> Token
TokLiteral forall a b. (a -> b) -> a -> b
$ forall a. Range -> a -> Ranged a
Ranged (forall a. HasRange a => a -> Range
getRange Interval
r) forall a b. (a -> b) -> a -> b
$ a -> Literal
lit a
a

literal :: Read a => (a -> Literal) -> LexAction Token
literal :: forall a. Read a => (a -> Literal) -> LexAction Token
literal = forall a. (String -> a) -> (a -> Literal) -> LexAction Token
literal' forall a. Read a => String -> a
read

-- | Parse an identifier. Identifiers can be qualified (see 'Name').
--   Example: @Foo.Bar.f@
identifier :: LexAction Token
identifier :: LexAction Token
identifier = forall a.
(Either (Interval, String1) [(Interval, String1)] -> a)
-> LexAction a
qualified forall a b. (a -> b) -> a -> b
$ forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either ((Interval, String) -> Token
TokId forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (p :: * -> * -> *) b c a.
Bifunctor p =>
(b -> c) -> p a b -> p a c
second forall l. IsList l => l -> [Item l]
toList) ([(Interval, String)] -> Token
TokQId forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a -> b) -> [a] -> [b]
map (forall (p :: * -> * -> *) b c a.
Bifunctor p =>
(b -> c) -> p a b -> p a c
second forall l. IsList l => l -> [Item l]
toList))


-- | Parse a possibly qualified name.
qualified :: (Either (Interval, String1) [(Interval, String1)] -> a) -> LexAction a
qualified :: forall a.
(Either (Interval, String1) [(Interval, String1)] -> a)
-> LexAction a
qualified Either (Interval, String1) [(Interval, String1)] -> a
tok =
    forall tok. (String -> Parser tok) -> LexAction tok
token forall a b. (a -> b) -> a -> b
$ \String
s ->
    do  Interval
i <- Parser Interval
getParseInterval
        case Interval -> [String1] -> [(Interval, String1)]
mkName Interval
i forall a b. (a -> b) -> a -> b
$ forall a. (a -> Bool) -> [a] -> [List1 a]
List1.wordsBy (forall a. Eq a => a -> a -> Bool
== Char
'.') String
s of
            []  -> forall a. String -> Parser a
lexError String
"lex error on .."
            [(Interval, String1)
x] -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Either (Interval, String1) [(Interval, String1)] -> a
tok forall a b. (a -> b) -> a -> b
$ forall a b. a -> Either a b
Left  (Interval, String1)
x
            [(Interval, String1)]
xs  -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Either (Interval, String1) [(Interval, String1)] -> a
tok forall a b. (a -> b) -> a -> b
$ forall a b. b -> Either a b
Right [(Interval, String1)]
xs
    where
        -- Compute the ranges for the substrings (separated by '.') of
        -- a name. Dots are included: the intervals generated for
        -- "A.B.x" correspond to "A.", "B." and "x".
        mkName :: Interval -> [String1] -> [(Interval, String1)]
        mkName :: Interval -> [String1] -> [(Interval, String1)]
mkName Interval
_ []     = []
        mkName Interval
i [String1
x]    = [(Interval
i, String1
x)]
        mkName Interval
i (String1
x:[String1]
xs) = (Interval
i0, String1
x) forall a. a -> [a] -> [a]
: Interval -> [String1] -> [(Interval, String1)]
mkName Interval
i1 [String1]
xs
            where
                p0 :: Position' SrcFile
p0 = forall a. Interval' a -> Position' a
iStart Interval
i
                p1 :: Position' SrcFile
p1 = forall a. Interval' a -> Position' a
iEnd Interval
i
                p' :: Position' SrcFile
p' = forall a. Position' a -> Char -> Position' a
movePos (forall (t :: * -> *) a.
Foldable t =>
Position' a -> t Char -> Position' a
movePosByString Position' SrcFile
p0 String1
x) Char
'.'
                i0 :: Interval
i0 = forall a. Position' a -> Position' a -> Interval' a
Interval Position' SrcFile
p0 Position' SrcFile
p'
                i1 :: Interval
i1 = forall a. Position' a -> Position' a -> Interval' a
Interval Position' SrcFile
p' Position' SrcFile
p1


{--------------------------------------------------------------------------
    Predicates
 --------------------------------------------------------------------------}

-- | True when the given character is the next character of the input string.
followedBy :: Char -> LexPredicate
followedBy :: Char -> LexPredicate
followedBy Char
c' ([LexState], ParseFlags)
_ AlexInput
_ LexState
_ AlexInput
inp =
    case AlexInput -> String
lexInput AlexInput
inp of
        []  -> Bool
False
        Char
c:String
_ -> Char
c forall a. Eq a => a -> a -> Bool
== Char
c'

-- | True if we are at the end of the file.
eof :: LexPredicate
eof :: LexPredicate
eof ([LexState], ParseFlags)
_ AlexInput
_ LexState
_ AlexInput
inp = forall (t :: * -> *) a. Foldable t => t a -> Bool
null forall a b. (a -> b) -> a -> b
$ AlexInput -> String
lexInput AlexInput
inp

-- | True if the given state appears somewhere on the state stack
inState :: LexState -> LexPredicate
inState :: LexState -> LexPredicate
inState LexState
s ([LexState]
ls, ParseFlags
_) AlexInput
_ LexState
_ AlexInput
_ = LexState
s forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [LexState]
ls