{-| This module contains the building blocks used to construct the lexer.
-}
module Agda.Syntax.Parser.LexActions
    ( -- * Main function
      lexToken
      -- * Lex actions
      -- ** General actions
    , token
    , withInterval, withInterval', withInterval_
    , withLayout
    , begin, end, beginWith, endWith
    , begin_, end_
    , lexError
      -- ** Specialized actions
    , keyword, symbol, identifier, literal, literal', integer
      -- * Lex predicates
    , followedBy, eof, inState
    ) where

import Data.Char
import Data.Maybe

import Agda.Syntax.Parser.Lexer
import Agda.Syntax.Parser.Alex
import Agda.Syntax.Parser.Monad
import Agda.Syntax.Parser.Tokens
import Agda.Syntax.Position
import Agda.Syntax.Literal

import Agda.Utils.List
import Agda.Utils.Tuple

import Agda.Utils.Impossible

{--------------------------------------------------------------------------
    Scan functions
 --------------------------------------------------------------------------}

-- | Called at the end of a file. Returns 'TokEOF'.
returnEOF :: AlexInput -> Parser Token
returnEOF :: AlexInput -> Parser Token
returnEOF AlexInput{ SrcFile
lexSrcFile :: AlexInput -> SrcFile
lexSrcFile :: SrcFile
lexSrcFile, PositionWithoutFile
lexPos :: AlexInput -> PositionWithoutFile
lexPos :: PositionWithoutFile
lexPos } = do
  -- Andreas, 2018-12-30, issue #3480
  -- The following setLastPos leads to parse error reporting
  -- far away from the interesting position, in particular
  -- if there is a long comment before the EOF.
  -- (Such a long comment is frequent in interactive programming, as
  -- commenting out until the end of the file is a common habit.)
  -- -- setLastPos lexPos
  -- Without it, we get much more useful error locations.
  String -> Parser ()
setPrevToken String
"<EOF>"
  Token -> Parser Token
forall (m :: * -> *) a. Monad m => a -> m a
return (Token -> Parser Token) -> Token -> Parser Token
forall a b. (a -> b) -> a -> b
$ Interval -> Token
TokEOF (Interval -> Token) -> Interval -> Token
forall a b. (a -> b) -> a -> b
$ SrcFile -> PositionWithoutFile -> PositionWithoutFile -> Interval
forall a.
a -> PositionWithoutFile -> PositionWithoutFile -> Interval' a
posToInterval SrcFile
lexSrcFile PositionWithoutFile
lexPos PositionWithoutFile
lexPos

-- | Set the current input and lex a new token (calls 'lexToken').
skipTo :: AlexInput -> Parser Token
skipTo :: AlexInput -> Parser Token
skipTo AlexInput
inp = do
  AlexInput -> Parser ()
setLexInput AlexInput
inp
  Parser Token
lexToken

{-| Scan the input to find the next token. Calls
'Agda.Syntax.Parser.Lexer.alexScanUser'. This is the main lexing function
where all the work happens. The function 'Agda.Syntax.Parser.Lexer.lexer',
used by the parser is the continuation version of this function.
-}
lexToken :: Parser Token
lexToken :: Parser Token
lexToken =
    do  AlexInput
inp <- Parser AlexInput
getLexInput
        [LexState]
lss <- Parser [LexState]
getLexState
        ParseFlags
flags <- Parser ParseFlags
getParseFlags
        case ([LexState], ParseFlags)
-> AlexInput -> LexState -> AlexReturn (LexAction Token)
alexScanUser ([LexState]
lss, ParseFlags
flags) AlexInput
inp (LexState -> [LexState] -> LexState
forall a. a -> [a] -> a
headWithDefault LexState
forall a. HasCallStack => a
__IMPOSSIBLE__ [LexState]
lss) of
            AlexReturn (LexAction Token)
AlexEOF                     -> AlexInput -> Parser Token
returnEOF AlexInput
inp
            AlexSkip AlexInput
inp' LexState
len           -> AlexInput -> Parser Token
skipTo AlexInput
inp'
            AlexToken AlexInput
inp' LexState
len LexAction Token
action   -> (Token -> Token) -> Parser Token -> Parser Token
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Token -> Token
postToken (Parser Token -> Parser Token) -> Parser Token -> Parser Token
forall a b. (a -> b) -> a -> b
$ LexAction Token
action AlexInput
inp AlexInput
inp' LexState
len
            AlexError AlexInput
i                 -> String -> Parser Token
forall a. String -> Parser a
parseError (String -> Parser Token) -> String -> Parser Token
forall a b. (a -> b) -> a -> b
$ [String] -> String
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat
              [ String
"Lexical error"
              , case String -> Maybe Char
forall a. [a] -> Maybe a
listToMaybe (String -> Maybe Char) -> String -> Maybe Char
forall a b. (a -> b) -> a -> b
$ AlexInput -> String
lexInput AlexInput
i of
                  Just Char
'\t'                -> String
" (you may want to replace tabs with spaces)"
                  Just Char
c | Bool -> Bool
not (Char -> Bool
isPrint Char
c) -> String
" (unprintable character)"
                  Maybe Char
_ -> String
""
              , String
":"
              ]

isSub :: Char -> Bool
isSub :: Char -> Bool
isSub Char
c = Char
'\x2080' Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x2089'

readSubscript :: [Char] -> Integer
readSubscript :: String -> Integer
readSubscript = String -> Integer
forall a. Read a => String -> a
read (String -> Integer) -> (String -> String) -> String -> Integer
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Char) -> String -> String
forall a b. (a -> b) -> [a] -> [b]
map (\Char
c -> LexState -> Char
forall a. Enum a => LexState -> a
toEnum (Char -> LexState
forall a. Enum a => a -> LexState
fromEnum Char
c LexState -> LexState -> LexState
forall a. Num a => a -> a -> a
- LexState
0x2080 LexState -> LexState -> LexState
forall a. Num a => a -> a -> a
+ Char -> LexState
forall a. Enum a => a -> LexState
fromEnum Char
'0'))

postToken :: Token -> Token
postToken :: Token -> Token
postToken (TokId (Interval
r, String
"\x03bb")) = Symbol -> Interval -> Token
TokSymbol Symbol
SymLambda Interval
r
postToken (TokId (Interval
r, String
"\x2026")) = Symbol -> Interval -> Token
TokSymbol Symbol
SymEllipsis Interval
r
postToken (TokId (Interval
r, String
"\x2192")) = Symbol -> Interval -> Token
TokSymbol Symbol
SymArrow Interval
r
postToken (TokId (Interval
r, String
"\x2983")) = Symbol -> Interval -> Token
TokSymbol Symbol
SymDoubleOpenBrace Interval
r
postToken (TokId (Interval
r, String
"\x2984")) = Symbol -> Interval -> Token
TokSymbol Symbol
SymDoubleCloseBrace Interval
r
postToken (TokId (Interval
r, String
"\x2987")) = Symbol -> Interval -> Token
TokSymbol Symbol
SymOpenIdiomBracket Interval
r
postToken (TokId (Interval
r, String
"\x2988")) = Symbol -> Interval -> Token
TokSymbol Symbol
SymCloseIdiomBracket Interval
r
postToken (TokId (Interval
r, String
"\x2987\x2988")) = Symbol -> Interval -> Token
TokSymbol Symbol
SymEmptyIdiomBracket Interval
r
postToken (TokId (Interval
r, String
"\x2200")) = Keyword -> Interval -> Token
TokKeyword Keyword
KwForall Interval
r
postToken (TokId (Interval
r, String
s))
  | String
set String -> String -> Bool
forall a. Eq a => a -> a -> Bool
== String
"Set" Bool -> Bool -> Bool
&& (Char -> Bool) -> String -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Char -> Bool
isSub String
n = (Interval, Integer) -> Token
TokSetN (Interval
r, String -> Integer
readSubscript String
n)
  where
    (String
set, String
n)      = LexState -> String -> (String, String)
forall a. LexState -> [a] -> ([a], [a])
splitAt LexState
3 String
s
postToken (TokId (Interval
r, String
s))
  | String
prop String -> String -> Bool
forall a. Eq a => a -> a -> Bool
== String
"Prop" Bool -> Bool -> Bool
&& (Char -> Bool) -> String -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Char -> Bool
isSub String
n = (Interval, Integer) -> Token
TokPropN (Interval
r, String -> Integer
readSubscript String
n)
  where
    (String
prop, String
n)     = LexState -> String -> (String, String)
forall a. LexState -> [a] -> ([a], [a])
splitAt LexState
4 String
s
postToken Token
t = Token
t

{--------------------------------------------------------------------------
    Lex actions
 --------------------------------------------------------------------------}

-- | The most general way of parsing a token.
token :: (String -> Parser tok) -> LexAction tok
token :: (String -> Parser tok) -> LexAction tok
token String -> Parser tok
action AlexInput
inp AlexInput
inp' LexState
len =
    do  AlexInput -> Parser ()
setLexInput AlexInput
inp'
        String -> Parser ()
setPrevToken String
t
        PositionWithoutFile -> Parser ()
setLastPos (PositionWithoutFile -> Parser ())
-> PositionWithoutFile -> Parser ()
forall a b. (a -> b) -> a -> b
$ AlexInput -> PositionWithoutFile
lexPos AlexInput
inp
        String -> Parser tok
action String
t
    where
        t :: String
t = LexState -> String -> String
forall a. LexState -> [a] -> [a]
take LexState
len (String -> String) -> String -> String
forall a b. (a -> b) -> a -> b
$ AlexInput -> String
lexInput AlexInput
inp

-- | Parse a token from an 'Interval' and the lexed string.
withInterval :: ((Interval, String) -> tok) -> LexAction tok
withInterval :: ((Interval, String) -> tok) -> LexAction tok
withInterval (Interval, String) -> tok
f = (String -> Parser tok) -> LexAction tok
forall tok. (String -> Parser tok) -> LexAction tok
token ((String -> Parser tok) -> LexAction tok)
-> (String -> Parser tok) -> LexAction tok
forall a b. (a -> b) -> a -> b
$ \String
s -> do
                   Interval
r <- Parser Interval
getParseInterval
                   tok -> Parser tok
forall (m :: * -> *) a. Monad m => a -> m a
return (tok -> Parser tok) -> tok -> Parser tok
forall a b. (a -> b) -> a -> b
$ (Interval, String) -> tok
f (Interval
r,String
s)

-- | Like 'withInterval', but applies a function to the string.
withInterval' :: (String -> a) -> ((Interval, a) -> tok) -> LexAction tok
withInterval' :: (String -> a) -> ((Interval, a) -> tok) -> LexAction tok
withInterval' String -> a
f (Interval, a) -> tok
t = ((Interval, String) -> tok) -> LexAction tok
forall tok. ((Interval, String) -> tok) -> LexAction tok
withInterval ((Interval, a) -> tok
t ((Interval, a) -> tok)
-> ((Interval, String) -> (Interval, a))
-> (Interval, String)
-> tok
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Interval -> Interval
forall a. a -> a
id (Interval -> Interval)
-> (String -> a) -> (Interval, String) -> (Interval, a)
forall a c b d. (a -> c) -> (b -> d) -> (a, b) -> (c, d)
-*- String -> a
f))

-- | Return a token without looking at the lexed string.
withInterval_ :: (Interval -> r) -> LexAction r
withInterval_ :: (Interval -> r) -> LexAction r
withInterval_ Interval -> r
f = ((Interval, String) -> r) -> LexAction r
forall tok. ((Interval, String) -> tok) -> LexAction tok
withInterval (Interval -> r
f (Interval -> r)
-> ((Interval, String) -> Interval) -> (Interval, String) -> r
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Interval, String) -> Interval
forall a b. (a, b) -> a
fst)


-- | Executed for layout keywords. Enters the 'Agda.Syntax.Parser.Lexer.layout'
--   state and performs the given action.
withLayout :: LexAction r -> LexAction r
withLayout :: LexAction r -> LexAction r
withLayout LexAction r
a AlexInput
i1 AlexInput
i2 LexState
n =
    do  LexState -> Parser ()
pushLexState LexState
layout
        LexAction r
a AlexInput
i1 AlexInput
i2 LexState
n


-- | Enter a new state without consuming any input.
begin :: LexState -> LexAction Token
begin :: LexState -> LexAction Token
begin LexState
code AlexInput
_ AlexInput
_ LexState
_ =
    do  LexState -> Parser ()
pushLexState LexState
code
        Parser Token
lexToken


-- | Enter a new state throwing away the current lexeme.
begin_ :: LexState -> LexAction Token
begin_ :: LexState -> LexAction Token
begin_ LexState
code AlexInput
_ AlexInput
inp' LexState
_ =
    do  LexState -> Parser ()
pushLexState LexState
code
        AlexInput -> Parser Token
skipTo AlexInput
inp'

-- | Exit the current state throwing away the current lexeme.
end_ :: LexAction Token
end_ :: LexAction Token
end_ AlexInput
_ AlexInput
inp' LexState
_ =
    do  Parser ()
popLexState
        AlexInput -> Parser Token
skipTo AlexInput
inp'


-- | Enter a new state and perform the given action.
beginWith :: LexState -> LexAction a -> LexAction a
beginWith :: LexState -> LexAction a -> LexAction a
beginWith LexState
code LexAction a
a AlexInput
inp AlexInput
inp' LexState
n =
    do  LexState -> Parser ()
pushLexState LexState
code
        LexAction a
a AlexInput
inp AlexInput
inp' LexState
n

-- | Exit the current state and perform the given action.
endWith :: LexAction a -> LexAction a
endWith :: LexAction a -> LexAction a
endWith LexAction a
a AlexInput
inp AlexInput
inp' LexState
n =
    do  Parser ()
popLexState
        LexAction a
a AlexInput
inp AlexInput
inp' LexState
n


-- | Exit the current state without consuming any input
end :: LexAction Token
end :: LexAction Token
end AlexInput
_ AlexInput
_ LexState
_ =
    do  Parser ()
popLexState
        Parser Token
lexToken

-- | Parse a 'Keyword' token, triggers layout for 'layoutKeywords'.
keyword :: Keyword -> LexAction Token
keyword :: Keyword -> LexAction Token
keyword Keyword
k = LexAction Token -> LexAction Token
forall r. LexAction r -> LexAction r
layout (LexAction Token -> LexAction Token)
-> LexAction Token -> LexAction Token
forall a b. (a -> b) -> a -> b
$ (Interval -> Token) -> LexAction Token
forall r. (Interval -> r) -> LexAction r
withInterval_ (Keyword -> Interval -> Token
TokKeyword Keyword
k)
    where
        layout :: LexAction r -> LexAction r
layout | Keyword
k Keyword -> [Keyword] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Keyword]
layoutKeywords  = LexAction r -> LexAction r
forall r. LexAction r -> LexAction r
withLayout
               | Bool
otherwise              = LexAction r -> LexAction r
forall a. a -> a
id


-- | Parse a 'Symbol' token.
symbol :: Symbol -> LexAction Token
symbol :: Symbol -> LexAction Token
symbol Symbol
s = (Interval -> Token) -> LexAction Token
forall r. (Interval -> r) -> LexAction r
withInterval_ (Symbol -> Interval -> Token
TokSymbol Symbol
s)


-- | Parse a number.

number :: String -> Integer
number :: String -> Integer
number String
str = String -> Integer
forall a. Read a => String -> a
read (String -> Integer) -> String -> Integer
forall a b. (a -> b) -> a -> b
$ case String
str of
  Char
'0' : Char
'x' : String
num -> String
str
  String
_               -> [String] -> String
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat ([String] -> String) -> [String] -> String
forall a b. (a -> b) -> a -> b
$ (Char -> Bool) -> String -> [String]
forall a. (a -> Bool) -> [a] -> [[a]]
wordsBy (Char
'_' Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==) String
str

integer :: String -> Integer
integer :: String -> Integer
integer = \case
  Char
'-' : String
str -> - (String -> Integer
number String
str)
  String
str       -> String -> Integer
number String
str

-- | Parse a literal.
literal' :: (String -> a) -> (Range -> a -> Literal) -> LexAction Token
literal' :: (String -> a) -> (Range -> a -> Literal) -> LexAction Token
literal' String -> a
read Range -> a -> Literal
lit =
  (String -> a) -> ((Interval, a) -> Token) -> LexAction Token
forall a tok.
(String -> a) -> ((Interval, a) -> tok) -> LexAction tok
withInterval' String -> a
read (Literal -> Token
TokLiteral (Literal -> Token)
-> ((Interval, a) -> Literal) -> (Interval, a) -> Token
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Range -> a -> Literal) -> (Range, a) -> Literal
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry Range -> a -> Literal
lit ((Range, a) -> Literal)
-> ((Interval, a) -> (Range, a)) -> (Interval, a) -> Literal
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Interval -> Range) -> (Interval, a) -> (Range, a)
forall a c b. (a -> c) -> (a, b) -> (c, b)
mapFst Interval -> Range
forall t. HasRange t => t -> Range
getRange)

literal :: Read a => (Range -> a -> Literal) -> LexAction Token
literal :: (Range -> a -> Literal) -> LexAction Token
literal = (String -> a) -> (Range -> a -> Literal) -> LexAction Token
forall a.
(String -> a) -> (Range -> a -> Literal) -> LexAction Token
literal' String -> a
forall a. Read a => String -> a
read

-- | Parse an identifier. Identifiers can be qualified (see 'Name').
--   Example: @Foo.Bar.f@
identifier :: LexAction Token
identifier :: LexAction Token
identifier = (Either (Interval, String) [(Interval, String)] -> Token)
-> LexAction Token
forall a.
(Either (Interval, String) [(Interval, String)] -> a)
-> LexAction a
qualified (((Interval, String) -> Token)
-> ([(Interval, String)] -> Token)
-> Either (Interval, String) [(Interval, String)]
-> Token
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (Interval, String) -> Token
TokId [(Interval, String)] -> Token
TokQId)


-- | Parse a possibly qualified name.
qualified :: (Either (Interval, String) [(Interval, String)] -> a) -> LexAction a
qualified :: (Either (Interval, String) [(Interval, String)] -> a)
-> LexAction a
qualified Either (Interval, String) [(Interval, String)] -> a
tok =
    (String -> Parser a) -> LexAction a
forall tok. (String -> Parser tok) -> LexAction tok
token ((String -> Parser a) -> LexAction a)
-> (String -> Parser a) -> LexAction a
forall a b. (a -> b) -> a -> b
$ \String
s ->
    do  Interval
i <- Parser Interval
getParseInterval
        case Interval -> [String] -> [(Interval, String)]
mkName Interval
i ([String] -> [(Interval, String)])
-> [String] -> [(Interval, String)]
forall a b. (a -> b) -> a -> b
$ (Char -> Bool) -> String -> [String]
forall a. (a -> Bool) -> [a] -> [[a]]
wordsBy (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
'.') String
s of
            []  -> String -> Parser a
forall a. String -> Parser a
lexError String
"lex error on .."
            [(Interval, String)
x] -> a -> Parser a
forall (m :: * -> *) a. Monad m => a -> m a
return (a -> Parser a) -> a -> Parser a
forall a b. (a -> b) -> a -> b
$ Either (Interval, String) [(Interval, String)] -> a
tok (Either (Interval, String) [(Interval, String)] -> a)
-> Either (Interval, String) [(Interval, String)] -> a
forall a b. (a -> b) -> a -> b
$ (Interval, String)
-> Either (Interval, String) [(Interval, String)]
forall a b. a -> Either a b
Left  (Interval, String)
x
            [(Interval, String)]
xs  -> a -> Parser a
forall (m :: * -> *) a. Monad m => a -> m a
return (a -> Parser a) -> a -> Parser a
forall a b. (a -> b) -> a -> b
$ Either (Interval, String) [(Interval, String)] -> a
tok (Either (Interval, String) [(Interval, String)] -> a)
-> Either (Interval, String) [(Interval, String)] -> a
forall a b. (a -> b) -> a -> b
$ [(Interval, String)]
-> Either (Interval, String) [(Interval, String)]
forall a b. b -> Either a b
Right [(Interval, String)]
xs
    where
        -- Compute the ranges for the substrings (separated by '.') of
        -- a name. Dots are included: the intervals generated for
        -- "A.B.x" correspond to "A.", "B." and "x".
        mkName :: Interval -> [String] -> [(Interval, String)]
        mkName :: Interval -> [String] -> [(Interval, String)]
mkName Interval
_ []     = []
        mkName Interval
i [String
x]    = [(Interval
i, String
x)]
        mkName Interval
i (String
x:[String]
xs) = (Interval
i0, String
x) (Interval, String) -> [(Interval, String)] -> [(Interval, String)]
forall a. a -> [a] -> [a]
: Interval -> [String] -> [(Interval, String)]
mkName Interval
i1 [String]
xs
            where
                p0 :: Position' SrcFile
p0 = Interval -> Position' SrcFile
forall a. Interval' a -> Position' a
iStart Interval
i
                p1 :: Position' SrcFile
p1 = Interval -> Position' SrcFile
forall a. Interval' a -> Position' a
iEnd Interval
i
                p' :: Position' SrcFile
p' = Position' SrcFile -> Char -> Position' SrcFile
forall a. Position' a -> Char -> Position' a
movePos (Position' SrcFile -> String -> Position' SrcFile
forall a. Position' a -> String -> Position' a
movePosByString Position' SrcFile
p0 String
x) Char
'.'
                i0 :: Interval
i0 = Position' SrcFile -> Position' SrcFile -> Interval
forall a. Position' a -> Position' a -> Interval' a
Interval Position' SrcFile
p0 Position' SrcFile
p'
                i1 :: Interval
i1 = Position' SrcFile -> Position' SrcFile -> Interval
forall a. Position' a -> Position' a -> Interval' a
Interval Position' SrcFile
p' Position' SrcFile
p1


{--------------------------------------------------------------------------
    Predicates
 --------------------------------------------------------------------------}

-- | True when the given character is the next character of the input string.
followedBy :: Char -> LexPredicate
followedBy :: Char -> LexPredicate
followedBy Char
c' ([LexState], ParseFlags)
_ AlexInput
_ LexState
_ AlexInput
inp =
    case AlexInput -> String
lexInput AlexInput
inp of
        []  -> Bool
False
        Char
c:String
_ -> Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
c'

-- | True if we are at the end of the file.
eof :: LexPredicate
eof :: LexPredicate
eof ([LexState], ParseFlags)
_ AlexInput
_ LexState
_ AlexInput
inp = String -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null (String -> Bool) -> String -> Bool
forall a b. (a -> b) -> a -> b
$ AlexInput -> String
lexInput AlexInput
inp

-- | True if the given state appears somewhere on the state stack
inState :: LexState -> LexPredicate
inState :: LexState -> LexPredicate
inState LexState
s ([LexState]
ls, ParseFlags
_) AlexInput
_ LexState
_ AlexInput
_ = LexState
s LexState -> [LexState] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [LexState]
ls