{-| The code to lex string and character literals. Basically the same code
    as in GHC.
-}
module Agda.Syntax.Parser.StringLiterals
    ( litString, litChar
    ) where

import Data.Bifunctor
import Data.Char
import qualified Data.Text as T

import Agda.Syntax.Common (pattern Ranged)
import Agda.Syntax.Parser.Alex
import Agda.Syntax.Parser.Monad
import Agda.Syntax.Parser.Tokens
import Agda.Syntax.Parser.LookAhead
import Agda.Syntax.Position
import Agda.Syntax.Literal

{--------------------------------------------------------------------------
    Exported actions
 --------------------------------------------------------------------------}

-- | Lex a string literal. Assumes that a double quote has been lexed.
litString :: LexAction Token
litString :: LexAction Token
litString = forall tok.
Char -> (Interval -> String -> Parser tok) -> LexAction tok
stringToken Char
'"' forall a b. (a -> b) -> a -> b
$ \ Interval
i String
s ->
  forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ RLiteral -> Token
TokLiteral forall a b. (a -> b) -> a -> b
$ forall a. Range -> a -> Ranged a
Ranged (forall a. HasRange a => a -> Range
getRange Interval
i) forall a b. (a -> b) -> a -> b
$ Text -> Literal
LitString forall a b. (a -> b) -> a -> b
$ String -> Text
T.pack String
s

{-| Lex a character literal. Assumes that a single quote has been lexed.  A
    character literal is lexed in exactly the same way as a string literal.
    Only before returning the token do we check that the lexed string is of
    length 1. This is maybe not the most efficient way of doing things, but on
    the other hand it will only be inefficient if there is a lexical error.
-}
litChar :: LexAction Token
litChar :: LexAction Token
litChar = forall tok.
Char -> (Interval -> String -> Parser tok) -> LexAction tok
stringToken Char
'\'' forall a b. (a -> b) -> a -> b
$ \ Interval
i -> \case
  [Char
c] -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ RLiteral -> Token
TokLiteral forall a b. (a -> b) -> a -> b
$ forall a. Range -> a -> Ranged a
Ranged (forall a. HasRange a => a -> Range
getRange Interval
i) forall a b. (a -> b) -> a -> b
$ Char -> Literal
LitChar Char
c
  String
_   -> forall a. String -> Parser a
lexError String
"character literal must contain a single character"


{--------------------------------------------------------------------------
    Errors
 --------------------------------------------------------------------------}

-- | Custom error function.
litError :: String -> LookAhead a
litError :: forall a. String -> LookAhead a
litError String
msg =
    do  LookAhead ()
sync
        forall a. Parser a -> LookAhead a
liftP forall a b. (a -> b) -> a -> b
$ forall a. String -> Parser a
lexError forall a b. (a -> b) -> a -> b
$
            String
"Lexical error in string or character literal: " forall a. [a] -> [a] -> [a]
++ String
msg


{--------------------------------------------------------------------------
    The meat
 --------------------------------------------------------------------------}

-- | The general function to lex a string or character literal token. The
--   character argument is the delimiter (@\"@ for strings and @\'@ for
--   characters).
stringToken :: Char -> (Interval -> String -> Parser tok) -> LexAction tok
stringToken :: forall tok.
Char -> (Interval -> String -> Parser tok) -> LexAction tok
stringToken Char
del Interval -> String -> Parser tok
mkTok = forall r.
(PreviousInput -> PreviousInput -> Int -> Parser r) -> LexAction r
LexAction forall a b. (a -> b) -> a -> b
$ \ PreviousInput
inp PreviousInput
inp' Int
n ->
    do  PositionWithoutFile -> Parser ()
setLastPos (forall a. Position' a -> Position' a
backupPos forall a b. (a -> b) -> a -> b
$ PreviousInput -> PositionWithoutFile
lexPos PreviousInput
inp')
        PreviousInput -> Parser ()
setLexInput PreviousInput
inp'
        -- TODO: Should setPrevToken be run here? Compare with
        -- Agda.Syntax.Parser.LexActions.token.
        String
tok <- forall a.
(forall a. String -> LookAhead a) -> LookAhead a -> Parser a
runLookAhead forall a. String -> LookAhead a
litError forall a b. (a -> b) -> a -> b
$ Char -> String -> LookAhead String
lexString Char
del String
""
        Interval
i   <- Parser Interval
getParseInterval
        Interval -> String -> Parser tok
mkTok Interval
i String
tok


-- | This is where the work happens. The string argument is an accumulating
--   parameter for the string being lexed.
lexString :: Char -> String -> LookAhead String
lexString :: Char -> String -> LookAhead String
lexString Char
del String
s =

    do  Char
c <- LookAhead Char
nextChar
        case Char
c of

            Char
c | Char
c forall a. Eq a => a -> a -> Bool
== Char
del  -> LookAhead ()
sync forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return (forall a. [a] -> [a]
reverse String
s)

            Char
'\\' ->
                do  Char
c' <- LookAhead Char
nextChar
                    case Char
c' of
                        Char
'&'             -> LookAhead ()
sync forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Char -> String -> LookAhead String
lexString Char
del String
s
                        Char
c | Char -> Bool
isSpace Char
c   -> LookAhead ()
sync forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Char -> String -> LookAhead String
lexStringGap Char
del String
s
                        Char
_               -> LookAhead String
normalChar

            Char
_ -> LookAhead String
normalChar
    where
        normalChar :: LookAhead String
normalChar =
            do  LookAhead ()
rollback
                Char
c <- LookAhead Char
lexChar
                Char -> String -> LookAhead String
lexString Char
del (Char
cforall a. a -> [a] -> [a]
:String
s)


-- | A string gap consists of whitespace (possibly including line breaks)
--   enclosed in backslashes. The gap is not part of the resulting string.
lexStringGap :: Char -> String -> LookAhead String
lexStringGap :: Char -> String -> LookAhead String
lexStringGap Char
del String
s =
    do  Char
c <- LookAhead Char
eatNextChar
        case Char
c of
            Char
'\\'            -> Char -> String -> LookAhead String
lexString Char
del String
s
            Char
c | Char -> Bool
isSpace Char
c   -> Char -> String -> LookAhead String
lexStringGap Char
del String
s
            Char
_               -> forall a. String -> LookAhead a
lookAheadError String
"non-space in string gap"

-- | Lex a single character.
lexChar :: LookAhead Char
lexChar :: LookAhead Char
lexChar =
    do  Char
c <- LookAhead Char
eatNextChar
        case Char
c of
            Char
'\\'    -> LookAhead Char
lexEscape
            Char
_       -> forall (m :: * -> *) a. Monad m => a -> m a
return Char
c

-- | Lex an escaped character. Assumes the backslash has been lexed.
lexEscape :: LookAhead Char
lexEscape :: LookAhead Char
lexEscape =
    do  Char
c <- LookAhead Char
eatNextChar
        case Char
c of
            Char
'^'     -> do Char
c <- LookAhead Char
eatNextChar
                          if Char
c forall a. Ord a => a -> a -> Bool
>= Char
'@' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'_'
                            then forall (m :: * -> *) a. Monad m => a -> m a
return (Int -> Char
chr (Char -> Int
ord Char
c forall a. Num a => a -> a -> a
- Char -> Int
ord Char
'@'))
                            else forall a. String -> LookAhead a
lookAheadError String
"invalid control character"

            Char
'x'     -> (Char -> Bool) -> Int -> (Char -> Int) -> LookAhead Char
readNum Char -> Bool
isHexDigit Int
16 Char -> Int
digitToInt
            Char
'o'     -> (Char -> Bool) -> Int -> (Char -> Int) -> LookAhead Char
readNum Char -> Bool
isOctDigit  Int
8 Char -> Int
digitToInt
            Char
x | Char -> Bool
isDigit Char
x
                    -> (Char -> Bool) -> Int -> (Char -> Int) -> Int -> LookAhead Char
readNumAcc Char -> Bool
isDigit Int
10 Char -> Int
digitToInt (Char -> Int
digitToInt Char
x)

            Char
c ->
                -- Try to match the input (starting with c) against the
                -- silly escape codes.
                do  Char
esc <- forall a.
Char -> [(String, LookAhead a)] -> LookAhead a -> LookAhead a
match' Char
c (forall a b. (a -> b) -> [a] -> [b]
map (forall (p :: * -> * -> *) b c a.
Bifunctor p =>
(b -> c) -> p a b -> p a c
second forall (m :: * -> *) a. Monad m => a -> m a
return) [(String, Char)]
sillyEscapeChars)
                                    (forall a. String -> LookAhead a
lookAheadError String
"bad escape code")
                    LookAhead ()
sync
                    forall (m :: * -> *) a. Monad m => a -> m a
return Char
esc

-- | Read a number in the specified base.
readNum :: (Char -> Bool) -> Int -> (Char -> Int) -> LookAhead Char
readNum :: (Char -> Bool) -> Int -> (Char -> Int) -> LookAhead Char
readNum Char -> Bool
isDigit Int
base Char -> Int
conv =
    do  Char
c <- LookAhead Char
eatNextChar
        if Char -> Bool
isDigit Char
c
            then (Char -> Bool) -> Int -> (Char -> Int) -> Int -> LookAhead Char
readNumAcc Char -> Bool
isDigit Int
base Char -> Int
conv (Char -> Int
conv Char
c)
            else forall a. String -> LookAhead a
lookAheadError String
"non-digit in numeral"

-- | Same as 'readNum' but with an accumulating parameter.
readNumAcc :: (Char -> Bool) -> Int -> (Char -> Int) -> Int -> LookAhead Char
readNumAcc :: (Char -> Bool) -> Int -> (Char -> Int) -> Int -> LookAhead Char
readNumAcc Char -> Bool
isDigit Int
base Char -> Int
conv Int
i = Int -> LookAhead Char
scan Int
i
    where
        scan :: Int -> LookAhead Char
scan Int
i =
            do  PreviousInput
inp <- LookAhead PreviousInput
getInput
                Char
c   <- LookAhead Char
nextChar
                case Char
c of
                    Char
c | Char -> Bool
isDigit Char
c -> Int -> LookAhead Char
scan (Int
iforall a. Num a => a -> a -> a
*Int
base forall a. Num a => a -> a -> a
+ Char -> Int
conv Char
c)
                    Char
_             ->
                        do  PreviousInput -> LookAhead ()
setInput PreviousInput
inp
                            LookAhead ()
sync
                            if Int
i forall a. Ord a => a -> a -> Bool
>= Char -> Int
ord forall a. Bounded a => a
minBound Bool -> Bool -> Bool
&& Int
i forall a. Ord a => a -> a -> Bool
<= Char -> Int
ord forall a. Bounded a => a
maxBound
                                then forall (m :: * -> *) a. Monad m => a -> m a
return (Int -> Char
chr Int
i)
                                else forall a. String -> LookAhead a
lookAheadError String
"character literal out of bounds"

-- | The escape codes.
sillyEscapeChars :: [(String, Char)]
sillyEscapeChars :: [(String, Char)]
sillyEscapeChars =
    [ (String
"a", Char
'\a')
    , (String
"b", Char
'\b')
    , (String
"f", Char
'\f')
    , (String
"n", Char
'\n')
    , (String
"r", Char
'\r')
    , (String
"t", Char
'\t')
    , (String
"v", Char
'\v')
    , (String
"\\", Char
'\\')
    , (String
"\"", Char
'\"')
    , (String
"'", Char
'\'')
    , (String
"NUL", Char
'\NUL')
    , (String
"SOH", Char
'\SOH')
    , (String
"STX", Char
'\STX')
    , (String
"ETX", Char
'\ETX')
    , (String
"EOT", Char
'\EOT')
    , (String
"ENQ", Char
'\ENQ')
    , (String
"ACK", Char
'\ACK')
    , (String
"BEL", Char
'\BEL')
    , (String
"BS", Char
'\BS')
    , (String
"HT", Char
'\HT')
    , (String
"LF", Char
'\LF')
    , (String
"VT", Char
'\VT')
    , (String
"FF", Char
'\FF')
    , (String
"CR", Char
'\CR')
    , (String
"SO", Char
'\SO')
    , (String
"SI", Char
'\SI')
    , (String
"DLE", Char
'\DLE')
    , (String
"DC1", Char
'\DC1')
    , (String
"DC2", Char
'\DC2')
    , (String
"DC3", Char
'\DC3')
    , (String
"DC4", Char
'\DC4')
    , (String
"NAK", Char
'\NAK')
    , (String
"SYN", Char
'\SYN')
    , (String
"ETB", Char
'\ETB')
    , (String
"CAN", Char
'\CAN')
    , (String
"EM", Char
'\EM')
    , (String
"SUB", Char
'\SUB')
    , (String
"ESC", Char
'\ESC')
    , (String
"FS", Char
'\FS')
    , (String
"GS", Char
'\GS')
    , (String
"RS", Char
'\RS')
    , (String
"US", Char
'\US')
    , (String
"SP", Char
'\SP')
    , (String
"DEL", Char
'\DEL')
    ]