{-# LANGUAGE CPP #-}
{-# LANGUAGE OverloadedStrings #-}
module Language.Lua.LexerUtils where
import Data.Text (Text)
import qualified Data.Text as Text
#if !MIN_VERSION_base(4,8,0)
import Control.Applicative (Applicative(..))
#endif
import Language.Lua.Token
import AlexTools
data LexToken = LexToken
{ ltokToken :: Token
, ltokRange :: SourceRange
, ltokLexeme :: Text
} deriving (Show)
data Mode
= NormalMode
| StringMode StringMode [SourceRange] Input
| CommentMode Input
| QuoteMode Input
Int
Bool
data StringMode = SingleQuote | DoubleQuote
unterminatedString :: Action Mode [Lexeme Token]
unterminatedString =
do ~(StringMode _strTy _errs inp0) <- getLexerState
setLexerState NormalMode
longToken inp0 TokUntermString
invalidChar :: Action Mode [Lexeme Token]
invalidChar =
do setLexerState NormalMode
lexeme TokUnexpected
invalidEsc :: Action Mode [Lexeme Token]
invalidEsc =
do inp1 <- startInput
inp2 <- endInput
~(StringMode m errs inp0) <- getLexerState
let err = SourceRange
{ sourceFrom = inputPos inp1
, sourceTo = inputPrev inp2
}
setLexerState (StringMode m (err : errs) inp0)
return []
checkEOF :: Mode -> Input -> [Lexeme Token]
checkEOF mode Input { inputPrev = end } =
case mode of
NormalMode {} -> []
CommentMode {} -> []
QuoteMode inp _ True -> ret TokUntermComment inp
QuoteMode inp _ _ -> ret TokUntermString inp
StringMode _ _ inp -> ret TokUntermString inp
where
ret t Input { inputPos = start, inputText = rest } =
[ Lexeme { lexemeToken = t
, lexemeRange = SourceRange { sourceFrom = start, sourceTo = end }
, lexemeText = rest
} ]
enterLongString :: Action Mode [Lexeme Token]
enterLongString =
do inp <- startInput
len <- matchLength
setLexerState (QuoteMode inp len False)
return []
enterString :: StringMode -> Action Mode [Lexeme Token]
enterString sm =
do inp <- startInput
setLexerState (StringMode sm [] inp)
return []
enterLongComment :: Action Mode [Lexeme Token]
enterLongComment =
do inp <- startInput
len <- matchLength
setLexerState (QuoteMode inp (len - 2) True)
return []
enterComment :: Action Mode [Lexeme Token]
enterComment =
do inp <- startInput
setLexerState (CommentMode inp)
return []
longToken ::
Input ->
Token ->
Action Mode [Lexeme Token]
longToken Input { inputPos = start, inputText = text } t =
do Input { inputPrev = end } <- endInput
let lexLen = 1 + sourceIndex end - sourceIndex start
return [Lexeme
{ lexemeToken = t
, lexemeRange = SourceRange { sourceFrom = start, sourceTo = end }
, lexemeText = Text.take lexLen text
} ]
endStringPredicate ::
Mode ->
Input ->
Int ->
Input ->
Bool
endStringPredicate mode _ len _ =
case mode of
QuoteMode _ startlen _ -> len == startlen
_ -> False
endMode :: Action Mode [Lexeme Token]
endMode =
do mode <- getLexerState
setLexerState NormalMode
case mode of
StringMode _ err inp -> longToken inp
$ if null err then TokSLit
else TokUnexpected
CommentMode inp -> longToken inp TokComment
QuoteMode inp _ isComment -> longToken inp
$ if isComment then TokComment
else TokSLit
NormalMode -> error "endMode: internal lexer error"
dropSpecialComment :: Text -> Text
dropSpecialComment text
| "#" `Text.isPrefixOf` text = Text.dropWhile (/='\n') text
| otherwise = text
dropWhiteSpace :: [Lexeme Token] -> [Lexeme Token]
dropWhiteSpace = filter (not . isWhite . lexemeToken)
where
isWhite TokWhiteSpace = True
isWhite TokComment = True
isWhite _ = False