{-# LANGUAGE CPP #-}
{-# LANGUAGE OverloadedStrings #-}
module Language.Lua.LexerUtils where
import Control.DeepSeq (NFData(..))
import Data.Char (ord)
import Data.Text (Text)
import qualified Data.Text as Text
import Data.Word (Word8)
#if !MIN_VERSION_base(4,8,0)
import Control.Applicative (Applicative(..))
#endif
import Language.Lua.Token
data LexToken = LexToken
{ ltokToken :: Token
, ltokRange :: SourceRange
, ltokLexeme :: Text
} deriving (Show,Eq)
type Action =
AlexInput ->
AlexInput ->
Int ->
Mode ->
(Mode, [LexToken])
data AlexInput = AlexInput
{ input_pos :: {-# UNPACK #-} !SourcePos
, input_prev :: {-# UNPACK #-} !SourcePos
, input_text :: {-# UNPACK #-} !Text
}
data Mode
= NormalMode
| StringMode StringMode [SourceRange] AlexInput
| CommentMode AlexInput
| QuoteMode AlexInput
Int
Bool
data StringMode = SingleQuote | DoubleQuote
unterminatedString :: Action
unterminatedString _inp1 inp2 _len mode =
case mode of
StringMode _strTy _errs inp0 -> ( NormalMode
, [ longToken inp0 inp2 TokUntermString ]
)
_ -> error "[bug] unterminatedString outside a string."
invalidChar :: Action
invalidChar inp1 _ _ _ =
( NormalMode
, [ LexToken { ltokToken = TokUnexpected
, ltokRange = singleRange (input_pos inp1)
, ltokLexeme = Text.take 1 (input_text inp1)
}
]
)
invalidEsc :: Action
invalidEsc inp1 inp2 _ mode =
case mode of
StringMode m errs inp0 -> (StringMode m (err : errs) inp0, [])
where err = SourceRange { sourceFrom = input_pos inp1
, sourceTo = input_prev inp2
}
_ -> error "[bug] invalidEsc outside a string."
checkEOF :: Mode -> AlexInput -> [LexToken]
checkEOF mode AlexInput { input_prev = end } =
case mode of
NormalMode {} -> []
CommentMode {} -> []
QuoteMode inp _ True -> ret TokUntermComment inp
QuoteMode inp _ _ -> ret TokUntermString inp
StringMode _ _ inp -> ret TokUntermString inp
where
ret t AlexInput { input_pos = start, input_text = rest } =
[ LexToken { ltokToken = t
, ltokRange = SourceRange { sourceFrom = start, sourceTo = end }
, ltokLexeme = rest
} ]
enterLongString :: Action
enterLongString inp _ len _ = (QuoteMode inp len False, [])
enterString :: StringMode -> Action
enterString sm inp _ _ _ = (StringMode sm [] inp, [])
enterLongComment :: Action
enterLongComment inp _ len _ = (QuoteMode inp (len - 2) True, [])
enterComment :: Action
enterComment inp _ _ _ = (CommentMode inp, [])
longToken ::
AlexInput ->
AlexInput ->
Token ->
LexToken
longToken AlexInput { input_pos = start, input_text = text }
AlexInput { input_prev = end }
t =
LexToken
{ ltokToken = t
, ltokRange = SourceRange { sourceFrom = start, sourceTo = end }
, ltokLexeme = Text.take lexLen text
}
where
lexLen = 1 + sourcePosIndex end - sourcePosIndex start
endStringPredicate ::
Mode ->
AlexInput ->
Int ->
AlexInput ->
Bool
endStringPredicate mode _ len _ =
case mode of
QuoteMode _ startlen _ -> len == startlen
_ -> False
endMode :: Action
endMode _ inp2 _ mode = (NormalMode, [lexeme])
where
lexeme =
case mode of
StringMode _ err inp -> longToken inp inp2
$ if null err then TokSLit
else TokUnexpected
CommentMode inp -> longToken inp inp2 TokComment
QuoteMode inp _ isComment -> longToken inp inp2
$ if isComment then TokComment
else TokSLit
NormalMode -> error "endMode: internal lexer error"
tok :: Token -> Action
tok token inp1 inp2 len mode = (mode, [t])
where
t = LexToken
{ ltokToken = token
, ltokRange = SourceRange { sourceFrom = input_pos inp1
, sourceTo = input_prev inp2
}
, ltokLexeme = Text.take len (input_text inp1)
}
dropSpecialComment :: Text -> Text
dropSpecialComment text
| "#" `Text.isPrefixOf` text = Text.dropWhile (/='\n') text
| otherwise = text
dropWhiteSpace :: [LexToken] -> [LexToken]
dropWhiteSpace = filter (not . isWhite . ltokToken)
where
isWhite TokWhiteSpace = True
isWhite TokComment = True
isWhite _ = False
data SourcePos = SourcePos
{ sourcePosName :: String
, sourcePosIndex, sourcePosLine, sourcePosColumn :: {-# UNPACK #-}!Int
}
deriving (Show,Eq)
instance NFData SourcePos where
rnf (SourcePos _ _ _ _) = ()
data SourceRange = SourceRange
{ sourceFrom :: !SourcePos, sourceTo :: !SourcePos
} deriving (Show,Eq)
instance NFData SourceRange where
rnf (SourceRange _ _) = ()
singleRange :: SourcePos -> SourceRange
singleRange p = SourceRange { sourceFrom = p, sourceTo = p }
startPos :: String -> SourcePos
startPos n = SourcePos n 0 1 1
showPos :: SourcePos -> String
showPos p = show (sourcePosLine p) ++ ":" ++ show (sourcePosColumn p)
showRange :: SourceRange -> String
showRange p = showPos (sourceFrom p) ++ "--" ++ showPos (sourceTo p)
alexInputPrevChar :: a -> ()
alexInputPrevChar _ = ()
alexGetByte :: AlexInput -> Maybe (Word8,AlexInput)
alexGetByte AlexInput { input_pos = p, input_text = text } =
do (c,text') <- Text.uncons text
let p' = move p c
x = fromIntegral (min 127 (ord c))
inp = AlexInput { input_prev = p, input_pos = p', input_text = text' }
x `seq` inp `seq` return (x, inp)
move :: SourcePos -> Char -> SourcePos
move (SourcePos name index line column) c =
case c of
'\t' -> SourcePos name (index+1) line (((column + 7) `div` 8) * 8 + 1)
'\n' -> SourcePos name (index+1) (line + 1) 1
_ -> SourcePos name (index+1) line (column + 1)