module Language.Lua.Lexer
( luaLexer
, LexicalError(..)
, TokenStream(..)
, runLexer
, streamToList
, streamToEitherList
) where
import Language.Lua.Token
import Data.Char (chr, isDigit, isHexDigit, isSpace)
import Data.List (foldl')
import Data.Maybe (fromMaybe)
import Data.Monoid
import Language.Lexer.Applicative
import Numeric (readHex)
import Text.Regex.Applicative
luaLexer :: Lexer Token
luaLexer = luaTokens <> luaWhitespace
luaTokens :: Lexer Token
luaTokens = token (longest luaToken)
<> token (longestShortest luaLongBracketStringLitPrefix luaLongBracketStringLitSuffix)
luaToken :: RE Char Token
luaToken =
TkAnd <$ "and"
<|> TkBreak <$ "break"
<|> TkDo <$ "do"
<|> TkElse <$ "else"
<|> TkElseif <$ "elseif"
<|> TkEnd <$ "end"
<|> TkFalse <$ "false"
<|> TkFor <$ "for"
<|> TkFunction <$ "function"
<|> TkGoto <$ "goto"
<|> TkIf <$ "if"
<|> TkIn <$ "in"
<|> TkLocal <$ "local"
<|> TkNil <$ "nil"
<|> TkNot <$ "not"
<|> TkOr <$ "or"
<|> TkRepeat <$ "repeat"
<|> TkReturn <$ "return"
<|> TkThen <$ "then"
<|> TkTrue <$ "true"
<|> TkUntil <$ "until"
<|> TkWhile <$ "while"
<|> TkAssign <$ "="
<|> TkNeq <$ "~="
<|> TkPlus <$ "+"
<|> TkDash <$ "-"
<|> TkMult <$ "*"
<|> TkFloatDiv <$ "/"
<|> TkModulo <$ "%"
<|> TkExponent <$ "^"
<|> TkLength <$ "#"
<|> TkBitwiseAnd <$ "&"
<|> TkBitwiseOr <$ "|"
<|> TkLt <$ "<"
<|> TkGt <$ ">"
<|> TkLShift <$ "<<"
<|> TkRShift <$ ">>"
<|> TkFloorDiv <$ "//"
<|> TkEq <$ "=="
<|> TkLeq <$ "<="
<|> TkGeq <$ ">="
<|> TkLParen <$ "("
<|> TkRParen <$ ")"
<|> TkLBrace <$ "{"
<|> TkRBrace <$ "}"
<|> TkLBracket <$ "["
<|> TkRBracket <$ "]"
<|> TkSemi <$ ";"
<|> TkColon <$ ":"
<|> TkComma <$ ","
<|> TkLabel <$ "::"
<|> TkDot <$ "."
<|> TkConcat <$ ".."
<|> TkVararg <$ "..."
<|> TkTilde <$ "~"
<|> TkQuote <$ "'"
<|> TkDoubleQuote <$ "\""
<|> TkIdent <$> luaIdentifier
<|> TkStringLit <$> luaStringLit
<|> TkFloatLit <$> luaFloatLit
<|> TkIntLit <$> luaIntLit
luaIdentifier :: RE Char String
luaIdentifier = (:)
<$> psym identFirst
<*> many (psym identRest)
where
identFirst :: Char -> Bool
identFirst = (||) <$> (== '_') <*> isAlpha
identRest :: Char -> Bool
identRest = (||) <$> identFirst <*> isDigit
luaStringLit :: RE Char String
luaStringLit = singleQuoted <|> doubleQuoted
where
singleQuoted :: RE Char String
singleQuoted = quoted '\''
doubleQuoted :: RE Char String
doubleQuoted = quoted '\"'
quoted :: Char -> RE Char String
quoted c = between c c $ many (escapeSequence <|> not_c)
where
not_c :: RE Char Char
not_c = psym (/= c)
escapeSequence :: RE Char Char
escapeSequence = sym '\\' *> sequences
where
sequences :: RE Char Char
sequences =
'\a' <$ sym 'a'
<|> '\b' <$ sym 'b'
<|> '\f' <$ sym 'f'
<|> '\n' <$ sym 'n'
<|> '\r' <$ sym 'r'
<|> '\t' <$ sym 't'
<|> '\v' <$ sym 'v'
<|> sym '\\'
<|> sym '"'
<|> sym '\''
<|> sym '\n'
<|> hexEscape
<|> decimalEscape
where
hexEscape :: RE Char Char
hexEscape = go <$> (sym 'x' *> hexDigit) <*> hexDigit
where
go :: Char -> Char -> Char
go x y = let [(n,"")] = readHex [x,y] in chr n
decimalEscape :: RE Char Char
decimalEscape = chr . read <$> betweenN 1 3 digit
luaIntLit :: RE Char String
luaIntLit = hexLit <|> some digit
where
hexLit :: RE Char String
hexLit = (\a b c -> a:b:c)
<$> sym '0'
<*> oneOf "xX"
<*> some hexDigit
luaFloatLit :: RE Char String
luaFloatLit = hexLit <|> decimalLit
where
hexLit :: RE Char String
hexLit = (\a b cs ds -> a : b : cs ++ ds)
<$> sym '0'
<*> oneOf "xX"
<*> some hexDigit
<*> andOr (fractionalSuffix hexDigit) (exponentPart "pP")
decimalLit :: RE Char String
decimalLit = (++) <$> some digit <*> andOr (fractionalSuffix digit) (exponentPart "eE")
exponentPart :: String -> RE Char String
exponentPart cs = f
<$> oneOf cs
<*> optional (oneOf "-+")
<*> some digit
where
f :: Char -> Maybe Char -> String -> String
f x Nothing zs = x:zs
f x (Just y) zs = x:y:zs
fractionalSuffix :: RE Char Char -> RE Char String
fractionalSuffix c = (:) <$> sym '.' <*> many c
luaLongBracketStringLitPrefix :: RE Char String
luaLongBracketStringLitPrefix = between '[' '[' (many (sym '='))
luaLongBracketStringLitSuffix :: String -> RE Char Token
luaLongBracketStringLitSuffix eqs = TkStringLit <$>
(optional (sym '\n') *> many anySym <* between ']' ']' (exactlyN (length eqs) (sym '=')))
luaWhitespace :: Lexer Token
luaWhitespace = mconcat
[ whitespace $ longest (psym isSpace)
, whitespace $ longestShortest luaBlockCommentPrefix luaBlockCommentSuffix
, whitespace $ longest luaLineComment
]
luaBlockCommentPrefix :: RE Char String
luaBlockCommentPrefix = "--[["
luaBlockCommentSuffix :: String -> RE Char String
luaBlockCommentSuffix _ = many anySym *> "]]"
luaLineComment :: RE Char String
luaLineComment =
"--"
<|> "--\n"
<|> "--" *> psym p *> many (psym (/= '\n'))
<|> "--[\n"
<|> "--[" *> psym p *> many (psym (/= '\n'))
where
p :: Char -> Bool
p c = c /= '[' && c /= '\n'
oneOf :: Eq s => [s] -> RE s s
oneOf = foldl' (\acc x -> acc <|> sym x) empty
andOr :: forall m. Monoid m => RE Char m -> RE Char m -> RE Char m
andOr rx ry = ry <|> liftA2 (\x my -> x <> fromMaybe mempty my) rx (optional ry)
between :: Char -> Char -> RE Char a -> RE Char a
between x y z = sym x *> z <* sym y
hexDigit :: RE Char Char
hexDigit = psym isHexDigit
digit :: RE Char Char
digit = psym isDigit
isAlpha :: Char -> Bool
isAlpha = (||) <$> isLowercase <*> isUppercase
isLowercase :: Char -> Bool
isLowercase = charBetween 'a' 'z'
isUppercase :: Char -> Bool
isUppercase = charBetween 'A' 'Z'
charBetween :: Char -> Char -> Char -> Bool
charBetween x y = (&&) <$> (>= x) <*> (<= y)
exactlyN :: Alternative f => Int -> f a -> f [a]
exactlyN 0 _ = pure []
exactlyN 1 f = pure <$> f
exactlyN n f = (:) <$> f <*> exactlyN (n1) f
betweenN :: Alternative f => Int -> Int -> f a -> f [a]
betweenN x y _ | x > y = error "betweenN: x > y"
betweenN 0 y f = upToN y f
betweenN x y f = liftA2 (++) (exactlyN x f) (upToN (yx) f)
upToN :: Alternative f => Int -> f a -> f [a]
upToN 0 _ = pure []
upToN 1 f = fmap pure f <|> pure []
upToN n f = let g = upToN (n1) f
in liftA2 (:) f g <|> g