{-# LANGUAGE CPP, DeriveDataTypeable #-}
module Language.Python.Common.Token (
Token (..),
debugTokenString,
tokenString,
hasLiteral,
TokenClass (..),
classifyToken
) where
import Language.Python.Common.Pretty
import Language.Python.Common.SrcLocation (SrcSpan (..), Span(getSpan))
import Data.Data
data Token
= IndentToken { token_span :: !SrcSpan }
| DedentToken { token_span :: !SrcSpan }
| NewlineToken { token_span :: !SrcSpan }
| LineJoinToken { token_span :: !SrcSpan }
| CommentToken { token_span :: !SrcSpan, token_literal :: !String }
| IdentifierToken { token_span :: !SrcSpan, token_literal :: !String }
| StringToken { token_span :: !SrcSpan, token_literal :: !String }
| ByteStringToken { token_span :: !SrcSpan, token_literal :: !String }
| UnicodeStringToken { token_span :: !SrcSpan, token_literal :: !String }
| IntegerToken { token_span :: !SrcSpan, token_literal :: !String, token_integer :: !Integer }
| LongIntegerToken { token_span :: !SrcSpan, token_literal :: !String, token_integer :: !Integer }
| FloatToken { token_span :: !SrcSpan, token_literal :: !String, token_double :: !Double }
| ImaginaryToken { token_span :: !SrcSpan, token_literal :: !String, token_double :: !Double }
| DefToken { token_span :: !SrcSpan }
| WhileToken { token_span :: !SrcSpan }
| IfToken { token_span :: !SrcSpan }
| TrueToken { token_span :: !SrcSpan }
| FalseToken { token_span :: !SrcSpan }
| ReturnToken { token_span :: !SrcSpan }
| TryToken { token_span :: !SrcSpan }
| ExceptToken { token_span :: !SrcSpan }
| RaiseToken { token_span :: !SrcSpan }
| InToken { token_span :: !SrcSpan }
| IsToken { token_span :: !SrcSpan }
| LambdaToken { token_span :: !SrcSpan }
| ClassToken { token_span :: !SrcSpan }
| FinallyToken { token_span :: !SrcSpan }
| NoneToken { token_span :: !SrcSpan }
| ForToken { token_span :: !SrcSpan }
| FromToken { token_span :: !SrcSpan }
| GlobalToken { token_span :: !SrcSpan }
| WithToken { token_span :: !SrcSpan }
| AsToken { token_span :: !SrcSpan }
| ElifToken { token_span :: !SrcSpan }
| YieldToken { token_span :: !SrcSpan }
| AssertToken { token_span :: !SrcSpan }
| ImportToken { token_span :: !SrcSpan }
| PassToken { token_span :: !SrcSpan }
| BreakToken { token_span :: !SrcSpan }
| ContinueToken { token_span :: !SrcSpan }
| DeleteToken { token_span :: !SrcSpan }
| ElseToken { token_span :: !SrcSpan }
| NotToken { token_span :: !SrcSpan }
| AndToken { token_span :: !SrcSpan }
| OrToken { token_span :: !SrcSpan }
| NonLocalToken { token_span :: !SrcSpan }
| AsyncToken { token_span :: !SrcSpan }
| AwaitToken { token_span :: !SrcSpan }
| PrintToken { token_span :: !SrcSpan }
| ExecToken { token_span :: !SrcSpan }
| AtToken { token_span :: !SrcSpan }
| LeftRoundBracketToken { token_span :: !SrcSpan }
| RightRoundBracketToken { token_span :: !SrcSpan }
| LeftSquareBracketToken { token_span :: !SrcSpan }
| RightSquareBracketToken { token_span :: !SrcSpan }
| LeftBraceToken { token_span :: !SrcSpan }
| RightBraceToken { token_span :: !SrcSpan }
| DotToken { token_span :: !SrcSpan }
| CommaToken { token_span :: !SrcSpan }
| SemiColonToken { token_span :: !SrcSpan }
| ColonToken { token_span :: !SrcSpan }
| EllipsisToken { token_span :: !SrcSpan }
| RightArrowToken { token_span :: !SrcSpan }
| AssignToken { token_span :: !SrcSpan }
| PlusAssignToken { token_span :: !SrcSpan }
| MinusAssignToken { token_span :: !SrcSpan }
| MultAssignToken { token_span :: !SrcSpan }
| DivAssignToken { token_span :: !SrcSpan }
| ModAssignToken { token_span :: !SrcSpan }
| PowAssignToken { token_span :: !SrcSpan }
| BinAndAssignToken { token_span :: !SrcSpan }
| BinOrAssignToken { token_span :: !SrcSpan }
| BinXorAssignToken { token_span :: !SrcSpan }
| LeftShiftAssignToken { token_span :: !SrcSpan }
| RightShiftAssignToken { token_span :: !SrcSpan }
| FloorDivAssignToken { token_span :: !SrcSpan }
| MatrixMultAssignToken { token_span :: !SrcSpan }
| BackQuoteToken { token_span :: !SrcSpan }
| PlusToken { token_span :: !SrcSpan }
| MinusToken { token_span :: !SrcSpan }
| MultToken { token_span :: !SrcSpan }
| DivToken { token_span :: !SrcSpan }
| GreaterThanToken { token_span :: !SrcSpan }
| LessThanToken { token_span :: !SrcSpan }
| EqualityToken { token_span :: !SrcSpan }
| GreaterThanEqualsToken { token_span :: !SrcSpan }
| LessThanEqualsToken { token_span :: !SrcSpan }
| ExponentToken { token_span :: !SrcSpan }
| BinaryOrToken { token_span :: !SrcSpan }
| XorToken { token_span :: !SrcSpan }
| BinaryAndToken { token_span :: !SrcSpan }
| ShiftLeftToken { token_span :: !SrcSpan }
| ShiftRightToken { token_span :: !SrcSpan }
| ModuloToken { token_span :: !SrcSpan }
| FloorDivToken { token_span :: !SrcSpan }
| TildeToken { token_span :: !SrcSpan }
| NotEqualsToken { token_span :: !SrcSpan }
| NotEqualsV2Token { token_span :: !SrcSpan }
| EOFToken { token_span :: !SrcSpan }
deriving (Eq,Ord,Show,Typeable,Data)
instance Span Token where
getSpan = token_span
debugTokenString :: Token -> String
debugTokenString token =
render (text (show $ toConstr token) <+> pretty (token_span token) <+>
if hasLiteral token then text (token_literal token) else empty)
hasLiteral :: Token -> Bool
hasLiteral token =
case token of
CommentToken {} -> True
IdentifierToken {} -> True
StringToken {} -> True
ByteStringToken {} -> True
UnicodeStringToken {} -> True
IntegerToken {} -> True
LongIntegerToken {} -> True
FloatToken {} -> True
ImaginaryToken {} -> True
other -> False
data TokenClass
= Comment
| Number
| Identifier
| Punctuation
| Bracket
| Layout
| Keyword
| String
| Operator
| Assignment
deriving (Show, Eq, Ord)
classifyToken :: Token -> TokenClass
classifyToken token =
case token of
IndentToken {} -> Layout
DedentToken {} -> Layout
NewlineToken {} -> Layout
CommentToken {} -> Comment
IdentifierToken {} -> Identifier
StringToken {} -> String
ByteStringToken {} -> String
UnicodeStringToken {} -> String
IntegerToken {} -> Number
LongIntegerToken {} -> Number
FloatToken {} -> Number
ImaginaryToken {} -> Number
DefToken {} -> Keyword
WhileToken {} -> Keyword
IfToken {} -> Keyword
TrueToken {} -> Keyword
FalseToken {} -> Keyword
ReturnToken {} -> Keyword
TryToken {} -> Keyword
ExceptToken {} -> Keyword
RaiseToken {} -> Keyword
InToken {} -> Keyword
IsToken {} -> Keyword
LambdaToken {} -> Keyword
ClassToken {} -> Keyword
FinallyToken {} -> Keyword
NoneToken {} -> Keyword
ForToken {} -> Keyword
FromToken {} -> Keyword
GlobalToken {} -> Keyword
WithToken {} -> Keyword
AsToken {} -> Keyword
ElifToken {} -> Keyword
YieldToken {} -> Keyword
AsyncToken {} -> Keyword
AwaitToken {} -> Keyword
AssertToken {} -> Keyword
ImportToken {} -> Keyword
PassToken {} -> Keyword
BreakToken {} -> Keyword
ContinueToken {} -> Keyword
DeleteToken {} -> Keyword
ElseToken {} -> Keyword
NotToken {} -> Keyword
AndToken {} -> Keyword
OrToken {} -> Keyword
NonLocalToken {} -> Keyword
PrintToken {} -> Keyword
ExecToken {} -> Keyword
AtToken {} -> Keyword
LeftRoundBracketToken {} -> Bracket
RightRoundBracketToken {} -> Bracket
LeftSquareBracketToken {} -> Bracket
RightSquareBracketToken {} -> Bracket
LeftBraceToken {} -> Bracket
RightBraceToken {} -> Bracket
DotToken {} -> Operator
CommaToken {} -> Punctuation
SemiColonToken {} -> Punctuation
ColonToken {} -> Punctuation
EllipsisToken {} -> Keyword
RightArrowToken {} -> Punctuation
AssignToken {} -> Assignment
PlusAssignToken {} -> Assignment
MinusAssignToken {} -> Assignment
MultAssignToken {} -> Assignment
DivAssignToken {} -> Assignment
ModAssignToken {} -> Assignment
PowAssignToken {} -> Assignment
BinAndAssignToken {} -> Assignment
BinOrAssignToken {} -> Assignment
BinXorAssignToken {} -> Assignment
LeftShiftAssignToken {} -> Assignment
RightShiftAssignToken {} -> Assignment
FloorDivAssignToken {} -> Assignment
MatrixMultAssignToken {} -> Assignment
BackQuoteToken {} -> Punctuation
PlusToken {} -> Operator
MinusToken {} -> Operator
MultToken {} -> Operator
DivToken {} -> Operator
GreaterThanToken {} -> Operator
LessThanToken {} -> Operator
EqualityToken {} -> Operator
GreaterThanEqualsToken {} -> Operator
LessThanEqualsToken {} -> Operator
ExponentToken {} -> Operator
BinaryOrToken {} -> Operator
XorToken {} -> Operator
BinaryAndToken {} -> Operator
ShiftLeftToken {} -> Operator
ShiftRightToken {} -> Operator
ModuloToken {} -> Operator
FloorDivToken {} -> Operator
TildeToken {} -> Operator
NotEqualsToken {} -> Operator
NotEqualsV2Token {} -> Operator
LineJoinToken {} -> Layout
EOFToken {} -> Layout
tokenString :: Token -> String
tokenString token =
case token of
IndentToken {} -> ""
DedentToken {} -> ""
NewlineToken {} -> ""
CommentToken {} -> token_literal token
IdentifierToken {} -> token_literal token
StringToken {} -> token_literal token
ByteStringToken {} -> token_literal token
UnicodeStringToken {} -> token_literal token
IntegerToken {} -> token_literal token
LongIntegerToken {} -> token_literal token
FloatToken {} -> token_literal token
ImaginaryToken {} -> token_literal token
DefToken {} -> "def"
WhileToken {} -> "while"
IfToken {} -> "if"
TrueToken {} -> "True"
FalseToken {} -> "False"
ReturnToken {} -> "return"
TryToken {} -> "try"
ExceptToken {} -> "except"
RaiseToken {} -> "raise"
InToken {} -> "in"
IsToken {} -> "is"
LambdaToken {} -> "lambda"
ClassToken {} -> "class"
FinallyToken {} -> "finally"
NoneToken {} -> "None"
ForToken {} -> "for"
FromToken {} -> "from"
GlobalToken {} -> "global"
WithToken {} -> "with"
AsToken {} -> "as"
ElifToken {} -> "elif"
YieldToken {} -> "yield"
AsyncToken {} -> "async"
AwaitToken {} -> "await"
AssertToken {} -> "assert"
ImportToken {} -> "import"
PassToken {} -> "pass"
BreakToken {} -> "break"
ContinueToken {} -> "continue"
DeleteToken {} -> "delete"
ElseToken {} -> "else"
NotToken {} -> "not"
AndToken {} -> "and"
OrToken {} -> "or"
NonLocalToken {} -> "nonlocal"
PrintToken {} -> "print"
ExecToken {} -> "exec"
AtToken {} -> "at"
LeftRoundBracketToken {} -> "("
RightRoundBracketToken {} -> ")"
LeftSquareBracketToken {} -> "["
RightSquareBracketToken {} -> "]"
LeftBraceToken {} -> "{"
RightBraceToken {} -> "}"
DotToken {} -> "."
CommaToken {} -> ","
SemiColonToken {} -> ";"
ColonToken {} -> ":"
EllipsisToken {} -> "..."
RightArrowToken {} -> "->"
AssignToken {} -> "="
PlusAssignToken {} -> "+="
MinusAssignToken {} -> "-="
MultAssignToken {} -> "*="
DivAssignToken {} -> "/="
ModAssignToken {} -> "%="
PowAssignToken {} -> "**="
BinAndAssignToken {} -> "&="
BinOrAssignToken {} -> "|="
BinXorAssignToken {} -> "^="
LeftShiftAssignToken {} -> "<<="
RightShiftAssignToken {} -> ">>="
FloorDivAssignToken {} -> "//="
MatrixMultAssignToken {} -> "@="
BackQuoteToken {} -> "`"
PlusToken {} -> "+"
MinusToken {} -> "-"
MultToken {} -> "*"
DivToken {} -> "/"
GreaterThanToken {} -> ">"
LessThanToken {} -> "<"
EqualityToken {} -> "=="
GreaterThanEqualsToken {} -> ">="
LessThanEqualsToken {} -> "<="
ExponentToken {} -> "**"
BinaryOrToken {} -> "|"
XorToken {} -> "^"
BinaryAndToken {} -> "&"
ShiftLeftToken {} -> "<<"
ShiftRightToken {} -> ">>"
ModuloToken {} -> "%"
FloorDivToken {} -> "//"
TildeToken {} -> "~"
NotEqualsToken {} -> "!="
NotEqualsV2Token {} -> "<>"
LineJoinToken {} -> "\\"
EOFToken {} -> ""