{-# OPTIONS_HADDOCK hide #-}
module Language.Haskell.Exts.InternalLexer (Token(..), showToken, lexer, topLexer) where
import Language.Haskell.Exts.ParseMonad
import Language.Haskell.Exts.SrcLoc hiding (loc)
import Language.Haskell.Exts.Comments
import Language.Haskell.Exts.Extension
import Language.Haskell.Exts.ExtScheme
import Prelude hiding (id, exponent)
import Data.Char
import Data.Ratio
import Data.List (intercalate, isPrefixOf)
import Control.Monad (when)
data Token
= VarId String
| LabelVarId String
| QVarId (String,String)
| IDupVarId (String)
| ILinVarId (String)
| ConId String
| QConId (String,String)
| DVarId [String]
| VarSym String
| ConSym String
| QVarSym (String,String)
| QConSym (String,String)
| IntTok (Integer, String)
| FloatTok (Rational, String)
| Character (Char, String)
| StringTok (String, String)
| IntTokHash (Integer, String)
| WordTokHash (Integer, String)
| FloatTokHash (Rational, String)
| DoubleTokHash (Rational, String)
| CharacterHash (Char, String)
| StringHash (String, String)
| LeftParen
| RightParen
| LeftHashParen
| RightHashParen
| SemiColon
| LeftCurly
| RightCurly
| VRightCurly
| LeftSquare
| RightSquare
| ParArrayLeftSquare
| ParArrayRightSquare
| Comma
| Underscore
| BackQuote
| Dot
| DotDot
| Colon
| QuoteColon
| DoubleColon
| Equals
| Backslash
| Bar
| LeftArrow
| RightArrow
| At
| TApp
| Tilde
| DoubleArrow
| Minus
| Exclamation
| Star
| LeftArrowTail
| RightArrowTail
| LeftDblArrowTail
| RightDblArrowTail
| THExpQuote
| THPatQuote
| THDecQuote
| THTypQuote
| THCloseQuote
| THIdEscape (String)
| THParenEscape
| THVarQuote
| THTyQuote
| THQuasiQuote (String,String)
| RPGuardOpen
| RPGuardClose
| RPCAt
| XCodeTagOpen
| XCodeTagClose
| XStdTagOpen
| XStdTagClose
| XCloseTagOpen
| XEmptyTagClose
| XChildTagOpen
| XPCDATA String
| XRPatOpen
| XRPatClose
| PragmaEnd
| RULES
| INLINE Bool
| INLINE_CONLIKE
| SPECIALISE
| SPECIALISE_INLINE Bool
| SOURCE
| DEPRECATED
| WARNING
| SCC
| GENERATED
| CORE
| UNPACK
| NOUNPACK
| OPTIONS (Maybe String,String)
| LANGUAGE
| ANN
| MINIMAL
| NO_OVERLAP
| OVERLAP
| OVERLAPPING
| OVERLAPPABLE
| OVERLAPS
| INCOHERENT
| COMPLETE
| KW_As
| KW_By
| KW_Case
| KW_Class
| KW_Data
| KW_Default
| KW_Deriving
| KW_Do
| KW_MDo
| KW_Else
| KW_Family
| KW_Forall
| KW_Group
| KW_Hiding
| KW_If
| KW_Import
| KW_In
| KW_Infix
| KW_InfixL
| KW_InfixR
| KW_Instance
| KW_Let
| KW_Module
| KW_NewType
| KW_Of
| KW_Proc
| KW_Rec
| KW_Role
| KW_Then
| KW_Type
| KW_Using
| KW_Where
| KW_Qualified
| KW_Pattern
| KW_Stock
| KW_Anyclass
| KW_Via
| KW_Foreign
| KW_Export
| KW_Safe
| KW_Unsafe
| KW_Threadsafe
| KW_Interruptible
| KW_StdCall
| KW_CCall
| KW_CPlusPlus
| KW_DotNet
| KW_Jvm
| KW_Js
| KW_JavaScript
| KW_CApi
| EOF
deriving (Eq,Show)
reserved_ops :: [(String,(Token, Maybe ExtScheme))]
reserved_ops = [
( "..", (DotDot, Nothing) ),
( ":", (Colon, Nothing) ),
( "::", (DoubleColon, Nothing) ),
( "=", (Equals, Nothing) ),
( "\\", (Backslash, Nothing) ),
( "|", (Bar, Nothing) ),
( "<-", (LeftArrow, Nothing) ),
( "->", (RightArrow, Nothing) ),
( "@", (At, Nothing) ),
( "@:", (RPCAt, Just (Any [RegularPatterns])) ),
( "~", (Tilde, Nothing) ),
( "=>", (DoubleArrow, Nothing) ),
( "*", (Star, Just (Any [KindSignatures])) ),
( "[:", (ParArrayLeftSquare, Just (Any [ParallelArrays])) ),
( ":]", (ParArrayRightSquare, Just (Any [ParallelArrays])) ),
( "-<", (LeftArrowTail, Just (Any [Arrows])) ),
( ">-", (RightArrowTail, Just (Any [Arrows])) ),
( "-<<", (LeftDblArrowTail, Just (Any [Arrows])) ),
( ">>-", (RightDblArrowTail, Just (Any [Arrows])) ),
( "\x2190", (LeftArrow, Just (Any [UnicodeSyntax])) ),
( "\x2192", (RightArrow, Just (Any [UnicodeSyntax])) ),
( "\x21d2", (DoubleArrow, Just (Any [UnicodeSyntax])) ),
( "\x2237", (DoubleColon, Just (Any [UnicodeSyntax])) ),
( "\x2919", (LeftArrowTail, Just (All [UnicodeSyntax, Arrows])) ),
( "\x291a", (RightArrowTail, Just (All [UnicodeSyntax, Arrows])) ),
( "\x291b", (LeftDblArrowTail, Just (All [UnicodeSyntax, Arrows])) ),
( "\x291c", (RightDblArrowTail, Just (All [UnicodeSyntax, Arrows])) ),
( "\x2605", (Star, Just (All [UnicodeSyntax, KindSignatures])) ),
( "\x2200", (KW_Forall, Just (All [UnicodeSyntax, ExplicitForAll])) )
]
special_varops :: [(String,(Token, Maybe ExtScheme))]
special_varops = [
( ".", (Dot, Just (Any [ExplicitForAll, ExistentialQuantification])) ),
( "-", (Minus, Nothing) ),
( "!", (Exclamation, Nothing) )
]
reserved_ids :: [(String,(Token, Maybe ExtScheme))]
reserved_ids = [
( "_", (Underscore, Nothing) ),
( "by", (KW_By, Just (Any [TransformListComp])) ),
( "case", (KW_Case, Nothing) ),
( "class", (KW_Class, Nothing) ),
( "data", (KW_Data, Nothing) ),
( "default", (KW_Default, Nothing) ),
( "deriving", (KW_Deriving, Nothing) ),
( "do", (KW_Do, Nothing) ),
( "else", (KW_Else, Nothing) ),
( "family", (KW_Family, Just (Any [TypeFamilies])) ),
( "forall", (KW_Forall, Just (Any [ExplicitForAll, ExistentialQuantification])) ),
( "group", (KW_Group, Just (Any [TransformListComp])) ),
( "if", (KW_If, Nothing) ),
( "import", (KW_Import, Nothing) ),
( "in", (KW_In, Nothing) ),
( "infix", (KW_Infix, Nothing) ),
( "infixl", (KW_InfixL, Nothing) ),
( "infixr", (KW_InfixR, Nothing) ),
( "instance", (KW_Instance, Nothing) ),
( "let", (KW_Let, Nothing) ),
( "mdo", (KW_MDo, Just (Any [RecursiveDo])) ),
( "module", (KW_Module, Nothing) ),
( "newtype", (KW_NewType, Nothing) ),
( "of", (KW_Of, Nothing) ),
( "proc", (KW_Proc, Just (Any [Arrows])) ),
( "rec", (KW_Rec, Just (Any [Arrows, RecursiveDo, DoRec])) ),
( "then", (KW_Then, Nothing) ),
( "type", (KW_Type, Nothing) ),
( "using", (KW_Using, Just (Any [TransformListComp])) ),
( "where", (KW_Where, Nothing) ),
( "role", (KW_Role, Just (Any [RoleAnnotations]))),
( "pattern", (KW_Pattern, Just (Any [PatternSynonyms]))),
( "stock", (KW_Stock, Just (Any [DerivingStrategies]))),
( "anyclass", (KW_Anyclass, Just (Any [DerivingStrategies]))),
( "via", (KW_Via, Just (Any [DerivingVia]))),
( "foreign", (KW_Foreign, Just (Any [ForeignFunctionInterface])) )
]
special_varids :: [(String,(Token, Maybe ExtScheme))]
special_varids = [
( "as", (KW_As, Nothing) ),
( "qualified", (KW_Qualified, Nothing) ),
( "hiding", (KW_Hiding, Nothing) ),
( "export", (KW_Export, Just (Any [ForeignFunctionInterface])) ),
( "safe", (KW_Safe, Just (Any [ForeignFunctionInterface, SafeImports, Safe, Trustworthy])) ),
( "unsafe", (KW_Unsafe, Just (Any [ForeignFunctionInterface])) ),
( "threadsafe", (KW_Threadsafe, Just (Any [ForeignFunctionInterface])) ),
( "interruptible", (KW_Interruptible, Just (Any [InterruptibleFFI])) ),
( "stdcall", (KW_StdCall, Just (Any [ForeignFunctionInterface])) ),
( "ccall", (KW_CCall, Just (Any [ForeignFunctionInterface])) ),
( "cplusplus", (KW_CPlusPlus, Just (Any [ForeignFunctionInterface])) ),
( "dotnet", (KW_DotNet, Just (Any [ForeignFunctionInterface])) ),
( "jvm", (KW_Jvm, Just (Any [ForeignFunctionInterface])) ),
( "js", (KW_Js, Just (Any [ForeignFunctionInterface])) ),
( "javascript", (KW_JavaScript, Just (Any [ForeignFunctionInterface])) ),
( "capi", (KW_CApi, Just (Any [CApiFFI])) )
]
pragmas :: [(String,Token)]
pragmas = [
( "rules", RULES ),
( "inline", INLINE True ),
( "noinline", INLINE False ),
( "notinline", INLINE False ),
( "specialise", SPECIALISE ),
( "specialize", SPECIALISE ),
( "source", SOURCE ),
( "deprecated", DEPRECATED ),
( "warning", WARNING ),
( "ann", ANN ),
( "scc", SCC ),
( "generated", GENERATED ),
( "core", CORE ),
( "unpack", UNPACK ),
( "nounpack", NOUNPACK ),
( "language", LANGUAGE ),
( "minimal", MINIMAL ),
( "no_overlap", NO_OVERLAP ),
( "overlap", OVERLAP ),
( "overlaps", OVERLAPS ),
( "overlapping", OVERLAPPING ),
( "overlappable", OVERLAPPABLE ),
( "incoherent", INCOHERENT ),
( "complete", COMPLETE ),
( "options", OPTIONS undefined )
]
isIdent, isHSymbol, isPragmaChar :: Char -> Bool
isIdent c = isAlphaNum c || c == '\'' || c == '_'
isHSymbol c = c `elem` ":!#%&*./?@\\-" || ((isSymbol c || isPunctuation c) && not (c `elem` "(),;[]`{}_\"'"))
isPragmaChar c = isAlphaNum c || c == '_'
isOpSymbol :: Char -> Bool
isOpSymbol c = c `elem` "!#$%&*+./<=>?@\\^|-~"
isPossiblyQvar :: Char -> Bool
isPossiblyQvar c = isIdent (toLower c) || c == '.'
matchChar :: Char -> String -> Lex a ()
matchChar c msg = do
s <- getInput
if null s || head s /= c then fail msg else discard 1
lexer :: (Loc Token -> P a) -> P a
lexer = runL topLexer
topLexer :: Lex a (Loc Token)
topLexer = do
b <- pullCtxtFlag
if b then
setBOL >> getSrcLocL >>= \l -> return (Loc (mkSrcSpan l l) VRightCurly)
else do
bol <- checkBOL
(bol', ws) <- lexWhiteSpace bol
ec <- getExtContext
case ec of
Just ChildCtxt | not bol' && ws -> getSrcLocL >>= \l -> return $ Loc (mkSrcSpan l l) $ XPCDATA " "
_ -> do startToken
sl <- getSrcLocL
t <- if bol' then lexBOL
else lexToken
el <- getSrcLocL
return $ Loc (mkSrcSpan sl el) t
lexWhiteSpace :: Bool -> Lex a (Bool, Bool)
lexWhiteSpace bol = do
s <- getInput
ignL <- ignoreLinePragmasL
case s of
'{':'-':'#':rest | isRecognisedPragma rest -> return (bol, False)
| isLinePragma rest && not ignL -> do
(l, fn) <- lexLinePragma
setSrcLineL l
setLineFilenameL fn
lexWhiteSpace True
'{':'-':_ -> do
loc <- getSrcLocL
discard 2
(bol1, c) <- lexNestedComment bol ""
loc2 <- getSrcLocL
pushComment $ Comment True (mkSrcSpan loc loc2) (reverse c)
(bol2, _) <- lexWhiteSpace bol1
return (bol2, True)
'-':'-':s1 | all (== '-') (takeWhile isHSymbol s1) -> do
loc <- getSrcLocL
discard 2
dashes <- lexWhile (== '-')
rest <- lexWhile (/= '\n')
s' <- getInput
loc2 <- getSrcLocL
let com = Comment False (mkSrcSpan loc loc2) $ dashes ++ rest
case s' of
[] -> pushComment com >> return (False, True)
_ -> do
pushComment com
lexNewline
lexWhiteSpace_ True
return (True, True)
'\n':_ -> do
lexNewline
lexWhiteSpace_ True
return (True, True)
'\t':_ -> do
lexTab
(bol', _) <- lexWhiteSpace bol
return (bol', True)
c:_ | isSpace c -> do
discard 1
(bol', _) <- lexWhiteSpace bol
return (bol', True)
_ -> return (bol, False)
lexWhiteSpace_ :: Bool -> Lex a ()
lexWhiteSpace_ bol = do _ <- lexWhiteSpace bol
return ()
isRecognisedPragma, isLinePragma :: String -> Bool
isRecognisedPragma str = let pragma = takeWhile isPragmaChar . dropWhile isSpace $ str
in case lookupKnownPragma pragma of
Nothing -> False
_ -> True
isLinePragma str = let pragma = map toLower . takeWhile isAlphaNum . dropWhile isSpace $ str
in case pragma of
"line" -> True
_ -> False
lexLinePragma :: Lex a (Int, String)
lexLinePragma = do
discard 3
lexWhile_ isSpace
discard 4
lexWhile_ isSpace
i <- lexWhile isDigit
when (null i) $ fail "Improperly formatted LINE pragma"
lexWhile_ isSpace
matchChar '"' "Improperly formatted LINE pragma"
fn <- lexWhile (/= '"')
matchChar '"' "Impossible - lexLinePragma"
lexWhile_ isSpace
mapM_ (flip matchChar "Improperly formatted LINE pragma") "#-}"
lexNewline
return (read i, fn)
lexNestedComment :: Bool -> String -> Lex a (Bool, String)
lexNestedComment bol str = do
s <- getInput
case s of
'-':'}':_ -> discard 2 >> return (bol, str)
'{':'-':_ -> do
discard 2
(bol', c) <- lexNestedComment bol ("-{" ++ str)
lexNestedComment bol' ("}-" ++ c )
'\t':_ -> lexTab >> lexNestedComment bol ('\t':str)
'\n':_ -> lexNewline >> lexNestedComment True ('\n':str)
c:_ -> discard 1 >> lexNestedComment bol (c:str)
[] -> fail "Unterminated nested comment"
lexBOL :: Lex a Token
lexBOL = do
pos <- getOffside
case pos of
LT -> do
setBOL
popContextL "lexBOL"
return VRightCurly
EQ ->
return SemiColon
GT -> lexToken
lexToken :: Lex a Token
lexToken = do
ec <- getExtContext
case ec of
Just HarpCtxt -> lexHarpToken
Just TagCtxt -> lexTagCtxt
Just CloseTagCtxt -> lexCloseTagCtxt
Just ChildCtxt -> lexChildCtxt
Just CodeTagCtxt -> lexCodeTagCtxt
_ -> lexStdToken
lexChildCtxt :: Lex a Token
lexChildCtxt = do
s <- getInput
case s of
'<':'%':'>':_ -> do discard 3
pushExtContextL ChildCtxt
return XChildTagOpen
'<':'%':_ -> do discard 2
pushExtContextL CodeTagCtxt
return XCodeTagOpen
'<':'/':_ -> do discard 2
popExtContextL "lexChildCtxt"
pushExtContextL CloseTagCtxt
return XCloseTagOpen
'<':'[':_ -> do discard 2
pushExtContextL HarpCtxt
return XRPatOpen
'<':_ -> do discard 1
pushExtContextL TagCtxt
return XStdTagOpen
_ -> lexPCDATA
lexPCDATA :: Lex a Token
lexPCDATA = do
s <- getInput
case s of
[] -> return EOF
_ -> case s of
'\n':_ -> do
x <- lexNewline >> lexPCDATA
case x of
XPCDATA p -> return $ XPCDATA $ '\n':p
EOF -> return EOF
_ -> fail $ "lexPCDATA: unexpected token: " ++ show x
'<':_ -> return $ XPCDATA ""
_ -> do let pcd = takeWhile (\c -> c `notElem` "<\n") s
l = length pcd
discard l
x <- lexPCDATA
case x of
XPCDATA pcd' -> return $ XPCDATA $ pcd ++ pcd'
EOF -> return EOF
_ -> fail $ "lexPCDATA: unexpected token: " ++ show x
lexCodeTagCtxt :: Lex a Token
lexCodeTagCtxt = do
s <- getInput
case s of
'%':'>':_ -> do discard 2
popExtContextL "lexCodeTagContext"
return XCodeTagClose
_ -> lexStdToken
lexCloseTagCtxt :: Lex a Token
lexCloseTagCtxt = do
s <- getInput
case s of
'%':'>':_ -> do discard 2
popExtContextL "lexCloseTagCtxt"
return XCodeTagClose
'>':_ -> do discard 1
popExtContextL "lexCloseTagCtxt"
return XStdTagClose
_ -> lexStdToken
lexTagCtxt :: Lex a Token
lexTagCtxt = do
s <- getInput
case s of
'/':'>':_ -> do discard 2
popExtContextL "lexTagCtxt: Empty tag"
return XEmptyTagClose
'>':_ -> do discard 1
popExtContextL "lexTagCtxt: Standard tag"
pushExtContextL ChildCtxt
return XStdTagClose
_ -> lexStdToken
lexHarpToken :: Lex a Token
lexHarpToken = do
s <- getInput
case s of
']':'>':_ -> do discard 2
popExtContextL "lexHarpToken"
return XRPatClose
_ -> lexStdToken
lexStdToken :: Lex a Token
lexStdToken = do
s <- getInput
exts <- getExtensionsL
let intHash = lexHash IntTok IntTokHash (Right WordTokHash)
case s of
[] -> return EOF
'0':c:d:_ | toLower c == 'o' && isOctDigit d -> do
discard 2
(n, str) <- lexOctal
con <- intHash
return (con (n, '0':c:str))
| toLower c == 'b' && isBinDigit d && BinaryLiterals `elem` exts -> do
discard 2
(n, str) <- lexBinary
con <- intHash
return (con (n, '0':c:str))
| toLower c == 'x' && isHexDigit d -> do
discard 2
(n, str) <- lexHexadecimal
con <- intHash
return (con (n, '0':c:str))
'?':c:_ | isLower c && ImplicitParams `elem` exts -> do
discard 1
id <- lexWhile isIdent
return $ IDupVarId id
'%':c:_ | isLower c && ImplicitParams `elem` exts -> do
discard 1
id <- lexWhile isIdent
return $ ILinVarId id
'(':'|':c:_ | RegularPatterns `elem` exts && not (isHSymbol c) ->
do discard 2
return RPGuardOpen
'|':')':_ | RegularPatterns `elem` exts ->
do discard 2
return RPGuardClose
'[':'|':_ | TemplateHaskell `elem` exts -> do
discard 2
return THExpQuote
'[':c:'|':_ | c == 'e' && TemplateHaskell `elem` exts -> do
discard 3
return THExpQuote
| c == 'p' && TemplateHaskell `elem` exts -> do
discard 3
return THPatQuote
| c == 'd' && TemplateHaskell `elem` exts -> do
discard 3
return THDecQuote
| c == 't' && TemplateHaskell `elem` exts -> do
discard 3
return THTypQuote
'[':'$':c:_ | isLower c && QuasiQuotes `elem` exts ->
discard 2 >> lexQuasiQuote c
'[':c:s' | isLower c && QuasiQuotes `elem` exts && case dropWhile isIdent s' of { '|':_ -> True;_->False} ->
discard 1 >> lexQuasiQuote c
| isUpper c && QuasiQuotes `elem` exts && case dropWhile isPossiblyQvar s' of { '|':_ -> True;_->False} ->
discard 1 >> lexQuasiQuote c
'|':']':_ | TemplateHaskell `elem` exts -> do
discard 2
return THCloseQuote
'$':c:_ | isLower c && TemplateHaskell `elem` exts -> do
discard 1
id <- lexWhile isIdent
return $ THIdEscape id
| c == '(' && TemplateHaskell `elem` exts -> do
discard 2
return THParenEscape
'<':'%':c:_ | XmlSyntax `elem` exts ->
case c of
'>' -> do discard 3
pushExtContextL ChildCtxt
return XChildTagOpen
_ -> do discard 2
pushExtContextL CodeTagCtxt
return XCodeTagOpen
'<':c:_ | isAlpha c && XmlSyntax `elem` exts -> do
discard 1
pushExtContextL TagCtxt
return XStdTagOpen
'(':'#':c:_ | unboxed exts && not (isHSymbol c) -> discard 2 >> return LeftHashParen
'#':')':_ | unboxed exts -> discard 2 >> return RightHashParen
'{':'-':'#':_ -> saveExtensionsL >> discard 3 >> lexPragmaStart
'#':'-':'}':_ -> restoreExtensionsL >> discard 3 >> return PragmaEnd
'[':':':_ | ParallelArrays `elem` exts -> discard 2 >> return ParArrayLeftSquare
':':']':_ | ParallelArrays `elem` exts -> discard 2 >> return ParArrayRightSquare
'@':c:_ | TypeApplications `elem` exts
&& not (isOpSymbol c) -> do
lc <- getLastChar
if isIdent lc
then discard 1 >> return At
else discard 1 >> return TApp
'#':c:_ | OverloadedLabels `elem` exts
&& isLower c || c == '_' -> do
discard 1
[ident] <- lexIdents
return $ LabelVarId ident
c:_ | isDigit c -> lexDecimalOrFloat
| isUpper c -> lexConIdOrQual ""
| isLower c || c == '_' -> do
idents <- lexIdents
case idents of
[ident] -> case lookup ident (reserved_ids ++ special_varids) of
Just (keyword, scheme) ->
if isEnabled scheme exts
then flagKW keyword >> return keyword
else return $ VarId ident
Nothing -> return $ VarId ident
_ -> return $ DVarId idents
| isHSymbol c -> do
sym <- lexWhile isHSymbol
return $ case lookup sym (reserved_ops ++ special_varops) of
Just (t , scheme) ->
if isEnabled scheme exts
then t
else case c of
':' -> ConSym sym
_ -> VarSym sym
Nothing -> case c of
':' -> ConSym sym
_ -> VarSym sym
| otherwise -> do
discard 1
case c of
'(' -> return LeftParen
')' -> return RightParen
',' -> return Comma
';' -> return SemiColon
'[' -> return LeftSquare
']' -> return RightSquare
'`' -> return BackQuote
'{' -> do
pushContextL NoLayout
return LeftCurly
'}' -> do
popContextL "lexStdToken"
return RightCurly
'\'' -> lexCharacter
'"' -> lexString
_ -> fail ("Illegal character \'" ++ show c ++ "\'\n")
where lexIdents :: Lex a [String]
lexIdents = do
ident <- lexWhile isIdent
s <- getInput
exts <- getExtensionsL
case s of
'-':c:_ | XmlSyntax `elem` exts && isAlpha c -> do
discard 1
idents <- lexIdents
return $ ident : idents
'#':_ | MagicHash `elem` exts -> do
hashes <- lexWhile (== '#')
return [ident ++ hashes]
_ -> return [ident]
lexQuasiQuote :: Char -> Lex a Token
lexQuasiQuote c = do
ident <- lexQuoter
matchChar '|' "Malformed quasi-quote quoter"
body <- lexQQBody
return $ THQuasiQuote (ident, body)
where lexQuoter
| isLower c = lexWhile isIdent
| otherwise = do
qualThing <- lexConIdOrQual ""
case qualThing of
QVarId (s1,s2) -> return $ s1 ++ '.':s2
QVarSym (s1, s2) -> return $ s1 ++ '.':s2
_ -> fail "Malformed quasi-quote quoter"
lexQQBody :: Lex a String
lexQQBody = do
s <- getInput
case s of
'\\':']':_ -> do discard 2
str <- lexQQBody
return (']':str)
'\\':'|':_ -> do discard 2
str <- lexQQBody
return ('|':str)
'|':']':_ -> discard 2 >> return ""
'|':_ -> do discard 1
str <- lexQQBody
return ('|':str)
']':_ -> do discard 1
str <- lexQQBody
return (']':str)
'\\':_ -> do discard 1
str <- lexQQBody
return ('\\':str)
'\n':_ -> do lexNewline
str <- lexQQBody
return ('\n':str)
[] -> fail "Unexpected end of input while lexing quasi-quoter"
_ -> do str <- lexWhile (not . (`elem` "\\|\n"))
rest <- lexQQBody
return (str++rest)
unboxed :: [KnownExtension] -> Bool
unboxed exts = UnboxedSums `elem` exts || UnboxedTuples `elem` exts
lookupKnownPragma :: String -> Maybe Token
lookupKnownPragma s =
case map toLower s of
x | "options_" `isPrefixOf` x -> Just $ OPTIONS (Just $ drop 8 s, undefined)
| "options" == x -> Just $ OPTIONS (Nothing, undefined)
| otherwise -> lookup x pragmas
lexPragmaStart :: Lex a Token
lexPragmaStart = do
lexWhile_ isSpace
pr <- lexWhile isPragmaChar
case lookupKnownPragma pr of
Just (INLINE True) -> do
s <- getInput
case map toLower s of
' ':'c':'o':'n':'l':'i':'k':'e':_ -> do
discard 8
return INLINE_CONLIKE
_ -> return $ INLINE True
Just SPECIALISE -> do
s <- getInput
case dropWhile isSpace $ map toLower s of
'i':'n':'l':'i':'n':'e':_ -> do
lexWhile_ isSpace
discard 6
return $ SPECIALISE_INLINE True
'n':'o':'i':'n':'l':'i':'n':'e':_ -> do
lexWhile_ isSpace
discard 8
return $ SPECIALISE_INLINE False
'n':'o':'t':'i':'n':'l':'i':'n':'e':_ -> do
lexWhile_ isSpace
discard 9
return $ SPECIALISE_INLINE False
_ -> return SPECIALISE
Just (OPTIONS opt) ->
let dropIfSpace (' ':xs) = xs
dropIfSpace xs = xs
in
case fst opt of
Just opt' -> do
rest <- lexRawPragma
return $ OPTIONS (Just opt', dropIfSpace rest)
Nothing -> do
s <- getInput
case s of
x:_ | isSpace x -> do
rest <- lexRawPragma
return $ OPTIONS (Nothing, dropIfSpace rest)
_ -> fail "Malformed Options pragma"
Just RULES -> do
addExtensionL ScopedTypeVariables
return RULES
Just p -> return p
_ -> fail "Internal error: Unrecognised recognised pragma"
lexRawPragma :: Lex a String
lexRawPragma = lexRawPragmaAux
where lexRawPragmaAux = do
rpr <- lexWhile (/='#')
s <- getInput
case s of
'#':'-':'}':_ -> return rpr
"" -> fail "End-of-file inside pragma"
_ -> do
discard 1
rpr' <- lexRawPragma
return $ rpr ++ '#':rpr'
lexDecimalOrFloat :: Lex a Token
lexDecimalOrFloat = do
ds <- lexWhile isDigit
rest <- getInput
exts <- getExtensionsL
case rest of
('.':d:_) | isDigit d -> do
discard 1
frac <- lexWhile isDigit
let num = parseInteger 10 (ds ++ frac)
decimals = toInteger (length frac)
(exponent, estr) <- do
rest2 <- getInput
case rest2 of
'e':_ -> lexExponent
'E':_ -> lexExponent
_ -> return (0,"")
con <- lexHash FloatTok FloatTokHash (Right DoubleTokHash)
return $ con ((num%1) * 10^^(exponent - decimals), ds ++ '.':frac ++ estr)
e:_ | toLower e == 'e' -> do
(exponent, estr) <- lexExponent
con <- lexHash FloatTok FloatTokHash (Right DoubleTokHash)
return $ con ((parseInteger 10 ds%1) * 10^^exponent, ds ++ estr)
'#':'#':_ | MagicHash `elem` exts -> discard 2 >> return (WordTokHash (parseInteger 10 ds, ds))
'#':_ | MagicHash `elem` exts -> discard 1 >> return (IntTokHash (parseInteger 10 ds, ds))
_ -> return (IntTok (parseInteger 10 ds, ds))
where
lexExponent :: Lex a (Integer, String)
lexExponent = do
(e:r) <- getInput
discard 1
case r of
'+':d:_ | isDigit d -> do
discard 1
(n, str) <- lexDecimal
return (n, e:'+':str)
'-':d:_ | isDigit d -> do
discard 1
(n, str) <- lexDecimal
return (negate n, e:'-':str)
d:_ | isDigit d -> lexDecimal >>= \(n,str) -> return (n, e:str)
_ -> fail "Float with missing exponent"
lexHash :: (b -> Token) -> (b -> Token) -> Either String (b -> Token) -> Lex a (b -> Token)
lexHash a b c = do
exts <- getExtensionsL
if MagicHash `elem` exts
then do
r <- getInput
case r of
'#':'#':_ -> case c of
Right c' -> discard 2 >> return c'
Left s -> fail s
'#':_ -> discard 1 >> return b
_ -> return a
else return a
lexConIdOrQual :: String -> Lex a Token
lexConIdOrQual qual = do
con <- lexWhile isIdent
let conid | null qual = ConId con
| otherwise = QConId (qual,con)
qual' | null qual = con
| otherwise = qual ++ '.':con
just_a_conid <- alternative (return conid)
rest <- getInput
exts <- getExtensionsL
case rest of
'.':c:_
| isLower c || c == '_' -> do
discard 1
ident <- lexWhile isIdent
s <- getInput
exts' <- getExtensionsL
ident' <- case s of
'#':_ | MagicHash `elem` exts' -> discard 1 >> return (ident ++ "#")
_ -> return ident
case lookup ident' reserved_ids of
Just (_,scheme) | isEnabled scheme exts' -> just_a_conid
_ -> return (QVarId (qual', ident'))
| isUpper c -> do
discard 1
lexConIdOrQual qual'
| isHSymbol c -> do
discard 1
sym <- lexWhile isHSymbol
exts' <- getExtensionsL
case lookup sym reserved_ops of
Just (_,scheme) | isEnabled scheme exts' -> just_a_conid
_ -> return $ case c of
':' -> QConSym (qual', sym)
_ -> QVarSym (qual', sym)
'#':cs
| null cs ||
not (isHSymbol $ head cs) &&
not (isIdent $ head cs) && MagicHash `elem` exts -> do
discard 1
case conid of
ConId con' -> return $ ConId $ con' ++ "#"
QConId (q,con') -> return $ QConId (q,con' ++ "#")
_ -> fail $ "lexConIdOrQual: unexpected token: " ++ show conid
_ -> return conid
lexCharacter :: Lex a Token
lexCharacter = do
s <- getInput
exts <- getExtensionsL
case s of
'\'':_ | TemplateHaskell `elem` exts -> discard 1 >> return THTyQuote
'\\':_ -> do
(c,raw) <- lexEscape
matchQuote
con <- lexHash Character CharacterHash
(Left "Double hash not available for character literals")
return (con (c, '\\':raw))
c:'\'':_ -> do
discard 2
con <- lexHash Character CharacterHash
(Left "Double hash not available for character literals")
return (con (c, [c]))
_ | any (`elem` exts) [TemplateHaskell, DataKinds] -> return THVarQuote
_ -> fail "Improper character constant or misplaced \'"
where matchQuote = matchChar '\'' "Improperly terminated character constant"
lexString :: Lex a Token
lexString = loop ("","")
where
loop (s,raw) = do
r <- getInput
exts <- getExtensionsL
case r of
'\\':'&':_ -> do
discard 2
loop (s, '&':'\\':raw)
'\\':c:_ | isSpace c -> do
discard 1
wcs <- lexWhiteChars
matchChar '\\' "Illegal character in string gap"
loop (s, '\\':reverse wcs ++ '\\':raw)
| otherwise -> do
(ce, str) <- lexEscape
loop (ce:s, reverse str ++ '\\':raw)
'"':'#':_ | MagicHash `elem` exts -> do
discard 2
return (StringHash (reverse s, reverse raw))
'"':_ -> do
discard 1
return (StringTok (reverse s, reverse raw))
c:_ | c /= '\n' -> do
discard 1
loop (c:s, c:raw)
_ -> fail "Improperly terminated string"
lexWhiteChars :: Lex a String
lexWhiteChars = do
s <- getInput
case s of
'\n':_ -> do
lexNewline
wcs <- lexWhiteChars
return $ '\n':wcs
'\t':_ -> do
lexTab
wcs <- lexWhiteChars
return $ '\t':wcs
c:_ | isSpace c -> do
discard 1
wcs <- lexWhiteChars
return $ c:wcs
_ -> return ""
lexEscape :: Lex a (Char, String)
lexEscape = do
discard 1
r <- getInput
case r of
'a':_ -> discard 1 >> return ('\a', "a")
'b':_ -> discard 1 >> return ('\b', "b")
'f':_ -> discard 1 >> return ('\f', "f")
'n':_ -> discard 1 >> return ('\n', "n")
'r':_ -> discard 1 >> return ('\r', "r")
't':_ -> discard 1 >> return ('\t', "t")
'v':_ -> discard 1 >> return ('\v', "v")
'\\':_ -> discard 1 >> return ('\\', "\\")
'"':_ -> discard 1 >> return ('\"', "\"")
'\'':_ -> discard 1 >> return ('\'', "\'")
'^':c:_ -> discard 2 >> cntrl c
'N':'U':'L':_ -> discard 3 >> return ('\NUL', "NUL")
'S':'O':'H':_ -> discard 3 >> return ('\SOH', "SOH")
'S':'T':'X':_ -> discard 3 >> return ('\STX', "STX")
'E':'T':'X':_ -> discard 3 >> return ('\ETX', "ETX")
'E':'O':'T':_ -> discard 3 >> return ('\EOT', "EOT")
'E':'N':'Q':_ -> discard 3 >> return ('\ENQ', "ENQ")
'A':'C':'K':_ -> discard 3 >> return ('\ACK', "ACK")
'B':'E':'L':_ -> discard 3 >> return ('\BEL', "BEL")
'B':'S':_ -> discard 2 >> return ('\BS', "BS")
'H':'T':_ -> discard 2 >> return ('\HT', "HT")
'L':'F':_ -> discard 2 >> return ('\LF', "LF")
'V':'T':_ -> discard 2 >> return ('\VT', "VT")
'F':'F':_ -> discard 2 >> return ('\FF', "FF")
'C':'R':_ -> discard 2 >> return ('\CR', "CR")
'S':'O':_ -> discard 2 >> return ('\SO', "SO")
'S':'I':_ -> discard 2 >> return ('\SI', "SI")
'D':'L':'E':_ -> discard 3 >> return ('\DLE', "DLE")
'D':'C':'1':_ -> discard 3 >> return ('\DC1', "DC1")
'D':'C':'2':_ -> discard 3 >> return ('\DC2', "DC2")
'D':'C':'3':_ -> discard 3 >> return ('\DC3', "DC3")
'D':'C':'4':_ -> discard 3 >> return ('\DC4', "DC4")
'N':'A':'K':_ -> discard 3 >> return ('\NAK', "NAK")
'S':'Y':'N':_ -> discard 3 >> return ('\SYN', "SYN")
'E':'T':'B':_ -> discard 3 >> return ('\ETB', "ETB")
'C':'A':'N':_ -> discard 3 >> return ('\CAN', "CAN")
'E':'M':_ -> discard 2 >> return ('\EM', "EM")
'S':'U':'B':_ -> discard 3 >> return ('\SUB', "SUB")
'E':'S':'C':_ -> discard 3 >> return ('\ESC', "ESC")
'F':'S':_ -> discard 2 >> return ('\FS', "FS")
'G':'S':_ -> discard 2 >> return ('\GS', "GS")
'R':'S':_ -> discard 2 >> return ('\RS', "RS")
'U':'S':_ -> discard 2 >> return ('\US', "US")
'S':'P':_ -> discard 2 >> return ('\SP', "SP")
'D':'E':'L':_ -> discard 3 >> return ('\DEL', "DEL")
'o':c:_ | isOctDigit c -> do
discard 1
(n, raw) <- lexOctal
n' <- checkChar n
return (n', 'o':raw)
'x':c:_ | isHexDigit c -> do
discard 1
(n, raw) <- lexHexadecimal
n' <- checkChar n
return (n', 'x':raw)
c:_ | isDigit c -> do
(n, raw) <- lexDecimal
n' <- checkChar n
return (n', raw)
_ -> fail "Illegal escape sequence"
where
checkChar n | n <= 0x10FFFF = return (chr (fromInteger n))
checkChar _ = fail "Character constant out of range"
cntrl :: Char -> Lex a (Char, String)
cntrl c | c >= '@' && c <= '_' = return (chr (ord c - ord '@'), '^':c:[])
cntrl _ = fail "Illegal control character"
lexOctal :: Lex a (Integer, String)
lexOctal = do
ds <- lexWhile isOctDigit
return (parseInteger 8 ds, ds)
lexBinary :: Lex a (Integer, String)
lexBinary = do
ds <- lexWhile isBinDigit
return (parseInteger 2 ds, ds)
lexHexadecimal :: Lex a (Integer, String)
lexHexadecimal = do
ds <- lexWhile isHexDigit
return (parseInteger 16 ds, ds)
lexDecimal :: Lex a (Integer, String)
lexDecimal = do
ds <- lexWhile isDigit
return (parseInteger 10 ds, ds)
parseInteger :: Integer -> String -> Integer
parseInteger radix ds =
foldl1 (\n d -> n * radix + d) (map (toInteger . digitToInt) ds)
flagKW :: Token -> Lex a ()
flagKW t =
when (t `elem` [KW_Do, KW_MDo]) $ do
exts <- getExtensionsL
when (NondecreasingIndentation `elem` exts) flagDo
isBinDigit :: Char -> Bool
isBinDigit c = c >= '0' && c <= '1'
showToken :: Token -> String
showToken t = case t of
VarId s -> s
LabelVarId s -> '#':s
QVarId (q,s) -> q ++ '.':s
IDupVarId s -> '?':s
ILinVarId s -> '%':s
ConId s -> s
QConId (q,s) -> q ++ '.':s
DVarId ss -> intercalate "-" ss
VarSym s -> s
ConSym s -> s
QVarSym (q,s) -> q ++ '.':s
QConSym (q,s) -> q ++ '.':s
IntTok (_, s) -> s
FloatTok (_, s) -> s
Character (_, s) -> '\'':s ++ "'"
StringTok (_, s) -> '"':s ++ "\""
IntTokHash (_, s) -> s ++ "#"
WordTokHash (_, s) -> s ++ "##"
FloatTokHash (_, s) -> s ++ "#"
DoubleTokHash (_, s) -> s ++ "##"
CharacterHash (_, s) -> '\'':s ++ "'#"
StringHash (_, s) -> '"':s ++ "\"#"
LeftParen -> "("
RightParen -> ")"
LeftHashParen -> "(#"
RightHashParen -> "#)"
SemiColon -> ";"
LeftCurly -> "{"
RightCurly -> "}"
VRightCurly -> "virtual }"
LeftSquare -> "["
RightSquare -> "]"
ParArrayLeftSquare -> "[:"
ParArrayRightSquare -> ":]"
Comma -> ","
Underscore -> "_"
BackQuote -> "`"
QuoteColon -> "':"
Dot -> "."
DotDot -> ".."
Colon -> ":"
DoubleColon -> "::"
Equals -> "="
Backslash -> "\\"
Bar -> "|"
LeftArrow -> "<-"
RightArrow -> "->"
At -> "@"
TApp -> "@"
Tilde -> "~"
DoubleArrow -> "=>"
Minus -> "-"
Exclamation -> "!"
Star -> "*"
LeftArrowTail -> "-<"
RightArrowTail -> ">-"
LeftDblArrowTail -> "-<<"
RightDblArrowTail -> ">>-"
THExpQuote -> "[|"
THPatQuote -> "[p|"
THDecQuote -> "[d|"
THTypQuote -> "[t|"
THCloseQuote -> "|]"
THIdEscape s -> '$':s
THParenEscape -> "$("
THVarQuote -> "'"
THTyQuote -> "''"
THQuasiQuote (n,q) -> "[$" ++ n ++ "|" ++ q ++ "]"
RPGuardOpen -> "(|"
RPGuardClose -> "|)"
RPCAt -> "@:"
XCodeTagOpen -> "<%"
XCodeTagClose -> "%>"
XStdTagOpen -> "<"
XStdTagClose -> ">"
XCloseTagOpen -> "</"
XEmptyTagClose -> "/>"
XPCDATA s -> "PCDATA " ++ s
XRPatOpen -> "<["
XRPatClose -> "]>"
PragmaEnd -> "#-}"
RULES -> "{-# RULES"
INLINE b -> "{-# " ++ if b then "INLINE" else "NOINLINE"
INLINE_CONLIKE -> "{-# " ++ "INLINE CONLIKE"
SPECIALISE -> "{-# SPECIALISE"
SPECIALISE_INLINE b -> "{-# SPECIALISE " ++ if b then "INLINE" else "NOINLINE"
SOURCE -> "{-# SOURCE"
DEPRECATED -> "{-# DEPRECATED"
WARNING -> "{-# WARNING"
SCC -> "{-# SCC"
GENERATED -> "{-# GENERATED"
CORE -> "{-# CORE"
UNPACK -> "{-# UNPACK"
NOUNPACK -> "{-# NOUNPACK"
OPTIONS (mt,_) -> "{-# OPTIONS" ++ maybe "" (':':) mt ++ " ..."
LANGUAGE -> "{-# LANGUAGE"
ANN -> "{-# ANN"
MINIMAL -> "{-# MINIMAL"
NO_OVERLAP -> "{-# NO_OVERLAP"
OVERLAP -> "{-# OVERLAP"
OVERLAPPING -> "{-# OVERLAPPING"
OVERLAPPABLE -> "{-# OVERLAPPABLE"
OVERLAPS -> "{-# OVERLAPS"
INCOHERENT -> "{-# INCOHERENT"
COMPLETE -> "{-# COMPLETE"
KW_As -> "as"
KW_By -> "by"
KW_Case -> "case"
KW_Class -> "class"
KW_Data -> "data"
KW_Default -> "default"
KW_Deriving -> "deriving"
KW_Do -> "do"
KW_MDo -> "mdo"
KW_Else -> "else"
KW_Family -> "family"
KW_Forall -> "forall"
KW_Group -> "group"
KW_Hiding -> "hiding"
KW_If -> "if"
KW_Import -> "import"
KW_In -> "in"
KW_Infix -> "infix"
KW_InfixL -> "infixl"
KW_InfixR -> "infixr"
KW_Instance -> "instance"
KW_Let -> "let"
KW_Module -> "module"
KW_NewType -> "newtype"
KW_Of -> "of"
KW_Proc -> "proc"
KW_Rec -> "rec"
KW_Then -> "then"
KW_Type -> "type"
KW_Using -> "using"
KW_Where -> "where"
KW_Qualified -> "qualified"
KW_Foreign -> "foreign"
KW_Export -> "export"
KW_Safe -> "safe"
KW_Unsafe -> "unsafe"
KW_Threadsafe -> "threadsafe"
KW_Interruptible -> "interruptible"
KW_StdCall -> "stdcall"
KW_CCall -> "ccall"
XChildTagOpen -> "<%>"
KW_CPlusPlus -> "cplusplus"
KW_DotNet -> "dotnet"
KW_Jvm -> "jvm"
KW_Js -> "js"
KW_JavaScript -> "javascript"
KW_CApi -> "capi"
KW_Role -> "role"
KW_Pattern -> "pattern"
KW_Stock -> "stock"
KW_Anyclass -> "anyclass"
KW_Via -> "via"
EOF -> "EOF"