{-# OPTIONS -Wall #-} module Language.Haskell.HBB.Internal.Lexer ( getVariableIdUsingLexerAt, LexingFailReason(..), IncludeQualified(..) ) where import Language.Haskell.HBB.Internal.SrcSpan import StringBuffer import FastString (unpackFS,mkFastString) import GhcMonad (liftIO,GhcMonad) import SrcLoc import Lexer (lexTokenStream,ParseResult(..),Token(..)) import GHC (getSessionDynFlags) -- | This type holds possible return values of getVariableIdUsingLexerAt. data LexingFailReason = LexingFailed | VarNotFound data IncludeQualified = IncludeQualifiedVars | ExcludeQualifiedVars deriving (Eq) -- | This function uses GHCs lexer to determine the token that is under the -- cursor (the passed SrcLoc). -- -- Currently only the tokens ITvarid (a variable id) and ITqvarid (a qualified -- variable id) are supported. If IncludeQualified euqal ExcludeQualifiedVars -- then ITqvarid will be ignored. A token of type ITqvarid has two strings -- attached, the name of the module (the qualifier) and the name of the -- variable. Of these twos only the name is contained by the result. getVariableIdUsingLexerAt :: GhcMonad m => (FilePath,BufLoc) -> IncludeQualified -> m (Either LexingFailReason (String,RealSrcSpan)) getVariableIdUsingLexerAt (filename,loc) behaviour = do let wholeFileLoc :: RealSrcLoc wholeFileLoc = mkRealSrcLoc (mkFastString filename) 1 1 ghcDynFlags <- getSessionDynFlags fileContent <- liftIO $ hGetStringBuffer filename let isRelevantToken :: Token -> Bool isRelevantToken (ITvarid _) = True isRelevantToken (ITqvarid _) | (behaviour == IncludeQualifiedVars) = True isRelevantToken _ = False token2Result :: Token -> String token2Result (ITvarid s ) = unpackFS s token2Result (ITqvarid (_,s)) = unpackFS s token2Result _ = error "Internal error (unexected wrong token type)" case lexTokenStream fileContent wholeFileLoc ghcDynFlags of -- Experiences showed that the lexer adds additional tokens -- to the token stream that have length 0 (e.g. ITvocurly or -- ITsemi). We want to filter the token stream for the -- (single) token that matches the SrcLoc passed as command -- line parameter. As this is obviously not enough -- (additional ITvocurly start at the same location), we have -- to filter them again. There are two possibilities: -- - only use non-zero-length tokens -- - only use ITvarid Tokens (variable IDs) -- -- The current solution is to filter the tokens for elements that -- contain the passed source location and from the result only use -- the tokens of type ITvarid. This makes sense as we anyway have -- to extract the string from this token. POk _ xs -> let relevantByLoc = [ tok | tok@(L (RealSrcSpan s) _) <- xs , (toBufLoc $ realSrcSpanStart s) <= loc , (toBufLoc $ realSrcSpanEnd s) > loc ] relevant = [ (token2Result t,r) | (L (RealSrcSpan r) t) <- relevantByLoc , isRelevantToken t ] in case relevant of [x] -> return $ Right x [] -> return $ Left VarNotFound _ -> error "internal error (too many tokens)" PFailed _ _ -> return $ Left LexingFailed