{-# LANGUAGE RankNTypes,DeriveDataTypeable #-}
{-# OPTIONS -Wall #-}
module Language.Haskell.HBB.Internal.GHCHighlevel (
    searchFunctionBindingM,
    searchFunctionBindingForNameM,
    searchTokenForNameM,
    whatIsAt,
    WhatIsAt(..),
    realSrcSpansOfBinding,
    getThingsAt,
    SearchTokenException(..),
    LexingFailReason(..),
    SearchedTokenInfo(..),
    FunBindInfo,
    BufSpan(..),
    BufLoc(..)
    ) where

-- This file contains a high-level wrapper for GHC functionalities.  It uses
-- the lower-level functionalities from GHC, AST and Lexer to provide
-- easy-to-use blocks of functionalities.
--
--         +-----------------+
--         | LibGHCHighlevel |
--         +-----------------+
--                | | |
--                | | |
--      +---------+ | +-------+----------+
--      |           |         |          |
--      v           v         v          v
--    +----+    +-----+  +-------+  +---------+
--    |GHC |    | AST |  | Lexer |  | SrcSpan |
--    +----+    +-----+  +-------+  +---------+
--

import Language.Haskell.HBB.Internal.SrcSpan
import Language.Haskell.HBB.Internal.Lexer
import Language.Haskell.HBB.Internal.AST
import Language.Haskell.HBB.Internal.GHC
import Control.Exception (throw,Exception)
import Data.Generics
import FastString (mkFastString)
import Outputable
import Data.Maybe (fromJust)
import Data.List (sortBy)
import GhcMonad
import SrcLoc (realSrcSpanStart,realSrcSpanEnd,mkRealSrcSpan,mkRealSrcLoc)
import Name (nameModule_maybe)
import GHC

-- FunBindInfo is the type returned by the function 'searchFunctionBinding'.
data SearchedTokenInfo a = SearchedTokenInfo { 
                 -- | GHC internally uses Outputable for things that may be
                 -- printed to the user for example.  As the printing functions
                 -- depend on the DynFlags used at compilation they can't be
                 -- used any more when the GHC run has finished. So 'printFun'
                 -- uses a closure to save the DynFlags in a curried function
                 -- to make it possible for clients to get a string
                 -- representation of a GHC internal data type.
                 printFun :: (forall b. Outputable b => b -> String)
                 -- | This is the Src-Span covering the full function name the
                 -- searchFunctionBinding function has determined (the
                 -- function gets passed only a certain point in a file
                 -- pointing to a (part) of the function name)
               , occSpan  :: RealSrcSpan
                 -- | The name that was at the location that has been passed to
                 -- searchFunctionBinding(M). In the case of inlining the name
                 -- is what is to be replaced by the function definition. This
                 -- name is completely enclosed by occSpan.
               , name     :: Name
                 -- | The type of the result is determined by the GenericQ a
                 -- passed as parameter.
               , result   :: a }

type FunBindInfo = SearchedTokenInfo (LHsBindLR Name Name,Maybe (LSig Name))

-- | These are exceptions searchFunctionBinding(M) may throw.
--
-- Each exception can be converted to a meaningful string. Moreover
-- searchFunctionBinding is throwing internal errors via error (exception
-- ErrorCall must be catched).
data SearchTokenException = LexingSearchError LexingFailReason
                          | TokenIsntAName
                          | TokenNotEndingAccordingly
                          | IsFunctionApplication
                          | IsExternalName ModuleName
                          | IsntNameOfABinding       -- The name at this location refers to for example a function parameter
                          deriving (Typeable)

instance Show SearchTokenException where
    show (LexingSearchError LexingFailed) = "Lexing failed. Input file isn't valid Haskell"
    show (LexingSearchError VarNotFound ) = "There is no variable at the specififed location"
    show  TokenIsntAName            = "Didn't find an name at the specified location.\n" ++
                                      "(Is this really an expression or something like a function parameter?)"
    show  TokenNotEndingAccordingly = "There is at least one name starting at the location but none ending accordingly.\n" ++
                                      "Did you specify the end of the source-span correctly?"
    show  IsFunctionApplication     = "This seems to be a function application of which inlining isn't supported so far."
    show (IsExternalName m)         = "The name refers to an external binding (module " ++ moduleNameString m ++ ")"
    show  IsntNameOfABinding        = "No according function binding found.\n" ++
                                      "Is this really the name of a binding (or for example a parameter to a function)?"

instance Exception SearchTokenException

-- | Parses the renamed AST of the module and returns all elements that start
-- at the passed location sorted by length in increasing oder.
getThingsAt 
    :: (GhcMonad m,Typeable a) 
    => (a -> BufLoc -> Maybe BufSpan) 
    -- ^ Tells how to extract the span of an a.  If the a element is at the
    -- BufLoc specified then the according BufSpan is returned. Note that the
    -- results are sorted by the end location of the BufSpan that is returned
    -- here. So if this function returns weird data then the sorting of the
    -- results is not warranted.
    -> FilePath       -- ^ The filename of the module to be considered.
    -> BufLoc         -- ^ The required start-location of the tokens.
    -> m [a]          -- ^ A sorted list of results.
getThingsAt isAtLoc filename location = do

    loadTargetsFromFilename filename

    let asTypeOf' :: a -> (a -> BufLoc -> Maybe BufSpan) -> a
        asTypeOf' a _ = a

    checkedMod <- searchModGraphFor (Left filename) >>= return . snd >>= parseModule >>= typecheckModule

    let rnSource = fromJust $ tm_renamed_source checkedMod

        locateIt s x = case isAtLoc x s of 
            Nothing            -> []
            Just (BufSpan _ e) -> [(x,e)]

        collectedExprs = queryRenamedAST [] (++) (mkQ [] (\x -> locateIt location (asTypeOf' x isAtLoc))) rnSource

    return $ map fst $ sortBy sortByEnding collectedExprs
        where
            sortByEnding :: (a,BufLoc) -> (a,BufLoc) -> Ordering
            sortByEnding (_,end1) (_,end2) = end1 `compare` end2

-- | This function takes a file name and the location that is of interest and
-- searches out the value or function binding for the name that stands at this
-- location. The returned value contains all informations that are needed to
-- inline the function definition or describe how to inline it (smart-inline).
--
-- If the name refers to a name that is not part of the module graph (because
-- it has been loaded by a library for example) this function will fail.
searchFunctionBindingM :: GhcMonad m => FilePath -> BufLoc -> Maybe BufLoc -> m FunBindInfo
searchFunctionBindingM filename loc1 mbBL = do

    updateDynFlagsToSuppressFileOutput

    (blStart,blEnd) <- case mbBL of
        Nothing -> do
            lexerResult  <- getVariableIdUsingLexerAt (filename,loc1) IncludeQualifiedVars

            let (_,rSpan) = case lexerResult of Right x -> x
                                                Left  x -> throw (LexingSearchError x)
            return ((toBufLoc $ realSrcSpanStart rSpan) 
                   ,(toBufLoc $ realSrcSpanEnd   rSpan))
        Just loc2 -> return (loc1,loc2)

    -- We search out the name that is at the position specified.
    -- With the name it is possible to get out the module and the location
    -- where the function or value bindings is defined.
    ourName <- do

        let -- Returns a list of (LHsExpr Name) instances that start at the passed
            -- position. The first instance is the shortest match which means that it spans
            -- fewest character. This is the name of the function that should be applied or
            -- the name of the value bindings.
            getSortedLHsExprsAt :: GhcMonad m => FilePath -> BufLoc -> m [LHsExpr Name]
            getSortedLHsExprsAt fn' location = getThingsAt extractBufSpan fn' location
                where extractBufSpan :: LHsExpr Name -> BufLoc -> Maybe BufSpan
                      extractBufSpan (L (RealSrcSpan r) (HsVar _  )) bl = if (spanStart $ toBufSpan r) == bl then Just $ toBufSpan r else Nothing
                      extractBufSpan (L (RealSrcSpan r) (HsApp _ _)) bl = if (spanStart $ toBufSpan r) == bl then Just $ toBufSpan r else Nothing
                      extractBufSpan _                               _  = Nothing

        -- getSortedLHsExprsAt returns a sorted list where the first element covers
        -- the smallest range (it is directly a (LHsExpr Name) and no function
        -- application).
        exprs <- getSortedLHsExprsAt filename blStart

        let endsAt :: SrcSpan -> BufLoc -> Bool
            endsAt (RealSrcSpan r) l = l == (toBufLoc $ realSrcSpanEnd r)
            endsAt _               _ = False

        case exprs of
            [] -> throw TokenIsntAName
            xs -> case [ e | (L l e) <- xs , l `endsAt` blEnd ] of
                  []  -> throw TokenNotEndingAccordingly
                  [(HsVar n)]   -> return n
                  [(HsApp _ _)] -> throw IsFunctionApplication
                  _             -> error "Internal error (too much results for matching expressions)."

    searchFunctionBindingForNameM (ourName,(BufSpan blStart blEnd),filename)

data FunBindAndSig = FunBindAndSig [LHsBindLR Name Name] [LSig Name]

joinFunBindAndSig :: FunBindAndSig -> FunBindAndSig -> FunBindAndSig
joinFunBindAndSig (FunBindAndSig b1 s1) (FunBindAndSig b2 s2) = FunBindAndSig (b1 ++ b2) (s1 ++ s2)

-- | This version to search a function binding takes a Name and some details
-- about it.
searchFunctionBindingForNameM :: GhcMonad m => (Name,BufSpan,FilePath) -> m FunBindInfo
searchFunctionBindingForNameM (ourName,nameSpan,filename) = do

    let genericQuery :: BufLoc -> GenericQ FunBindAndSig
        genericQuery = 

            let hasSearchedStartLoc :: BufLoc -> SrcSpan -> Bool
                hasSearchedStartLoc loc (RealSrcSpan rss) =
                    loc == (toBufLoc $ realSrcSpanStart rss)
                hasSearchedStartLoc _   _ = False

                -- TODO is it better to only use the Name-comparison?
                -- Currently first the Name of a function is compared with
                -- ourName and then its location...
                
                -- This function is used to create a generic SYB-query to collect the function
                -- bindings (usually only one) that start at a certain location.
                locateFunctionsQ :: BufLoc -> (LHsBindLR Name Name) -> FunBindAndSig
                locateFunctionsQ _   x@(L _ (FunBind { 
                                    fun_id = (L _ itsName) })) | (ourName == itsName) = FunBindAndSig [x] []
                locateFunctionsQ loc x@(L l (FunBind {})) | hasSearchedStartLoc loc l = FunBindAndSig [x] []
                -- Pattern bindings (e.g. sq :: Int -> Int = \x -> x * x)
                -- are also collected to be able to give a clear message to the user...
                locateFunctionsQ loc x@(L l (PatBind {})) | hasSearchedStartLoc loc l = FunBindAndSig [x] []
                locateFunctionsQ _     (L _ (VarBind {}))                             =
                    error $ "Internal error (unexpected VarBind: " ++ 
                            "GHC doc says that they are introduced by the typechecker)"
                locateFunctionsQ _     (L _ (AbsBinds {}))                            =
                    error $ "Internal error (unexpected AbsBind: " ++ 
                            "GHC doc says that they are introduced by the typechecker)"
                locateFunctionsQ _     _                                              = FunBindAndSig [] []


                sigQ :: Name -> BufLoc -> HsValBindsLR Name Name -> FunBindAndSig
                sigQ na _ (ValBindsOut _ x) = FunBindAndSig [] (filter isCorrectSig x)
                    where
                        -- Tests proved that the location stored within
                        -- (LSig Name) contains weird data. Therefore the
                        -- name is used for the comparison (which should be
                        -- better anyway)...
                        isCorrectSig :: LSig Name -> Bool
                        isCorrectSig (L _ (TypeSig lNames _)) = any (\(L _ n) -> n == na) lNames
                        isCorrectSig _                        = False
                sigQ _ _ _ = FunBindAndSig [] []

            in  (\x -> mkQ (FunBindAndSig [] []) (locateFunctionsQ x) `extQ` (sigQ ourName x))

    sti@(SearchedTokenInfo {
        result = (FunBindAndSig funBinds sigs)
        }) <- 
        searchTokenForNameM 
            (ourName,nameSpan,filename)
            (FunBindAndSig [] [])
            joinFunBindAndSig
            genericQuery

    let ourFun = case funBinds of 
            []                     -> throw IsntNameOfABinding
            [b@(L _ (FunBind {}))] -> b
            [b@(L _ (PatBind {}))] -> b
            _                      -> error "Internal error (more that one matching function binding found)"

    let ourSig = case sigs of 
            []    -> Nothing
            (x:_) -> Just x
    
    fgs <- getSessionDynFlags

    return SearchedTokenInfo { printFun = showPpr fgs
                             , occSpan  = occSpan sti
                             , name     = name    sti
                             , result   = (ourFun,ourSig) }

-- | This is a generic function that takes informations about a Name and
-- queries the renamed AST according to the parameters.
searchTokenForNameM 
    :: GhcMonad m 
    => (Name,BufSpan,FilePath) -- ^ informations about the Name instance, its location and the file 
    -> b                       -- ^ neutral result
    -> (b -> b -> b)           -- ^ function to join 2 results
    -> (BufLoc -> GenericQ b)  -- ^ generic query (special cases) for the
                               --   result, produced by 'mkQ' for example...
    -> m (SearchedTokenInfo b)
searchTokenForNameM (ourName,nameSpan,filename) neutralResult joinResult queryResult = do

    let startLoc = case nameSrcSpan ourName of
             (RealSrcSpan rss) -> toBufLoc $ realSrcSpanStart rss
             _                 -> 
                -- In this case the name doesn't refer to the current unit of compilation
                -- but to a library. We throw an exception but insert the module name.
                throw $ IsExternalName (moduleName $ nameModule ourName)

    renamedAST <- do
        modSum <- do
            -- Ok now there are two possibilities.
            -- Either the name refers to something at module scope (a name
            -- defined in this or another module) or it refers to a nested
            -- function binding.
            -- In the first case 'nameModule_maybe' will return the module of
            -- interest. In the second case the binding must be located in the
            -- same module as the name. We cycle through the module graph to
            -- find the current module and return it.
            modName <- case nameModule_maybe ourName of
                       Just m  -> return $ moduleName m
                       Nothing -> searchModGraphFor (Left filename) >>= return . fst
            getModSummary modName
                                        
        extractRenamedAST modSum

    let ourResult = queryRenamedAST neutralResult joinResult (queryResult startLoc) renamedAST

        occurrenceSpan :: RealSrcSpan
        occurrenceSpan = 
            let (BufLoc l1 c1) = spanStart nameSpan
                (BufLoc l2 c2) = spanEnd   nameSpan
            in mkRealSrcSpan (mkRealSrcLoc (mkFastString filename) l1 c1) 
                             (mkRealSrcLoc (mkFastString filename) l2 c2) 

    getSessionDynFlags >>= (\fgs -> return SearchedTokenInfo { 
                                                printFun = showPpr fgs
                                              , occSpan  = occurrenceSpan
                                              , name     = ourName
                                              , result   = ourResult })

-- | This is a internal type used by whatIsAt.
data WhatIsAtIndirection = NoRecursion
                         | RecFromName
                         | RecFromSig
                         | RecFromIEDecl

-- | This function is responsible to detect what kind of thing is located at
-- the passed location (the token).
whatIsAt
    :: GhcMonad m 
    => FilePath    -- ^ The file where the token occurred
    -> BufSpan     -- ^ The location of the token to consider
    -> m WhatIsAt
whatIsAt = whatIsAtInternal NoRecursion
    where
        whatIsAtInternal
            :: GhcMonad m 
            => WhatIsAtIndirection  -- ^ Is this a recursive call from following a name/sig...
            -> FilePath
            -> BufSpan 
            -> m WhatIsAt
        whatIsAtInternal rec filename (BufSpan startLoc@(BufLoc _ c1) (BufLoc _ c2)) = do

            let followName :: Name -> Maybe (FilePath,BufSpan)
                followName n = case nameSrcSpan n of 
                                UnhelpfulSpan _  -> Nothing
                                RealSrcSpan   rs -> Just (unpackRealSrcSpan rs)

            let isTokenName :: GhcMonad m => FilePath -> BufLoc -> m WhatIsAt
                isTokenName fn sl = do
                    let considerLHsExprVar :: LHsExpr Name -> BufLoc -> Maybe BufSpan
                        considerLHsExprVar (L (RealSrcSpan r) (HsVar _  )) bl = if (spanStart $ toBufSpan r) == bl
                                                                                then Just $ toBufSpan r else Nothing
                        considerLHsExprVar _                               _  = Nothing

                    things <- getThingsAt considerLHsExprVar fn sl
                    case things of
                        [(L _ (HsVar n))] -> case followName n of
                                             Nothing    -> return $ ThereIsAnExternalName n
                                             Just (f,s) -> whatIsAtInternal RecFromName f s
                        _                 -> return $ UnknownElement

            let isTokenIEDecl :: GhcMonad m => FilePath -> BufLoc -> m WhatIsAt
                isTokenIEDecl fn bl = do
                    let considerIEDeclAt :: LIE Name -> BufLoc -> Maybe BufSpan
                        considerIEDeclAt (L (RealSrcSpan r) (IEVar _)) l = if (spanStart $ toBufSpan r) == l
                                                                           then Just $ toBufSpan r else Nothing
                        considerIEDeclAt _                            _  = Nothing

                    things <- getThingsAt considerIEDeclAt fn bl
                    case things of
                        [(L (RealSrcSpan _) (IEVar n))] -> 
                            case followName n of
                                Nothing    -> return $ ThereIsAnIEDeclToExtern n
                                Just (f,s) -> whatIsAtInternal RecFromIEDecl f s
                        _                  -> return UnknownElement
                    

            let isValBindAt :: GhcMonad m => FilePath -> BufLoc -> m WhatIsAt
                isValBindAt fn sl = do
                    let considerBindsAt :: LHsBindLR Name Name -> BufLoc -> Maybe BufSpan
                        considerBindsAt (L (RealSrcSpan r) b@(FunBind {})) bl = 
                            let allSpansOfThisBinding = realSrcSpansOfBinding (c2 - c1) b

                                foldArg :: Bool -> RealSrcSpan -> Bool
                                foldArg True _  = True
                                foldArg _    rs = (spanStart $ toBufSpan rs) == bl
                            
                            in  if foldl foldArg False allSpansOfThisBinding
                                then Just $ toBufSpan r
                                else Nothing
                        considerBindsAt _                                _  = Nothing
                    
                    funBinds <- getThingsAt considerBindsAt fn sl

                    case funBinds of
                        [b@(L _ (FunBind { fun_id = (L _ _) }))] -> return $
                            case rec of NoRecursion   -> ThereIsABinding                     b
                                        RecFromName   -> ThereIsANameFor    (ThereIsABinding b)
                                        RecFromSig    -> ThereIsATypeSigFor (ThereIsABinding b)
                                        RecFromIEDecl -> ThereIsAnIEDeclFor (ThereIsABinding b)
                        _                                        -> return $ UnknownElement
                    
            let isFunParameterAt :: GhcMonad m => FilePath -> BufLoc -> m WhatIsAt
                isFunParameterAt fn sl = do
                    let considerLPat :: LPat Name -> BufLoc -> Maybe BufSpan
                        considerLPat (L (RealSrcSpan r) (VarPat _)) bl = if (spanStart $ toBufSpan r) == bl
                                                                         then Just $ toBufSpan r else Nothing
                        considerLPat _                              _  = Nothing

                    things <- getThingsAt considerLPat fn sl
                    case things of
                        [p@(L _ (VarPat _))] -> return $
                            case rec of RecFromName -> ThereIsANameFor (ThereIsAFunParameter p)
                                        _           -> ThereIsAFunParameter p
                        _                  -> return   UnknownElement

            let filterByStartLoc :: BufLoc -> Located Name -> Bool
                filterByStartLoc bl' (L (RealSrcSpan r) _) = (spanStart $ toBufSpan r) == bl'
                filterByStartLoc _   _                     = False

            let isFunSignatureAt :: GhcMonad m => FilePath -> BufLoc -> m WhatIsAt
                isFunSignatureAt fn sl = do
                    let considerLSig :: LSig Name -> BufLoc -> Maybe BufSpan
                        considerLSig (L (RealSrcSpan _) (TypeSig lnames _)) bl = 
                            case filter (filterByStartLoc bl) lnames of
                             [(L (RealSrcSpan r) _)] -> Just $ toBufSpan r
                             _                       -> Nothing
                        considerLSig _                                    _  = Nothing
                            
                    sigs <- getThingsAt considerLSig fn sl

                    case sigs of
                        [(L (RealSrcSpan _) (TypeSig lnames _))] ->
                            case filter (filterByStartLoc startLoc) lnames of
                                [L _ n] -> case followName n of
                                           Nothing    -> return UnknownElement 
                                           Just (f,s) -> whatIsAtInternal RecFromSig f s
                                _       -> return UnknownElement
                        _               -> return UnknownElement

            
            tokenIsName         <- isTokenName      filename startLoc 
            tokenIsIEDecl       <- isTokenIEDecl    filename startLoc
            tokenIsValBind      <- isValBindAt      filename startLoc 
            tokenIsFunParameter <- isFunParameterAt filename startLoc
            tokenIsFunSignature <- isFunSignatureAt filename startLoc
                    
            let orIfUnknown :: WhatIsAt -> WhatIsAt -> WhatIsAt
                orIfUnknown UnknownElement x = x
                orIfUnknown x              _ = x

            return $ tokenIsName         `orIfUnknown` 
                     tokenIsIEDecl       `orIfUnknown` 
                     tokenIsValBind      `orIfUnknown` 
                     tokenIsFunParameter `orIfUnknown` 
                     tokenIsFunSignature

data WhatIsAt = 
              -- | Names are used for value- and function bindings as well as
              -- function parameters. External names refer to things outside of
              -- the module graph (external libraries for example)
                ThereIsAnExternalName    Name
              -- | (I)mport/(E)xport declaration that points to another
              -- compilation unit (package).
              | ThereIsAnIEDeclToExtern  Name
              -- | (I)mport/(E)xport declaration that points to the thing
              -- stored as first parameter.
              | ThereIsAnIEDeclFor       WhatIsAt
              -- | Names are just pointers to other things. When such a name is
              -- discovered, another run of WhatIsAt is triggered which
              -- searches for the thing that is at the location pointed to by
              -- this name. This can only be a binding (ThereIsABinding) or a
              -- function parameter (ThereIsAFunParameter).
              | ThereIsANameFor          WhatIsAt
              -- | The token pointed to a binding.
              | ThereIsABinding          (LHsBindLR Name Name)
              -- | Function parameters are of type (LPat Name) at the location
              -- where they are defined.
              | ThereIsAFunParameter     (LPat Name)
              -- | The location specified points to a function or value
              -- bindings signature.
              | ThereIsATypeSigFor       WhatIsAt
              -- | There is something that is currently not supported (e.g. a
              -- type declaration).
              | UnknownElement

-- | This function extracts the RealSrcSpan elements of a function binding.
-- The problem is that a function binding may contain several entry points of
-- which each has its own src-span attached. Each of these spans will be
-- contained by the result list produced by this function.
--
-- This is the heading @myfunction@ in @myfunction x = x * x@.
realSrcSpansOfBinding 
    :: Int                -- ^ Length of the function name (determined by the lexer)
    -> HsBindLR Name Name -- ^ The actual binding
    -> [RealSrcSpan]      -- ^ A list with one name for each match of the
                          -- function (or [] if this is a pattern binding or
                          -- infix declaration)
realSrcSpansOfBinding funNameLen (FunBind { fun_infix = False 
                                          , fun_matches = (MatchGroup lmatches _) }) =

    -- A function binding does not contain its Name instance explicitely. This
    -- is a problem at this point and the only way to surround it is to guess
    -- that the name of the function always starts with the match. This is also
    -- the reason why infix notation currently isn't supported (extracting the
    -- extract name of the function is a little tricky isn't it?)

    let extractNameSpanFromLMatch :: Int -> LMatch Name -> [RealSrcSpan]
        extractNameSpanFromLMatch len (L (RealSrcSpan l) _) = 
            let sta = realSrcSpanStart l
                (sl,sc) = (srcLocLine sta,srcLocCol sta)
                -- We extract the file name to be able to normalise it
                f = srcLocFile sta
                s = mkRealSrcLoc f sl  sc
                e = mkRealSrcLoc f sl (sc + len)
            in  [mkRealSrcSpan s e]
        extractNameSpanFromLMatch _       _                 = []

    in  concatMap (extractNameSpanFromLMatch funNameLen) lmatches
realSrcSpansOfBinding _ _ = []