{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE ScopedTypeVariables #-}

module Calligraphy.Phases.Parse
  ( parseHieFiles,
    ppParseError,
    ppParsePhaseDebugInfo,
    ParseError (..),
    ParsePhaseDebugInfo (..),
  )
where

import qualified Calligraphy.Compat.GHC as GHC
import Calligraphy.Compat.Lib (isDerivingNode, isInlineNode, isInstanceNode, isMinimalNode, isTypeSignatureNode, mergeSpans, sourceInfo)
import qualified Calligraphy.Compat.Lib as GHC
import Calligraphy.Util.LexTree (LexTree, TreeError (..), foldLexTree)
import qualified Calligraphy.Util.LexTree as LT
import Calligraphy.Util.Printer
import Calligraphy.Util.Types
import Control.Monad.Except
import Control.Monad.State
import Data.Array (Array)
import qualified Data.Array as Array
import Data.EnumMap (EnumMap)
import qualified Data.EnumMap as EnumMap
import Data.EnumSet (EnumSet)
import qualified Data.EnumSet as EnumSet
import qualified Data.Foldable as Foldable
import Data.List (unzip4)
import Data.Map (Map)
import qualified Data.Map as M
import qualified Data.Map as Map
import Data.Semigroup
import Data.Set (Set)
import qualified Data.Set as Set
import Data.Tree (Forest, Tree (..))

-- | A declaration extracted from the source code.
--
-- A single symbol can apparently declare a name multiple times in the same place, with multiple distinct keys D:
-- This happens, for example, with default methods; the name refers to both the method name and the default implementation's name.
-- We have to account for that to _some_ degree, which is why keys is a set.
-- The actual resolution of these happens wit 'dedup' in mkForest
data RawDecl = RawDecl
  { RawDecl -> String
_rdName :: !String,
    RawDecl -> EnumSet GHCKey
rdKeys :: !(EnumSet GHCKey),
    RawDecl -> DeclType
_rdTyp :: !DeclType,
    RawDecl -> Loc
rdStart :: !Loc,
    RawDecl -> Loc
rdEnd :: !Loc
  }

data ParseError = TreeError
  { ParseError -> String
_peModuleName :: String,
    ParseError -> String
_peModulePath :: FilePath,
    ParseError -> TreeError Loc RawDecl
_peError :: TreeError Loc RawDecl
  }

ppParseError :: Prints ParseError
ppParseError :: Prints ParseError
ppParseError (TreeError String
str String
path TreeError Loc RawDecl
err) = do
  forall (m :: * -> *). MonadPrint m => String -> m ()
strLn forall a b. (a -> b) -> a -> b
$ String
"Parse error in module " forall a. Semigroup a => a -> a -> a
<> String
str forall a. Semigroup a => a -> a -> a
<> String
" (" forall a. Semigroup a => a -> a -> a
<> String
path forall a. Semigroup a => a -> a -> a
<> String
")"
  forall (m :: * -> *) a. MonadPrint m => m a -> m a
indent forall a b. (a -> b) -> a -> b
$ Prints (TreeError Loc RawDecl)
ppTreeError TreeError Loc RawDecl
err
  where
    ppTreeError :: Prints (TreeError Loc RawDecl)
    ppTreeError :: Prints (TreeError Loc RawDecl)
ppTreeError (InvalidBounds Loc
l RawDecl
decl Loc
r) = do
      forall (m :: * -> *). MonadPrint m => String -> m ()
strLn forall a b. (a -> b) -> a -> b
$ String
"Invalid bounds " forall a. Semigroup a => a -> a -> a
<> forall a. Show a => a -> String
show (Loc
l, Loc
r) forall a. Semigroup a => a -> a -> a
<> String
"while inserting"
      forall (m :: * -> *) a. MonadPrint m => m a -> m a
indent forall a b. (a -> b) -> a -> b
$ Prints RawDecl
ppRawDecl RawDecl
decl
    ppTreeError (OverlappingBounds RawDecl
a RawDecl
b Loc
l Loc
r) = do
      forall (m :: * -> *). MonadPrint m => String -> m ()
strLn forall a b. (a -> b) -> a -> b
$ String
"Clashing bounds: (" forall a. Semigroup a => a -> a -> a
<> forall a. Show a => a -> String
show (Loc
l, Loc
r) forall a. Semigroup a => a -> a -> a
<> String
")"
      forall (m :: * -> *). MonadPrint m => String -> m ()
strLn String
"Node 1:"
      forall (m :: * -> *) a. MonadPrint m => m a -> m a
indent forall a b. (a -> b) -> a -> b
$ Prints RawDecl
ppRawDecl RawDecl
a
      forall (m :: * -> *). MonadPrint m => String -> m ()
strLn String
"Node 2:"
      forall (m :: * -> *) a. MonadPrint m => m a -> m a
indent forall a b. (a -> b) -> a -> b
$ Prints RawDecl
ppRawDecl RawDecl
b
    ppTreeError TreeError Loc RawDecl
MidSplit = forall (m :: * -> *). MonadPrint m => String -> m ()
strLn String
"MidSplit"
    ppTreeError (LexicalError Loc
l RawDecl
decl Loc
r LexTree Loc RawDecl
t) = do
      forall (m :: * -> *). MonadPrint m => String -> m ()
strLn String
"Lexical error while inserting"
      forall (m :: * -> *). MonadPrint m => String -> m ()
strLn String
"Node:"
      forall (m :: * -> *) a. MonadPrint m => m a -> m a
indent forall a b. (a -> b) -> a -> b
$ Prints RawDecl
ppRawDecl RawDecl
decl
      forall (m :: * -> *). MonadPrint m => String -> m ()
strLn String
"Bounds:"
      forall (m :: * -> *) a. MonadPrint m => m a -> m a
indent forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *) a. (MonadPrint m, Show a) => a -> m ()
showLn (Loc
l, Loc
r)
      forall (m :: * -> *). MonadPrint m => String -> m ()
strLn String
"Tree:"
      forall (m :: * -> *) a. MonadPrint m => m a -> m a
indent forall a b. (a -> b) -> a -> b
$ do
        Prints (LexTree Loc RawDecl)
ppLexTree LexTree Loc RawDecl
t

ppRawDecl :: Prints RawDecl
ppRawDecl :: Prints RawDecl
ppRawDecl (RawDecl String
name EnumSet GHCKey
keys DeclType
typ Loc
st Loc
end) = do
  forall (m :: * -> *). MonadPrint m => String -> m ()
strLn String
name
  forall (m :: * -> *) a. MonadPrint m => m a -> m a
indent forall a b. (a -> b) -> a -> b
$ do
    forall (m :: * -> *). MonadPrint m => String -> m ()
strLn forall a b. (a -> b) -> a -> b
$ String
"Type: " forall a. Semigroup a => a -> a -> a
<> forall a. Show a => a -> String
show DeclType
typ
    forall (m :: * -> *). MonadPrint m => String -> m ()
strLn forall a b. (a -> b) -> a -> b
$ String
"Span: " forall a. Semigroup a => a -> a -> a
<> forall a. Show a => a -> String
show (Loc
st, Loc
end)
    forall (m :: * -> *). MonadPrint m => String -> m ()
strLn forall a b. (a -> b) -> a -> b
$ String
"Keys: " forall a. Semigroup a => a -> a -> a
<> [String] -> String
unwords (forall a. Show a => a -> String
show forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall k. Enum k => EnumSet k -> [k]
EnumSet.toList EnumSet GHCKey
keys)

ppLexTree :: Prints (LexTree Loc RawDecl)
ppLexTree :: Prints (LexTree Loc RawDecl)
ppLexTree = forall r p a.
r -> (r -> p -> a -> r -> p -> r -> r) -> LexTree p a -> r
foldLexTree (forall (f :: * -> *) a. Applicative f => a -> f a
pure ()) forall a b. (a -> b) -> a -> b
$ \Printer ()
ls Loc
l RawDecl
decl Printer ()
m Loc
r Printer ()
rs -> do
  Printer ()
ls
  forall (m :: * -> *) a. (MonadPrint m, Show a) => a -> m ()
showLn (Loc
l, Loc
r)
  Prints RawDecl
ppRawDecl RawDecl
decl
  forall (m :: * -> *) a. MonadPrint m => m a -> m a
indent Printer ()
m
  Printer ()
rs

ghcNameKey :: GHC.Name -> GHCKey
ghcNameKey :: Name -> GHCKey
ghcNameKey = TypeIndex -> GHCKey
GHCKey forall b c a. (b -> c) -> (a -> b) -> a -> c
. Unique -> TypeIndex
GHC.getKey forall b c a. (b -> c) -> (a -> b) -> a -> c
. Name -> Unique
GHC.nameUnique

newtype ParsePhaseDebugInfo = ParsePhaseDebugInfo {ParsePhaseDebugInfo -> [(String, LexTree Loc RawDecl)]
modulesLexTrees :: [(String, LexTree Loc RawDecl)]}

ppParsePhaseDebugInfo :: Prints ParsePhaseDebugInfo
ppParsePhaseDebugInfo :: Prints ParsePhaseDebugInfo
ppParsePhaseDebugInfo (ParsePhaseDebugInfo [(String, LexTree Loc RawDecl)]
mods) = forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [(String, LexTree Loc RawDecl)]
mods forall a b. (a -> b) -> a -> b
$ \(String
modName, LexTree Loc RawDecl
ltree) -> do
  forall (m :: * -> *). MonadPrint m => String -> m ()
strLn String
modName
  forall (m :: * -> *) a. MonadPrint m => m a -> m a
indent forall a b. (a -> b) -> a -> b
$ Prints (LexTree Loc RawDecl)
ppLexTree LexTree Loc RawDecl
ltree

data ParsedFile = ParsedFile
  { ParsedFile -> String
_pfModuleName :: String,
    ParsedFile -> String
_pfFilePath :: FilePath,
    ParsedFile -> Forest Decl
_pfDecls :: Forest Decl,
    ParsedFile -> Set (GHCKey, GHCKey)
_pfCalls :: Set (GHCKey, GHCKey),
    ParsedFile -> EnumMap GHCKey (EnumSet GHCKey)
_pfTypings :: EnumMap GHCKey (EnumSet GHCKey),
    ParsedFile -> LexTree Loc RawDecl
_pfDebugTree :: LexTree Loc RawDecl
  }

-- | Assigns and maintains a mapping of GHCKeys to Key
type HieParse a = StateT (Key, EnumMap GHCKey Key) (Either ParseError) a

parseHieFiles ::
  [GHC.HieFile] ->
  Either ParseError (ParsePhaseDebugInfo, CallGraph)
parseHieFiles :: [HieFile] -> Either ParseError (ParsePhaseDebugInfo, CallGraph)
parseHieFiles [HieFile]
files = (\([ParsedFile]
parsed, (Key
_, EnumMap GHCKey Key
keymap)) -> [ParsedFile]
-> EnumMap GHCKey Key -> (ParsePhaseDebugInfo, CallGraph)
mkCallGraph [ParsedFile]
parsed EnumMap GHCKey Key
keymap) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall s (m :: * -> *) a. StateT s m a -> s -> m (a, s)
runStateT (forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM HieFile -> HieParse ParsedFile
parseHieFile [HieFile]
files) (TypeIndex -> Key
Key TypeIndex
0, forall a. Monoid a => a
mempty)
  where
    mkCallGraph :: [ParsedFile] -> EnumMap GHCKey Key -> (ParsePhaseDebugInfo, CallGraph)
    mkCallGraph :: [ParsedFile]
-> EnumMap GHCKey Key -> (ParsePhaseDebugInfo, CallGraph)
mkCallGraph [ParsedFile]
parsed EnumMap GHCKey Key
keymap =
      let ([Module]
mods, [(String, LexTree Loc RawDecl)]
debugs, [Set (GHCKey, GHCKey)]
calls, [EnumMap GHCKey (EnumSet GHCKey)]
typings) = forall a b c d. [(a, b, c, d)] -> ([a], [b], [c], [d])
unzip4 (forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (\(ParsedFile String
name String
path Forest Decl
decls Set (GHCKey, GHCKey)
call EnumMap GHCKey (EnumSet GHCKey)
typing LexTree Loc RawDecl
ltree) -> (String -> String -> Forest Decl -> Module
Module String
name String
path Forest Decl
decls, (String
name, LexTree Loc RawDecl
ltree), Set (GHCKey, GHCKey)
call, EnumMap GHCKey (EnumSet GHCKey)
typing)) [ParsedFile]
parsed)
          typeEdges :: Set (Key, Key)
typeEdges = forall a b.
(Enum a, Ord b) =>
EnumMap a b -> Set (a, a) -> Set (b, b)
rekeyCalls EnumMap GHCKey Key
keymap forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Ord a => [a] -> Set a
Set.fromList forall a b. (a -> b) -> a -> b
$ do
            (GHCKey
term, EnumSet GHCKey
types) <- forall k a. Enum k => EnumMap k a -> [(k, a)]
EnumMap.toList (forall a. Monoid a => [a] -> a
mconcat [EnumMap GHCKey (EnumSet GHCKey)]
typings)
            GHCKey
typ <- forall k. Enum k => EnumSet k -> [k]
EnumSet.toList EnumSet GHCKey
types
            forall (f :: * -> *) a. Applicative f => a -> f a
pure (GHCKey
term, GHCKey
typ)
       in ([(String, LexTree Loc RawDecl)] -> ParsePhaseDebugInfo
ParsePhaseDebugInfo [(String, LexTree Loc RawDecl)]
debugs, [Module] -> Set (Key, Key) -> Set (Key, Key) -> CallGraph
CallGraph [Module]
mods (forall a b.
(Enum a, Ord b) =>
EnumMap a b -> Set (a, a) -> Set (b, b)
rekeyCalls EnumMap GHCKey Key
keymap (forall a. Monoid a => [a] -> a
mconcat [Set (GHCKey, GHCKey)]
calls)) Set (Key, Key)
typeEdges)

parseHieFile :: GHC.HieFile -> HieParse ParsedFile
parseHieFile :: HieFile -> HieParse ParsedFile
parseHieFile file :: HieFile
file@(GHC.HieFile String
filepath Module
mdl Array TypeIndex HieTypeFlat
_ HieASTs TypeIndex
_ [AvailInfo]
avails ByteString
_) = do
  LexTree Loc RawDecl
lextree <- forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String -> TreeError Loc RawDecl -> ParseError
TreeError String
modname String
filepath) forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a b. (a -> b) -> a -> b
$ [RawDecl] -> Either (TreeError Loc RawDecl) (LexTree Loc RawDecl)
structure [RawDecl]
decls
  let calls :: Set (GHCKey, GHCKey)
calls = LexTree Loc GHCKey -> Set (GHCKey, GHCKey)
resolveCalls (forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (forall k. Enum k => EnumSet k -> k
EnumSet.findMin forall b c a. (b -> c) -> (a -> b) -> a -> c
. RawDecl -> EnumSet GHCKey
rdKeys) LexTree Loc RawDecl
lextree)
  Forest Decl
forest <- forall a b. Traversal (Forest a) (Forest b) a b
forestT (EnumSet GHCKey -> RawDecl -> HieParse Decl
mkDecl EnumSet GHCKey
exportKeys) (LexTree Loc RawDecl -> Forest RawDecl
mkForest LexTree Loc RawDecl
lextree)
  forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a b. (a -> b) -> a -> b
$ String
-> String
-> Forest Decl
-> Set (GHCKey, GHCKey)
-> EnumMap GHCKey (EnumSet GHCKey)
-> LexTree Loc RawDecl
-> ParsedFile
ParsedFile String
modname String
filepath Forest Decl
forest Set (GHCKey, GHCKey)
calls EnumMap GHCKey (EnumSet GHCKey)
types LexTree Loc RawDecl
lextree
  where
    modname :: String
modname = ModuleName -> String
GHC.moduleNameString (forall unit. GenModule unit -> ModuleName
GHC.moduleName Module
mdl)
    exportKeys :: EnumSet GHCKey
exportKeys = forall k. Enum k => [k] -> EnumSet k
EnumSet.fromList forall a b. (a -> b) -> a -> b
$ forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Name -> GHCKey
ghcNameKey forall a b. (a -> b) -> a -> b
$ [AvailInfo]
avails forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= AvailInfo -> [Name]
GHC.availNames
    Collect [RawDecl]
decls [(Loc, GHCKey)]
useSites EnumMap GHCKey (EnumSet GHCKey)
types = HieFile -> Collect
collect HieFile
file

    resolveCalls :: LexTree Loc GHCKey -> Set (GHCKey, GHCKey)
    resolveCalls :: LexTree Loc GHCKey -> Set (GHCKey, GHCKey)
resolveCalls LexTree Loc GHCKey
lextree = forall a b c. (a -> b -> c) -> b -> a -> c
flip forall (t :: * -> *) m a.
(Foldable t, Monoid m) =>
(a -> m) -> t a -> m
foldMap [(Loc, GHCKey)]
useSites forall a b. (a -> b) -> a -> b
$ \(Loc
loc, GHCKey
callee) ->
      case forall p a. Ord p => p -> LexTree p a -> Maybe a
LT.lookup Loc
loc LexTree Loc GHCKey
lextree of
        Maybe GHCKey
Nothing -> forall a. Monoid a => a
mempty
        Just GHCKey
rep -> forall a. a -> Set a
Set.singleton (GHCKey
rep, GHCKey
callee)

    mkForest :: LexTree Loc RawDecl -> Forest RawDecl
    mkForest :: LexTree Loc RawDecl -> Forest RawDecl
mkForest = forall s t a b. Traversal s t a b -> (a -> b) -> s -> t
over forall a b. Traversal (Forest a) (Forest b) a b
forestT (String, (EnumSet GHCKey, Max DeclType, First Loc, First Loc))
-> RawDecl
fromKV forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall k v. (Ord k, Semigroup v) => Forest (k, v) -> Forest (k, v)
dedup forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall s t a b. Traversal s t a b -> (a -> b) -> s -> t
over forall a b. Traversal (Forest a) (Forest b) a b
forestT forall {a} {c}.
(a, RawDecl, c)
-> (String, (EnumSet GHCKey, Max DeclType, First Loc, First Loc))
toKV forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall p a. LexTree p a -> Forest (p, a, p)
LT.toForest
      where
        toKV :: (a, RawDecl, c)
-> (String, (EnumSet GHCKey, Max DeclType, First Loc, First Loc))
toKV (a
_, RawDecl String
name EnumSet GHCKey
keys DeclType
typ Loc
s Loc
e, c
_) = (String
name, (EnumSet GHCKey
keys, forall a. a -> Max a
Max DeclType
typ, forall a. a -> First a
First Loc
s, forall a. a -> First a
First Loc
e))
        fromKV :: (String, (EnumSet GHCKey, Max DeclType, First Loc, First Loc))
-> RawDecl
fromKV (String
name, (EnumSet GHCKey
keys, Max DeclType
typ, First Loc
s, First Loc
e)) = String -> EnumSet GHCKey -> DeclType -> Loc -> Loc -> RawDecl
RawDecl String
name EnumSet GHCKey
keys DeclType
typ Loc
s Loc
e

    -- TODO this is the only part that touches the state, maybe it's worth lifting it out
    mkDecl :: EnumSet GHCKey -> RawDecl -> HieParse Decl
    mkDecl :: EnumSet GHCKey -> RawDecl -> HieParse Decl
mkDecl EnumSet GHCKey
exportSet (RawDecl String
str EnumSet GHCKey
ghcKeys DeclType
typ Loc
start Loc
_) = do
      Key
key <- HieParse Key
fresh
      forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ (forall k. Enum k => EnumSet k -> [k]
EnumSet.toList EnumSet GHCKey
ghcKeys) (Key -> GHCKey -> HieParse ()
assoc Key
key)
      let exported :: Bool
exported = forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any (forall a b c. (a -> b -> c) -> b -> a -> c
flip forall k. Enum k => k -> EnumSet k -> Bool
EnumSet.member EnumSet GHCKey
exportSet) (forall k. Enum k => EnumSet k -> [k]
EnumSet.toList EnumSet GHCKey
ghcKeys)
      forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a b. (a -> b) -> a -> b
$ String -> Key -> EnumSet GHCKey -> Bool -> DeclType -> Loc -> Decl
Decl String
str Key
key EnumSet GHCKey
ghcKeys Bool
exported DeclType
typ Loc
start

    fresh :: HieParse Key
    fresh :: HieParse Key
fresh = forall s (m :: * -> *) a. MonadState s m => (s -> (a, s)) -> m a
state forall a b. (a -> b) -> a -> b
$ \(Key TypeIndex
n, EnumMap GHCKey Key
m) -> (TypeIndex -> Key
Key TypeIndex
n, (TypeIndex -> Key
Key (TypeIndex
n forall a. Num a => a -> a -> a
+ TypeIndex
1), EnumMap GHCKey Key
m))

    assoc :: Key -> GHCKey -> HieParse ()
    assoc :: Key -> GHCKey -> HieParse ()
assoc Key
key GHCKey
ghckey = forall s (m :: * -> *). MonadState s m => (s -> s) -> m ()
modify forall a b. (a -> b) -> a -> b
$ forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (forall k a. Enum k => k -> a -> EnumMap k a -> EnumMap k a
EnumMap.insert GHCKey
ghckey Key
key)

dedup :: (Ord k, Semigroup v) => Forest (k, v) -> Forest (k, v)
dedup :: forall k v. (Ord k, Semigroup v) => Forest (k, v) -> Forest (k, v)
dedup = forall {a} {b}. Dedup a b -> [Tree (a, b)]
fromDedup forall b c a. (b -> c) -> (a -> b) -> a -> c
. Forest (k, v) -> Dedup k v
toDedup
  where
    fromDedup :: Dedup a b -> [Tree (a, b)]
fromDedup = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (\(a
k, (b
v, Dedup a b
d)) -> forall a. a -> [Tree a] -> Tree a
Node (a
k, b
v) (Dedup a b -> [Tree (a, b)]
fromDedup Dedup a b
d)) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall k a. Map k a -> [(k, a)]
Map.toList forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall k v. Dedup k v -> Map k (v, Dedup k v)
unDedup
    toDedup :: Forest (k, v) -> Dedup k v
toDedup = forall k v. Map k (v, Dedup k v) -> Dedup k v
Dedup forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall k a. Ord k => (a -> a -> a) -> [(k, a)] -> Map k a
Map.fromListWith forall a. Semigroup a => a -> a -> a
(<>) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (\(Node (k
k, v
v) Forest (k, v)
f) -> (k
k, (v
v, Forest (k, v) -> Dedup k v
toDedup Forest (k, v)
f)))

newtype Dedup k v = Dedup {forall k v. Dedup k v -> Map k (v, Dedup k v)
unDedup :: Map k (v, Dedup k v)}

instance (Ord k, Semigroup v) => Semigroup (Dedup k v) where
  Dedup Map k (v, Dedup k v)
a <> :: Dedup k v -> Dedup k v -> Dedup k v
<> Dedup Map k (v, Dedup k v)
b = forall k v. Map k (v, Dedup k v) -> Dedup k v
Dedup (forall k a. Ord k => (a -> a -> a) -> Map k a -> Map k a -> Map k a
Map.unionWith forall a. Semigroup a => a -> a -> a
(<>) Map k (v, Dedup k v)
a Map k (v, Dedup k v)
b)

structure :: [RawDecl] -> Either (TreeError Loc RawDecl) (LexTree Loc RawDecl)
structure :: [RawDecl] -> Either (TreeError Loc RawDecl) (LexTree Loc RawDecl)
structure = forall (t :: * -> *) (m :: * -> *) b a.
(Foldable t, Monad m) =>
(b -> a -> m b) -> b -> t a -> m b
foldM (\ !LexTree Loc RawDecl
t RawDecl
decl -> forall p a.
Ord p =>
(a -> a -> Maybe a)
-> p
-> a
-> p
-> LexTree p a
-> Either (TreeError p a) (LexTree p a)
LT.insertWith RawDecl -> RawDecl -> Maybe RawDecl
f (RawDecl -> Loc
rdStart RawDecl
decl) RawDecl
decl (RawDecl -> Loc
rdEnd RawDecl
decl) LexTree Loc RawDecl
t) forall p a. LexTree p a
LT.emptyLexTree
  where
    f :: RawDecl -> RawDecl -> Maybe RawDecl
f (RawDecl String
na EnumSet GHCKey
ka DeclType
ta Loc
sa Loc
ea) prev :: RawDecl
prev@(RawDecl String
nb EnumSet GHCKey
kb DeclType
tb Loc
_ Loc
_)
      | DeclType
ta forall a. Eq a => a -> a -> Bool
== DeclType
tb Bool -> Bool -> Bool
&& String
na forall a. Eq a => a -> a -> Bool
== String
nb = forall a. a -> Maybe a
Just (String -> EnumSet GHCKey -> DeclType -> Loc -> Loc -> RawDecl
RawDecl String
na (EnumSet GHCKey
ka forall a. Semigroup a => a -> a -> a
<> EnumSet GHCKey
kb) DeclType
ta Loc
sa Loc
ea)
      | Bool
otherwise = forall a. a -> Maybe a
Just RawDecl
prev

-- | This is the best way I can find of checking whether the name was written by a programmer or not.
-- GHC internally classifies names extensively, but none of those mechanisms seem to allow to distinguish GHC-generated names.
isGenerated :: GHC.Name -> Bool
isGenerated :: Name -> Bool
isGenerated = forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
elem Char
'$' forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. NamedThing a => a -> String
GHC.getOccString

data Collect = Collect
  { Collect -> [RawDecl]
_decls :: [RawDecl],
    Collect -> [(Loc, GHCKey)]
_uses :: [(Loc, GHCKey)],
    Collect -> EnumMap GHCKey (EnumSet GHCKey)
_types :: EnumMap GHCKey (EnumSet GHCKey)
  }

-- | Collect declarations, uses, and types in a HIE file
collect :: GHC.HieFile -> Collect
collect :: HieFile -> Collect
collect (GHC.HieFile String
_ Module
_ Array TypeIndex HieTypeFlat
typeArr (GHC.HieASTs Map HiePath (HieAST TypeIndex)
asts) [AvailInfo]
_ ByteString
_) = forall s a. State s a -> s -> s
execState (forall (m :: * -> *) s t a b.
Applicative m =>
Traversal s t a b -> s -> (a -> m ()) -> m ()
forT_ forall (t :: * -> *) (f :: * -> *) a b.
(Traversable t, Applicative f) =>
(a -> f b) -> t a -> f (t b)
traverse Map HiePath (HieAST TypeIndex)
asts HieAST TypeIndex -> State Collect ()
go) ([RawDecl]
-> [(Loc, GHCKey)] -> EnumMap GHCKey (EnumSet GHCKey) -> Collect
Collect forall a. Monoid a => a
mempty forall a. Monoid a => a
mempty forall a. Monoid a => a
mempty)
  where
    tellDecl :: GHC.Name -> DeclType -> GHC.RealSrcSpan -> State Collect ()
    tellDecl :: Name -> DeclType -> RealSrcSpan -> State Collect ()
tellDecl Name
nm DeclType
typ RealSrcSpan
spn = forall s (m :: * -> *). MonadState s m => (s -> s) -> m ()
modify forall a b. (a -> b) -> a -> b
$ \(Collect [RawDecl]
decls [(Loc, GHCKey)]
uses EnumMap GHCKey (EnumSet GHCKey)
types) -> [RawDecl]
-> [(Loc, GHCKey)] -> EnumMap GHCKey (EnumSet GHCKey) -> Collect
Collect (RawDecl
decl forall a. a -> [a] -> [a]
: [RawDecl]
decls) [(Loc, GHCKey)]
uses EnumMap GHCKey (EnumSet GHCKey)
types
      where
        decl :: RawDecl
decl =
          String -> EnumSet GHCKey -> DeclType -> Loc -> Loc -> RawDecl
RawDecl
            (forall a. NamedThing a => a -> String
GHC.getOccString Name
nm)
            (forall k. Enum k => k -> EnumSet k
EnumSet.singleton forall b c a. (b -> c) -> (a -> b) -> a -> c
. Name -> GHCKey
ghcNameKey forall a b. (a -> b) -> a -> b
$ Name
nm)
            DeclType
typ
            (TypeIndex -> TypeIndex -> Loc
Loc (RealSrcSpan -> TypeIndex
GHC.srcSpanStartLine RealSrcSpan
spn) (RealSrcSpan -> TypeIndex
GHC.srcSpanStartCol RealSrcSpan
spn))
            (TypeIndex -> TypeIndex -> Loc
Loc (RealSrcSpan -> TypeIndex
GHC.srcSpanEndLine RealSrcSpan
spn) (RealSrcSpan -> TypeIndex
GHC.srcSpanEndCol RealSrcSpan
spn))

    tellUse :: GHC.RealSrcLoc -> GHCKey -> State Collect ()
    tellUse :: RealSrcLoc -> GHCKey -> State Collect ()
tellUse RealSrcLoc
loc GHCKey
key = forall s (m :: * -> *). MonadState s m => (s -> s) -> m ()
modify forall a b. (a -> b) -> a -> b
$ \(Collect [RawDecl]
decls [(Loc, GHCKey)]
uses EnumMap GHCKey (EnumSet GHCKey)
types) -> [RawDecl]
-> [(Loc, GHCKey)] -> EnumMap GHCKey (EnumSet GHCKey) -> Collect
Collect [RawDecl]
decls ((TypeIndex -> TypeIndex -> Loc
Loc (RealSrcLoc -> TypeIndex
GHC.srcLocLine RealSrcLoc
loc) (RealSrcLoc -> TypeIndex
GHC.srcLocCol RealSrcLoc
loc), GHCKey
key) forall a. a -> [a] -> [a]
: [(Loc, GHCKey)]
uses) EnumMap GHCKey (EnumSet GHCKey)
types

    tellType :: GHC.Name -> GHC.TypeIndex -> State Collect ()
    tellType :: Name -> TypeIndex -> State Collect ()
tellType Name
name TypeIndex
ix = forall s (m :: * -> *). MonadState s m => (s -> s) -> m ()
modify forall a b. (a -> b) -> a -> b
$ \(Collect [RawDecl]
decls [(Loc, GHCKey)]
uses EnumMap GHCKey (EnumSet GHCKey)
types) -> [RawDecl]
-> [(Loc, GHCKey)] -> EnumMap GHCKey (EnumSet GHCKey) -> Collect
Collect [RawDecl]
decls [(Loc, GHCKey)]
uses (forall k a.
Enum k =>
(a -> a -> a) -> k -> a -> EnumMap k a -> EnumMap k a
EnumMap.insertWith forall a. Semigroup a => a -> a -> a
(<>) (Name -> GHCKey
ghcNameKey Name
name) (EnumMap TypeIndex (EnumSet GHCKey)
typeMap forall k a. Enum k => EnumMap k a -> k -> a
EnumMap.! TypeIndex
ix) EnumMap GHCKey (EnumSet GHCKey)
types)

    typeMap :: EnumMap TypeIndex (EnumSet GHCKey)
typeMap = Array TypeIndex HieTypeFlat -> EnumMap TypeIndex (EnumSet GHCKey)
resolveTypes Array TypeIndex HieTypeFlat
typeArr

    ignoreNode :: NodeInfo a -> Bool
ignoreNode NodeInfo a
nodeInfo = forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any (forall a b. (a -> b) -> a -> b
$ NodeInfo a
nodeInfo) [forall a. NodeInfo a -> Bool
isInstanceNode, forall a. NodeInfo a -> Bool
isTypeSignatureNode, forall a. NodeInfo a -> Bool
isInlineNode, forall a. NodeInfo a -> Bool
isMinimalNode, forall a. NodeInfo a -> Bool
isDerivingNode]

    go :: GHC.HieAST GHC.TypeIndex -> State Collect ()
    go :: HieAST TypeIndex -> State Collect ()
go node :: HieAST TypeIndex
node@(GHC.Node SourcedNodeInfo TypeIndex
_ RealSrcSpan
_ [HieAST TypeIndex]
children) =
      forall (m :: * -> *) s t a b.
Applicative m =>
Traversal s t a b -> s -> (a -> m ()) -> m ()
forT_ forall a. Traversal' (HieAST a) (NodeInfo a)
sourceInfo HieAST TypeIndex
node forall a b. (a -> b) -> a -> b
$ \NodeInfo TypeIndex
nodeInfo ->
        forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless (forall a. NodeInfo a -> Bool
ignoreNode NodeInfo TypeIndex
nodeInfo) forall a b. (a -> b) -> a -> b
$ do
          forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ (forall k a. Map k a -> [(k, a)]
M.toList forall a b. (a -> b) -> a -> b
$ forall a. NodeInfo a -> NodeIdentifiers a
GHC.nodeIdentifiers NodeInfo TypeIndex
nodeInfo) forall a b. (a -> b) -> a -> b
$ \case
            (Right Name
name, GHC.IdentifierDetails Maybe TypeIndex
ty Set ContextInfo
info) | Bool -> Bool
not (Name -> Bool
isGenerated Name
name) -> do
              forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ (Name -> TypeIndex -> State Collect ()
tellType Name
name) Maybe TypeIndex
ty
              case Set ContextInfo -> IdentifierType
classifyIdentifier Set ContextInfo
info of
                IdentifierType
IdnIgnore -> forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
                IdentifierType
IdnUse -> RealSrcLoc -> GHCKey -> State Collect ()
tellUse (RealSrcSpan -> RealSrcLoc
GHC.realSrcSpanStart forall a b. (a -> b) -> a -> b
$ forall a. HieAST a -> RealSrcSpan
GHC.nodeSpan HieAST TypeIndex
node) (Name -> GHCKey
ghcNameKey Name
name)
                IdnDecl DeclType
typ RealSrcSpan
sp
                  | RealSrcSpan -> Bool
GHC.isPointSpan RealSrcSpan
sp -> forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
                  | Bool
otherwise -> Name -> DeclType -> RealSrcSpan -> State Collect ()
tellDecl Name
name DeclType
typ RealSrcSpan
sp
            (Identifier, IdentifierDetails TypeIndex)
_ -> forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
          forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ HieAST TypeIndex -> State Collect ()
go [HieAST TypeIndex]
children

-- TODO This can be faster by storing intermediate restuls, but that has proven tricky to get right.
resolveTypes :: Array GHC.TypeIndex GHC.HieTypeFlat -> EnumMap GHC.TypeIndex (EnumSet GHCKey)
resolveTypes :: Array TypeIndex HieTypeFlat -> EnumMap TypeIndex (EnumSet GHCKey)
resolveTypes Array TypeIndex HieTypeFlat
typeArray = forall k a. Enum k => [(k, a)] -> EnumMap k a
EnumMap.fromList [(TypeIndex
ix, forall s a. State s a -> s -> a
evalState (TypeIndex -> State (EnumSet TypeIndex) (EnumSet GHCKey)
go TypeIndex
ix) forall a. Monoid a => a
mempty) | TypeIndex
ix <- forall i e. Ix i => Array i e -> [i]
Array.indices Array TypeIndex HieTypeFlat
typeArray]
  where
    keys :: GHC.HieType a -> EnumSet GHCKey
    keys :: forall a. HieType a -> EnumSet GHCKey
keys (GHC.HTyConApp (GHC.IfaceTyCon Name
name IfaceTyConInfo
_) HieArgs a
_) = forall k. Enum k => k -> EnumSet k
EnumSet.singleton (Name -> GHCKey
ghcNameKey Name
name)
    keys (GHC.HForAllTy ((Name
name, a
_), ArgFlag
_) a
_) = forall k. Enum k => k -> EnumSet k
EnumSet.singleton (Name -> GHCKey
ghcNameKey Name
name)
    -- These are variables, which we ignore, but it can't hurt
    keys (GHC.HTyVarTy Name
name) = forall k. Enum k => k -> EnumSet k
EnumSet.singleton (Name -> GHCKey
ghcNameKey Name
name)
    keys HieType a
_ = forall a. Monoid a => a
mempty
    go :: GHC.TypeIndex -> State (EnumSet GHC.TypeIndex) (EnumSet GHCKey)
    go :: TypeIndex -> State (EnumSet TypeIndex) (EnumSet GHCKey)
go TypeIndex
current =
      forall s (m :: * -> *) a. MonadState s m => (s -> a) -> m a
gets (forall k. Enum k => k -> EnumSet k -> Bool
EnumSet.member TypeIndex
current) forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \case
        Bool
True -> forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a. Monoid a => a
mempty
        Bool
False -> do
          forall s (m :: * -> *). MonadState s m => (s -> s) -> m ()
modify (forall k. Enum k => k -> EnumSet k -> EnumSet k
EnumSet.insert TypeIndex
current)
          let ty :: HieTypeFlat
ty = Array TypeIndex HieTypeFlat
typeArray forall i e. Ix i => Array i e -> i -> e
Array.! TypeIndex
current
          forall a. Monoid a => a -> a -> a
mappend (forall a. HieType a -> EnumSet GHCKey
keys HieTypeFlat
ty) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Monoid a => [a] -> a
mconcat forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM TypeIndex -> State (EnumSet TypeIndex) (EnumSet GHCKey)
go (forall (t :: * -> *) a. Foldable t => t a -> [a]
Foldable.toList HieTypeFlat
ty)

data IdentifierType
  = IdnDecl !DeclType !GHC.Span
  | IdnUse
  | IdnIgnore

instance Semigroup IdentifierType where
  IdentifierType
IdnIgnore <> :: IdentifierType -> IdentifierType -> IdentifierType
<> IdentifierType
a = IdentifierType
a
  IdentifierType
IdnUse <> IdentifierType
IdnIgnore = IdentifierType
IdnUse
  IdentifierType
IdnUse <> IdentifierType
a = IdentifierType
a
  IdnDecl DeclType
typ RealSrcSpan
sp <> IdnDecl DeclType
typ' RealSrcSpan
sp' = DeclType -> RealSrcSpan -> IdentifierType
IdnDecl (forall a. Ord a => a -> a -> a
max DeclType
typ DeclType
typ') (RealSrcSpan -> RealSrcSpan -> RealSrcSpan
mergeSpans RealSrcSpan
sp RealSrcSpan
sp')
  IdnDecl DeclType
typ RealSrcSpan
sp <> IdentifierType
_ = DeclType -> RealSrcSpan -> IdentifierType
IdnDecl DeclType
typ RealSrcSpan
sp

instance Monoid IdentifierType where mempty :: IdentifierType
mempty = IdentifierType
IdnIgnore

classifyIdentifier :: Set GHC.ContextInfo -> IdentifierType
classifyIdentifier :: Set ContextInfo -> IdentifierType
classifyIdentifier = forall (t :: * -> *) m a.
(Foldable t, Monoid m) =>
(a -> m) -> t a -> m
foldMap ContextInfo -> IdentifierType
classify
  where
    classify :: GHC.ContextInfo -> IdentifierType
    classify :: ContextInfo -> IdentifierType
classify (GHC.Decl DeclType
GHC.DataDec (Just RealSrcSpan
sp)) = DeclType -> RealSrcSpan -> IdentifierType
IdnDecl DeclType
DataDecl RealSrcSpan
sp
    classify (GHC.Decl DeclType
GHC.PatSynDec (Just RealSrcSpan
sp)) = DeclType -> RealSrcSpan -> IdentifierType
IdnDecl DeclType
DataDecl RealSrcSpan
sp
    classify (GHC.Decl DeclType
GHC.FamDec (Just RealSrcSpan
sp)) = DeclType -> RealSrcSpan -> IdentifierType
IdnDecl DeclType
DataDecl RealSrcSpan
sp
    classify (GHC.Decl DeclType
GHC.SynDec (Just RealSrcSpan
sp)) = DeclType -> RealSrcSpan -> IdentifierType
IdnDecl DeclType
DataDecl RealSrcSpan
sp
    classify (GHC.Decl DeclType
GHC.ConDec (Just RealSrcSpan
sp)) = DeclType -> RealSrcSpan -> IdentifierType
IdnDecl DeclType
ConDecl RealSrcSpan
sp
    classify (GHC.Decl DeclType
GHC.ClassDec (Just RealSrcSpan
sp)) = DeclType -> RealSrcSpan -> IdentifierType
IdnDecl DeclType
ClassDecl RealSrcSpan
sp
    classify (GHC.ClassTyDecl (Just RealSrcSpan
sp)) = DeclType -> RealSrcSpan -> IdentifierType
IdnDecl DeclType
ValueDecl RealSrcSpan
sp
    classify (GHC.ValBind BindType
GHC.RegularBind Scope
_ (Just RealSrcSpan
sp)) = DeclType -> RealSrcSpan -> IdentifierType
IdnDecl DeclType
ValueDecl RealSrcSpan
sp
    classify (GHC.RecField RecFieldContext
GHC.RecFieldDecl (Just RealSrcSpan
sp)) = DeclType -> RealSrcSpan -> IdentifierType
IdnDecl DeclType
RecDecl RealSrcSpan
sp
    -- Use
    classify (GHC.RecField RecFieldContext
GHC.RecFieldAssign Maybe RealSrcSpan
_) = IdentifierType
IdnUse
    classify (GHC.RecField RecFieldContext
GHC.RecFieldOcc Maybe RealSrcSpan
_) = IdentifierType
IdnUse
    classify ContextInfo
GHC.Use = IdentifierType
IdnUse
    -- Ignore
    classify ContextInfo
_ = IdentifierType
IdnIgnore