{-# LANGUAGE AllowAmbiguousTypes #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE DerivingVia #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE NumericUnderscores #-}
{-# LANGUAGE ViewPatterns #-}
{- | Type Points From analysis in the style of
- Cork: Dynamic Memory Leak Detectionfor Garbage-Collected Languages
- https://dl.acm.org/doi/10.1145/1190216.1190224
- -}
module GHC.Debug.TypePointsFrom( typePointsFrom
                               , detectLeaks
                               , TypePointsFrom(..)
                               , getNodes
                               , getEdges
                               , edgeSource
                               , edgeTarget
                               , Key
                               , Edge(..)
                               , getKey
                               ) where

import GHC.Debug.Client.Monad
import GHC.Debug.Client
import Control.Monad.State
import GHC.Debug.ParTrace
import GHC.Debug.Types.Ptr
import qualified Data.Map.Monoidal.Strict as Map
import Data.Map (Map)
import qualified Data.Map.Internal as M
import GHC.Debug.Profile
import Control.Monad.Identity
import Control.Concurrent
import Data.List (sortOn)
import Language.Dot
import qualified Data.Set as S


type Key = InfoTablePtr

data Edge = Edge !Key !Key deriving (Eq, Ord, Show)

edgeSource :: Edge -> Key
edgeTarget :: Edge -> Key
edgeSource (Edge k1 _) = k1
edgeTarget (Edge _ k2) = k2

data TypePointsFrom = TypePointsFrom { nodes :: !(Map.MonoidalMap Key CensusStats)
                                      , edges :: !(Map.MonoidalMap Edge CensusStats)
                                      } deriving (Show)

getNodes :: TypePointsFrom -> Map Key CensusStats
getEdges :: TypePointsFrom -> Map Edge CensusStats
getNodes = Map.getMonoidalMap . nodes
getEdges = Map.getMonoidalMap . edges

instance Monoid TypePointsFrom where
  mempty = TypePointsFrom mempty mempty

instance Semigroup TypePointsFrom where
  (TypePointsFrom a1 a2) <> (TypePointsFrom b1 b2) = TypePointsFrom (a1 <> b1) (a2 <> b2)

singletonTPF :: Key -> CensusStats -> [(Edge, CensusStats)] -> TypePointsFrom
singletonTPF k s es = TypePointsFrom (Map.singleton k s)
                                  (Map.fromList es)

-- | Perform a "type points from" heap census
typePointsFrom :: [ClosurePtr] -> DebugM TypePointsFrom
typePointsFrom cs = traceParFromM funcs (map (ClosurePtrWithInfo Root) cs)

  where
    nop = const (return ())
    funcs = TraceFunctionsIO nop nop clos visit nop

    visit :: ClosurePtr -> Context -> DebugM TypePointsFrom
    visit cp ctx = do
      sc <- dereferenceClosure cp
      let k = tableId $ info (noSize sc)
          v = mkCS (dcSize sc)
          parent_edge = case ctx of
                          Root -> []
                          Parent pk -> [(Edge k pk, v)]
      return $ TypePointsFrom Map.empty (Map.fromList parent_edge)


    clos :: ClosurePtr -> SizedClosure -> Context
              -> DebugM (Context, TypePointsFrom, DebugM () -> DebugM ())
    clos _cp sc ctx = do
      let k = tableId $ info (noSize sc)
      let s :: Size
          s = dcSize sc
          v =  mkCS s

          -- Edges point from the object TO what retains it
          parent_edge = case ctx of
                          Root -> []
                          Parent pk -> [(Edge k pk, v)]

      return (Parent k, singletonTPF k v parent_edge, id)


data Context = Root | Parent Key


-- | Repeatedly call 'typesPointsFrom' and perform the leak detection
-- analysis.
detectLeaks :: Int -> Debuggee -> IO ()
detectLeaks interval e = loop Nothing (M.empty, M.empty) 0
  where
    loop :: Maybe TypePointsFrom -> RankMaps -> Int -> IO ()
    loop prev_census rms i = do
      print i
      threadDelay (interval * 1_000_000)
      pause e
      (gs, r, new_rmaps) <- runTrace e $ do
        _ <- precacheBlocks
        rs <- gcRoots
        traceWrite (length rs)
        res <- typePointsFrom rs
        let !new_rmaps = case prev_census of
                           Nothing -> rms
                           Just pcensus -> updateRankMap rms pcensus res
        let cands = chooseCandidates (fst new_rmaps)
        traceWrite (length cands)
        gs <- mapM (findSlice (snd new_rmaps)) (take 10 cands)
        return (gs, res, new_rmaps)
      resume e
      zipWithM_ (\n g -> writeFile ("slices/"
                                      ++ show @Int i ++ "-"
                                      ++ show @Int n ++ ".dot")
                                   (renderDot g)) [0..] gs
      loop (Just r) new_rmaps (i + 1)


-- Analysis code
--
getKey :: InfoTablePtr -> DebugM String
getKey itblp = do
    loc <- getSourceInfo itblp
    itbl <- dereferenceInfoTable itblp
    case loc of
      Nothing -> getKeyFallback itblp itbl
      Just s -> return $ show (tipe itbl) ++ ":" ++ renderSourceInfo s

getKeyFallback itbp itbl = do
    case tipe itbl of
      t | CONSTR <= t && t <= CONSTR_NOCAF   -> do
        ConstrDesc a b c <- dereferenceConDesc itbp
        return $ a ++ ":" ++ b ++ ":" ++ c
      _ -> return $ show (tipe itbl)

type Rank = Double
type Decay = Double

data RankInfo = RankInfo !Rank !Int deriving Show

getRank :: RankInfo -> Rank
getRank (RankInfo r _) = r

default_decay :: Decay
default_decay = 0.15

rank_threshold :: Double
rank_threshold = 100

min_iterations :: Int
min_iterations = 2

applyRankFilter :: RankInfo -> Bool
applyRankFilter (RankInfo r i) = r >= rank_threshold && i >= min_iterations

-- | Lookup suitable candidates from the RankMap
-- , Chooses values based on 'rank_threshold' and 'min_iterations'
lookupRM :: Key -> RankMap Edge -> [(Edge, RankInfo)]
lookupRM k m = M.assocs filtered_map
  where
    -- TODO, work out how to use these functions O(log n)
    --smaller =  traceShow (M.size m) (M.dropWhileAntitone ((/= k) . edgeSource) $ m)
    --res_map = traceShow (M.size smaller) (M.takeWhileAntitone ((== k) . edgeSource) smaller)
    (res_map, _) = M.partitionWithKey (\e _ -> (== k) . edgeSource $ e) m
    filtered_map = M.filter (\(RankInfo r _) -> r > 0) res_map

mkDotId :: InfoTablePtr -> Id
mkDotId (InfoTablePtr w) = IntegerId (fromIntegral w)

findSlice :: RankMap Edge -> Key -> DebugM Graph
findSlice rm k = Graph StrictGraph DirectedGraph (Just (mkDotId k)) <$> evalStateT (go 3 k) S.empty

  where

    go :: Int -> InfoTablePtr -> StateT (S.Set InfoTablePtr) DebugM [Statement]
    go n cur_k = do
      visited_set <- get
      -- But don't stop going deep until we've seen a decent number of
      -- nodes
      if S.member cur_k visited_set || (n <= 0 && S.size visited_set >= 20)
        then return []
        else do
          label <- lift $ getKey cur_k
          let next_edges = take 20 (lookupRM cur_k rm)
              -- Decoding very wide is bad
              edge_stmts = map mkEdge next_edges
              node_stmt = NodeStatement (NodeId (mkDotId cur_k) Nothing) [AttributeSetValue (StringId "label") (StringId label) ]
              mkEdge (Edge _ e, ri) = EdgeStatement [ENodeId NoEdge (NodeId (mkDotId cur_k) Nothing), ENodeId DirectedEdge (NodeId (mkDotId e) Nothing)] [AttributeSetValue (StringId "label") (StringId (show (getRank ri))) ]

          modify' (S.insert cur_k)
          ss <- concat <$> mapM (go (n-1) . edgeTarget . fst) next_edges
          return $ node_stmt : edge_stmts ++ ss

renderSourceInfo :: SourceInformation -> String
renderSourceInfo s = escapeQuotes (infoName s ++ ":" ++ infoType s ++ ":" ++ infoPosition s)

escapeQuotes :: String -> String
escapeQuotes [] = []
escapeQuotes ('"':xs) = '\\' : '"' : escapeQuotes xs
escapeQuotes (x:xs) = x:escapeQuotes xs


chooseCandidates :: RankMap Key -> [Key]
chooseCandidates = map fst . reverse . sortOn (getRank . snd) . M.assocs . M.filter applyRankFilter

type RankMap k = M.Map k RankInfo

type RankMaps = (RankMap Key, RankMap Edge)

type RankUpdateMap k = M.Map k RankUpdateInfo

type RankUpdateInfo = Int -> Double -> Double

-- | Update the current rank predictions based on the difference between
-- two censuses.
updateRankMap :: (RankMap Key, RankMap Edge)
              -> TypePointsFrom
              -> TypePointsFrom
              -> (RankMap Key, RankMap Edge)
updateRankMap (rm_n, rm_e) t1 t2 = (ns, es)
  where
    !(rnodes, redges) = ratioRank t1 t2
    missingL = M.dropMissing
    missingR = M.mapMissing (\_ f -> RankInfo (f 0 0) 1)
    matched = M.zipWithMatched (\_ (RankInfo r iters) f -> RankInfo (f iters r) (iters + 1))

    !ns = runIdentity $ M.mergeA missingL missingR matched rm_n rnodes
    !es = runIdentity $ M.mergeA missingL missingR matched rm_e redges


compareSize :: CensusStats -> CensusStats -> Maybe (Int -> Double -> Double)
compareSize (cssize -> Size s1) (cssize -> Size s2) =
  if fromIntegral s2 > (1 - default_decay) * fromIntegral s1
    -- Calculate "Q"
    then if s1 > s2
          -- Shrinking phase, penalise rank
          then Just (\phases rank ->
                      rank
                        - ((fromIntegral (phases + 1))
                            * ((fromIntegral s1 / fromIntegral s2) - 1)))
          else Just (\phases rank ->
                        rank +
                          ((fromIntegral (phases + 1))
                            * ((fromIntegral s2 / fromIntegral s1) - 1)))
    else Nothing

-- | Compute how to update the ranks based on the difference between two
-- censuses.
ratioRank :: TypePointsFrom -> TypePointsFrom -> (RankUpdateMap Key, RankUpdateMap Edge)
ratioRank t1 t2 = (candidates, redges)
  where
    ns1 = getNodes t1
    ns2 = getNodes t2

    es1 = getEdges t1
    es2 = getEdges t2
    missingL = M.dropMissing
    missingR = M.dropMissing
    matched = M.zipWithMaybeMatched (\_ cs1 cs2 -> compareSize cs1 cs2)
    !candidates = runIdentity $ M.mergeA missingL missingR matched ns1 ns2

    !redges = runIdentity $ M.mergeA missingL missingR matched es1 es2