{-# LANGUAGE GeneralizedNewtypeDeriving, OverloadedStrings #-}
module NetSpider.Weaver
(
Weaver,
newWeaver,
addFoundNode,
markAsVisited,
getSnapshot,
getSnapshot',
isVisited,
getFoundNodes,
getBoundaryNodes,
visitAllBoundaryNodes
) where
import Data.Foldable (foldl')
import Data.Hashable (Hashable)
import Data.HashMap.Strict (HashMap)
import qualified Data.HashMap.Strict as HM
import qualified Data.HashSet as HS
import Data.List (sort, reverse, sortOn)
import Data.Maybe (listToMaybe, mapMaybe)
import GHC.Exts (groupWith)
import NetSpider.Found (FoundNode(..), LinkState(..), FoundLink(targetNode))
import NetSpider.Log
( runWriterLoggingM, WriterLoggingM, logDebugW, LogLine, spack
)
import NetSpider.Log ()
import NetSpider.Query.Internal (FoundNodePolicy(..))
import NetSpider.Query (policyOverwrite, policyAppend)
import NetSpider.Snapshot.Internal
( SnapshotGraph, SnapshotNode(..), SnapshotLink(..)
)
import NetSpider.Timestamp (Timestamp)
import NetSpider.Unify
( LinkSampleUnifier,
LinkSampleID,
LinkSample(..),
linkSampleId
)
import qualified NetSpider.Unify as Unify
data Weaver n na la =
Weaver
{ visitedNodes :: HashMap n [FoundNode n na la],
foundNodePolicy :: FoundNodePolicy n na
}
deriving (Show,Eq)
newWeaver :: FoundNodePolicy n na -> Weaver n na la
newWeaver p = Weaver HM.empty p
addFoundNode :: (Eq n, Hashable n) => FoundNode n na la -> Weaver n na la -> Weaver n na la
addFoundNode fn weaver = new_weaver
where
nid = subjectNode fn
new_weaver = weaver { visitedNodes = HM.insertWith updater nid [fn] $ visitedNodes weaver }
updater =
case foundNodePolicy weaver of
PolicyOverwrite -> \new old -> if latestTimeOfNodes new >= latestTimeOfNodes old
then new
else old
PolicyAppend -> \new old -> new ++ old
latestTimeOfNodes ns = listToMaybe $ reverse $ sort $ map foundAt ns
markAsVisited :: (Eq n, Hashable n) => n -> Weaver n na la -> Weaver n na la
markAsVisited nid w = w { visitedNodes = HM.insertWith updater nid [] $ visitedNodes w }
where
updater _ old = old
isVisited :: (Eq n, Hashable n) => n -> Weaver n na la -> Bool
isVisited n w = HM.member n (visitedNodes w)
getFoundNodes :: (Eq n, Hashable n) => n -> Weaver n na la -> Maybe [FoundNode n na la]
getFoundNodes n w = HM.lookup n (visitedNodes w)
getSnapshot :: (Ord n, Hashable n, Show n) => LinkSampleUnifier n na fla sla -> Weaver n na fla -> SnapshotGraph n na sla
getSnapshot u w = fst $ getSnapshot' u w
getBoundaryNodes :: (Eq n, Hashable n) => Weaver n na fla -> [n]
getBoundaryNodes weaver = HS.toList boundary_nodes_set
where
boundary_nodes_set = HS.fromList $ filter (\nid -> not $ isVisited nid weaver) $ all_target_nodes
all_target_nodes = (map targetNode . neighborLinks) =<< (concat $ HM.elems $ visitedNodes weaver)
visitAllBoundaryNodes :: (Eq n, Hashable n) => Weaver n na fla -> Weaver n na fla
visitAllBoundaryNodes weaver = foldl' (\w n -> markAsVisited n w) weaver $ getBoundaryNodes weaver
latestFoundNodeFor :: (Eq n, Hashable n) => n -> Weaver n na fla -> Maybe (FoundNode n na fla)
latestFoundNodeFor nid weaver = do
found_nodes <- HM.lookup nid $ visitedNodes weaver
listToMaybe $ reverse $ sortOn foundAt $ found_nodes
makeSnapshotNode :: (Eq n, Hashable n) => Weaver n na fla -> n -> SnapshotNode n na
makeSnapshotNode weaver nid =
SnapshotNode { _nodeId = nid,
_isOnBoundary = not $ isVisited nid weaver,
_nodeTimestamp = m_timestamp,
_nodeAttributes = m_attributes
}
where
mfn = latestFoundNodeFor nid weaver
m_timestamp = fmap foundAt mfn
m_attributes = fmap nodeAttributes mfn
allLinkSamples :: Weaver n na la -> [LinkSample n la]
allLinkSamples w = Unify.toLinkSamples =<< (concat $ HM.elems $ visitedNodes w)
getSnapshot' :: (Ord n, Hashable n, Show n)
=> LinkSampleUnifier n na fla sla
-> Weaver n na fla
-> (SnapshotGraph n na sla, [LogLine])
getSnapshot' unifier weaver = ((nodes, links), logs)
where
nodes = visited_nodes ++ boundary_nodes
visited_nodes = map (makeSnapshotNode weaver) $ HM.keys $ visitedNodes weaver
boundary_nodes = map (makeSnapshotNode weaver) $ getBoundaryNodes weaver
(links, logs) = runWriterLoggingM $ fmap mconcat
$ mapM (makeSnapshotLinks unifier weaver)
$ groupWith linkSampleId $ allLinkSamples weaver
makeSnapshotLinks :: (Eq n, Hashable n, Show n)
=> LinkSampleUnifier n na fla sla
-> Weaver n na fla
-> [LinkSample n fla]
-> WriterLoggingM [SnapshotLink n sla]
makeSnapshotLinks _ _ [] = return []
makeSnapshotLinks unifier weaver link_samples@(head_sample : _) = do
unified <- doUnify link_samples
logUnified unified
return $ mapMaybe makeSnapshotLink unified
where
makeEndNode getter = makeSnapshotNode weaver $ getter $ head_sample
doUnify = unifier (makeEndNode lsSubjectNode) (makeEndNode lsTargetNode)
logUnified unified = logDebugW ( "Unify link [" <> (spack $ lsSubjectNode head_sample) <> "]-["
<> (spack $ lsTargetNode head_sample) <> "]: "
<> "from " <> (spack $ length link_samples) <> " samples "
<> "to " <> (spack $ length unified) <> " samples"
)
makeSnapshotLink unified_sample = do
case lsLinkState unified_sample of
LinkUnused -> Nothing
LinkToTarget -> Just $ sampleToLink unified_sample True True
LinkToSubject -> Just $ sampleToLink unified_sample False True
LinkBidirectional -> Just $ sampleToLink unified_sample True False
sampleToLink sample to_target is_directed =
SnapshotLink { _sourceNode = (if to_target then lsSubjectNode else lsTargetNode) sample,
_destinationNode = (if to_target then lsTargetNode else lsSubjectNode) sample,
_isDirected = is_directed,
_linkTimestamp = lsTimestamp sample,
_linkAttributes = lsLinkAttributes sample
}