{-# LANGUAGE CPP, DeriveGeneric, DeriveFunctor, GeneralizedNewtypeDeriving, NamedFieldPuns, BangPatterns #-} {-# OPTIONS_GHC -fno-warn-orphans #-} -- | An abstraction to help with re-running actions when files or other -- input values they depend on have changed. -- module Distribution.Client.FileMonitor ( -- * Declaring files to monitor MonitorFilePath(..), MonitorKindFile(..), MonitorKindDir(..), FilePathGlob(..), monitorFile, monitorFileHashed, monitorNonExistentFile, monitorFileExistence, monitorDirectory, monitorNonExistentDirectory, monitorDirectoryExistence, monitorFileOrDirectory, monitorFileGlob, monitorFileGlobExistence, monitorFileSearchPath, monitorFileHashedSearchPath, -- * Creating and checking sets of monitored files FileMonitor(..), newFileMonitor, MonitorChanged(..), MonitorChangedReason(..), checkFileMonitorChanged, updateFileMonitor, MonitorTimestamp, beginUpdateFileMonitor, ) where import Prelude () import Distribution.Client.Compat.Prelude #if MIN_VERSION_containers(0,5,0) import qualified Data.Map.Strict as Map #else import qualified Data.Map as Map #endif import qualified Data.ByteString.Lazy as BS import qualified Distribution.Compat.Binary as Binary import qualified Data.Hashable as Hashable import Control.Monad import Control.Monad.Trans (MonadIO, liftIO) import Control.Monad.State (StateT, mapStateT) import qualified Control.Monad.State as State import Control.Monad.Except (ExceptT, runExceptT, withExceptT, throwError) import Control.Exception import Distribution.Compat.Time import Distribution.Client.Glob import Distribution.Simple.Utils (handleDoesNotExist, writeFileAtomic) import Distribution.Client.Utils (mergeBy, MergeResult(..)) import System.FilePath import System.Directory import System.IO ------------------------------------------------------------------------------ -- Types for specifying files to monitor -- -- | A description of a file (or set of files) to monitor for changes. -- -- Where file paths are relative they are relative to a common directory -- (e.g. project root), not necessarily the process current directory. -- data MonitorFilePath = MonitorFile { monitorKindFile :: !MonitorKindFile, monitorKindDir :: !MonitorKindDir, monitorPath :: !FilePath } | MonitorFileGlob { monitorKindFile :: !MonitorKindFile, monitorKindDir :: !MonitorKindDir, monitorPathGlob :: !FilePathGlob } deriving (Eq, Show, Generic) data MonitorKindFile = FileExists | FileModTime | FileHashed | FileNotExists deriving (Eq, Show, Generic) data MonitorKindDir = DirExists | DirModTime | DirNotExists deriving (Eq, Show, Generic) instance Binary MonitorFilePath instance Binary MonitorKindFile instance Binary MonitorKindDir -- | Monitor a single file for changes, based on its modification time. -- The monitored file is considered to have changed if it no longer -- exists or if its modification time has changed. -- monitorFile :: FilePath -> MonitorFilePath monitorFile = MonitorFile FileModTime DirNotExists -- | Monitor a single file for changes, based on its modification time -- and content hash. The monitored file is considered to have changed if -- it no longer exists or if its modification time and content hash have -- changed. -- monitorFileHashed :: FilePath -> MonitorFilePath monitorFileHashed = MonitorFile FileHashed DirNotExists -- | Monitor a single non-existent file for changes. The monitored file -- is considered to have changed if it exists. -- monitorNonExistentFile :: FilePath -> MonitorFilePath monitorNonExistentFile = MonitorFile FileNotExists DirNotExists -- | Monitor a single file for existence only. The monitored file is -- considered to have changed if it no longer exists. -- monitorFileExistence :: FilePath -> MonitorFilePath monitorFileExistence = MonitorFile FileExists DirNotExists -- | Monitor a single directory for changes, based on its modification -- time. The monitored directory is considered to have changed if it no -- longer exists or if its modification time has changed. -- monitorDirectory :: FilePath -> MonitorFilePath monitorDirectory = MonitorFile FileNotExists DirModTime -- | Monitor a single non-existent directory for changes. The monitored -- directory is considered to have changed if it exists. -- monitorNonExistentDirectory :: FilePath -> MonitorFilePath -- Just an alias for monitorNonExistentFile, since you can't -- tell the difference between a non-existent directory and -- a non-existent file :) monitorNonExistentDirectory = monitorNonExistentFile -- | Monitor a single directory for existence. The monitored directory is -- considered to have changed only if it no longer exists. -- monitorDirectoryExistence :: FilePath -> MonitorFilePath monitorDirectoryExistence = MonitorFile FileNotExists DirExists -- | Monitor a single file or directory for changes, based on its modification -- time. The monitored file is considered to have changed if it no longer -- exists or if its modification time has changed. -- monitorFileOrDirectory :: FilePath -> MonitorFilePath monitorFileOrDirectory = MonitorFile FileModTime DirModTime -- | Monitor a set of files (or directories) identified by a file glob. -- The monitored glob is considered to have changed if the set of files -- matching the glob changes (i.e. creations or deletions), or for files if the -- modification time and content hash of any matching file has changed. -- monitorFileGlob :: FilePathGlob -> MonitorFilePath monitorFileGlob = MonitorFileGlob FileHashed DirExists -- | Monitor a set of files (or directories) identified by a file glob for -- existence only. The monitored glob is considered to have changed if the set -- of files matching the glob changes (i.e. creations or deletions). -- monitorFileGlobExistence :: FilePathGlob -> MonitorFilePath monitorFileGlobExistence = MonitorFileGlob FileExists DirExists -- | Creates a list of files to monitor when you search for a file which -- unsuccessfully looked in @notFoundAtPaths@ before finding it at -- @foundAtPath@. monitorFileSearchPath :: [FilePath] -> FilePath -> [MonitorFilePath] monitorFileSearchPath notFoundAtPaths foundAtPath = monitorFile foundAtPath : map monitorNonExistentFile notFoundAtPaths -- | Similar to 'monitorFileSearchPath', but also instructs us to -- monitor the hash of the found file. monitorFileHashedSearchPath :: [FilePath] -> FilePath -> [MonitorFilePath] monitorFileHashedSearchPath notFoundAtPaths foundAtPath = monitorFileHashed foundAtPath : map monitorNonExistentFile notFoundAtPaths ------------------------------------------------------------------------------ -- Implementation types, files status -- -- | The state necessary to determine whether a set of monitored -- files has changed. It consists of two parts: a set of specific -- files to be monitored (index by their path), and a list of -- globs, which monitor may files at once. data MonitorStateFileSet = MonitorStateFileSet ![MonitorStateFile] ![MonitorStateGlob] -- Morally this is not actually a set but a bag (represented by lists). -- There is no principled reason to use a bag here rather than a set, but -- there is also no particular gain either. That said, we do preserve the -- order of the lists just to reduce confusion (and have predictable I/O -- patterns). deriving Show type Hash = Int -- | The state necessary to determine whether a monitored file has changed. -- -- This covers all the cases of 'MonitorFilePath' except for globs which is -- covered separately by 'MonitorStateGlob'. -- -- The @Maybe ModTime@ is to cover the case where we already consider the -- file to have changed, either because it had already changed by the time we -- did the snapshot (i.e. too new, changed since start of update process) or it -- no longer exists at all. -- data MonitorStateFile = MonitorStateFile !MonitorKindFile !MonitorKindDir !FilePath !MonitorStateFileStatus deriving (Show, Generic) data MonitorStateFileStatus = MonitorStateFileExists | MonitorStateFileModTime !ModTime -- ^ cached file mtime | MonitorStateFileHashed !ModTime !Hash -- ^ cached mtime and content hash | MonitorStateDirExists | MonitorStateDirModTime !ModTime -- ^ cached dir mtime | MonitorStateNonExistent | MonitorStateAlreadyChanged deriving (Show, Generic) instance Binary MonitorStateFile instance Binary MonitorStateFileStatus -- | The state necessary to determine whether the files matched by a globbing -- match have changed. -- data MonitorStateGlob = MonitorStateGlob !MonitorKindFile !MonitorKindDir !FilePathRoot !MonitorStateGlobRel deriving (Show, Generic) data MonitorStateGlobRel = MonitorStateGlobDirs !Glob !FilePathGlobRel !ModTime ![(FilePath, MonitorStateGlobRel)] -- invariant: sorted | MonitorStateGlobFiles !Glob !ModTime ![(FilePath, MonitorStateFileStatus)] -- invariant: sorted | MonitorStateGlobDirTrailing deriving (Show, Generic) instance Binary MonitorStateGlob instance Binary MonitorStateGlobRel -- | We can build a 'MonitorStateFileSet' from a set of 'MonitorFilePath' by -- inspecting the state of the file system, and we can go in the reverse -- direction by just forgetting the extra info. -- reconstructMonitorFilePaths :: MonitorStateFileSet -> [MonitorFilePath] reconstructMonitorFilePaths (MonitorStateFileSet singlePaths globPaths) = map getSinglePath singlePaths ++ map getGlobPath globPaths where getSinglePath (MonitorStateFile kindfile kinddir filepath _) = MonitorFile kindfile kinddir filepath getGlobPath (MonitorStateGlob kindfile kinddir root gstate) = MonitorFileGlob kindfile kinddir $ FilePathGlob root $ case gstate of MonitorStateGlobDirs glob globs _ _ -> GlobDir glob globs MonitorStateGlobFiles glob _ _ -> GlobFile glob MonitorStateGlobDirTrailing -> GlobDirTrailing ------------------------------------------------------------------------------ -- Checking the status of monitored files -- -- | A monitor for detecting changes to a set of files. It can be used to -- efficiently test if any of a set of files (specified individually or by -- glob patterns) has changed since some snapshot. In addition, it also checks -- for changes in a value (of type @a@), and when there are no changes in -- either it returns a saved value (of type @b@). -- -- The main use case looks like this: suppose we have some expensive action -- that depends on certain pure inputs and reads some set of files, and -- produces some pure result. We want to avoid re-running this action when it -- would produce the same result. So we need to monitor the files the action -- looked at, the other pure input values, and we need to cache the result. -- Then at some later point, if the input value didn't change, and none of the -- files changed, then we can re-use the cached result rather than re-running -- the action. -- -- This can be achieved using a 'FileMonitor'. Each 'FileMonitor' instance -- saves state in a disk file, so the file for that has to be specified, -- making sure it is unique. The pattern is to use 'checkFileMonitorChanged' -- to see if there's been any change. If there is, re-run the action, keeping -- track of the files, then use 'updateFileMonitor' to record the current -- set of files to monitor, the current input value for the action, and the -- result of the action. -- -- The typical occurrence of this pattern is captured by 'rerunIfChanged' -- and the 'Rebuild' monad. More complicated cases may need to use -- 'checkFileMonitorChanged' and 'updateFileMonitor' directly. -- data FileMonitor a b = FileMonitor { -- | The file where this 'FileMonitor' should store its state. -- fileMonitorCacheFile :: FilePath, -- | Compares a new cache key with old one to determine if a -- corresponding cached value is still valid. -- -- Typically this is just an equality test, but in some -- circumstances it can make sense to do things like subset -- comparisons. -- -- The first arg is the new value, the second is the old cached value. -- fileMonitorKeyValid :: a -> a -> Bool, -- | When this mode is enabled, if 'checkFileMonitorChanged' returns -- 'MonitoredValueChanged' then we have the guarantee that no files -- changed, that the value change was the only change. In the default -- mode no such guarantee is provided which is slightly faster. -- fileMonitorCheckIfOnlyValueChanged :: Bool } -- | Define a new file monitor. -- -- It's best practice to define file monitor values once, and then use the -- same value for 'checkFileMonitorChanged' and 'updateFileMonitor' as this -- ensures you get the same types @a@ and @b@ for reading and writing. -- -- The path of the file monitor itself must be unique because it keeps state -- on disk and these would clash. -- newFileMonitor :: Eq a => FilePath -- ^ The file to cache the state of the -- file monitor. Must be unique. -> FileMonitor a b newFileMonitor path = FileMonitor path (==) False -- | The result of 'checkFileMonitorChanged': either the monitored files or -- value changed (and it tells us which it was) or nothing changed and we get -- the cached result. -- data MonitorChanged a b = -- | The monitored files and value did not change. The cached result is -- @b@. -- -- The set of monitored files is also returned. This is useful -- for composing or nesting 'FileMonitor's. MonitorUnchanged b [MonitorFilePath] -- | The monitor found that something changed. The reason is given. -- | MonitorChanged (MonitorChangedReason a) deriving Show -- | What kind of change 'checkFileMonitorChanged' detected. -- data MonitorChangedReason a = -- | One of the files changed (existence, file type, mtime or file -- content, depending on the 'MonitorFilePath' in question) MonitoredFileChanged FilePath -- | The pure input value changed. -- -- The previous cached key value is also returned. This is sometimes -- useful when using a 'fileMonitorKeyValid' function that is not simply -- '(==)', when invalidation can be partial. In such cases it can make -- sense to 'updateFileMonitor' with a key value that's a combination of -- the new and old (e.g. set union). | MonitoredValueChanged a -- | There was no saved monitor state, cached value etc. Ie the file -- for the 'FileMonitor' does not exist. | MonitorFirstRun -- | There was existing state, but we could not read it. This typically -- happens when the code has changed compared to an existing 'FileMonitor' -- cache file and type of the input value or cached value has changed such -- that we cannot decode the values. This is completely benign as we can -- treat is just as if there were no cache file and re-run. | MonitorCorruptCache deriving (Eq, Show, Functor) -- | Test if the input value or files monitored by the 'FileMonitor' have -- changed. If not, return the cached value. -- -- See 'FileMonitor' for a full explanation. -- checkFileMonitorChanged :: (Binary a, Binary b) => FileMonitor a b -- ^ cache file path -> FilePath -- ^ root directory -> a -- ^ guard or key value -> IO (MonitorChanged a b) -- ^ did the key or any paths change? checkFileMonitorChanged monitor@FileMonitor { fileMonitorKeyValid, fileMonitorCheckIfOnlyValueChanged } root currentKey = -- Consider it a change if the cache file does not exist, -- or we cannot decode it. Sadly ErrorCall can still happen, despite -- using decodeFileOrFail, e.g. Data.Char.chr errors handleDoesNotExist (MonitorChanged MonitorFirstRun) $ handleErrorCall (MonitorChanged MonitorCorruptCache) $ readCacheFile monitor >>= either (\_ -> return (MonitorChanged MonitorCorruptCache)) checkStatusCache where checkStatusCache (cachedFileStatus, cachedKey, cachedResult) = do change <- checkForChanges case change of Just reason -> return (MonitorChanged reason) Nothing -> return (MonitorUnchanged cachedResult monitorFiles) where monitorFiles = reconstructMonitorFilePaths cachedFileStatus where -- In fileMonitorCheckIfOnlyValueChanged mode we want to guarantee that -- if we return MonitoredValueChanged that only the value changed. -- We do that by checkin for file changes first. Otherwise it makes -- more sense to do the cheaper test first. checkForChanges | fileMonitorCheckIfOnlyValueChanged = checkFileChange cachedFileStatus cachedKey cachedResult `mplusMaybeT` checkValueChange cachedKey | otherwise = checkValueChange cachedKey `mplusMaybeT` checkFileChange cachedFileStatus cachedKey cachedResult mplusMaybeT :: Monad m => m (Maybe a) -> m (Maybe a) -> m (Maybe a) mplusMaybeT ma mb = do mx <- ma case mx of Nothing -> mb Just x -> return (Just x) -- Check if the guard value has changed checkValueChange cachedKey | not (fileMonitorKeyValid currentKey cachedKey) = return (Just (MonitoredValueChanged cachedKey)) | otherwise = return Nothing -- Check if any file has changed checkFileChange cachedFileStatus cachedKey cachedResult = do res <- probeFileSystem root cachedFileStatus case res of -- Some monitored file has changed Left changedPath -> return (Just (MonitoredFileChanged (normalise changedPath))) -- No monitored file has changed Right (cachedFileStatus', cacheStatus) -> do -- But we might still want to update the cache whenCacheChanged cacheStatus $ rewriteCacheFile monitor cachedFileStatus' cachedKey cachedResult return Nothing -- | Helper for reading the cache file. -- -- This determines the type and format of the binary cache file. -- readCacheFile :: (Binary a, Binary b) => FileMonitor a b -> IO (Either String (MonitorStateFileSet, a, b)) readCacheFile FileMonitor {fileMonitorCacheFile} = withBinaryFile fileMonitorCacheFile ReadMode $ \hnd -> Binary.decodeOrFailIO =<< BS.hGetContents hnd -- | Helper for writing the cache file. -- -- This determines the type and format of the binary cache file. -- rewriteCacheFile :: (Binary a, Binary b) => FileMonitor a b -> MonitorStateFileSet -> a -> b -> IO () rewriteCacheFile FileMonitor {fileMonitorCacheFile} fileset key result = writeFileAtomic fileMonitorCacheFile $ Binary.encode (fileset, key, result) -- | Probe the file system to see if any of the monitored files have changed. -- -- It returns Nothing if any file changed, or returns a possibly updated -- file 'MonitorStateFileSet' plus an indicator of whether it actually changed. -- -- We may need to update the cache since there may be changes in the filesystem -- state which don't change any of our affected files. -- -- Consider the glob @{proj1,proj2}\/\*.cabal@. Say we first run and find a -- @proj1@ directory containing @proj1.cabal@ yet no @proj2@. If we later run -- and find @proj2@ was created, yet contains no files matching @*.cabal@ then -- we want to update the cache despite no changes in our relevant file set. -- Specifically, we should add an mtime for this directory so we can avoid -- re-traversing the directory in future runs. -- probeFileSystem :: FilePath -> MonitorStateFileSet -> IO (Either FilePath (MonitorStateFileSet, CacheChanged)) probeFileSystem root (MonitorStateFileSet singlePaths globPaths) = runChangedM $ do sequence_ [ probeMonitorStateFileStatus root file status | MonitorStateFile _ _ file status <- singlePaths ] -- The glob monitors can require state changes globPaths' <- sequence [ probeMonitorStateGlob root globPath | globPath <- globPaths ] return (MonitorStateFileSet singlePaths globPaths') ----------------------------------------------- -- Monad for checking for file system changes -- -- We need to be able to bail out if we detect a change (using ExceptT), -- but if there's no change we need to be able to rebuild the monitor -- state. And we want to optimise that rebuilding by keeping track if -- anything actually changed (using StateT), so that in the typical case -- we can avoid rewriting the state file. newtype ChangedM a = ChangedM (StateT CacheChanged (ExceptT FilePath IO) a) deriving (Functor, Applicative, Monad, MonadIO) runChangedM :: ChangedM a -> IO (Either FilePath (a, CacheChanged)) runChangedM (ChangedM action) = runExceptT $ State.runStateT action CacheUnchanged somethingChanged :: FilePath -> ChangedM a somethingChanged path = ChangedM $ throwError path cacheChanged :: ChangedM () cacheChanged = ChangedM $ State.put CacheChanged mapChangedFile :: (FilePath -> FilePath) -> ChangedM a -> ChangedM a mapChangedFile adjust (ChangedM a) = ChangedM (mapStateT (withExceptT adjust) a) data CacheChanged = CacheChanged | CacheUnchanged whenCacheChanged :: Monad m => CacheChanged -> m () -> m () whenCacheChanged CacheChanged action = action whenCacheChanged CacheUnchanged _ = return () ---------------------- -- | Probe the file system to see if a single monitored file has changed. -- probeMonitorStateFileStatus :: FilePath -> FilePath -> MonitorStateFileStatus -> ChangedM () probeMonitorStateFileStatus root file status = case status of MonitorStateFileExists -> probeFileExistence root file MonitorStateFileModTime mtime -> probeFileModificationTime root file mtime MonitorStateFileHashed mtime hash -> probeFileModificationTimeAndHash root file mtime hash MonitorStateDirExists -> probeDirExistence root file MonitorStateDirModTime mtime -> probeFileModificationTime root file mtime MonitorStateNonExistent -> probeFileNonExistence root file MonitorStateAlreadyChanged -> somethingChanged file -- | Probe the file system to see if a monitored file glob has changed. -- probeMonitorStateGlob :: FilePath -- ^ root path -> MonitorStateGlob -> ChangedM MonitorStateGlob probeMonitorStateGlob relroot (MonitorStateGlob kindfile kinddir globroot glob) = do root <- liftIO $ getFilePathRootDirectory globroot relroot case globroot of FilePathRelative -> MonitorStateGlob kindfile kinddir globroot <$> probeMonitorStateGlobRel kindfile kinddir root "." glob -- for absolute cases, make the changed file we report absolute too _ -> mapChangedFile (root ) $ MonitorStateGlob kindfile kinddir globroot <$> probeMonitorStateGlobRel kindfile kinddir root "" glob probeMonitorStateGlobRel :: MonitorKindFile -> MonitorKindDir -> FilePath -- ^ root path -> FilePath -- ^ path of the directory we are -- looking in relative to @root@ -> MonitorStateGlobRel -> ChangedM MonitorStateGlobRel probeMonitorStateGlobRel kindfile kinddir root dirName (MonitorStateGlobDirs glob globPath mtime children) = do change <- liftIO $ checkDirectoryModificationTime (root dirName) mtime case change of Nothing -> do children' <- sequence [ do fstate' <- probeMonitorStateGlobRel kindfile kinddir root (dirName fname) fstate return (fname, fstate') | (fname, fstate) <- children ] return $! MonitorStateGlobDirs glob globPath mtime children' Just mtime' -> do -- directory modification time changed: -- a matching subdir may have been added or deleted matches <- filterM (\entry -> let subdir = root dirName entry in liftIO $ doesDirectoryExist subdir) . filter (matchGlob glob) =<< liftIO (getDirectoryContents (root dirName)) children' <- mapM probeMergeResult $ mergeBy (\(path1,_) path2 -> compare path1 path2) children (sort matches) return $! MonitorStateGlobDirs glob globPath mtime' children' -- Note that just because the directory has changed, we don't force -- a cache rewrite with 'cacheChanged' since that has some cost, and -- all we're saving is scanning the directory. But we do rebuild the -- cache with the new mtime', so that if the cache is rewritten for -- some other reason, we'll take advantage of that. where probeMergeResult :: MergeResult (FilePath, MonitorStateGlobRel) FilePath -> ChangedM (FilePath, MonitorStateGlobRel) -- Only in cached (directory deleted) probeMergeResult (OnlyInLeft (path, fstate)) = do case allMatchingFiles (dirName path) fstate of [] -> return (path, fstate) -- Strictly speaking we should be returning 'CacheChanged' above -- as we should prune the now-missing 'MonitorStateGlobRel'. However -- we currently just leave these now-redundant entries in the -- cache as they cost no IO and keeping them allows us to avoid -- rewriting the cache. (file:_) -> somethingChanged file -- Only in current filesystem state (directory added) probeMergeResult (OnlyInRight path) = do fstate <- liftIO $ buildMonitorStateGlobRel Nothing Map.empty kindfile kinddir root (dirName path) globPath case allMatchingFiles (dirName path) fstate of (file:_) -> somethingChanged file -- This is the only case where we use 'cacheChanged' because we can -- have a whole new dir subtree (of unbounded size and cost), so we -- need to save the state of that new subtree in the cache. [] -> cacheChanged >> return (path, fstate) -- Found in path probeMergeResult (InBoth (path, fstate) _) = do fstate' <- probeMonitorStateGlobRel kindfile kinddir root (dirName path) fstate return (path, fstate') -- | Does a 'MonitorStateGlob' have any relevant files within it? allMatchingFiles :: FilePath -> MonitorStateGlobRel -> [FilePath] allMatchingFiles dir (MonitorStateGlobFiles _ _ entries) = [ dir fname | (fname, _) <- entries ] allMatchingFiles dir (MonitorStateGlobDirs _ _ _ entries) = [ res | (subdir, fstate) <- entries , res <- allMatchingFiles (dir subdir) fstate ] allMatchingFiles dir MonitorStateGlobDirTrailing = [dir] probeMonitorStateGlobRel _ _ root dirName (MonitorStateGlobFiles glob mtime children) = do change <- liftIO $ checkDirectoryModificationTime (root dirName) mtime mtime' <- case change of Nothing -> return mtime Just mtime' -> do -- directory modification time changed: -- a matching file may have been added or deleted matches <- return . filter (matchGlob glob) =<< liftIO (getDirectoryContents (root dirName)) mapM_ probeMergeResult $ mergeBy (\(path1,_) path2 -> compare path1 path2) children (sort matches) return mtime' -- Check that none of the children have changed forM_ children $ \(file, status) -> probeMonitorStateFileStatus root (dirName file) status return (MonitorStateGlobFiles glob mtime' children) -- Again, we don't force a cache rewite with 'cacheChanged', but we do use -- the new mtime' if any. where probeMergeResult :: MergeResult (FilePath, MonitorStateFileStatus) FilePath -> ChangedM () probeMergeResult mr = case mr of InBoth _ _ -> return () -- this is just to be able to accurately report which file changed: OnlyInLeft (path, _) -> somethingChanged (dirName path) OnlyInRight path -> somethingChanged (dirName path) probeMonitorStateGlobRel _ _ _ _ MonitorStateGlobDirTrailing = return MonitorStateGlobDirTrailing ------------------------------------------------------------------------------ -- | Update the input value and the set of files monitored by the -- 'FileMonitor', plus the cached value that may be returned in future. -- -- This takes a snapshot of the state of the monitored files right now, so -- 'checkFileMonitorChanged' will look for file system changes relative to -- this snapshot. -- -- This is typically done once the action has been completed successfully and -- we have the action's result and we know what files it looked at. See -- 'FileMonitor' for a full explanation. -- -- If we do take the snapshot after the action has completed then we have a -- problem. The problem is that files might have changed /while/ the action was -- running but /after/ the action read them. If we take the snapshot after the -- action completes then we will miss these changes. The solution is to record -- a timestamp before beginning execution of the action and then we make the -- conservative assumption that any file that has changed since then has -- already changed, ie the file monitor state for these files will be such that -- 'checkFileMonitorChanged' will report that they have changed. -- -- So if you do use 'updateFileMonitor' after the action (so you can discover -- the files used rather than predicting them in advance) then use -- 'beginUpdateFileMonitor' to get a timestamp and pass that. Alternatively, -- if you take the snapshot in advance of the action, or you're not monitoring -- any files then you can use @Nothing@ for the timestamp parameter. -- updateFileMonitor :: (Binary a, Binary b) => FileMonitor a b -- ^ cache file path -> FilePath -- ^ root directory -> Maybe MonitorTimestamp -- ^ timestamp when the update action started -> [MonitorFilePath] -- ^ files of interest relative to root -> a -- ^ the current key value -> b -- ^ the current result value -> IO () updateFileMonitor monitor root startTime monitorFiles cachedKey cachedResult = do hashcache <- readCacheFileHashes monitor msfs <- buildMonitorStateFileSet startTime hashcache root monitorFiles rewriteCacheFile monitor msfs cachedKey cachedResult -- | A timestamp to help with the problem of file changes during actions. -- See 'updateFileMonitor' for details. -- newtype MonitorTimestamp = MonitorTimestamp ModTime -- | Record a timestamp at the beginning of an action, and when the action -- completes call 'updateFileMonitor' passing it the timestamp. -- See 'updateFileMonitor' for details. -- beginUpdateFileMonitor :: IO MonitorTimestamp beginUpdateFileMonitor = MonitorTimestamp <$> getCurTime -- | Take the snapshot of the monitored files. That is, given the -- specification of the set of files we need to monitor, inspect the state -- of the file system now and collect the information we'll need later to -- determine if anything has changed. -- buildMonitorStateFileSet :: Maybe MonitorTimestamp -- ^ optional: timestamp -- of the start of the action -> FileHashCache -- ^ existing file hashes -> FilePath -- ^ root directory -> [MonitorFilePath] -- ^ patterns of interest -- relative to root -> IO MonitorStateFileSet buildMonitorStateFileSet mstartTime hashcache root = go [] [] where go :: [MonitorStateFile] -> [MonitorStateGlob] -> [MonitorFilePath] -> IO MonitorStateFileSet go !singlePaths !globPaths [] = return (MonitorStateFileSet (reverse singlePaths) (reverse globPaths)) go !singlePaths !globPaths (MonitorFile kindfile kinddir path : monitors) = do monitorState <- MonitorStateFile kindfile kinddir path <$> buildMonitorStateFile mstartTime hashcache kindfile kinddir root path go (monitorState : singlePaths) globPaths monitors go !singlePaths !globPaths (MonitorFileGlob kindfile kinddir globPath : monitors) = do monitorState <- buildMonitorStateGlob mstartTime hashcache kindfile kinddir root globPath go singlePaths (monitorState : globPaths) monitors buildMonitorStateFile :: Maybe MonitorTimestamp -- ^ start time of update -> FileHashCache -- ^ existing file hashes -> MonitorKindFile -> MonitorKindDir -> FilePath -- ^ the root directory -> FilePath -> IO MonitorStateFileStatus buildMonitorStateFile mstartTime hashcache kindfile kinddir root path = do let abspath = root path isFile <- doesFileExist abspath isDir <- doesDirectoryExist abspath case (isFile, kindfile, isDir, kinddir) of (_, FileNotExists, _, DirNotExists) -> -- we don't need to care if it exists now, since we check at probe time return MonitorStateNonExistent (False, _, False, _) -> return MonitorStateAlreadyChanged (True, FileExists, _, _) -> return MonitorStateFileExists (True, FileModTime, _, _) -> handleIOException MonitorStateAlreadyChanged $ do mtime <- getModTime abspath if changedDuringUpdate mstartTime mtime then return MonitorStateAlreadyChanged else return (MonitorStateFileModTime mtime) (True, FileHashed, _, _) -> handleIOException MonitorStateAlreadyChanged $ do mtime <- getModTime abspath if changedDuringUpdate mstartTime mtime then return MonitorStateAlreadyChanged else do hash <- getFileHash hashcache abspath abspath mtime return (MonitorStateFileHashed mtime hash) (_, _, True, DirExists) -> return MonitorStateDirExists (_, _, True, DirModTime) -> handleIOException MonitorStateAlreadyChanged $ do mtime <- getModTime abspath if changedDuringUpdate mstartTime mtime then return MonitorStateAlreadyChanged else return (MonitorStateDirModTime mtime) (False, _, True, DirNotExists) -> return MonitorStateAlreadyChanged (True, FileNotExists, False, _) -> return MonitorStateAlreadyChanged -- | If we have a timestamp for the beginning of the update, then any file -- mtime later than this means that it changed during the update and we ought -- to consider the file as already changed. -- changedDuringUpdate :: Maybe MonitorTimestamp -> ModTime -> Bool changedDuringUpdate (Just (MonitorTimestamp startTime)) mtime = mtime > startTime changedDuringUpdate _ _ = False -- | Much like 'buildMonitorStateFileSet' but for the somewhat complicated case -- of a file glob. -- -- This gets used both by 'buildMonitorStateFileSet' when we're taking the -- file system snapshot, but also by 'probeGlobStatus' as part of checking -- the monitored (globed) files for changes when we find a whole new subtree. -- buildMonitorStateGlob :: Maybe MonitorTimestamp -- ^ start time of update -> FileHashCache -- ^ existing file hashes -> MonitorKindFile -> MonitorKindDir -> FilePath -- ^ the root directory -> FilePathGlob -- ^ the matching glob -> IO MonitorStateGlob buildMonitorStateGlob mstartTime hashcache kindfile kinddir relroot (FilePathGlob globroot globPath) = do root <- liftIO $ getFilePathRootDirectory globroot relroot MonitorStateGlob kindfile kinddir globroot <$> buildMonitorStateGlobRel mstartTime hashcache kindfile kinddir root "." globPath buildMonitorStateGlobRel :: Maybe MonitorTimestamp -- ^ start time of update -> FileHashCache -- ^ existing file hashes -> MonitorKindFile -> MonitorKindDir -> FilePath -- ^ the root directory -> FilePath -- ^ directory we are examining -- relative to the root -> FilePathGlobRel -- ^ the matching glob -> IO MonitorStateGlobRel buildMonitorStateGlobRel mstartTime hashcache kindfile kinddir root dir globPath = do let absdir = root dir dirEntries <- getDirectoryContents absdir dirMTime <- getModTime absdir case globPath of GlobDir glob globPath' -> do subdirs <- filterM (\subdir -> doesDirectoryExist (absdir subdir)) $ filter (matchGlob glob) dirEntries subdirStates <- forM (sort subdirs) $ \subdir -> do fstate <- buildMonitorStateGlobRel mstartTime hashcache kindfile kinddir root (dir subdir) globPath' return (subdir, fstate) return $! MonitorStateGlobDirs glob globPath' dirMTime subdirStates GlobFile glob -> do let files = filter (matchGlob glob) dirEntries filesStates <- forM (sort files) $ \file -> do fstate <- buildMonitorStateFile mstartTime hashcache kindfile kinddir root (dir file) return (file, fstate) return $! MonitorStateGlobFiles glob dirMTime filesStates GlobDirTrailing -> return MonitorStateGlobDirTrailing -- | We really want to avoid re-hashing files all the time. We already make -- the assumption that if a file mtime has not changed then we don't need to -- bother checking if the content hash has changed. We can apply the same -- assumption when updating the file monitor state. In the typical case of -- updating a file monitor the set of files is the same or largely the same so -- we can grab the previously known content hashes with their corresponding -- mtimes. -- type FileHashCache = Map FilePath (ModTime, Hash) -- | We declare it a cache hit if the mtime of a file is the same as before. -- lookupFileHashCache :: FileHashCache -> FilePath -> ModTime -> Maybe Hash lookupFileHashCache hashcache file mtime = do (mtime', hash) <- Map.lookup file hashcache guard (mtime' == mtime) return hash -- | Either get it from the cache or go read the file getFileHash :: FileHashCache -> FilePath -> FilePath -> ModTime -> IO Hash getFileHash hashcache relfile absfile mtime = case lookupFileHashCache hashcache relfile mtime of Just hash -> return hash Nothing -> readFileHash absfile -- | Build a 'FileHashCache' from the previous 'MonitorStateFileSet'. While -- in principle we could preserve the structure of the previous state, given -- that the set of files to monitor can change then it's simpler just to throw -- away the structure and use a finite map. -- readCacheFileHashes :: (Binary a, Binary b) => FileMonitor a b -> IO FileHashCache readCacheFileHashes monitor = handleDoesNotExist Map.empty $ handleErrorCall Map.empty $ do res <- readCacheFile monitor case res of Left _ -> return Map.empty Right (msfs, _, _) -> return (mkFileHashCache msfs) where mkFileHashCache :: MonitorStateFileSet -> FileHashCache mkFileHashCache (MonitorStateFileSet singlePaths globPaths) = collectAllFileHashes singlePaths `Map.union` collectAllGlobHashes globPaths collectAllFileHashes singlePaths = Map.fromList [ (fpath, (mtime, hash)) | MonitorStateFile _ _ fpath (MonitorStateFileHashed mtime hash) <- singlePaths ] collectAllGlobHashes globPaths = Map.fromList [ (fpath, (mtime, hash)) | MonitorStateGlob _ _ _ gstate <- globPaths , (fpath, (mtime, hash)) <- collectGlobHashes "" gstate ] collectGlobHashes dir (MonitorStateGlobDirs _ _ _ entries) = [ res | (subdir, fstate) <- entries , res <- collectGlobHashes (dir subdir) fstate ] collectGlobHashes dir (MonitorStateGlobFiles _ _ entries) = [ (dir fname, (mtime, hash)) | (fname, MonitorStateFileHashed mtime hash) <- entries ] collectGlobHashes _dir MonitorStateGlobDirTrailing = [] ------------------------------------------------------------------------------ -- Utils -- -- | Within the @root@ directory, check if @file@ has its 'ModTime' is -- the same as @mtime@, short-circuiting if it is different. probeFileModificationTime :: FilePath -> FilePath -> ModTime -> ChangedM () probeFileModificationTime root file mtime = do unchanged <- liftIO $ checkModificationTimeUnchanged root file mtime unless unchanged (somethingChanged file) -- | Within the @root@ directory, check if @file@ has its 'ModTime' and -- 'Hash' is the same as @mtime@ and @hash@, short-circuiting if it is -- different. probeFileModificationTimeAndHash :: FilePath -> FilePath -> ModTime -> Hash -> ChangedM () probeFileModificationTimeAndHash root file mtime hash = do unchanged <- liftIO $ checkFileModificationTimeAndHashUnchanged root file mtime hash unless unchanged (somethingChanged file) -- | Within the @root@ directory, check if @file@ still exists as a file. -- If it *does not* exist, short-circuit. probeFileExistence :: FilePath -> FilePath -> ChangedM () probeFileExistence root file = do existsFile <- liftIO $ doesFileExist (root file) unless existsFile (somethingChanged file) -- | Within the @root@ directory, check if @dir@ still exists. -- If it *does not* exist, short-circuit. probeDirExistence :: FilePath -> FilePath -> ChangedM () probeDirExistence root dir = do existsDir <- liftIO $ doesDirectoryExist (root dir) unless existsDir (somethingChanged dir) -- | Within the @root@ directory, check if @file@ still does not exist. -- If it *does* exist, short-circuit. probeFileNonExistence :: FilePath -> FilePath -> ChangedM () probeFileNonExistence root file = do existsFile <- liftIO $ doesFileExist (root file) existsDir <- liftIO $ doesDirectoryExist (root file) when (existsFile || existsDir) (somethingChanged file) -- | Returns @True@ if, inside the @root@ directory, @file@ has the same -- 'ModTime' as @mtime@. checkModificationTimeUnchanged :: FilePath -> FilePath -> ModTime -> IO Bool checkModificationTimeUnchanged root file mtime = handleIOException False $ do mtime' <- getModTime (root file) return (mtime == mtime') -- | Returns @True@ if, inside the @root@ directory, @file@ has the -- same 'ModTime' and 'Hash' as @mtime and @chash@. checkFileModificationTimeAndHashUnchanged :: FilePath -> FilePath -> ModTime -> Hash -> IO Bool checkFileModificationTimeAndHashUnchanged root file mtime chash = handleIOException False $ do mtime' <- getModTime (root file) if mtime == mtime' then return True else do chash' <- readFileHash (root file) return (chash == chash') -- | Read a non-cryptographic hash of a @file@. readFileHash :: FilePath -> IO Hash readFileHash file = withBinaryFile file ReadMode $ \hnd -> evaluate . Hashable.hash =<< BS.hGetContents hnd -- | Given a directory @dir@, return @Nothing@ if its 'ModTime' -- is the same as @mtime@, and the new 'ModTime' if it is not. checkDirectoryModificationTime :: FilePath -> ModTime -> IO (Maybe ModTime) checkDirectoryModificationTime dir mtime = handleIOException Nothing $ do mtime' <- getModTime dir if mtime == mtime' then return Nothing else return (Just mtime') -- | Run an IO computation, returning @e@ if there is an 'error' -- call. ('ErrorCall') handleErrorCall :: a -> IO a -> IO a handleErrorCall e = handle (\(ErrorCall _) -> return e) -- | Run an IO computation, returning @e@ if there is any 'IOException'. -- -- This policy is OK in the file monitor code because it just causes the -- monitor to report that something changed, and then code reacting to that -- will normally encounter the same IO exception when it re-runs the action -- that uses the file. -- handleIOException :: a -> IO a -> IO a handleIOException e = handle (anyIOException e) where anyIOException :: a -> IOException -> IO a anyIOException x _ = return x ------------------------------------------------------------------------------ -- Instances -- instance Binary MonitorStateFileSet where put (MonitorStateFileSet singlePaths globPaths) = do put (1 :: Int) -- version put singlePaths put globPaths get = do ver <- get if ver == (1 :: Int) then do singlePaths <- get globPaths <- get return $! MonitorStateFileSet singlePaths globPaths else fail "MonitorStateFileSet: wrong version"