{-# LANGUAGE RankNTypes #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ViewPatterns #-} module Data.Conduit.Find ( FileEntry(..) , Predicate , HasFilePath(..) , sourceFileEntries , matchAll , ignoreVcs , regexMatcher , regex , glob , stat , lstat , getPath , regular , executable , prune , test , find , find' , lfind , lfind' , findWithPreFilter , readPaths , or_ , and_ , not_ ) where import Conduit import Control.Applicative import Control.Arrow import Control.Monad import Data.Attoparsec.Text import Data.Bits import Data.Conduit.Find.Looped import Data.Foldable (for_) import Data.Monoid import Data.Text (Text, unpack, pack) import Filesystem.Path.CurrentOS (FilePath, encodeString, filename) import Prelude hiding (FilePath) import System.Posix.Files import Text.Regex.Posix ((=~)) data FileEntry = FileEntry { entryPath :: FilePath , entryStatus :: FileStatus } instance Show FileEntry where show entry = "FileEntry " ++ show (entryPath entry) class HasFilePath a where getFilePath :: a -> FilePath instance HasFilePath FilePath where getFilePath = id instance HasFilePath FileEntry where getFilePath = entryPath type Predicate m a = Looped m a a -- | Walk through the entries of a directory tree, allowing the user to -- specify a 'Predicate' which may decides not only which entries to yield -- from the conduit, but also which directories to follow, and how to -- recurse into that directory by permitting the use of a subsequent -- 'Predicate'. -- -- Note that the 'followSymlinks' parameter to this function has a different -- meaning than it does for 'sourceDirectoryDeep': if @True@, symlinks are -- never passed to the predicate, only what they point to; if @False@, -- symlinks are never read at all. For 'sourceDirectoryDeep', if -- 'followSymlinks' is @False@ it only prevents directory symlinks from -- being read. sourceFileEntries :: MonadResource m => Looped m FilePath FileEntry -> FilePath -> Producer m FileEntry sourceFileEntries matcher dir = sourceDirectory dir =$= go matcher where go m = do mfp <- await for_ mfp $ \fp -> do applyPredicate m fp yield (`sourceFileEntries` fp) go m -- | Return all entries. This is the same as 'sourceDirectoryDeep', except -- that the 'FileStatus' structure for each entry is also provided. As a -- result, only one stat call is ever made per entry, compared to two per -- directory in the current version of 'sourceDirectoryDeep'. matchAll :: Monad m => Predicate m a matchAll = Looped $ \entry -> return $ KeepAndRecurse entry matchAll -- | Return all entries, except for those within version-control metadata -- directories (and not including the version control directory itself either). ignoreVcs :: (MonadIO m, HasFilePath e) => Predicate m e ignoreVcs = Looped $ \entry -> return $ if filename (getFilePath entry) `elem` vcsDirs then Ignore else KeepAndRecurse entry ignoreVcs where vcsDirs = [ ".git", "CVS", "RCS", "SCCS", ".svn", ".hg", "_darcs" ] -- | The 'regexMatcher' predicate builder matches some part of every path -- against a given regex. Use the simpler 'regex' if you just want to apply -- a regex to every file name. regexMatcher :: (Monad m, HasFilePath e) => (FilePath -> FilePath) -- ^ Function that specifies which part of the pathname to -- match against. Use this to match against only filenames, -- or to relativize the path against the search root before -- comparing. -> Text -- ^ The regular expression search pattern. -> Predicate m e regexMatcher accessor (unpack -> pat) = go where go = Looped $ \entry -> return $ if encodeString (accessor (getFilePath entry)) =~ pat then KeepAndRecurse entry go else Recurse go -- | Find every entry whose filename part matching the given regular expression. regex :: (Monad m, HasFilePath e) => Text -> Predicate m e regex = regexMatcher filename -- | Find every entry whose filename part matching the given filename globbing -- expression. For example: @glob "*.hs"@. glob :: (Monad m, HasFilePath e) => Text -> Predicate m e glob g = case parseOnly globParser g of Left e -> error $ "Failed to parse glob: " ++ e Right x -> regex ("^" <> x <> "$") where globParser :: Parser Text globParser = fmap mconcat $ many $ char '*' *> return ".*" <|> char '?' *> return "." <|> string "[]]" *> return "[]]" <|> (\x y z -> pack ((x:y) ++ [z])) <$> char '[' <*> manyTill anyChar (try (char ']')) <*> char ']' <|> do x <- anyChar return . pack $ if x `elem` ".()^$" then ['\\', x] else [x] doStat :: MonadIO m => (String -> IO FileStatus) -> Looped m FilePath FileEntry doStat getstatus = Looped $ \path -> do s <- liftIO $ getstatus (encodeString path) let entry = FileEntry path s return $ if isDirectory s then KeepAndRecurse entry (doStat getstatus) else Keep entry lstat :: MonadIO m => Looped m FilePath FileEntry lstat = doStat getSymbolicLinkStatus stat :: MonadIO m => Looped m FilePath FileEntry stat = doStat getFileStatus getPath :: MonadIO m => Looped m FileEntry FilePath getPath = liftLooped (return . entryPath) status :: Monad m => (FileStatus -> Bool) -> Predicate m FileEntry status f = if_ (f . entryStatus) regular :: Monad m => Predicate m FileEntry regular = status isRegularFile executable :: Monad m => Predicate m FileEntry executable = status (\s -> fileMode s .&. ownerExecuteMode /= 0) prune :: (Monad m, HasFilePath e) => FilePath -> Predicate m e prune path = Looped $ \entry -> return $ if getFilePath entry == path then Ignore else KeepAndRecurse entry (prune path) test :: MonadIO m => Predicate m FileEntry -> FilePath -> m Bool test matcher path = getAny `liftM` testSingle (stat >>> matcher) path alwaysTrue where alwaysTrue = const (return (Any True)) find :: (MonadIO m, MonadResource m) => FilePath -> Predicate m FileEntry -> Producer m FilePath find path pr = sourceFileEntries (stat >>> pr) path =$= mapC entryPath data FindFilter = IgnoreFile | ConsiderFile | MaybeRecurse deriving (Show, Eq) -- | Run a find, but using a pre-pass filter on the FilePaths, to eliminates -- files from consideration early and avoid calling stat on them. findWithPreFilter :: (MonadIO m, MonadResource m) => FilePath -> Bool -> Predicate m FilePath -> Predicate m FileEntry -> Producer m FileEntry findWithPreFilter path follow filt pr = sourceDirectory path =$= go pr where go m = do mfp <- await for_ mfp $ \fp -> do r <- lift $ runLooped filt fp let candidate = case r of Ignore -> IgnoreFile Keep _ -> ConsiderFile Recurse _ -> MaybeRecurse KeepAndRecurse _ _ -> ConsiderFile unless (candidate == IgnoreFile) $ do st <- liftIO $ (if follow then getFileStatus else getSymbolicLinkStatus) (encodeString fp) let next = when (isDirectory st) . findWithPreFilter fp follow filt case candidate of IgnoreFile -> return () MaybeRecurse -> next pr ConsiderFile -> applyPredicate m (FileEntry fp st) yield next go m find' :: (MonadIO m, MonadResource m) => FilePath -> Predicate m FileEntry -> Producer m FileEntry find' path pr = sourceFileEntries (stat >>> pr) path lfind :: (MonadIO m, MonadResource m) => FilePath -> Predicate m FileEntry -> Producer m FilePath lfind path pr = sourceFileEntries (lstat >>> pr) path =$= mapC entryPath lfind' :: (MonadIO m, MonadResource m) => FilePath -> Predicate m FileEntry -> Producer m FileEntry lfind' path pr = sourceFileEntries (lstat >>> pr) path readPaths :: (MonadIO m, MonadResource m) => FilePath -> Predicate m FilePath -> Producer m FilePath readPaths path pr = sourceDirectory path =$= do mfp <- await for_ mfp $ \fp -> do r <- lift $ runLooped pr fp case r of Ignore -> return () Keep a -> yield a Recurse _ -> return () KeepAndRecurse a _ -> yield a