{-# LANGUAGE RankNTypes #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE GADTs #-} module Data.Conduit.Find ( -- * Introduction -- $intro -- ** Basic comparison with GNU find -- $gnufind -- ** Performance -- $performance -- ** Other notes -- $notes -- * Finding functions find , find' , lfind , lfind' , stat , lstat , test , findRaw -- * File path predicates , ignoreVcs , regex , glob , filename_ , filenameS_ , filepath_ , filepathS_ , withPath -- * File entry predicates (uses stat information) , regular , hasMode , executable , depth , lastAccessed , lastModified , withFileStatus -- * Predicate combinators , module Cond , (=~) -- * Types and type classes , FileEntry(..) ) where import Conduit import Control.Applicative import Control.Monad import Control.Monad.State.Class import Data.Attoparsec.Text import Data.Bits import qualified Data.Cond as Cond import Data.Cond hiding (test) import Data.Maybe (fromMaybe) import Data.Monoid import Data.Text (Text, unpack, pack) import Data.Time import Data.Time.Clock.POSIX import Filesystem.Path.CurrentOS (FilePath, encodeString, filename) import Prelude hiding (FilePath) import System.Posix.Files import System.Posix.Types import qualified Text.Regex.Posix as R ((=~)) {- $intro **find-conduit** is essentially a souped version of GNU find for Haskell, using a DSL to provide both ease of us, and extensive flexbility. In its simplest form, let's compare some uses of find to find-conduit. Bear in mind that the result of the find function is a conduit, so you're expected to either sink it to a list, or operate on the file paths as they are yielded. -} {- $gnufind A typical find command: @ find src -name '*.hs' -type f -print @ Would in find-conduit be: @ find "src" (glob \"*.hs\" \<\> regular) $$ mapM_C (liftIO . print) @ The 'glob' predicate matches the file basename against the globbing pattern, while the 'regular' predicate matches plain files. A more complicated example: @ find . -size +100M -perm 644 -mtime 1 @ Now in find-conduit: @ let megs = 1024 * 1024 days = 86400 now <- liftIO getCurrentTime find \".\" ( fileSize (> 100*megs) \<\> hasMode 0o644 \<\> lastModified (> addUTCTime now (-(1*days))) ) @ Appending predicates like this expressing an "and" relationship. Use '<|>' to express "or". You can also negate any predicate: @ find \".\" (not_ (hasMode 0o644)) @ By default, predicates, whether matching or not, will allow recursion into directories. In order to express that matching predicate should disallow recursion, use 'prune': @ find \".\" (prune (depth (> 2))) @ This is the same as using '-maxdepth 2' in find. @ find \".\" (prune (filename_ (== \"dist\"))) @ This is the same as: @ find . \\( -name dist -prune \\) -o -print @ -} {- $performance find-conduit strives to make file-finding a well performing operation. To this end, a composed Predicate will only call stat once per entry being considered; and if you prune a directory, it is not traversed at all. By default, 'find' calls stat for every file before it applies the predicate, in order to ensure that only one such call is needed. Sometimes, however, you know just from the FilePath that you don't want to consider a certain file, or you want to prune a directory tree. To support these types of optimized queries, a variant of find is provided called 'findWithPreFilter'. This takes two predicates: one that is applied to only the FilePath, before stat (or lstat) is called; and one that is applied to the full file information after the stat. -} {- $notes See 'Data.Cond' for more details on the Monad used to build predicates. -} type Predicate m a = CondT a m () data FileEntry = FileEntry { entryPath :: FilePath , entryDepth :: Int , entryStatus :: Maybe FileStatus -- ^ This is Nothing until we determine stat should be called. } instance Show FileEntry where show entry = "FileEntry " ++ show (entryPath entry) ++ " " ++ show (entryDepth entry) newFileEntry :: FilePath -> Int -> FileEntry newFileEntry p d = FileEntry p d Nothing -- | Return all entries, except for those within version-control metadata -- directories (and not including the version control directory itself either). ignoreVcs :: Monad m => Predicate m FileEntry ignoreVcs = prune (filename_ (`elem` vcsDirs)) where vcsDirs = [ ".git", "CVS", "RCS", "SCCS", ".svn", ".hg", "_darcs" ] regex :: Monad m => Text -> Predicate m FileEntry regex pat = filename_ (=~ pat) -- | This is a re-export of 'Text.Regex.Posix.=~', with the types changed for -- ease of use with this module. For example, you can simply say: -- -- @ -- filename_ (=~ \"\\\\.hs$\") -- @ -- -- Which is the same thing as: -- -- @ -- regex \"\\\\.hs$\" -- @ (=~) :: FilePath -> Text -> Bool str =~ pat = encodeString str R.=~ unpack pat -- | Find every entry whose filename part matching the given filename globbing -- expression. For example: @glob "*.hs"@. glob :: Monad m => Text -> Predicate m FileEntry glob g = case parseOnly globParser g of Left e -> error $ "Failed to parse glob: " ++ e Right x -> regex ("^" <> x <> "$") where globParser :: Parser Text globParser = fmap mconcat $ many $ char '*' *> return ".*" <|> char '?' *> return "." <|> string "[]]" *> return "[]]" <|> (\x y z -> pack ((x:y) ++ [z])) <$> char '[' <*> manyTill anyChar (try (char ']')) <*> char ']' <|> do x <- anyChar return . pack $ if x `elem` ".()^$" then ['\\', x] else [x] doStat :: MonadIO m => (String -> IO FileStatus) -> Predicate m FileEntry doStat getstatus = do entry <- get s <- liftIO $ getstatus (encodeString (entryPath entry)) put $ entry { entryStatus = Just s } lstat :: MonadIO m => Predicate m FileEntry lstat = doStat getSymbolicLinkStatus stat :: MonadIO m => Predicate m FileEntry stat = doStat getFileStatus getStatus :: FileEntry -> FileStatus getStatus e = fromMaybe (error $ "FileStatus has not been determined for: " ++ show (entryPath e)) (entryStatus e) withFileStatus :: Monad m => (FileStatus -> m Bool) -> Predicate m FileEntry withFileStatus f = ifM_ (f . getStatus) status :: Monad m => (FileStatus -> Bool) -> Predicate m FileEntry status f = withFileStatus (return . f) regular :: Monad m => Predicate m FileEntry regular = status isRegularFile directory :: Monad m => Predicate m FileEntry directory = status isDirectory hasMode :: Monad m => FileMode -> Predicate m FileEntry hasMode m = status (\s -> fileMode s .&. m /= 0) executable :: Monad m => Predicate m FileEntry executable = hasMode ownerExecuteMode withPath :: Monad m => (FilePath -> m Bool) -> Predicate m FileEntry withPath f = ifM_ (f . entryPath) filename_ :: Monad m => (FilePath -> Bool) -> Predicate m FileEntry filename_ f = withPath (return . f . filename) filenameS_ :: Monad m => (String -> Bool) -> Predicate m FileEntry filenameS_ f = withPath (return . f . encodeString . filename) filepath_ :: Monad m => (FilePath -> Bool) -> Predicate m FileEntry filepath_ f = withPath (return . f) filepathS_ :: Monad m => (String -> Bool) -> Predicate m FileEntry filepathS_ f = withPath (return . f . encodeString) depth :: Monad m => (Int -> Bool) -> Predicate m FileEntry depth f = if_ (f . entryDepth) withStatusTime :: Monad m => (UTCTime -> Bool) -> (FileStatus -> POSIXTime) -> Predicate m FileEntry withStatusTime f g = status (f . posixSecondsToUTCTime . g) lastAccessed :: Monad m => (UTCTime -> Bool) -> Predicate m FileEntry lastAccessed = flip withStatusTime accessTimeHiRes lastModified :: Monad m => (UTCTime -> Bool) -> Predicate m FileEntry lastModified = flip withStatusTime modificationTimeHiRes -- | A raw find does no processing on the FileEntry, leaving it up to the user -- to determine when and if stat should be called. Note that unless you -- take care to indicate when recursion should happen, an error will result -- when the raw finder attempts to recurse on a non-directory. The bare -- minimum for a proper finder should look like this for non-recursion: -- -- @ -- findRaw \ $ do -- \ -- localM stat $ do -- directory ||: norecurse -- \ -- @ -- -- To apply predicates only to a single directory, without recursing, simply -- start (or end) the predicate with 'norecurse', and use @localM stat@ or -- @localM lstat@ at the point where you need 'FileStatus' information. findRaw :: (MonadIO m, MonadResource m) => FilePath -> Bool -> Predicate m FileEntry -> Source m FileEntry findRaw path follow predicate = traverseRecursively (newFileEntry path 0) predicate (const . yield) lift readDirectory where readDirectory (FileEntry p d mst) go = do -- If no status has been determined yet, we must now in order to know -- whether to traverse or not. recurse <- isDirectory <$> case mst of Nothing -> liftIO $ (if follow then getFileStatus else getSymbolicLinkStatus) $ encodeString p Just st -> return st when recurse $ (sourceDirectory p =$) $ awaitForever $ \fp -> mapInput (const ()) (const Nothing) $ go $ newFileEntry fp (succ d) basicFind :: (MonadIO m, MonadResource m) => Predicate m FileEntry -> Bool -> FilePath -> Predicate m FileEntry -> Source m FileEntry basicFind f follow path pr = findRaw path follow $ f >> (directory ||: norecurse) >> pr find' :: (MonadIO m, MonadResource m) => FilePath -> Predicate m FileEntry -> Source m FileEntry find' = basicFind stat True find :: (MonadIO m, MonadResource m) => FilePath -> Predicate m FileEntry -> Source m FilePath find path pr = find' path pr =$= mapC entryPath lfind' :: (MonadIO m, MonadResource m) => FilePath -> Predicate m FileEntry -> Source m FileEntry lfind' = basicFind lstat False lfind :: (MonadIO m, MonadResource m) => FilePath -> Predicate m FileEntry -> Source m FilePath lfind path pr = lfind' path pr =$= mapC entryPath -- | Test a file path using the same type of 'Predicate' that is accepted by -- 'find'. test :: MonadIO m => Predicate m FileEntry -> FilePath -> m Bool test matcher path = Cond.test (stat >> matcher) (newFileEntry path 0)