{-# LANGUAGE RankNTypes #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE ViewPatterns #-} module Data.Conduit.Find ( FileEntry(..) , Predicate(..) , sourceFileEntries , matchAll , ignoreVcs , regexMatcher , regex , glob ) where import Conduit import Control.Applicative import Control.Monad (when) import Data.Attoparsec.Text import Data.Foldable (for_) import Data.Monoid ((<>), mconcat) import Data.Text (Text, unpack, pack) import Filesystem.Path.CurrentOS (FilePath, encodeString, filename) import Prelude hiding (FilePath) import System.Posix.Files (FileStatus, getFileStatus, getSymbolicLinkStatus, isDirectory) import Text.Regex.Posix ((=~)) data FileEntry = FileEntry { entryPath :: FilePath , entryStatus :: FileStatus } newtype Predicate m = Predicate (FileEntry -> m (Maybe FileEntry, Maybe (Predicate m))) -- | Walk through the entries of a directory tree, allowing the user to -- specify a 'Predicate' which may decides not only which entries to yield -- from the conduit, but also which directories to follow, and how to -- recurse into that directory by permitting the use of a subsequent -- 'Predicate'. -- -- Note that the 'followSymlinks' parameter to this function has a different -- meaning than it does for 'sourceDirectoryDeep': if @True@, symlinks are -- never passed to the predicate, only what they point to; if @False@, -- symlinks are never read at all. For 'sourceDirectoryDeep', if -- 'followSymlinks' is @False@ it only prevents directory symlinks from -- being read. sourceFileEntries :: MonadResource m => Bool -> Predicate m -> FilePath -> Producer m FileEntry sourceFileEntries followSymlinks (Predicate matcher) dir = sourceDirectory dir =$= go where go = do mfp <- await for_ mfp $ \fp -> do stat <- liftIO $ (if followSymlinks then getFileStatus else getSymbolicLinkStatus) (encodeString fp) let entry = FileEntry fp stat res <- lift $ matcher entry for_ (fst res) yield when (isDirectory stat) $ for_ (snd res) $ flip (sourceFileEntries followSymlinks) fp go -- | Return all entries. This is the same as 'sourceDirectoryDeep', except -- that the 'FileStatus' structure for each entry is also provided. As a -- result, only one stat call is ever made per entry, compared to two per -- directory in the current version of 'sourceDirectoryDeep'. matchAll :: Monad m => Predicate m matchAll = Predicate $ \entry -> return (Just entry, Just matchAll) -- | Return all entries, except for those within version-control metadata -- directories (and not including the version control directory itself either). ignoreVcs :: MonadIO m => Predicate m ignoreVcs = Predicate $ \entry -> return $ if filename (entryPath entry) `elem` vcsDirs then (Nothing, Nothing) else (Just entry, Just ignoreVcs) where vcsDirs = [ ".git", "CVS", "RCS", "SCCS", ".svn", ".hg" ] -- | The 'regexMatcher' predicate builder matches some part of every path -- against a given regex. Use the simpler 'regex' if you just want to apply -- a regex to every file name. regexMatcher :: Monad m => (FilePath -> FilePath) -- ^ Function that specifies which part of the pathname to -- match against. Use this to match against only filenames, -- or to relativize the path against the search root before -- comparing. -> Bool -- ^ If True, prune directories from the search that do not -- match. -> Text -- ^ The regular expression search pattern. -> Predicate m regexMatcher accessor pruneNonMatching (unpack -> pat) = go where go = Predicate $ \entry -> return $ if encodeString (accessor (entryPath entry)) =~ pat then (Just entry, Just go) else (Nothing, if pruneNonMatching then Nothing else Just go) -- | Find every entry whose filename part matching the given regular expression. regex :: Monad m => Text -> Predicate m regex = regexMatcher filename False -- | Find every entry whose filename part matching the given filename globbing -- expression. For example: @glob "*.hs"@. glob :: Monad m => Text -> Predicate m glob g = case parseOnly globParser g of Left e -> error $ "Failed to parse glob: " ++ e Right x -> regex ("^" <> x <> "$") where globParser :: Parser Text globParser = fmap mconcat $ many $ char '*' *> return ".*" <|> char '?' *> return "." <|> (\x y z -> pack ((x:y) ++ [z])) <$> char '[' -- jww (2014-04-23): This does not yet handle the pattern []], -- which is legal. <*> manyTill anyChar (try (char ']')) <*> char ']' <|> do x <- anyChar return . pack $ if x `elem` ['.', '(', ')', '^', '$'] then ['\\', x] else [x]