{- Copyright (C) 2010-2016 Dr. Alistair Ward This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . -} {- | [@AUTHOR@] Dr. Alistair Ward [@DESCRIPTION@] Defines file-related type-synonyms, and associated operations. -} module Squeeze.Data.File( -- * Types -- ** Type-synonyms FilePathList, FileSize, FileSizeAndPath, -- * Functions accumulateSize, aggregateSize, -- expandDirectory, findDuplicates, -- findSize, findSizes, orderByIncreasingSize, orderByDecreasingSize, getFileSizeStatistics, selectSuitableFileSizes, -- ** Accessors getSize, getPath, -- ** Predicates hasSizeBy ) where import Control.Arrow((&&&)) import qualified Control.Monad import qualified Control.Monad.Writer import qualified Data.List import qualified Data.Ord import qualified Factory.Math.Statistics import qualified System.Directory import System.FilePath(()) import qualified System.IO import qualified ToolShed.Data.Foldable -- | A type suitable for containing an arbitrary set of file-paths. type FilePathList = [System.IO.FilePath] -- | A type-synonym specifically to hold file-sizes (in bytes). type FileSize = Integer -- Matches the return-type of 'IO.hFileSize'. -- | A type suitable for containing a file-path, qualified by the corresponding 'FileSize'. type FileSizeAndPath = (FileSize, System.IO.FilePath) -- | Accessor. getSize :: FileSizeAndPath -> FileSize getSize = fst -- | Accessor. getPath :: FileSizeAndPath -> System.IO.FilePath getPath = snd -- | Sum the 'FileSize's in the specified list. aggregateSize :: [FileSizeAndPath] -> FileSize aggregateSize = Data.List.foldl' (\acc (fileSize, _) -> acc + fileSize) 0 {- | * Returns the cumulative sequence of sizes, as each file is prepended to the specified list. * CAVEAT: the list-length is one greater than that supplied, since the last element represents the size with zero files. -} accumulateSize :: [FileSizeAndPath] -> [FileSize] accumulateSize = scanr ((+) . getSize) 0 {- | * Recursively descend the specified path, accumulating a list of files. * CAVEAT: all non-directory files are returned; devices, pipes, sockets, symlinks ... -} expandDirectory :: System.IO.FilePath -> IO FilePathList expandDirectory filePath = do directoryExists <- System.Directory.doesDirectoryExist filePath if directoryExists then System.Directory.getDirectoryContents filePath >>= fmap concat . mapM ( expandDirectory {-recurse-} . (filePath ) {-qualify the path-} ) . filter ( `notElem` [".", ".."] -- Prevent infinite recursion. ) else {-non-directory-} return {-to IO-monad-} [filePath] -- CAVEAT: this could include non-existent paths, devices, pipes, sockets, symlinks ... {- | * Finds any file-paths which have been specified more than once. * This includes files which have been implicitly specified via a directory. -} findDuplicates :: FilePathList -> IO FilePathList findDuplicates = fmap (map head . filter ((> 1) . length) . ToolShed.Data.Foldable.gather . concat) . mapM expandDirectory {- | * Get the size of a file, treating a directory as an atomic unit. * CAVEAT: the size of a symlink, is that of the file to which it refers. -} findSize :: System.IO.FilePath -> IO FileSize findSize filePath = expandDirectory filePath >>= fmap aggregateSize . mapM (\f -> flip (,) f `fmap` System.IO.withFile f System.IO.ReadMode System.IO.hFileSize) -- | Finds file-sizes. findSizes :: FilePathList -> IO [FileSizeAndPath] findSizes = uncurry fmap . (flip zip &&& mapM findSize) -- | Sorts a list of 'FileSizeAndPath' by increasing size; ie. smallest first. orderByIncreasingSize :: [FileSizeAndPath] -> [FileSizeAndPath] orderByIncreasingSize = Data.List.sortBy $ Data.Ord.comparing getSize -- | Sorts a list of 'FileSizeAndPath' by decreasing size; ie. smallest first. orderByDecreasingSize :: [FileSizeAndPath] -> [FileSizeAndPath] orderByDecreasingSize = reverse . orderByIncreasingSize -- | True if the specified file has the required size according to the specified predicate. hasSizeBy :: (FileSize -> Bool) -- ^ The predicate. -> FileSizeAndPath -- ^ The file-parameters to be tested. -> Bool hasSizeBy predicate = predicate . getSize -- | Acquire statistics related to a list of files. getFileSizeStatistics :: (Fractional mean, Floating standardDeviation) => [FileSizeAndPath] -> (Int, FileSize, mean, standardDeviation) -- ^ (Number of components, Aggregate size, Mean size, Standard-deviation). getFileSizeStatistics l = ( length l, sum sizes, Factory.Math.Statistics.getMean sizes, Factory.Math.Statistics.getStandardDeviation sizes ) where sizes = map getSize l {- | * Partitions the specified list of file-sizes & paths, into those whose size is suitable according to the specified predicate & those which are unsuitable. * Logs the results. -} selectSuitableFileSizes :: (FileSize -> Bool) -> [FileSizeAndPath] -> Control.Monad.Writer.Writer [String] [FileSizeAndPath] selectSuitableFileSizes predicate fileSizeAndPathList = let (accepted, rejected) = Data.List.partition (hasSizeBy predicate) fileSizeAndPathList in do Control.Monad.unless (null rejected) $ Control.Monad.Writer.tell ["WARNING: rejecting files of unsuitable size; " ++ show rejected ++ "."] return {-to Writer-monad-} accepted