{-# LANGUAGE CPP #-} {- Copyright (C) 2010 Dr. Alistair Ward This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . -} {- | [@AUTHOR@] Dr. Alistair Ward [@DESCRIPTION@] * Returns combinations of the specified files, which fit into the available space, without wasting more than the specified ratio. * Any directory-names are treated as atomic units, rather than individual files. * Because of the explosion of possible combinations, an /exact/ match for the available space is frequently found with a surprisingly small set of files. [@CAVEATS@] Though it runs in constant space, the algorithm has @O(2^n)@ time-complexity, & may take an excessive time to calculate all possibilities. Because of this, it prints progressively better matches as they're found, rather than waiting to find the best. -} module Squeeze.Squeeze( -- * Functions distribute, findBestFit, findCombinations, -- risingFilter, squeeze ) where import Control.Applicative((<$>)) import qualified Control.Arrow import qualified Data.List import qualified Squeeze.CommandOptions as CommandOptions import qualified Squeeze.File as File import qualified Squeeze.FileCombination as FileCombination import qualified Squeeze.FileSizeBounds as FileSizeBounds #if MIN_VERSION_parallel(3,0,0) import qualified Control.Parallel.Strategies #endif -- | Progressively raises the selection-criterion, as each match is found. risingFilter :: File.FileSize -- ^ The initial minimum byte-size of file to accept. -> [FileCombination.FileCombination] -- ^ The input list of files to filter. -> [FileCombination.FileCombination] -- ^ The resulting list of files, which have met rising criterion. risingFilter minimumBytes (x@(size, _) : xs) | size >= minimumBytes = x : risingFilter size xs | otherwise = risingFilter minimumBytes xs risingFilter _ [] = [] {- | * Checks that the total aggregate 'File.FileSize', meets or exceeds 'minimumBytes'. * Drops excessively large files, assuming that the file-list has been sorted by size, largest first. * Generates up to @2^n@ combinations of the @n@ specified files; the algorithm is similar to 'Data.List.subsequences', except that unproductive lines are immediately terminated. This is the performance bottle-neck, and though there may be simpler and faster algorithms, the key attribute is that it operates in constant space. * The algorithm is stable, in that it maintains the specified file-order within each combination; though the order in which the combinations are concatenated is rather arbitrary. -} findCombinations :: FileSizeBounds.FileSizeBounds -- ^ The bounds within which to find file-combinations. -> [File.FileSizeAndPath] -- ^ The input list of file-names and sizes. -> [FileCombination.FileCombination] -- ^ The resulting unordered list of suitable file-combinations. findCombinations (minimumBytes, maximumBytes) = filter ( FileCombination.hasSize (>= minimumBytes) ) . ( FileCombination.nullFileCombination : ) . nonEmptyCombinations minimumBytes . dropWhile ( File.hasSize (> maximumBytes) ) where nonEmptyCombinations :: File.FileSize -> [File.FileSizeAndPath] -> [FileCombination.FileCombination] nonEmptyCombinations minimumBytes' fileSizeAndPathList@(x : xs) | minimumBytes' > 0 && File.aggregateSize fileSizeAndPathList < minimumBytes' = [] | otherwise = FileCombination.singleton x : foldr ( \y -> (y :) . let combination = FileCombination.prepend x y in if FileCombination.hasSize (<= maximumBytes) combination then (combination :) else id ) [] (nonEmptyCombinations (minimumBytes' - File.getSize x) xs {-recurse-}) nonEmptyCombinations _ [] = [] -- | Splits up the task, to facilitate parallelization, before calling 'findCombinations'. distribute :: Double -- ^ The ratio at which to bisect the list of files, in a divide and conquer strategy. -> FileSizeBounds.FileSizeBounds -- ^ The bounds within which to find file-combinations. -> [File.FileSizeAndPath] -- ^ The input list of file-names and sizes. -> [FileCombination.FileCombination] -- ^ The complete unordered list of suitable file-combinations. distribute bisectionRatio fileSizeBounds fileSizeAndPathList | bisectionIndex <= 0 || bisectionIndex >= fileSizeAndPathListLength = findCombinations fileSizeBounds fileSizeAndPathList | otherwise = distribute' `uncurry` splitAt bisectionIndex fileSizeAndPathList where fileSizeAndPathListLength, bisectionIndex :: Int fileSizeAndPathListLength = length fileSizeAndPathList bisectionIndex = round $ bisectionRatio * fromIntegral fileSizeAndPathListLength distribute' :: [File.FileSizeAndPath] -> [File.FileSizeAndPath] -> [FileCombination.FileCombination] distribute' fileSizeAndPathListL fileSizeAndPathListR = #if MIN_VERSION_parallel(3,0,0) concat $ Control.Parallel.Strategies.parMap ( Control.Parallel.Strategies.evalList $ Control.Parallel.Strategies.evalTuple2 Control.Parallel.Strategies.rdeepseq Control.Parallel.Strategies.r0 --Evaluate the aggregate size, but not the path-list. ) #else concatMap #endif ( \combinationL@(fileSize, _) -> FileCombination.concatenate combinationL `map` findCombinations (negate fileSize `FileSizeBounds.shift` fileSizeBounds) fileSizeAndPathListR ) $ Control.Arrow.first (\minimumBytes -> minimumBytes - File.aggregateSize fileSizeAndPathListR) fileSizeBounds `findCombinations` fileSizeAndPathListL {- | * Calls 'File.order' to sort the files by size (largest first), on the empirical basis that the generated file-combinations, will more quickly result in a good match for the available space. * Calls 'distribute' to split the job, to facilitate parallelization. * Calls the private function 'risingFilter' to progressively select better file-combinations from those returned by 'distribute'. -} findBestFit :: Double -- ^ The ratio at which to bisect the list of files, in a divide and conquer strategy. -> FileSizeBounds.FileSizeBounds -- ^ The bounds within which to find file-combinations. -> [File.FileSizeAndPath] -- ^ The input list of file-names and sizes. -> [FileCombination.FileCombination] -- ^ A reduced list of suitable file-combinations, sorted best first. findBestFit bisectionRatio fileSizeBounds@(minimumBytes, _) = risingFilter minimumBytes . distribute bisectionRatio fileSizeBounds . File.order {- | * Removes duplicate file-names. * Determines the size of each specified file. * Unpacks the command-line options. * Calls 'findBestFit' to solve the problem. -} squeeze :: CommandOptions.CommandOptions -- ^ The the caller's selection-criteria. -> File.FilePathList -- ^ The list of file-names from which to select. -> IO [FileCombination.FileCombination] squeeze commandOptions = (CommandOptions.bisectionRatio commandOptions `findBestFit` CommandOptions.fileSizeBounds commandOptions <$>) . File.findSizes . Data.List.nub