{-
	Copyright (C) 2010-2016 Dr. Alistair Ward

	This program is free software: you can redistribute it and/or modify
	it under the terms of the GNU General Public License as published by
	the Free Software Foundation, either version 3 of the License, or
	(at your option) any later version.

	This program is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
	GNU General Public License for more details.

	You should have received a copy of the GNU General Public License
	along with this program.  If not, see <http://www.gnu.org/licenses/>.
-}
{- |
 [@AUTHOR@]	Dr. Alistair Ward

 [@DESCRIPTION@]	Defines file-related type-synonyms, and associated operations.
-}

module Squeeze.Data.File(
-- * Types
-- ** Type-synonyms
        FilePathList,
        FileSize,
        FileSizeAndPath,
-- * Functions
        accumulateSize,
        aggregateSize,
--	expandDirectory,
        findDuplicates,
--	findSize,
        findSizes,
        orderByIncreasingSize,
        orderByDecreasingSize,
        getFileSizeStatistics,
        selectSuitableFileSizes,
-- ** Accessors
        getSize,
        getPath,
-- ** Predicates
        hasSizeBy
) where

import                  Control.Arrow((&&&))
import qualified        Control.Monad
import qualified        Control.Monad.Writer
import qualified        Data.List
import qualified        Data.Ord
import qualified        Factory.Math.Statistics
import qualified        System.Directory
import                  System.FilePath((</>))
import qualified        System.IO
import qualified        ToolShed.Data.Foldable

-- | A type suitable for containing an arbitrary set of file-paths.
type FilePathList       = [System.IO.FilePath]

-- | A type-synonym specifically to hold file-sizes (in bytes).
type FileSize           = Integer       -- Matches the return-type of 'IO.hFileSize'.

-- | A type suitable for containing a file-path, qualified by the corresponding 'FileSize'.
type FileSizeAndPath    = (FileSize, System.IO.FilePath)

-- | Accessor.
getSize :: FileSizeAndPath -> FileSize
getSize = fst

-- | Accessor.
getPath :: FileSizeAndPath -> System.IO.FilePath
getPath = snd

-- | Sum the 'FileSize's in the specified list.
aggregateSize :: [FileSizeAndPath] -> FileSize
aggregateSize   = Data.List.foldl' (\acc (fileSize, _) -> acc + fileSize) 0

{- |
	* Returns the cumulative sequence of sizes, as each file is prepended to the specified list.

	* CAVEAT: the list-length is one greater than that supplied, since the last element represents the size with zero files.
-}
accumulateSize :: [FileSizeAndPath] -> [FileSize]
accumulateSize  = scanr ((+) . getSize) 0

{- |
	* Recursively descend the specified path, accumulating a list of files.

	* CAVEAT: all non-directory files are returned; devices, pipes, sockets, symlinks ...
-}
expandDirectory :: System.IO.FilePath -> IO FilePathList
expandDirectory filePath        = do
        directoryExists <- System.Directory.doesDirectoryExist filePath

        if directoryExists
                then System.Directory.getDirectoryContents filePath >>= fmap concat . mapM (
                        expandDirectory {-recurse-} . (filePath </>) {-qualify the path-}
                ) . filter (
                        `notElem` [".", ".."]   -- Prevent infinite recursion.
                )
                else {-non-directory-} return {-to IO-monad-} [filePath]        -- CAVEAT: this could include non-existent paths, devices, pipes, sockets, symlinks ...

{- |
	* Finds any file-paths which have been specified more than once.

	* This includes files which have been implicitly specified via a directory.
-}
findDuplicates :: FilePathList -> IO FilePathList
findDuplicates  = fmap (map head . filter ((> 1) . length) . ToolShed.Data.Foldable.gather . concat) . mapM expandDirectory

{- |
	* Get the size of a file, treating a directory as an atomic unit.

	* CAVEAT: the size of a symlink, is that of the file to which it refers.
-}
findSize :: System.IO.FilePath -> IO FileSize
findSize filePath       = expandDirectory filePath >>= fmap aggregateSize . mapM (\f -> flip (,) f `fmap` System.IO.withFile f System.IO.ReadMode System.IO.hFileSize)

-- | Finds file-sizes.
findSizes :: FilePathList -> IO [FileSizeAndPath]
findSizes       = uncurry fmap . (flip zip &&& mapM findSize)

-- | Sorts a list of 'FileSizeAndPath' by increasing size; ie. smallest first.
orderByIncreasingSize :: [FileSizeAndPath] -> [FileSizeAndPath]
orderByIncreasingSize   = Data.List.sortBy $ Data.Ord.comparing getSize

-- | Sorts a list of 'FileSizeAndPath' by decreasing size; ie. smallest first.
orderByDecreasingSize :: [FileSizeAndPath] -> [FileSizeAndPath]
orderByDecreasingSize   = reverse . orderByIncreasingSize

-- | True if the specified file has the required size according to the specified predicate.
hasSizeBy
        :: (FileSize -> Bool)   -- ^ The predicate.
        -> FileSizeAndPath      -- ^ The file-parameters to be tested.
        -> Bool
hasSizeBy predicate     = predicate . getSize

-- | Acquire statistics related to a list of files.
getFileSizeStatistics
        :: (Fractional mean, Floating standardDeviation)
        => [FileSizeAndPath]
        -> (Int, FileSize, mean, standardDeviation)     -- ^ (Number of components, Aggregate size, Mean size, Standard-deviation).
getFileSizeStatistics l = (
        length l,
        sum sizes,
        Factory.Math.Statistics.getMean sizes,
        Factory.Math.Statistics.getStandardDeviation sizes
 ) where
        sizes   = map getSize l

{- |
	* Partitions the specified list of file-sizes & paths, into those whose size is suitable according to the specified predicate & those which are unsuitable.

	* Logs the results.
-}
selectSuitableFileSizes :: (FileSize -> Bool) -> [FileSizeAndPath] -> Control.Monad.Writer.Writer [String] [FileSizeAndPath]
selectSuitableFileSizes predicate fileSizeAndPathList   = let
        (accepted, rejected)    = Data.List.partition (hasSizeBy predicate) fileSizeAndPathList
 in do
        Control.Monad.unless (null rejected) $ Control.Monad.Writer.tell ["WARNING: rejecting files of unsuitable size; " ++ show rejected ++ "."]

        return {-to Writer-monad-} accepted