module FishFood.Profiler(
calculateFileSizeDistribution,
formatFileSizeDistribution,
getFileSize,
getValue
) where
import Control.Arrow((&&&),(***))
import qualified Control.Monad.Writer
import qualified Data.Default
import qualified Data.List
import qualified Data.Map
import qualified Data.Maybe
import qualified FishFood.Data.CommandOptions as Data.CommandOptions
import qualified FishFood.Data.File as Data.File
import FishFood.Data.Verbosity()
import qualified Text.Printf
type Probability = Double
type Result = (Data.File.FileSize, Either Int Probability)
getFileSize :: Result -> Data.File.FileSize
getFileSize = fst
getValue :: Result -> Either Int Probability
getValue = snd
type FileSizeDistribution = [Result]
calculateFileSizeDistribution :: (Floating ratio, RealFrac ratio) => Data.CommandOptions.CommandOptions ratio -> [Data.File.FileSize] -> Control.Monad.Writer.Writer [String] FileSizeDistribution
calculateFileSizeDistribution commandOptions fileSizes = let
binSizeDelta = Data.CommandOptions.getBinSizeDelta commandOptions
deriveProbabilityMassFunction = Data.CommandOptions.getDeriveProbabilityMassFunction commandOptions
nDecimalDigits = Data.CommandOptions.getNDecimalDigits commandOptions
mean, standardDeviation :: Double
(nFiles, mean, standardDeviation) = Data.File.getFileSizeStatistics fileSizes
in do
Control.Monad.Writer.tell [Text.Printf.printf "Files=%d, mean=%.*f, standard-deviation=%.*f" nFiles nDecimalDigits mean nDecimalDigits standardDeviation]
return $ if standardDeviation == 0
then return . (,) (head fileSizes) $ if deriveProbabilityMassFunction
then Right 1
else Left nFiles
else let
getDefaultedBinSizeIncrement :: Maybe Data.File.FileSize -> Data.File.FileSize
getDefaultedBinSizeIncrement = Data.Maybe.fromMaybe $ round standardDeviation `max` 1
calculatedBinSizes :: [Data.File.FileSize]
calculatedBinSizes = map (
\fileSize -> either (
div fileSize . getDefaultedBinSizeIncrement
) (
floor . (`logBase` fromIntegral fileSize)
) binSizeDelta
) fileSizes
initialFrequencyDistribution :: Data.Map.Map Data.File.FileSize Int
initialFrequencyDistribution = Data.Map.fromAscList . (
`zip` repeat 0
) . takeWhile (
<= maximum calculatedBinSizes
) . dropWhile (
< minimum calculatedBinSizes
) $ either (
\maybeBinSizeIncrement -> iterate (+ getDefaultedBinSizeIncrement maybeBinSizeIncrement) 0
) (
\binRatio -> map round $ iterate (* binRatio) 1
) binSizeDelta
mapBinSizeToFileSize :: Data.Map.Map Data.File.FileSize value -> Data.Map.Map Data.File.FileSize value
mapBinSizeToFileSize = Data.Map.mapKeys $ \binSize -> either (
(* binSize) . getDefaultedBinSizeIncrement
) (
ceiling . (^^ binSize)
) binSizeDelta
in Data.Map.toList . (
if deriveProbabilityMassFunction
then Data.Map.map Right . mapBinSizeToFileSize . Data.Map.map ((/ fromIntegral nFiles ) . fromIntegral)
else Data.Map.map Left . mapBinSizeToFileSize
) $ foldr (
Data.Map.insertWith (+) `flip` 1
) initialFrequencyDistribution calculatedBinSizes
formatFileSizeDistribution :: Data.CommandOptions.CommandOptions ratio -> FileSizeDistribution -> String
formatFileSizeDistribution commandOptions = Data.List.intercalate "\n" . map (
\(fileSize, value) -> fileSize ++ " " ++ value
) . (
if Data.CommandOptions.getVerbosity commandOptions > Data.Default.def
then (
[
(
($ (fileSizeWidth, fileSizeHeader)) &&& ($ (valueWidth, valueHeader))
) . uncurry $ Text.Printf.printf "%*s",
(`replicate` '=') *** (`replicate` '=') $ columnWidths
] ++
)
else id
) . map (
Text.Printf.printf "%*d" fileSizeWidth *** either (
Text.Printf.printf "%*d" valueWidth
) (
Text.Printf.printf "%.*f" $ Data.CommandOptions.getNDecimalDigits commandOptions
)
) where
fileSizeHeader, valueHeader :: String
headers@(fileSizeHeader, valueHeader) = (,) "Bin-size" $ if Data.CommandOptions.getDeriveProbabilityMassFunction commandOptions then "Probability" else "Frequency"
fileSizeWidth, valueWidth :: Int
columnWidths@(fileSizeWidth, valueWidth) = (`max` 10) . length *** length $ headers