{-# LANGUAGE OverloadedStrings #-} -- | -- Module : HaskDeep.HashSet -- Copyright : Mauro Taraborelli 2012 -- License : BSD3 -- -- Maintainer : maurotaraborelli@gmail.com -- Stability : experimental -- Portability : unknown -- -- Computes hashes traversing recursively through a directory structure. -- Uses a list of known hashes to audit a set of files. -- -- Internal module. module HaskDeep.HashSet ( -- * The @HashSet@ type HashSet(..) ,setSymbol -- Text -> HashSet -> HashSet ,empty -- HashSet ,insert -- HashInfo -> HashSet -> HashSet ,fromList -- [HashInfo] -> HashSet ,toAscList -- HashSet -> [HashInfo] ,audit -- HashSet -> HashSet -> (HashSet, HashSet) -- * The @HashInfo@ type ,HashInfo(..) ,toByteString -- HashInfo -> ByteString ) where import Data.List (intercalate) import Data.ByteString (ByteString) import qualified Data.ByteString as BS import qualified Data.ByteString.Char8 as B8 import Data.Set (Set) import qualified Data.Set as Set import Data.Text (Text) import qualified Data.Text as T import qualified Data.Text.Encoding as TE -- | Information about the hashed file. data HashInfo = HashInfo { file :: Text -- ^ Relative file path , size :: Integer -- ^ File size in byte , hash :: ByteString -- ^ File hash } deriving (Eq, Ord) instance Show HashInfo where show (HashInfo f s h) = intercalate "," [ show s , B8.unpack h , T.unpack f ] toByteString :: HashInfo -> ByteString toByteString (HashInfo f s h) = BS.intercalate commaBS [ B8.pack (show s) , h , TE.encodeUtf8 f ] commaBS :: ByteString commaBS = B8.singleton ',' -- | Hashed files. data HashSet = HashSet { filesCount :: Integer -- ^ Number of hashed files , sizeSum :: Integer -- ^ Total size in bytes of hashed files , compSymbol :: Text -- ^ Computation mode symbol , hashSet :: Set HashInfo -- ^ Hashes } deriving (Eq, Ord) instance Show HashSet where show (HashSet 0 _ _ _ ) = "No files." show (HashSet fc ss cs hs) = unlines [ "Files count : " ++ show fc , "Files size : " ++ show ss , "Computed with: " ++ T.unpack cs , "--" , (unlines . map show . Set.toAscList) hs ] -- | Set computation mode symbol. setSymbol :: Text -> HashSet -> HashSet setSymbol cs hs = hs { compSymbol = cs } -- | Create an empty @HashSet@. empty :: HashSet empty = HashSet 0 0 T.empty Set.empty -- | Insert an @HashInfo@ into an @HashSet@ insert :: HashInfo -> HashSet -> HashSet insert hi hs = hs { filesCount = filesCount hs + 1 , sizeSum = sizeSum hs + size hi , hashSet = Set.insert hi $ hashSet hs } -- | Create an @HashSet@ from a list of @HashInfo@ fromList :: [HashInfo] -> HashSet fromList his = HashSet fc ss T.empty hs where hs = Set.fromList his fc = countFiles hs ss = sumSize hs -- | Create an ordered list of @HashInfo@ from an @HashSet@. toAscList :: HashSet -> [HashInfo] toAscList (HashSet _ _ _ hs) = Set.toAscList hs -- | Get the number of hashed files countFiles :: Set HashInfo -> Integer countFiles = fromIntegral . Set.size -- | Get the total size in bytes of hashed files sumSize :: Set HashInfo -> Integer sumSize = Set.foldr sumSize' 0 where sumSize' hi ss = ss + size hi -- | Compare two @HashSet@ and return the not matching @HashInfo@. -- -- Not matching means all the HashInfo of the first HashSet not present in the second HashSet -- and all the HashInfo of the second HashSet not present in the first HashSet. audit :: HashSet -> HashSet -> (HashSet, HashSet) audit (HashSet _ _ cs1 hs1) (HashSet _ _ cs2 hs2) = ( HashSet hs1_no_match_filescount hs1_no_match_sizesum cs1 hs1_no_match , HashSet hs2_no_match_filescount hs2_no_match_sizesum cs2 hs2_no_match ) where matching = hs1 `Set.intersection` hs2 hs1_no_match = hs1 `Set.difference` matching hs2_no_match = hs2 `Set.difference` matching hs1_no_match_filescount = countFiles hs1_no_match hs1_no_match_sizesum = sumSize hs1_no_match hs2_no_match_filescount = countFiles hs2_no_match hs2_no_match_sizesum = sumSize hs2_no_match