module Storage.Hashed.Packed
( Format(..), Block, OS
, hatch, compact, repack, lookup
, create, load
, format, blockLookup, live, hatchery, mature, roots, references, rootdir
) where
import Prelude hiding ( lookup, read )
import Storage.Hashed.AnchoredPath( )
import Storage.Hashed.Tree ( )
import Storage.Hashed.Utils
import Storage.Hashed.Hash
import Control.Monad( forM, forM_, unless )
import Control.Applicative( (<$>) )
import System.FilePath( (</>), (<.>) )
import System.Directory( createDirectoryIfMissing, removeFile
, getDirectoryContents )
import Bundled.Posix( fileExists, isDirectory, getFileStatus )
import qualified Data.ByteString.Lazy.Char8 as BL
import qualified Data.ByteString.Char8 as BS
import Data.Maybe( listToMaybe, catMaybes, isNothing )
import Data.Binary( encode, decode )
import qualified Data.Set as S
import qualified Data.Map as M
import Data.List( sort )
import Data.Int( Int64 )
data Format = Loose | Compact | Pack deriving (Show, Eq)
loose_dirs :: [[Char]]
loose_dirs = let chars = ['0'..'9'] ++ ['a'..'f']
in [ [a,b] | a <- chars, b <- chars ]
loosePath :: OS -> Hash -> FilePath
loosePath _ NoHash = error "No path for NoHash!"
loosePath os hash =
let hash' = BS.unpack (encodeBase16 hash)
in rootdir os </> "hatchery" </> take 2 hash' </> drop 2 hash'
looseLookup :: OS -> Hash -> IO (Maybe FileSegment)
looseLookup _ NoHash = return Nothing
looseLookup os hash = do
let path = loosePath os hash
exist <- fileExists <$> getFileStatus path
return $ if exist then Just (path, Nothing)
else Nothing
data Block = Block { blockLookup :: Hash -> IO (Maybe FileSegment)
, size :: Int64
, format :: Format }
data OS = OS { hatchery :: Block
, mature :: [Block]
, roots :: [Hash]
, references :: FileSegment -> IO [Hash]
, rootdir :: FilePath }
repack :: OS -> IO OS
repack _ = error "repack undefined"
hatch :: OS -> [BL.ByteString] -> IO OS
hatch os blobs =
do processed <- mapM sieve blobs
write [ (h, b) | (True, h, b) <- processed ]
where write bits =
case format (hatchery os) of
Loose ->
do forM bits $ \(hash, blob) -> do
BL.writeFile (loosePath os hash) blob
return os
Compact -> error "hatch/compact undefined"
_ -> fail "Hatchery must be either Loose or Compact."
sieve blob = do let hash = sha256 blob
absent <- isNothing <$> lookup os hash
return (absent, hash, blob)
compact :: OS -> IO OS
compact os = do objects <- live os [hatchery os]
block <- createPack os (M.toList objects)
cleanup
return $ os { mature = block:mature os }
where cleanup =
case format (hatchery os) of
Loose -> forM_ loose_dirs $ nuke . ((rootdir os </> "hatchery") </>)
Compact -> removeFile (rootdir os </> "hatchery") >> return ()
_ -> fail "Hatchery must be either Loose or Compact."
nuke dir = mapM (removeFile . (dir </>)) =<<
(Prelude.filter (`notElem` [".", ".."]) `fmap`
getDirectoryContents dir)
blocksLookup :: [Block] -> Hash -> IO (Maybe (Hash, FileSegment))
blocksLookup blocks hash =
do segment <- cat `fmap` mapM (flip blockLookup hash) blocks
return $ case segment of
Nothing -> Nothing
Just seg -> Just (hash, seg)
where cat = listToMaybe . catMaybes
lookup :: OS -> Hash -> IO (Maybe FileSegment)
lookup os hash =
do res <- blocksLookup (hatchery os : mature os) hash
return $ case res of
Nothing -> Nothing
Just (_, seg) -> Just seg
create :: FilePath -> Format -> IO OS
create path fmt = do createDirectoryIfMissing True path
initHatchery
load path
where initHatchery | fmt == Loose =
do mkdir hatchpath
forM loose_dirs $ mkdir . (hatchpath </>)
| fmt == Compact =
error "create/mkHatchery Compact undefined"
mkdir = createDirectoryIfMissing False
hatchpath = path </> "hatchery"
load :: FilePath -> IO OS
load path =
do hatch_stat <- getFileStatus $ path </> "hatchery"
let is_os = fileExists hatch_stat
is_dir = isDirectory hatch_stat
unless is_os $ fail $ path ++ " is not an object storage!"
let _hatchery = Block { blockLookup = look os
, format = if is_dir then Loose else Compact
, size = undefined }
os = OS { hatchery = _hatchery
, rootdir = path
, mature = packs
, roots = _roots
, references = undefined }
look | format _hatchery == Loose = looseLookup
| otherwise = undefined
packs = []
_roots = []
return os
readPack :: FilePath -> IO Block
readPack file = do bits <- readSegment (file, Nothing)
let count = decode (BL.take 8 $ bits)
_lookup NoHash _ _ = return Nothing
_lookup hash@(SHA256 rawhash) first final = do
let middle = first + ((final first) `div` 2)
res <- case ( compare rawhash (hashof first)
, compare rawhash (hashof middle)
, compare rawhash (hashof final) ) of
(LT, _, _) -> return Nothing
( _, _, GT) -> return Nothing
(EQ, _, _) -> return $ Just (segof first)
( _, _, EQ) -> return $ Just (segof final)
(GT, EQ, LT) -> return $ Just (segof middle)
(GT, GT, LT) | middle /= final -> _lookup hash middle final
(GT, LT, LT) | first /= middle -> _lookup hash first middle
( _, _, _) -> return Nothing
return res
headerof i = BL.take 51 $ BL.drop (8 + i * 51) bits
hashof i = BS.concat $ BL.toChunks $ BL.take 32 $ headerof i
segof i = (file, Just (count * 51 + 8 + from, sz))
where from = decode (BL.take 8 $ BL.drop 33 $ headerof i)
sz = decode (BL.take 8 $ BL.drop 42 $ headerof i)
return $ Block { size = BL.length bits
, format = Pack
, blockLookup = \h -> _lookup h 0 (count 1) }
createPack :: OS -> [(Hash, FileSegment)] -> IO Block
createPack os bits =
do contents <- mapM readSegment (map snd bits)
let offsets = scanl (+) 0 $ map BL.length contents
headerbits = [ BL.concat [ BL.fromChunks [rawhash]
, BL.pack "@"
, encode offset
, BL.pack "!"
, encode $ BL.length string
, BL.pack "\n" ]
| (SHA256 rawhash, _) <- bits
| string <- contents
| offset <- offsets ]
header = BL.concat $ (encode $ length bits) : sort headerbits
blob = BL.concat $ header:contents
hash = sha256 blob
path = rootdir os </> BS.unpack (encodeBase16 hash) <.> "bin"
BL.writeFile path blob
readPack path
live :: OS -> [Block] -> IO (M.Map Hash FileSegment)
live os blocks =
reachable (references os)
(blocksLookup blocks)
(S.fromList $ roots os)