{-# LANGUAGE CPP, MultiParamTypeClasses #-}
module Darcs.Util.Index
( readIndex
, updateIndexFrom
, indexFormatValid
, updateIndex
, listFileIDs
, Index
, filter
, getFileID
, align
) where
import Darcs.Prelude hiding ( readFile, writeFile, filter )
import Darcs.Util.ByteString ( readSegment, decodeLocale )
import qualified Darcs.Util.File ( getFileStatus )
import Darcs.Util.Hash( sha256, rawHash )
import Darcs.Util.Tree
import Darcs.Util.Path
( AnchoredPath
, anchorPath
, anchoredRoot
, Name
, rawMakeName
, appendPath
, flatten
)
import Control.Monad( when )
import Control.Exception( catch, throw, SomeException, Exception )
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as BC
import Data.ByteString.Unsafe( unsafeHead, unsafeDrop )
import Data.ByteString.Internal
( c2w
, fromForeignPtr
, memcpy
, nullForeignPtr
, toForeignPtr
)
import Data.Int( Int64, Int32 )
import Data.IORef( )
import Data.Maybe( fromJust, isJust, fromMaybe )
import Data.Typeable( Typeable )
import Foreign.Storable
import Foreign.ForeignPtr( ForeignPtr, withForeignPtr, castForeignPtr )
import Foreign.Ptr( Ptr, plusPtr )
import System.IO ( hPutStrLn, stderr )
import System.IO.MMap( mmapFileForeignPtr, Mode(..) )
import System.Directory( doesFileExist, getCurrentDirectory, doesDirectoryExist )
#if mingw32_HOST_OS
import System.Directory( renameFile )
import System.FilePath( (<.>) )
#else
import System.Directory( removeFile )
#endif
#ifdef WIN32
import System.Win32.File
( BY_HANDLE_FILE_INFORMATION(..)
, closeHandle
, createFile
, fILE_FLAG_BACKUP_SEMANTICS
, fILE_SHARE_NONE
, gENERIC_NONE
, getFileInformationByHandle
, oPEN_EXISTING
)
#else
import qualified System.Posix.Files as F ( getSymbolicLinkStatus, fileID )
#endif
import System.FilePath ( (</>) )
import qualified System.Posix.Files as F
( modificationTime, fileSize, isDirectory, isSymbolicLink
, FileStatus
)
import System.Posix.Types ( FileID, EpochTime, FileOffset )
data Item = Item { iBase :: !(Ptr ())
, iHashAndDescriptor :: !B.ByteString
} deriving Show
index_version :: B.ByteString
index_version = BC.pack "HSI6"
index_endianness_indicator :: Int32
index_endianness_indicator = 1
size_header, size_magic, size_endianness_indicator :: Int
size_magic = 4
size_endianness_indicator = 4
size_header = size_magic + size_endianness_indicator
size_dsclen, size_hash, size_size, size_aux, size_fileid :: Int
size_size = 8
size_aux = 8
size_fileid = 8
size_dsclen = 4
size_hash = 32
size_type, size_null :: Int
size_type = 1
size_null = 1
off_size, off_aux, off_hash, off_dsc, off_dsclen, off_fileid :: Int
off_size = 0
off_aux = off_size + size_size
off_fileid = off_aux + size_aux
off_dsclen = off_fileid + size_fileid
off_hash = off_dsclen + size_dsclen
off_dsc = off_hash + size_hash
itemAllocSize :: AnchoredPath -> Int
itemAllocSize apath = align 4 $
size_size + size_aux + size_fileid + size_dsclen + size_hash +
size_type + B.length (flatten apath) + size_null
itemSize, itemNext :: Item -> Int
itemSize i =
size_size + size_aux + size_fileid + size_dsclen +
(B.length $ iHashAndDescriptor i)
itemNext i = align 4 (itemSize i + 1)
iHash, iDescriptor :: Item -> B.ByteString
iDescriptor = unsafeDrop size_hash . iHashAndDescriptor
iHash = B.take size_hash . iHashAndDescriptor
iPath :: Item -> FilePath
iPath = decodeLocale . unsafeDrop 1 . iDescriptor
iSize, iAux :: Item -> Ptr Int64
iSize i = plusPtr (iBase i) off_size
iAux i = plusPtr (iBase i) off_aux
iFileID :: Item -> Ptr FileID
iFileID i = plusPtr (iBase i) off_fileid
itemIsDir :: Item -> Bool
itemIsDir i = unsafeHead (iDescriptor i) == c2w 'D'
type FileStatus = Maybe F.FileStatus
modificationTime :: FileStatus -> EpochTime
modificationTime = maybe 0 F.modificationTime
fileSize :: FileStatus -> FileOffset
fileSize = maybe 0 F.fileSize
fileExists :: FileStatus -> Bool
fileExists = maybe False (const True)
isDirectory :: FileStatus -> Bool
isDirectory = maybe False F.isDirectory
createItem :: ItemType -> AnchoredPath -> ForeignPtr () -> Int -> IO Item
createItem typ apath fp off = do
let dsc =
B.concat
[ BC.singleton $ if typ == TreeType then 'D' else 'F'
, flatten apath
, B.singleton 0
]
(dsc_fp, dsc_start, dsc_len) = toForeignPtr dsc
withForeignPtr fp $ \p ->
withForeignPtr dsc_fp $ \dsc_p -> do
fileid <- fromMaybe 0 <$> getFileID apath
pokeByteOff p (off + off_fileid) (fromIntegral fileid :: Int64)
pokeByteOff p (off + off_dsclen) (fromIntegral dsc_len :: Int32)
memcpy
(plusPtr p $ off + off_dsc)
(plusPtr dsc_p dsc_start)
(fromIntegral dsc_len)
peekItem fp off
peekItem :: ForeignPtr () -> Int -> IO Item
peekItem fp off =
withForeignPtr fp $ \p -> do
nl' :: Int32 <- peekByteOff p (off + off_dsclen)
when (nl' <= 2) $ fail "Descriptor too short in peekItem!"
let nl = fromIntegral nl'
dsc =
fromForeignPtr
(castForeignPtr fp)
(off + off_hash)
(size_hash + nl - 1)
return $! Item {iBase = plusPtr p off, iHashAndDescriptor = dsc}
updateItem :: Item -> Int64 -> Hash -> IO ()
updateItem item _ NoHash =
fail $ "Index.update NoHash: " ++ iPath item
updateItem item size hash =
do poke (iSize item) size
unsafePokeBS (iHash item) (rawHash hash)
updateFileID :: Item -> FileID -> IO ()
updateFileID item fileid = poke (iFileID item) $ fromIntegral fileid
updateAux :: Item -> Int64 -> IO ()
updateAux item aux = poke (iAux item) $ aux
updateTime :: forall a.(Enum a) => Item -> a -> IO ()
updateTime item mtime = updateAux item (fromIntegral $ fromEnum mtime)
iHash' :: Item -> Hash
iHash' i = SHA256 (iHash i)
mmapIndex :: forall a. FilePath -> Int -> IO (ForeignPtr a, Int)
mmapIndex indexpath req_size = do
act_size <- fromIntegral . fileSize <$> Darcs.Util.File.getFileStatus indexpath
let size = case req_size > 0 of
True -> req_size
False | act_size >= size_header -> act_size - size_header
| otherwise -> 0
case size of
0 -> return (castForeignPtr nullForeignPtr, size)
_ -> do (x, _, _) <- mmapFileForeignPtr indexpath
ReadWriteEx (Just (0, size + size_header))
return (x, size)
data IndexM m = Index { mmap :: (ForeignPtr ())
, basedir :: FilePath
, hashtree :: Tree m -> Hash
, predicate :: AnchoredPath -> TreeItem m -> Bool }
| EmptyIndex
type Index = IndexM IO
data State = State
{ dirlength :: !Int
, path :: !AnchoredPath
, start :: !Int
}
data Result = Result
{ changed :: !Bool
, next :: !Int
, treeitem :: !(Maybe (TreeItem IO))
, resitem :: !Item
}
readItem :: Index -> State -> IO Result
readItem index state = do
item <- peekItem (mmap index) (start state)
res' <- if itemIsDir item
then readDir index state item
else readFile index state item
return res'
data CorruptIndex = CorruptIndex String deriving (Eq, Typeable)
instance Exception CorruptIndex
instance Show CorruptIndex where show (CorruptIndex s) = s
nameof :: Item -> State -> Maybe Name
nameof item state
| iDescriptor item == BC.pack "D." = Nothing
| otherwise =
case rawMakeName $ B.drop (dirlength state + 1) $ iDescriptor item of
Left msg -> throw (CorruptIndex msg)
Right name -> Just name
maybeAppendName :: AnchoredPath -> Maybe Name -> AnchoredPath
maybeAppendName parent = maybe parent (parent `appendPath`)
substateof :: Item -> State -> State
substateof item state =
state
{ start = start state + itemNext item
, path = path state `maybeAppendName` myname
, dirlength =
case myname of
Nothing ->
dirlength state
Just _ ->
B.length (iDescriptor item)
}
where
myname = nameof item state
readDir :: Index -> State -> Item -> IO Result
readDir index state item = do
following <- fromIntegral <$> peek (iAux item)
st <- getFileStatus (iPath item)
let exists = fileExists st && isDirectory st
fileid <- fromIntegral <$> (peek $ iFileID item)
fileid' <- fromMaybe fileid <$> (getFileID' $ iPath item)
when (fileid == 0) $ updateFileID item fileid'
let substate = substateof item state
want = exists && (predicate index) (path substate) (Stub undefined NoHash)
oldhash = iHash' item
subs off =
case compare off following of
LT -> do
result <- readItem index $ substate { start = off }
rest <- subs $ next result
return $! (nameof (resitem result) substate, result) : rest
EQ -> return []
GT ->
fail $
"Offset mismatch at " ++ show off ++
" (ends at " ++ show following ++ ")"
inferiors <- if want then subs $ start substate
else return []
let we_changed = or [ changed x | (_, x) <- inferiors ] || nullleaf
nullleaf = null inferiors && oldhash == nullsha
nullsha = SHA256 (B.replicate 32 0)
tree' =
makeTree
[ (n, fromJust $ treeitem s)
| (Just n, s) <- inferiors, isJust $ treeitem s ]
treehash = if we_changed then hashtree index tree' else oldhash
tree = tree' { treeHash = treehash }
when (exists && we_changed) $ updateItem item 0 treehash
return $ Result { changed = not exists || we_changed
, next = following
, treeitem = if want then Just $ SubTree tree
else Nothing
, resitem = item }
readFile :: Index -> State -> Item -> IO Result
readFile index state item = do
st <- getFileStatus (iPath item)
mtime <- fromIntegral <$> (peek $ iAux item)
size <- peek $ iSize item
fileid <- fromIntegral <$> (peek $ iFileID item)
fileid' <- fromMaybe fileid <$> (getFileID' $ iPath item)
let mtime' = modificationTime st
size' = fromIntegral $ fileSize st
readblob = readSegment (basedir index </> (iPath item), Nothing)
exists = fileExists st && not (isDirectory st)
we_changed = mtime /= mtime' || size /= size'
hash = iHash' item
when (exists && we_changed) $
do hash' <- sha256 `fmap` readblob
updateItem item size' hash'
updateTime item mtime'
when (fileid == 0) $ updateFileID item fileid'
return $ Result { changed = not exists || we_changed
, next = start state + itemNext item
, treeitem = if exists then Just $ File $ Blob readblob hash else Nothing
, resitem = item }
data ResultF = ResultF
{ nextF :: !Int
, resitemF :: !Item
, _fileIDs :: [((AnchoredPath, ItemType), FileID)]
}
listFileIDs :: Index -> IO ([((AnchoredPath, ItemType), FileID)])
listFileIDs EmptyIndex = return []
listFileIDs index =
do let initial = State { start = size_header
, dirlength = 0
, path = anchoredRoot }
res <- readItemFileIDs index initial
return $ _fileIDs res
readItemFileIDs :: Index -> State -> IO ResultF
readItemFileIDs index state = do
item <- peekItem (mmap index) (start state)
res' <- if itemIsDir item
then readDirFileIDs index state item
else readFileFileID index state item
return res'
readDirFileIDs :: Index -> State -> Item -> IO ResultF
readDirFileIDs index state item =
do fileid <- fromIntegral <$> (peek $ iFileID item)
following <- fromIntegral <$> peek (iAux item)
let substate = substateof item state
subs off =
case compare off following of
LT -> do
result <- readItemFileIDs index $ substate {start = off}
rest <- subs $ nextF result
return $! (nameof (resitemF result) substate, result) : rest
EQ -> return []
GT ->
fail $
"Offset mismatch at " ++ show off ++
" (ends at " ++ show following ++ ")"
inferiors <- subs $ start substate
return $ ResultF { nextF = following
, resitemF = item
, _fileIDs = (((path substate, TreeType), fileid):concatMap (_fileIDs . snd) inferiors) }
readFileFileID :: Index -> State -> Item -> IO ResultF
readFileFileID _ state item =
do fileid' <- fromIntegral <$> (peek $ iFileID item)
let myname = nameof item state
return $ ResultF { nextF = start state + itemNext item
, resitemF = item
, _fileIDs = [((path state `maybeAppendName` myname, BlobType), fileid')] }
readIndex :: FilePath -> (Tree IO -> Hash) -> IO Index
readIndex indexpath ht = do
(mmap_ptr, mmap_size) <- mmapIndex indexpath 0
base <- getCurrentDirectory
return $ if mmap_size == 0 then EmptyIndex
else Index { mmap = mmap_ptr
, basedir = base
, hashtree = ht
, predicate = \_ _ -> True }
formatIndex :: ForeignPtr () -> Tree IO -> Tree IO -> IO ()
formatIndex mmap_ptr old reference =
do _ <- create (SubTree reference) (anchoredRoot) size_header
unsafePokeBS magic index_version
withForeignPtr mmap_ptr $ \ptr ->
pokeByteOff ptr size_magic index_endianness_indicator
where magic = fromForeignPtr (castForeignPtr mmap_ptr) 0 4
create (File _) path' off =
do i <- createItem BlobType path' mmap_ptr off
let flatpath = anchorPath "" path'
case find old path' of
Nothing -> return ()
Just ti -> do st <- getFileStatus flatpath
let hash = itemHash ti
mtime = modificationTime st
size = fileSize st
updateItem i (fromIntegral size) hash
updateTime i mtime
return $ off + itemNext i
create (SubTree s) path' off =
do i <- createItem TreeType path' mmap_ptr off
case find old path' of
Nothing -> return ()
Just ti | itemHash ti == NoHash -> return ()
| otherwise -> updateItem i 0 $ itemHash ti
let subs [] = return $ off + itemNext i
subs ((name,x):xs) = do
let path'' = path' `appendPath` name
noff <- subs xs
create x path'' noff
lastOff <- subs (listImmediate s)
poke (iAux i) (fromIntegral lastOff)
return lastOff
create (Stub _ _) path' _ =
fail $ "Cannot create index from stubbed Tree at " ++ show path'
updateIndexFrom :: FilePath -> (Tree IO -> Hash) -> Tree IO -> IO Index
updateIndexFrom indexpath hashtree' ref =
do old_idx <- updateIndex =<< readIndex indexpath hashtree'
reference <- expand ref
let len_root = itemAllocSize anchoredRoot
len = len_root + sum [ itemAllocSize p | (p, _) <- list reference ]
exist <- doesFileExist indexpath
#if mingw32_HOST_OS
when exist $ renameFile indexpath (indexpath <.> "old")
#else
when exist $ removeFile indexpath
#endif
(mmap_ptr, _) <- mmapIndex indexpath len
formatIndex mmap_ptr old_idx reference
readIndex indexpath hashtree'
updateIndex :: Index -> IO (Tree IO)
updateIndex EmptyIndex = return emptyTree
updateIndex index =
do let initial = State { start = size_header
, dirlength = 0
, path = anchoredRoot }
res <- readItem index initial
case treeitem res of
Just (SubTree tree) -> return $ filter (predicate index) tree
_ -> fail "Unexpected failure in updateIndex!"
indexFormatValid :: FilePath -> IO Bool
indexFormatValid path' =
do
(start, _, _) <- mmapFileForeignPtr path' ReadOnly (Just (0, size_header))
let magic = fromForeignPtr (castForeignPtr start) 0 4
endianness_indicator <- withForeignPtr start $ \ptr -> peekByteOff ptr 4
return $
index_version == magic && index_endianness_indicator == endianness_indicator
`catch` \(_::SomeException) -> return False
instance FilterTree IndexM IO where
filter _ EmptyIndex = EmptyIndex
filter p index = index { predicate = \a b -> predicate index a b && p a b }
getFileID :: AnchoredPath -> IO (Maybe FileID)
getFileID = getFileID' . anchorPath ""
getFileID' :: FilePath -> IO (Maybe FileID)
getFileID' fp = do
file_exists <- doesFileExist fp
dir_exists <- doesDirectoryExist fp
if file_exists || dir_exists
#ifdef WIN32
then do
h <-
createFile fp gENERIC_NONE fILE_SHARE_NONE Nothing
oPEN_EXISTING fILE_FLAG_BACKUP_SEMANTICS Nothing
fhnumber <-
(Just . fromIntegral . bhfiFileIndex) <$> getFileInformationByHandle h
closeHandle h
return fhnumber
#else
then (Just . F.fileID) <$> F.getSymbolicLinkStatus fp
#endif
else return Nothing
unsafePokeBS :: BC.ByteString -> BC.ByteString -> IO ()
unsafePokeBS to from =
do let (fp_to, off_to, len_to) = toForeignPtr to
(fp_from, off_from, len_from) = toForeignPtr from
when (len_to /= len_from) $ fail $ "Length mismatch in unsafePokeBS: from = "
++ show len_from ++ " /= to = " ++ show len_to
withForeignPtr fp_from $ \p_from ->
withForeignPtr fp_to $ \p_to ->
memcpy (plusPtr p_to off_to)
(plusPtr p_from off_from)
(fromIntegral len_to)
align :: Integral a => a -> a -> a
align boundary i = case i `rem` boundary of
0 -> i
x -> i + boundary - x
{-# INLINE align #-}
getFileStatus :: FilePath -> IO FileStatus
getFileStatus path = do
mst <- Darcs.Util.File.getFileStatus path
case mst of
Just st
| F.isSymbolicLink st -> do
hPutStrLn stderr $ "Warning: ignoring symbolic link " ++ path
return Nothing
_ -> return mst