{-# LANGUAGE LambdaCase #-}

module Data.Acid.Repair
  ( repairFile
  , repairEvents
  , repairCheckpoints
  ) where

import qualified Data.Acid.Archive as Archive
import Data.Acid.Local (mkEventsLogKey, mkCheckpointsLogKey)
import Data.Acid.Log (LogKey)
import qualified Data.Acid.Log as Log
import qualified Data.ByteString.Lazy as Lazy
import Data.List (sort)
import System.Directory
import System.FilePath.Posix
import System.IO (hClose, openTempFile)

repairEntries :: Lazy.ByteString -> Lazy.ByteString
repairEntries :: ByteString -> ByteString
repairEntries =
  [ByteString] -> ByteString
Archive.packEntries ([ByteString] -> ByteString)
-> (ByteString -> [ByteString]) -> ByteString -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Entries -> [ByteString]
Archive.entriesToListNoFail (Entries -> [ByteString])
-> (ByteString -> Entries) -> ByteString -> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Entries
Archive.readEntries

-- | @'repairFile' path@ will truncate the entries in @file@ until there are
-- only valid entries (if a corrupted entry is found, then the rest of the file
-- is truncated).
--
-- The old file will be copied to @path.bak@ (or @path.bak.1@, etc… if the file
-- already exists).
--
-- 'repairFile' tries very hard to avoid leaving files in an inconsistent state:
-- the truncated file is written in a temporary file, which is then moved into
-- place, similarly copies are performed with moves instead. Still this is not
-- fully atomic: there are two consecutive moves, so 'repairFile' may, in case
-- of crash, yield a state where the @path.bak@ file is there but no @path@ is
-- there anymore, this would require manual intervention.
repairFile :: FilePath -> IO ()
repairFile :: FilePath -> IO ()
repairFile FilePath
fp = do
    ByteString
broken <- FilePath -> IO ByteString
Lazy.readFile FilePath
fp
    let repaired :: ByteString
repaired = ByteString -> ByteString
repairEntries ByteString
broken
    (FilePath
tmp, Handle
temph) <- FilePath -> FilePath -> IO (FilePath, Handle)
openTempFile (FilePath -> FilePath
takeDirectory FilePath
fp) (FilePath -> FilePath
takeFileName FilePath
fp)
      -- We use `openTempFile`, here, rather than `findNext` because we want to
      -- make extra-sure that we are not overriding an important file.
    Handle -> IO ()
hClose Handle
temph
      -- Closing immediately to benefit from the bracket guarantees of
      -- `writeFile`. A more elegant solution would be to use a `withTempFile`
      -- function, such as that from package `temporary`.
    FilePath -> ByteString -> IO ()
Lazy.writeFile FilePath
tmp ByteString
repaired
    FilePath -> IO ()
dropFile FilePath
fp
    FilePath -> FilePath -> IO ()
renameFile FilePath
tmp FilePath
fp

-- Repairs the files corresponding to the given 'LogKey'. It implements the
-- logic described in 'repairEvents'.
repairLogs :: LogKey object -> IO ()
repairLogs :: forall object. LogKey object -> IO ()
repairLogs LogKey object
identifier = do
    [(Int, FilePath)]
logFiles <- LogKey object -> IO [(Int, FilePath)]
forall object. LogKey object -> IO [(Int, FilePath)]
Log.findLogFiles LogKey object
identifier
    let sorted :: [(Int, FilePath)]
sorted = [(Int, FilePath)] -> [(Int, FilePath)]
forall a. Ord a => [a] -> [a]
sort [(Int, FilePath)]
logFiles
        ([Int]
_eventIds, [FilePath]
files) = [(Int, FilePath)] -> ([Int], [FilePath])
forall a b. [(a, b)] -> ([a], [b])
unzip [(Int, FilePath)]
sorted
    [Bool]
broken_files <- (FilePath -> IO Bool) -> [FilePath] -> IO [Bool]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM FilePath -> IO Bool
needsRepair [FilePath]
files
      -- We're doing a second deserialisation of the files here (see
      -- 'needsRepair'). It would be better, computation-time-wise to make as
      -- single pass and let `repairEntries`, for instance, return whether a fix
      -- is needed. But it's a lot of complication and requires loading the
      -- entire base in memory, rather than streaming files one-by-one. So it's
      -- better to just do the second pass.
    [FilePath] -> IO ()
repair ([FilePath] -> IO ()) -> [FilePath] -> IO ()
forall a b. (a -> b) -> a -> b
$ ((Bool, FilePath) -> FilePath) -> [(Bool, FilePath)] -> [FilePath]
forall a b. (a -> b) -> [a] -> [b]
map (Bool, FilePath) -> FilePath
forall a b. (a, b) -> b
snd ([(Bool, FilePath)] -> [FilePath])
-> [(Bool, FilePath)] -> [FilePath]
forall a b. (a -> b) -> a -> b
$ ((Bool, FilePath) -> Bool)
-> [(Bool, FilePath)] -> [(Bool, FilePath)]
forall a. (a -> Bool) -> [a] -> [a]
dropWhile (\(Bool
b,FilePath
_) -> Bool -> Bool
not Bool
b) ([(Bool, FilePath)] -> [(Bool, FilePath)])
-> [(Bool, FilePath)] -> [(Bool, FilePath)]
forall a b. (a -> b) -> a -> b
$ [Bool] -> [FilePath] -> [(Bool, FilePath)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Bool]
broken_files [FilePath]
files
  where
    repair :: [FilePath] -> IO ()
repair [] = () -> IO ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
    repair (FilePath
file:[FilePath]
rest) = do
      (FilePath -> IO ()) -> [FilePath] -> IO ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ FilePath -> IO ()
dropFile ([FilePath] -> [FilePath]
forall a. [a] -> [a]
reverse [FilePath]
rest)
      FilePath -> IO ()
repairFile FilePath
file

-- Moves (atomically) a file `path` to `path.bak` (or `path.bak.1`, etc… if the
-- file already exists).
dropFile :: FilePath -> IO ()
dropFile :: FilePath -> IO ()
dropFile FilePath
fp = do
    FilePath
bak <- FilePath -> IO FilePath
findNext (FilePath
fp FilePath -> FilePath -> FilePath
forall a. [a] -> [a] -> [a]
++ FilePath
".bak")
      -- We're using `findNext` rather than `openTempFile`, here, because we
      -- want predictable names
    FilePath -> FilePath -> IO ()
renameFile FilePath
fp FilePath
bak

-- | Repairs the WAL files with the following strategy:
--
-- * Let `f` be the oldest corrupted file.
-- * All files older than `f` is left untouched
-- * `f` is repaired with `repairFile`
-- * Old files younger than `f` is dropped (and saved to `path.bak`, or
--   `path.bak.1`, etc…)
--
-- In other words, all the log entries after the first corrupted entry is
-- dropped. The reasoning is that newer entries are likely not to make sense
-- after some entries have been removed from the log. This strategy guarantees a
-- consistent state, albeit a potentially old one.
repairEvents
  :: FilePath -- ^ Directory in which the events files can be found.
  -> IO ()
repairEvents :: FilePath -> IO ()
repairEvents FilePath
directory =
    LogKey (Tagged ByteString) -> IO ()
forall object. LogKey object -> IO ()
repairLogs (FilePath -> SerialisationLayer Any -> LogKey (Tagged ByteString)
forall object.
FilePath -> SerialisationLayer object -> LogKey (Tagged ByteString)
mkEventsLogKey FilePath
directory SerialisationLayer Any
forall {a}. a
noserialisation)
  where
    noserialisation :: a
noserialisation =
      FilePath -> a
forall a. HasCallStack => FilePath -> a
error FilePath
"Repair.repairEvents: the serialisation layer shouldn't be forced"

-- | Repairs the checkpoints file using the following strategy:
--
-- * Every checkpoints file is repaired with `repairFile`
--
-- Checkpoints are mostly independent. Contrary to 'repairEvents', dropping a
-- checkpoint doesn't affect the consistency of later checkpoints.
repairCheckpoints
  :: FilePath -- ^ Directory in which the checkpoints files can be found.
  -> IO ()
repairCheckpoints :: FilePath -> IO ()
repairCheckpoints FilePath
directory = do
    let checkpointLogKey :: LogKey (Checkpoint object)
checkpointLogKey = FilePath -> SerialisationLayer object -> LogKey (Checkpoint object)
forall object.
FilePath -> SerialisationLayer object -> LogKey (Checkpoint object)
mkCheckpointsLogKey FilePath
directory SerialisationLayer object
forall {a}. a
noserialisation
    [(Int, FilePath)]
checkpointFiles <- LogKey (Checkpoint Any) -> IO [(Int, FilePath)]
forall object. LogKey object -> IO [(Int, FilePath)]
Log.findLogFiles LogKey (Checkpoint Any)
forall {object}. LogKey (Checkpoint object)
checkpointLogKey
    let ([Int]
_eventIds, [FilePath]
files) = [(Int, FilePath)] -> ([Int], [FilePath])
forall a b. [(a, b)] -> ([a], [b])
unzip [(Int, FilePath)]
checkpointFiles
    (FilePath -> IO ()) -> [FilePath] -> IO ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ FilePath -> IO ()
repairFile [FilePath]
files
  where
    noserialisation :: a
noserialisation =
      FilePath -> a
forall a. HasCallStack => FilePath -> a
error FilePath
"Repair.repairCheckpoints: the serialisation layer shouldn't be forced"

needsRepair :: FilePath -> IO Bool
needsRepair :: FilePath -> IO Bool
needsRepair FilePath
fp = do
    ByteString
contents <- FilePath -> IO ByteString
Lazy.readFile FilePath
fp
    let entries :: Entries
entries = ByteString -> Entries
Archive.readEntries ByteString
contents
    Bool -> IO Bool
forall (m :: * -> *) a. Monad m => a -> m a
return (Bool -> IO Bool) -> Bool -> IO Bool
forall a b. (a -> b) -> a -> b
$ Entries -> Bool
entriesNeedRepair Entries
entries
  where
    entriesNeedRepair :: Entries -> Bool
entriesNeedRepair Archive.Fail{} = Bool
True
    entriesNeedRepair Entries
Archive.Done = Bool
False
    entriesNeedRepair (Archive.Next ByteString
_ Entries
rest) = Entries -> Bool
entriesNeedRepair Entries
rest

findNext :: FilePath -> IO (FilePath)
findNext :: FilePath -> IO FilePath
findNext FilePath
fp = Int -> IO FilePath
go Int
0
  where
    go :: Int -> IO FilePath
go Int
n =
      let next :: FilePath
next = FilePath -> Int -> FilePath
fileWithSuffix FilePath
fp Int
n in
      FilePath -> IO Bool
doesFileExist FilePath
next IO Bool -> (Bool -> IO FilePath) -> IO FilePath
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \case
        Bool
False -> FilePath -> IO FilePath
forall (m :: * -> *) a. Monad m => a -> m a
return FilePath
next
        Bool
True -> Int -> IO FilePath
go (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)

fileWithSuffix :: FilePath -> Int -> FilePath
fileWithSuffix :: FilePath -> Int -> FilePath
fileWithSuffix FilePath
fp Int
i =
  if Int
i Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 then FilePath
fp
  else FilePath
fp FilePath -> FilePath -> FilePath
forall a. [a] -> [a] -> [a]
++ FilePath
"." FilePath -> FilePath -> FilePath
forall a. [a] -> [a] -> [a]
++ Int -> FilePath
forall a. Show a => a -> FilePath
show Int
i