{-# LANGUAGE DoAndIfThenElse #-}
{-
Format:
 |content length| crc16   | content |
 |8 bytes       | 2 bytes | n bytes |
-}
module Data.Acid.Archive
    ( Entry
    , Entries(..)
    , putEntries
    , packEntries
    , readEntries
    , entriesToList
    , entriesToListNoFail
    , Archiver(..)
    , defaultArchiver
    ) where

import           Data.Acid.CRC

import qualified Data.ByteString        as Strict
import qualified Data.ByteString.Lazy   as Lazy
import           Data.ByteString.Builder
import           Data.Monoid
import           Data.Serialize.Get     hiding (Result (..))
import qualified Data.Serialize.Get     as Serialize

-- | A bytestring that represents an entry in an archive.
type Entry = Lazy.ByteString

-- | Result of unpacking an archive.  This is essentially a list of
-- 'Entry', but may terminate in 'Fail' if the archive format is
-- incorrect.
data Entries = Done | Next Entry Entries | Fail String
    deriving (Show)

-- | Convert 'Entries' to a normal list, calling 'error' if there was
-- a failure in unpacking the archive.
entriesToList :: Entries -> [Entry]
entriesToList Done              = []
entriesToList (Next entry next) = entry : entriesToList next
entriesToList (Fail msg)        = error $ "Data.Acid.Archive: " <> msg

-- | Convert 'Entries' to a normal list, silently ignoring a failure
-- to unpack the archive and instead returning a truncated list.
entriesToListNoFail :: Entries -> [Entry]
entriesToListNoFail Done              = []
entriesToListNoFail (Next entry next) = entry : entriesToListNoFail next
entriesToListNoFail Fail{}            = []


-- | Interface for the lowest level of the serialisation layer, which
-- handles packing lists of 'Entry' elements (essentially just
-- bytestrings) into a single bytestring, perhaps with error-checking.
--
-- Any @'Archiver'{'archiveWrite', 'archiveRead'}@ must satisfy the
-- round-trip property:
--
-- > forall xs . entriesToList (archiveRead (archiveWrite xs)) == xs
--
-- Moreover, 'archiveWrite' must be a monoid homomorphism, so that
-- concatenating archives is equivalent to concatenating the lists of
-- entries that they represent:
--
-- > archiveWrite [] == empty
-- > forall xs ys . archiveWrite xs <> archiveWrite ys == archiveWrite (xs ++ ys)
data Archiver
    = Archiver
      { archiveWrite :: [Entry] -> Lazy.ByteString
        -- ^ Pack a list of entries into a bytestring.

      , archiveRead  :: Lazy.ByteString -> Entries
        -- ^ Unpack a bytestring as a list of 'Entries', including the
        -- possibility of failure if the format is invalid.
      }

-- | Standard (and historically the only) implementation of the
-- 'Archiver' interface.  This represents each entry in the following
-- format:
--
-- > | entry length | crc16   | entry   |
-- > | 8 bytes      | 2 bytes | n bytes |
defaultArchiver :: Archiver
defaultArchiver = Archiver packEntries readEntries


putEntry :: Entry -> Builder
putEntry content
    = word64LE contentLength !<>
      word16LE contentHash !<>
      lazyByteString content
    where contentLength = fromIntegral $ Lazy.length content
          contentHash   = crc16 content
          a !<> b = let c = a <> b in c `seq` c

putEntries :: [Entry] -> Builder
putEntries = mconcat . map putEntry

packEntries :: [Entry] -> Lazy.ByteString
packEntries = toLazyByteString . putEntries

readEntries :: Lazy.ByteString -> Entries
readEntries bs
    = worker (Lazy.toChunks bs)
    where worker [] = Done
          worker (x:xs)
              = check (runGetPartial readEntry x) xs
          check result more
              = case result of
                  Serialize.Done entry rest
                      | Strict.null rest    -> Next entry (worker more)
                      | otherwise           -> Next entry (worker (rest:more))
                  Serialize.Fail msg _      -> Fail msg
                  Serialize.Partial cont    -> case more of
                                                 []     -> check (cont Strict.empty) []
                                                 (x:xs) -> check (cont x) xs

readEntry :: Get Entry
readEntry
    = do contentLength <- getWord64le
         contentChecksum <-getWord16le
         content <- getLazyByteString_fast (fromIntegral contentLength)
         if crc16 content /= contentChecksum
           then fail "Invalid hash"
           else return content

-- | Read a lazy bytestring WITHOUT any copying or concatenation.
getLazyByteString_fast :: Int -> Get Lazy.ByteString
getLazyByteString_fast = worker 0 []
  where
    worker counter acc n = do
      rem <- remaining
      if n > rem then do
         chunk <- getBytes rem
         _ <- ensure 1
         worker (counter + rem) (chunk:acc) (n-rem)
      else do
         chunk <- getBytes n
         return $ Lazy.fromChunks (reverse $ chunk:acc)