{-# LANGUAGE BangPatterns               #-}
{-# LANGUAGE FlexibleContexts           #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE OverloadedStrings          #-}
{-# LANGUAGE TupleSections              #-}

-- | Pack file support.

module Data.Git.Internal.Pack where

import           Codec.Compression.Zlib.Internal  hiding (Raw)
import           Control.Applicative
import           Control.Concurrent.MVar
import           Control.Exception
import           Control.Monad
import           Control.Monad.Fail
import           Control.Monad.Reader
import           Control.Monad.RWS
import           Control.Monad.ST
import           Data.Attoparsec.ByteString       as A
import           Data.Attoparsec.Combinator       (lookAhead)
import           Data.Bits
import qualified Data.ByteString                  as B
import           Data.ByteString.Builder          (Builder)
import qualified Data.ByteString.Builder          as BB
import qualified Data.ByteString.Lazy             as BL
import           Data.Digest.CRC32
import           Data.Map                         (Map)
import qualified Data.Map                         as M
import           Data.STRef
import qualified Data.Vector                      as V
import qualified Data.Vector.Mutable              as VM
import qualified Data.Vector.Unboxed              as UV
import qualified Data.Vector.Unboxed.Mutable      as UM
import           Data.Word
import           System.IO                        (SeekMode (..), hSeek, hTell)
import           System.Posix.FilePath

import Data.Git.Hash
import Data.Git.Internal.FileUtil
import Data.Git.Internal.Object (parseBlob, parseCommit, parseTag, parseTree)
import Data.Git.Internal.Parsers
import Data.Git.Internal.Types  (PackFile (..), PackIndex (..), Crc32)
import Data.Git.Object

data PackObject = PackObject ObjectType
                | DeltaOff Int
                | DeltaRef Sha1
                  deriving (Eq, Ord, Show)

data Raw = Raw {
      rawType :: !PackObject
    , rawData :: !BL.ByteString
    } deriving (Eq, Ord, Show)

parsePackedRaw' :: Parser Raw
parsePackedRaw' = do (ty, sz) <- parseCompactHeader
                     rest     <- lookAhead takeLazyByteString
                     let (_, dat) = decompressTo sz rest
                     return $ Raw ty dat

base128le :: Parser Word
base128le = do b <- fromIntegral <$> anyWord8
               if b `testBit` 7
               then base128le >>= return . \n -> n `unsafeShiftL` 7 .|. (0x7f .&. b)
               else return b

parseCompactHeader :: Parser (PackObject, Word)
parseCompactHeader = do h  <- anyWord8
                        sz <- if h `testBit` 7 then base128le else return 0
                        ty <- parseType $ 0x70 .&. h
                        return (ty, (sz `unsafeShiftL` 4) .|. (0x0f .&. fromIntegral h))
    where parseType 0x10 = pure $ PackObject CommitType
          parseType 0x20 = pure $ PackObject TreeType
          parseType 0x30 = pure $ PackObject BlobType
          parseType 0x40 = pure $ PackObject TagType
          parseType 0x60 = DeltaOff <$> parseOffset
          parseType 0x70 = DeltaRef . Sha1 <$> A.take 20
          parseType n    = error $ "no type for " ++ show (n `shiftR` 4)

          -- Parse according to the following (cargo-culted) recommended C:
          --        #       byte = *data++;
          --        #       number = byte & 0x7f;
          --        #       while (byte & 0x80) {
          --        #               byte = *data++;
          --        #               number = ((number + 1) << 7) | (byte & 0x7f);
          --        #       }
          parseOffset :: Parser Int
          parseOffset = do byte <- anyWord8
                           let number = byte .&. 0x7f
                           loop byte (fromIntegral number)
              where
                loop byte number | byte `testBit` 7 = do
                  byte' <- anyWord8
                  loop byte' $ (number + 1) `unsafeShiftL` 7 .|. (fromIntegral byte' .&. 0x7f)
                loop _    number | otherwise        = return number

decompressTo :: Word -> BL.ByteString -> (BL.ByteString, BL.ByteString)
decompressTo sz = fmap BL.fromStrict . foldDecompressStreamWithInput ((<>) . ("",)) (,"") throw go
    where go = decompressST zlibFormat defaultDecompressParams { decompressBufferSize = fromIntegral sz }

type Patch a = RWST BL.ByteString BB.Builder Word64 Parser a

-- see: https://github.com/git/git/blob/master/patch-delta.c

parsePatch :: Patch ()
parsePatch = do op <- lift anyWord8
                b  <- if   op `testBit` 7
                     then copy op
                     else BL.fromStrict <$> lift (A.take $ fromIntegral op)
                tell . BB.lazyByteString $ b
    where copy op = do put 0
                       when (op `testBit` 0) $ orShift 0
                       when (op `testBit` 1) $ orShift 8
                       when (op `testBit` 2) $ orShift 16
                       when (op `testBit` 3) $ orShift 24
                       cp_off <- fromIntegral <$> get
                       put 0
                       when (op `testBit` 4) $ orShift 0
                       when (op `testBit` 5) $ orShift 8
                       when (op `testBit` 6) $ orShift 16
                       gets (==0) >>= flip when (put 0x10000)
                       cp_size <- fromIntegral <$> get
                       asks $ BL.take cp_size . BL.drop cp_off

          orShift n = do byte <- lift anyWord8
                         modify ((fromIntegral byte `unsafeShiftL` n) .|.)

applyPatch :: BL.ByteString -> B.ByteString -> Either String BL.ByteString
applyPatch base = fmap (BB.toLazyByteString . snd) . parseOnly (execRWST go base 0)
    where go = lift base128le >> lift base128le >> many parsePatch

resolveDelta :: Int -> PackFile -> Raw -> Maybe Raw
resolveDelta off pf@(PackFile pb _) (Raw (DeltaOff o) b) = do
  lu <- parseMaybe parsePackedRaw' $ BL.drop (fromIntegral $ off - o) pb
  mr <- resolveDelta (off - o) pf lu
  let patched = either error id (applyPatch (rawData mr) $ BL.toStrict b)
  return $ mr { rawData = patched }
resolveDelta _ pf@(PackFile pb ind) (Raw (DeltaRef s) b) = do
  o <- getShaOffset ind s
  lu <- parseMaybe parsePackedRaw' $ BL.drop (fromIntegral o) pb
  mr <- resolveDelta (fromIntegral o) pf lu
  let patched = either error id (applyPatch (rawData mr) $ BL.toStrict b)
  return $ mr { rawData = patched }
resolveDelta _ _ r = return r

parseFanout :: Parser (UV.Vector Word32)
parseFanout = UV.replicateM 256 word32

parseIndexShas :: Word32 -> Parser (V.Vector Sha1)
parseIndexShas n = V.replicateM (fromIntegral n) (A.take 20 >>= return . Sha1)

parseShaCrcs :: Word32 -> Parser (UV.Vector Crc32)
parseShaCrcs n = UV.replicateM (fromIntegral n) word32

parseShaOffsets :: Word32 -> Parser (UV.Vector Word32)
parseShaOffsets n = UV.replicateM (fromIntegral n) word32

parseBigOffsets :: UV.Vector Word32 -> Parser (UV.Vector Word64)
parseBigOffsets os = UV.replicateM (fromIntegral n) word64
    where n = UV.foldl' (\a b -> a + (b `unsafeShiftL` 63)) 0 os

-- FIXME: Handle large packfile offsets
parsePackIndex :: Parser PackIndex
parsePackIndex = do void "\255tOc"
                    2  <- word32
                    fo <- parseFanout
                    let size = fo UV.! 255
                    ss <- parseIndexShas  size
                    cs <- parseShaCrcs    size
                    os <- parseShaOffsets size
                    bs <- parseBigOffsets os
                    -- ps <- parseSha1
                    return $ PackIndex fo ss cs os bs

-- I'm not proud of this function.
getShaOffset :: PackIndex -> Sha1 -> Maybe Word64
getShaOffset pidx s = offset'
    where fb, ub, lb :: Int
          -- what the Sha starts with, which is our index into the fanout table.
          fb = fromIntegral . B.head . getSha1 $ s
          -- The last value position that can start with this byte.
          ub = fromIntegral $ (fanout pidx) UV.! fb
          -- Our search starts at the highest value in the value below our sha1 initial byte.
          lb | fb == 0    = 0
             | otherwise = fromIntegral $ (fanout pidx) UV.! pred fb
          offset = ((shaOffsets pidx) UV.!) . (lb+) <$>
                   V.elemIndex s (V.slice lb (ub - lb) (indexShas pidx))
          offset' = case offset of
                      Nothing -> Nothing
                      Just off | off `testBit` 31 ->
                                   Just ((shaBigOffsets pidx) UV.! fromIntegral (clearBit off 31))
                               | otherwise        ->
                                   Just $ fromIntegral off

readIndexFile :: RawFilePath -> IO PackIndex
readIndexFile p = (either error id . parseOnly parsePackIndex) <$> readRawFileS (p <.> "idx")

isPackIndex :: RawFilePath -> Bool
isPackIndex = (==".idx") . takeExtension

-- | Read a 'PackFile' off the filesystem.
readPackFile :: RawFilePath -> IO PackFile
readPackFile p = PackFile <$> readRawFileL (p <.> "pack") <*> readIndexFile p

-- | Try to get the 'Object' for a 'Sha1' in a 'PackFile'.
findPackSha :: PackFile -> Sha1 -> Maybe Object
findPackSha pf@(PackFile pb ix) s = do
  offset <- getShaOffset ix s
  raw    <- parseMaybe parsePackedRaw' (BL.drop (fromIntegral offset) pb)
  Raw (PackObject t) d <- resolveDelta (fromIntegral offset) pf raw
  case t of -- FIXME: don't do the stupid "blob 1234\NUL" thing here
    BlobType   -> BlobObj   <$> parseMaybe parseBlob   ("blob 1234\NUL"   <> d)
    TreeType   -> TreeObj   <$> parseMaybe parseTree   ("tree 1234\NUL"   <> d)
    CommitType -> CommitObj <$> parseMaybe parseCommit ("commit 1234\NUL" <> d)
    TagType    -> TagObj    <$> parseMaybe parseTag    ("tag 1234\NUL"    <> d)

buildPackedObject :: Object -> Builder
buildPackedObject o = buildCompactHeader ty sz <> BB.lazyByteString (compress zlibFormat defaultCompressParams b)
    where b  = BB.toLazyByteString $ buildObject o
          ty = compactTag o
          sz = fromIntegral . BL.length $ b -- TODO: suck less

compactTag :: Object -> Word8
compactTag CommitObj {} = 0x10
compactTag TreeObj   {} = 0x20
compactTag BlobObj   {} = 0x30
compactTag TagObj    {} = 0x40

buildCompactHeader :: Word8 -> Word64 -> Builder
buildCompactHeader t sz = go (t .|. 0x0f .&. fromIntegral sz) (sz `unsafeShiftR` 4)
    where go c 0 = BB.word8 c
          go c n = BB.word8 (c .|. 0x80) <> go (fromIntegral $ 0x7f .&. n) (n `unsafeShiftR` 7)

data IndexData = IndexData
    { idxCrc    :: !Word32
    , idxOffset :: !Word64
    } deriving (Eq, Ord, Show)

type PackIndexer = Map Sha1 IndexData

data PackingState = PackingState
    { psTempFile :: TempFile
    , psCount    :: Word32
    , psIndexer  :: PackIndexer
    , psOffset   :: Word64
    }

makePackIndex :: PackIndexer -> PackIndex
makePackIndex idx | sz == 0    =
                      -- The empty PackIndex
                      PackIndex (UV.replicate 256 0) mempty mempty mempty mempty
                  | otherwise = runST $ do
  fan  <- UM.replicate 256 0
  shas <- VM.unsafeNew sz
  crcs <- UM.unsafeNew sz
  offs <- UM.unsafeNew sz
  bos  <- newSTRef (0, [])
  forM_ (zip [0..] $ M.toAscList idx) $ \(i, (h@(Sha1 bs), IndexData crc off)) -> do
    UM.unsafeWrite fan (fromIntegral $ B.head bs) (fromIntegral $ i + 1)
    VM.unsafeWrite shas i h
    UM.unsafeWrite crcs i crc
    if off < bit 31
    then UM.unsafeWrite offs i $ fromIntegral off
    else do
      (boc, bol) <- readSTRef bos
      UM.unsafeWrite offs i (boc `setBit` 31)
      writeSTRef bos (boc + 1, (boc, off) : bol)
  (boc, bol) <- readSTRef bos
  let fillFan lst fi = do
        cur <- UM.unsafeRead fan fi
        if cur < lst
        then UM.unsafeWrite fan fi lst >> return lst
        else return cur
  foldM_ fillFan 0 [0..255]
  PackIndex <$> UV.unsafeFreeze fan
            <*> V.unsafeFreeze  shas
            <*> UV.unsafeFreeze crcs
            <*> UV.unsafeFreeze offs
            <*> pure (UV.create $ do
                        v <- UM.unsafeNew (fromIntegral boc)
                        sequence_ [UM.unsafeWrite v (fromIntegral bi) bo | (bi, bo) <- bol]
                        return v)
    where sz = M.size idx

buildPackIndex :: PackIndex -> Builder
buildPackIndex (PackIndex fan shas crcs offs boffs) =
    BB.byteString (B.pack [0xff, 0x74, 0x4f, 0x63])
              <> BB.word32BE 2
              <> foldMap BB.word32BE (UV.toList fan)
              <> foldMap (BB.byteString . getSha1) shas
              <> foldMap BB.word32BE (UV.toList crcs)
              <> foldMap BB.word32BE (UV.toList offs)
              <> foldMap BB.word64BE (UV.toList boffs)

-- | A monad transformer for writing packfiles.
newtype PackingT m a = PackingT { unPackingT :: ReaderT (MVar PackingState) m a }
    deriving (Functor, Applicative, Monad, MonadIO, MonadReader (MVar PackingState)
             ,MonadTrans, MonadFail)

-- | Run a 'PackingT' computation, using the given path as a template for the tempfile.  The pack
--   and the index will be written to the same directory as the tempfile.
runPackingT :: MonadIO m => (PackFile -> m ()) -> RawFilePath -> PackingT m a -> m a
runPackingT reg p pma = do
  mvar <- liftIO $ newPackFile p >>= newMVar
  runReaderT (unPackingT $ pma >>= \(!a) -> finishPacking >> return a) mvar
    where finishPacking = ask >>= liftIO . (`withMVar` finishPackFile) >>= lift . maybe (pure ()) reg

-- | Write out the currently accumulating packfile, and start writing a new one.
flushPackFile :: MonadIO m => (PackFile -> m ()) -> PackingT m ()
flushPackFile reg = ask >>= liftIO . (`modifyMVar` go) >>= lift . maybe (pure ()) reg
    where go ps = do
            p <- finishPackFile ps
            n <- newPackFile (tempTemplate (psTempFile ps))
            pure (n, p)

-- | Write an object to the packfile.
packObject :: MonadIO m => Object -> PackingT m Sha1
packObject o = ask >>= liftIO . (`modifyMVar` go)
    where go !ps@(PackingState tmp n idx off) | hash `M.member` idx = pure (ps, hash)
                                              | otherwise           = do
            let objd = BB.toLazyByteString $ buildPackedObject o
                off' = off + fromIntegral (BL.length objd)
                crc  = crc32 objd
                idx' = M.insert hash (IndexData crc off) idx
            BL.hPut (tempHandle tmp) objd
            return $ (PackingState tmp (n + 1) idx' off', hash)
          hash = sha1 o

-- | Write the given objects to a packfile using the given path as a template for the tempfile.  The
--   pack will be written to the same directory as the tempfile.  Also writes the pack index.
writePackFile :: MonadIO m => (PackFile -> m ()) -> RawFilePath -> [Object] -> m ()
writePackFile reg p os = runPackingT reg p $ mapM_ packObject os

finishPackFile :: MonadIO m => PackingState -> m (Maybe PackFile)
finishPackFile (PackingState tmp 0 _ _  ) =
  -- don't write empty files
  liftIO (closeTempFile tmp Nothing) >> pure Nothing
finishPackFile (PackingState tmp n idx _) = liftIO $ do
  let h = tempHandle tmp
  len <- fromIntegral <$> hTell h
  hSeek h AbsoluteSeek 8             -- rewind to the length field...
  BB.hPutBuilder h $ BB.word32BE n   -- ...and scribble it in
  hSeek h AbsoluteSeek 0             -- now we go back to the beginning...
  !hash <- sha1 <$> BL.hGet h len     -- ...to hash the whole thing
  hSeek h SeekFromEnd 0              -- and be paranoid about appending...
  B.hPut h $ getSha1 hash            -- ...the hash to the end of the pack
  let !baseFilename = takeDirectory (tempFileName tmp) </> ("pack-" <> getSha1Hex (toHex hash))
  closeTempFile tmp (Just $ baseFilename <.> "pack")
  pnm <- withHandleAtomic (tempFileName tmp) $ \ih -> do
    let built   = buildPackIndex $ makePackIndex idx
        idxbs   = BB.toLazyByteString $ built <> (BB.byteString . getSha1 $ hash)
        idxhash = sha1 idxbs
    BL.hPut ih idxbs
    B.hPut  ih $ getSha1 idxhash
    return (Just $ baseFilename <.> ".idx", baseFilename)
  Just <$> readPackFile pnm

newPackFile :: RawFilePath -> IO PackingState
newPackFile p = do
  !tmp <- tempFile p
  BB.hPutBuilder (tempHandle tmp) $ "PACK" <> BB.word32BE 2 <> BB.word32BE 0
  return $ PackingState tmp 0 mempty 12