{-# LANGUAGE NumericUnderscores #-}
{-# LANGUAGE OverloadedStrings #-}

module Streamly.External.Archive.Tests (tests) where

import qualified Codec.Archive.Tar as Tar
import Codec.Compression.GZip (compress)
import Control.Monad (forM)
import Crypto.Random.Entropy (getEntropy)
import Data.Bifunctor (first)
import Data.ByteString (ByteString, append)
import qualified Data.ByteString as B
import Data.ByteString.Char8 (unpack)
import qualified Data.ByteString.Lazy as LB
import Data.Char (chr, ord)
import Data.Function ((&))
import Data.List (nub, sort)
import Data.Maybe (fromJust)
import qualified Streamly.Data.Stream.Prelude as S
import Streamly.External.Archive
import Streamly.External.Archive.Internal.Foreign (blockSize)
import Streamly.Internal.Data.Fold.Type (Fold (Fold), Step (Partial))
import System.Directory (createDirectoryIfMissing)
import System.FilePath (addTrailingPathSeparator, hasTrailingPathSeparator, joinPath, takeDirectory)
import System.IO.Temp (withSystemTempDirectory)
import Test.QuickCheck (Gen, choose, frequency, vectorOf)
import Test.QuickCheck.Monadic (monadicIO, pick, run)
import Test.Tasty (TestTree)
import Test.Tasty.HUnit (assertBool, assertEqual, testCase)
import Test.Tasty.QuickCheck (testProperty)

tests :: [TestTree]
tests =
  [ testTar False,
    testTar True,
    testSparse
  ]

-- | Use other libraries to create a tar (or tar.gz) file containing random data,
-- read the file back using our library, and check if the results are as expected.
testTar :: Bool -> TestTree
testTar gz = testProperty ("tar (" ++ (if gz then "gz" else "no gz") ++ ")") $ monadicIO $ do
  -- Generate a random file system hierarchy.
  hierarchy <- pick $ randomHierarchy "" 4 4 5

  -- Create a new temporary directory, write our hierarchy into
  -- a "files" subdirectory of the temporary directory, use other
  -- libraries to create files.tar (or files.tar.gz), read the file
  -- back using our library, and check if the results are as expected.
  run . withSystemTempDirectory "archive-streaming-testZip" $ \tmpDir -> do
    let filesDir = joinPath [tmpDir, "files"]
    createDirectoryIfMissing True filesDir
    pathsAndByteStrings <- writeHierarchy filesDir hierarchy

    let archFile = joinPath [tmpDir, "files.tar" ++ (if gz then ".gz" else "")]
    LB.writeFile archFile . (if gz then compress else id) . Tar.write =<< Tar.pack tmpDir ["files"]

    let fileFold =
          Fold
            ( \(mfp, mtyp, msz, mbs) e ->
                case e of
                  Left h -> do
                    mfp_ <- headerPathName h
                    mtyp_ <- headerFileType h
                    msz_ <- headerSize h
                    return $ Partial (unpack <$> mfp_, mtyp_, msz_, mbs)
                  Right bs ->
                    return $
                      Partial
                        ( mfp,
                          mtyp,
                          msz,
                          case mbs of
                            Nothing -> Just bs
                            Just bs' -> Just $ bs' `append` bs
                        )
            )
            (return $ Partial (Nothing, Nothing, Nothing, Nothing))
            return

    pathsFileTypesSizesAndByteStrings <-
      S.unfold (readArchive archFile) undefined
        & groupByHeader fileFold
        & fmap (\(mfp, mtyp, msz, mbs) -> (fromJust mfp, fromJust mtyp, msz, mbs))
        & S.toList

    -- Make the file paths and ByteStrings comparable and compare them.
    let pathsAndByteStrings_ =
          sort $ map (first ("files/" ++)) (("", Nothing) : pathsAndByteStrings)
    let pathAndByteStrings2_ =
          sort . map (\(x, _, _, y) -> (x, y)) $ pathsFileTypesSizesAndByteStrings
    let samePathsAndByteStrings = pathsAndByteStrings_ == pathAndByteStrings2_

    -- Check FileType.
    let fileTypesCorrect =
          all
            ( \(fp, typ, _, _) ->
                if hasTrailingPathSeparator fp
                  then typ == FileTypeDirectory
                  else typ == FileTypeRegular
            )
            pathsFileTypesSizesAndByteStrings

    -- Check header file size.
    let fileSizeCorrect =
          all
            ( \(_, _, msz, mbs) ->
                case (msz, mbs) of
                  (Nothing, _) -> False -- The size is always available.
                  (Just sz, Nothing) -> sz == 0 -- File or directory.
                  (Just sz, Just bs) -> fromIntegral sz == B.length bs
            )
            pathsFileTypesSizesAndByteStrings

    return $ samePathsAndByteStrings && fileTypesCorrect && fileSizeCorrect

-- | Read a fixed sparse file (sparse.tar) using our library and make sure the results are as
-- expected. (The file was created manually on Linux with "cp --sparse=always" to create the sparse
-- files and "tar -Scvf" to create the archive. We were unable to do the equivalent thing on macOS
-- Mojave / APFS.)
testSparse :: TestTree
testSparse = testCase "sparse" $ do
  let fileFold =
        Fold
          ( \(mfp, mbs) e ->
              case e of
                Left h -> do
                  mfp_ <- headerPathName h
                  return $ Partial (unpack <$> mfp_, mbs)
                Right bs ->
                  return $
                    Partial
                      ( mfp,
                        case mbs of
                          Nothing -> Just bs
                          Just bs' -> Just $ bs' `append` bs
                      )
          )
          (return $ Partial (Nothing, Nothing))
          return

  archive <-
    S.unfold (readArchive "test/data/sparse.tar") undefined
      & groupByHeader fileFold
      & fmap (\(mfp, mbs) -> (fromJust mfp, fromJust mbs))
      & S.toList

  assertEqual "" (map fst archive) ["zero", "zeroZero", "zeroAsdf", "asdfZero"]

  let tenMb = 10_000_000
  let zero = B.replicate tenMb 0
  let asdf = "asdf"

  assertBool "unexpected bytestring (1)" $ snd (head archive) == zero
  assertBool "unexpected bytestring (2)" $ snd (archive !! 1) == zero `B.append` zero
  assertBool "unexpected bytestring (3)" $ snd (archive !! 2) == zero `B.append` asdf
  assertBool "unexpected bytestring (4)" $ snd (archive !! 3) == asdf `B.append` zero

-- | Writes a given hierarchy of relative paths (created with 'randomHierarchy') to disk
-- in the specified directory and returns the same hierarchy except with actual ByteStrings
-- instead of lengths. Note: The original relative paths are returned back unaltered.
writeHierarchy :: FilePath -> [(FilePath, Maybe Int)] -> IO [(FilePath, Maybe ByteString)]
writeHierarchy writeDir = mapM $ \(p, mBsLen) ->
  let fullp = joinPath [writeDir, p]
   in case mBsLen of
        Just bsLen -> do
          createDirectoryIfMissing True (takeDirectory fullp)
          bs <- getEntropy (fromIntegral bsLen)
          B.writeFile fullp bs
          return
            ( p,
              if bsLen == 0
                then Nothing -- Our library yields no ByteString at all for empty files.
                else Just bs
            )
        Nothing -> createDirectoryIfMissing True fullp >> return (p, Nothing)

-- | Recursively generates a random hierarchy of relative paths to files and
-- directories. (Nothing is written to disk; only the paths are returned.)
-- The initial dirPath should be "". A random bytestring length is
-- provided in case of a file; 'Nothing' in the case of a directory.
randomHierarchy :: FilePath -> Int -> Int -> Int -> Gen [(FilePath, Maybe Int)]
randomHierarchy dirPath maxFiles maxDirs maxDepth = do
  numFiles <- choose (0, maxFiles)
  fileComps <- nub <$> vectorOf numFiles pathComponent
  let filePaths = map (\c -> joinPath [dirPath, c]) fileComps
  bsLengths <-
    map Just
      <$> vectorOf
        (length filePaths)
        ( frequency
            [ (1, choose (0, 5)),
              (1, choose (blockSize - 5, blockSize + 5)),
              (1, choose (0, 3 * blockSize))
            ]
        )

  numDirs <- choose (0, maxDirs)
  dirComps <-
    nub . filter (not . (`elem` fileComps))
      <$> vectorOf (if maxDepth <= 0 then 0 else numDirs) pathComponent
  -- libarchive reads back directory paths with a trailing separator.
  let dirPaths = map (\c -> addTrailingPathSeparator $ joinPath [dirPath, c]) dirComps

  recursion <-
    concat
      <$> forM
        dirPaths
        ( \dirPath' ->
            randomHierarchy dirPath' (maxFiles `div` 2) (maxDirs `div` 2) (maxDepth - 1)
        )

  return $ zip filePaths bsLengths ++ zip dirPaths (repeat Nothing) ++ recursion

-- | Generates a random path component of length between 1 and 10, e.g., "HO53UVKQ".
-- For compatibility with case-insensitive file systems, uses only one case.
pathComponent :: Gen String
pathComponent = do
  len <- choose (1, 10)
  vectorOf len $
    chr
      <$> frequency
        [ (1, choose (ord 'A', ord 'Z')),
          (1, choose (ord '0', ord '9'))
        ]