{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Pantry.Archive
( getArchivePackage
, getArchive
, getArchiveKey
, fetchArchivesRaw
, fetchArchives
, findCabalOrHpackFile
) where
import RIO
import qualified Pantry.SHA256 as SHA256
import Pantry.Storage hiding (Tree, TreeEntry)
import Pantry.Tree
import Pantry.Types
import RIO.Process
import Pantry.Internal (normalizeParents, makeTarRelative)
import qualified RIO.Text as T
import qualified RIO.Text.Partial as T
import qualified RIO.List as List
import qualified RIO.ByteString.Lazy as BL
import qualified RIO.Map as Map
import qualified RIO.Set as Set
import qualified Hpack.Config as Hpack
import Pantry.HPack (hpackVersion)
import Data.Bits ((.&.), shiftR)
import Path (toFilePath)
import qualified Codec.Archive.Zip as Zip
import qualified Data.Digest.CRC32 as CRC32
import Distribution.PackageDescription (packageDescription, package)
import Conduit
import Data.Conduit.Zlib (ungzip)
import qualified Data.Conduit.Tar as Tar
import Pantry.HTTP
fetchArchivesRaw
:: (HasPantryConfig env, HasLogFunc env, HasProcessContext env)
=> [(RawArchive, RawPackageMetadata)]
-> RIO env ()
fetchArchivesRaw pairs =
for_ pairs $ \(ra, rpm) ->
getArchive (RPLIArchive ra rpm) ra rpm
fetchArchives
:: (HasPantryConfig env, HasLogFunc env, HasProcessContext env)
=> [(Archive, PackageMetadata)]
-> RIO env ()
fetchArchives pairs =
fetchArchivesRaw [(toRawArchive a, toRawPM pm) | (a, pm) <- pairs]
getArchiveKey
:: forall env. (HasPantryConfig env, HasLogFunc env, HasProcessContext env)
=> RawPackageLocationImmutable
-> RawArchive
-> RawPackageMetadata
-> RIO env TreeKey
getArchiveKey rpli archive rpm =
packageTreeKey <$> getArchivePackage rpli archive rpm
thd3 :: (a, b, c) -> c
thd3 (_, _, z) = z
getArchivePackage
:: forall env. (HasPantryConfig env, HasLogFunc env, HasProcessContext env, HasCallStack)
=> RawPackageLocationImmutable
-> RawArchive
-> RawPackageMetadata
-> RIO env Package
getArchivePackage rpli archive rpm = thd3 <$> getArchive rpli archive rpm
getArchive
:: forall env. (HasPantryConfig env, HasLogFunc env, HasProcessContext env, HasCallStack)
=> RawPackageLocationImmutable
-> RawArchive
-> RawPackageMetadata
-> RIO env (SHA256, FileSize, Package)
getArchive rpli archive rpm = do
mcached <- loadCache rpli archive
cached@(_, _, pa) <-
case mcached of
Just stored -> pure stored
Nothing -> withArchiveLoc archive $ \fp sha size -> do
pa <- parseArchive rpli archive fp
storeCache archive sha size pa
pure (sha, size, pa)
either throwIO (\_ -> pure cached) $ checkPackageMetadata rpli rpm pa
storeCache
:: forall env. (HasPantryConfig env, HasLogFunc env)
=> RawArchive
-> SHA256
-> FileSize
-> Package
-> RIO env ()
storeCache archive sha size pa =
case raLocation archive of
ALUrl url -> withStorage $ storeArchiveCache url (raSubdir archive) sha size (packageTreeKey pa)
ALFilePath _ -> pure ()
loadCache
:: forall env. (HasPantryConfig env, HasLogFunc env, HasProcessContext env)
=> RawPackageLocationImmutable
-> RawArchive
-> RIO env (Maybe (SHA256, FileSize, Package))
loadCache rpli archive =
case loc of
ALFilePath _ -> pure Nothing
ALUrl url -> withStorage (loadArchiveCache url (raSubdir archive)) >>= loop
where
loc = raLocation archive
msha = raHash archive
msize = raSize archive
loadFromCache :: TreeId -> RIO env (Maybe Package)
loadFromCache tid = fmap Just $ withStorage $ loadPackageById rpli tid
loop [] = pure Nothing
loop ((sha, size, tid):rest) =
case msha of
Nothing -> do
case msize of
Just size' | size /= size' -> loop rest
_ -> do
case loc of
ALUrl url -> do
logDebug $ "Using archive from " <> display url <> " without a specified cryptographic hash"
logDebug $ "Cached hash is " <> display sha <> ", file size " <> display size
ALFilePath _ -> pure ()
fmap (sha, size,) <$> loadFromCache tid
Just sha'
| sha == sha' ->
case msize of
Nothing -> do
case loc of
ALUrl url -> logDebug $ "Archive from " <> display url <> " does not specify a size"
ALFilePath _ -> pure ()
fmap (sha, size,) <$> loadFromCache tid
Just size'
| size == size' -> fmap (sha, size,) <$> loadFromCache tid
| otherwise -> do
logWarn $ "Archive from " <> display loc <> " has a matching hash but mismatched size"
logWarn "Please verify that your configuration provides the correct size"
loop rest
| otherwise -> loop rest
checkPackageMetadata
:: RawPackageLocationImmutable
-> RawPackageMetadata
-> Package
-> Either PantryException Package
checkPackageMetadata pl pm pa = do
let
err = MismatchedPackageMetadata
pl
pm
(Just (packageTreeKey pa))
(packageIdent pa)
test :: Eq a => Maybe a -> a -> Bool
test (Just x) y = x == y
test Nothing _ = True
tests =
[ test (rpmTreeKey pm) (packageTreeKey pa)
, test (rpmName pm) (pkgName $ packageIdent pa)
, test (rpmVersion pm) (pkgVersion $ packageIdent pa)
]
in if and tests then Right pa else Left err
withArchiveLoc
:: HasLogFunc env
=> RawArchive
-> (FilePath -> SHA256 -> FileSize -> RIO env a)
-> RIO env a
withArchiveLoc (RawArchive (ALFilePath resolved) msha msize _subdir) f = do
let abs' = resolvedAbsolute resolved
fp = toFilePath abs'
(sha, size) <- withBinaryFile fp ReadMode $ \h -> do
size <- FileSize . fromIntegral <$> hFileSize h
for_ msize $ \size' -> when (size /= size') $ throwIO $ LocalInvalidSize abs' Mismatch
{ mismatchExpected = size'
, mismatchActual = size
}
sha <- runConduit (sourceHandle h .| SHA256.sinkHash)
for_ msha $ \sha' -> when (sha /= sha') $ throwIO $ LocalInvalidSHA256 abs' Mismatch
{ mismatchExpected = sha'
, mismatchActual = sha
}
pure (sha, size)
f fp sha size
withArchiveLoc (RawArchive (ALUrl url) msha msize _subdir) f =
withSystemTempFile "archive" $ \fp hout -> do
logDebug $ "Downloading archive from " <> display url
(sha, size, ()) <- httpSinkChecked url msha msize (sinkHandle hout)
hClose hout
f fp sha size
data ArchiveType = ATTarGz | ATTar | ATZip
deriving (Enum, Bounded)
instance Display ArchiveType where
display ATTarGz = "GZIP-ed tar file"
display ATTar = "Uncompressed tar file"
display ATZip = "Zip file"
data METype
= METNormal
| METExecutable
| METLink !FilePath
deriving Show
data MetaEntry = MetaEntry
{ mePath :: !FilePath
, meType :: !METype
}
deriving Show
foldArchive
:: (HasPantryConfig env, HasLogFunc env)
=> ArchiveLocation
-> FilePath
-> ArchiveType
-> a
-> (a -> MetaEntry -> ConduitT ByteString Void (RIO env) a)
-> RIO env a
foldArchive loc fp ATTarGz accum f =
withSourceFile fp $ \src -> runConduit $ src .| ungzip .| foldTar loc accum f
foldArchive loc fp ATTar accum f =
withSourceFile fp $ \src -> runConduit $ src .| foldTar loc accum f
foldArchive loc fp ATZip accum0 f = withBinaryFile fp ReadMode $ \h -> do
let go accum entry = do
let me = MetaEntry (Zip.eRelativePath entry) met
met = fromMaybe METNormal $ do
let modes = shiftR (Zip.eExternalFileAttributes entry) 16
guard $ Zip.eVersionMadeBy entry .&. 0xFF00 == 0x0300
guard $ modes /= 0
Just $
if (modes .&. 0o100) == 0
then METNormal
else METExecutable
lbs = Zip.fromEntry entry
let crcExpected = Zip.eCRC32 entry
crcActual = CRC32.crc32 lbs
when (crcExpected /= crcActual)
$ throwIO $ CRC32Mismatch loc (Zip.eRelativePath entry) Mismatch
{ mismatchExpected = crcExpected
, mismatchActual = crcActual
}
runConduit $ sourceLazy lbs .| f accum me
isDir entry =
case reverse $ Zip.eRelativePath entry of
'/':_ -> True
_ -> False
lbs <- BL.hGetContents h
foldM go accum0 (filter (not . isDir) $ Zip.zEntries $ Zip.toArchive lbs)
foldTar
:: (HasPantryConfig env, HasLogFunc env)
=> ArchiveLocation
-> a
-> (a -> MetaEntry -> ConduitT ByteString o (RIO env) a)
-> ConduitT ByteString o (RIO env) a
foldTar loc accum0 f = do
ref <- newIORef accum0
Tar.untar $ \fi -> toME fi >>= traverse_ (\me -> do
accum <- readIORef ref
accum' <- f accum me
writeIORef ref $! accum')
readIORef ref
where
toME :: MonadIO m => Tar.FileInfo -> m (Maybe MetaEntry)
toME fi = do
let exc = InvalidTarFileType loc (Tar.getFileInfoPath fi) (Tar.fileType fi)
mmet <-
case Tar.fileType fi of
Tar.FTSymbolicLink bs ->
case decodeUtf8' bs of
Left _ -> throwIO exc
Right text -> pure $ Just $ METLink $ T.unpack text
Tar.FTNormal -> pure $ Just $
if Tar.fileMode fi .&. 0o100 /= 0
then METExecutable
else METNormal
Tar.FTDirectory -> pure Nothing
_ -> throwIO exc
pure $
(\met -> MetaEntry
{ mePath = Tar.getFileInfoPath fi
, meType = met
})
<$> mmet
data SimpleEntry = SimpleEntry
{ seSource :: !FilePath
, seType :: !FileType
}
deriving Show
parseArchive
:: (HasPantryConfig env, HasLogFunc env, HasProcessContext env)
=> RawPackageLocationImmutable
-> RawArchive
-> FilePath
-> RIO env Package
parseArchive rpli archive fp = do
let loc = raLocation archive
getFiles [] = throwIO $ UnknownArchiveType loc
getFiles (at:ats) = do
eres <- tryAny $ foldArchive loc fp at id $ \m me -> pure $ m . (me:)
case eres of
Left e -> do
logDebug $ "parseArchive of " <> display at <> ": " <> displayShow e
getFiles ats
Right files -> pure (at, Map.fromList $ map (mePath &&& id) $ files [])
(at :: ArchiveType, files :: Map FilePath MetaEntry) <- getFiles [minBound..maxBound]
let toSimple :: FilePath -> MetaEntry -> Either String (Map FilePath SimpleEntry)
toSimple key me =
case meType me of
METNormal -> Right $ Map.singleton key $ SimpleEntry (mePath me) FTNormal
METExecutable -> Right $ Map.singleton key $ SimpleEntry (mePath me) FTExecutable
METLink relDest -> do
case relDest of
'/':_ -> Left $ concat
[ "File located at "
, show $ mePath me
, " is a symbolic link to absolute path "
, relDest
]
_ -> Right ()
dest0 <-
case makeTarRelative (mePath me) relDest of
Left e -> Left $ concat
[ "Error resolving relative path "
, relDest
, " from symlink at "
, mePath me
, ": "
, e
]
Right x -> Right x
dest <-
case normalizeParents dest0 of
Left e -> Left $ concat
[ "Invalid symbolic link from "
, mePath me
, " to "
, relDest
, ", tried parsing "
, dest0
, ": "
, e
]
Right x -> Right x
case Map.lookup dest files of
Nothing ->
case findWithPrefix dest files of
[] -> Left $ "Symbolic link dest not found from " ++ mePath me ++ " to " ++ relDest ++ ", looking for " ++ dest ++ ".\n"
++ "This may indicate that the source is a git archive which uses git-annex.\n"
++ "See https://github.com/commercialhaskell/stack/issues/4579 for further information."
pairs -> fmap fold $ for pairs $ \(suffix, me') -> toSimple (key ++ '/' : suffix) me'
Just me' ->
case meType me' of
METNormal -> Right $ Map.singleton key $ SimpleEntry dest FTNormal
METExecutable -> Right $ Map.singleton key $ SimpleEntry dest FTExecutable
METLink _ -> Left $ "Symbolic link dest cannot be a symbolic link, from " ++ mePath me ++ " to " ++ relDest
case fold <$> Map.traverseWithKey toSimple files of
Left e -> throwIO $ UnsupportedTarball loc $ T.pack e
Right files1 -> do
let files2 = stripCommonPrefix $ Map.toList files1
files3 = takeSubdir (raSubdir archive) files2
toSafe (fp', a) =
case mkSafeFilePath fp' of
Nothing -> Left $ "Not a safe file path: " ++ show fp'
Just sfp -> Right (sfp, a)
case traverse toSafe files3 of
Left e -> throwIO $ UnsupportedTarball loc $ T.pack e
Right safeFiles -> do
let toSave = Set.fromList $ map (seSource . snd) safeFiles
(blobs :: Map FilePath BlobKey) <-
foldArchive loc fp at mempty $ \m me ->
if mePath me `Set.member` toSave
then do
bs <- mconcat <$> sinkList
(_, blobKey) <- lift $ withStorage $ storeBlob bs
pure $ Map.insert (mePath me) blobKey m
else pure m
tree <- fmap (TreeMap . Map.fromList) $ for safeFiles $ \(sfp, se) ->
case Map.lookup (seSource se) blobs of
Nothing -> error $ "Impossible: blob not found for: " ++ seSource se
Just blobKey -> pure (sfp, TreeEntry blobKey (seType se))
buildFile <- findCabalOrHpackFile rpli tree
(buildFilePath, buildFileBlobKey, buildFileEntry) <- case buildFile of
BFCabal fpath te@(TreeEntry key _) -> pure (fpath, key, te)
BFHpack te@(TreeEntry key _) -> pure (hpackSafeFilePath, key, te)
mbs <- withStorage $ loadBlob buildFileBlobKey
bs <-
case mbs of
Nothing -> throwIO $ TreeReferencesMissingBlob rpli buildFilePath buildFileBlobKey
Just bs -> pure bs
cabalBs <- case buildFile of
BFCabal _ _ -> pure bs
BFHpack _ -> snd <$> hpackToCabal rpli tree
(_warnings, gpd) <- rawParseGPD (Left rpli) cabalBs
let ident@(PackageIdentifier name _) = package $ packageDescription gpd
case buildFile of
BFCabal _ _ -> when (buildFilePath /= cabalFileName name) $ throwIO $ WrongCabalFileName rpli buildFilePath name
_ -> return ()
(tid, treeKey') <- withStorage $ storeTree rpli ident tree buildFile
packageCabal <- case buildFile of
BFCabal _ _ -> pure $ PCCabalFile buildFileEntry
BFHpack _ -> do
cabalKey <- withStorage $ do
hpackId <- storeHPack rpli tid
loadCabalBlobKey hpackId
hpackSoftwareVersion <- hpackVersion
let cabalTreeEntry = TreeEntry cabalKey (teType buildFileEntry)
pure $ PCHpack $ PHpack { phOriginal = buildFileEntry, phGenerated = cabalTreeEntry, phVersion = hpackSoftwareVersion}
pure Package
{ packageTreeKey = treeKey'
, packageTree = tree
, packageCabalEntry = packageCabal
, packageIdent = ident
}
findWithPrefix :: FilePath -> Map FilePath MetaEntry -> [(FilePath, MetaEntry)]
findWithPrefix dir = mapMaybe go . Map.toList
where
prefix = dir ++ "/"
go (x, y) = (, y) <$> List.stripPrefix prefix x
findCabalOrHpackFile
:: MonadThrow m
=> RawPackageLocationImmutable
-> Tree
-> m BuildFile
findCabalOrHpackFile loc (TreeMap m) = do
let isCabalFile (sfp, _) =
let txt = unSafeFilePath sfp
in not ("/" `T.isInfixOf` txt) && (".cabal" `T.isSuffixOf` txt)
isHpackFile (sfp, _) =
let txt = unSafeFilePath sfp
in T.pack (Hpack.packageConfig) == txt
isBFCabal (BFCabal _ _) = True
isBFCabal _ = False
sfpBuildFile (BFCabal sfp _) = sfp
sfpBuildFile (BFHpack _) = hpackSafeFilePath
toBuildFile xs@(sfp, te) = let cbFile = if (isCabalFile xs)
then Just $ BFCabal sfp te
else Nothing
hpFile = if (isHpackFile xs)
then Just $ BFHpack te
else Nothing
in cbFile <|> hpFile
case mapMaybe toBuildFile $ Map.toList m of
[] -> throwM $ TreeWithoutCabalFile loc
[bfile] -> pure bfile
xs -> case (filter isBFCabal xs) of
[] -> throwM $ TreeWithoutCabalFile loc
[bfile] -> pure bfile
xs' -> throwM $ TreeWithMultipleCabalFiles loc $ map sfpBuildFile xs'
stripCommonPrefix :: [(FilePath, a)] -> [(FilePath, a)]
stripCommonPrefix [] = []
stripCommonPrefix pairs@((firstFP, _):_) = fromMaybe pairs $ do
let firstDir = takeWhile (/= '/') firstFP
guard $ not $ null firstDir
let strip (fp, a) = (, a) <$> List.stripPrefix (firstDir ++ "/") fp
stripCommonPrefix <$> traverse strip pairs
takeSubdir
:: Text
-> [(FilePath, a)]
-> [(Text, a)]
takeSubdir subdir = mapMaybe $ \(fp, a) -> do
stripped <- List.stripPrefix subdirs $ splitDirs $ T.pack fp
Just (T.intercalate "/" stripped, a)
where
splitDirs = List.dropWhile (== ".") . filter (/= "") . T.splitOn "/"
subdirs = splitDirs subdir