module Codec.Archive.Zip
(
Archive (..)
, Entry (..)
, CompressionMethod (..)
, ZipOption (..)
, ZipException (..)
, emptyArchive
, toArchive
, toArchiveOrFail
, fromArchive
, filesInArchive
, addEntryToArchive
, deleteEntryFromArchive
, findEntryByPath
, fromEntry
, toEntry
, readEntry
, writeEntry
, addFilesToArchive
, extractFilesFromArchive
) where
import System.Time ( toUTCTime, addToClockTime, CalendarTime (..), ClockTime (..), TimeDiff (..) )
#if MIN_VERSION_directory(1,2,0)
import Data.Time.Clock.POSIX ( utcTimeToPOSIXSeconds )
#endif
import Data.Bits ( shiftL, shiftR, (.&.) )
import Data.Binary
import Data.Binary.Get
import Data.Binary.Put
import Data.List ( nub, find, intercalate )
import Data.Data (Data)
import Data.Typeable (Typeable)
import Text.Printf
import System.FilePath
import System.Directory ( doesDirectoryExist, getDirectoryContents, createDirectoryIfMissing )
import Control.Monad ( when, unless, zipWithM )
import qualified Control.Exception as E
import System.Directory ( getModificationTime )
import System.IO ( stderr, hPutStrLn )
import qualified Data.Digest.CRC32 as CRC32
import qualified Data.Map as M
#if MIN_VERSION_binary(0,6,0)
import Control.Applicative
#endif
#ifndef _WINDOWS
import System.Posix.Files ( setFileTimes, setFileMode, fileMode, getFileStatus )
#endif
import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as B
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TL
import qualified Codec.Compression.Zlib.Raw as Zlib
versionMadeBy :: Word16
#ifdef _WINDOWS
versionMadeBy = 0x0000
#else
versionMadeBy = 0x0300
#endif
#if !MIN_VERSION_binary(0, 6, 0)
manySig :: Word32 -> Get a -> Get [a]
manySig sig p = do
sig' <- lookAhead getWord32le
if sig == sig'
then do
r <- p
rs <- manySig sig p
return $ r : rs
else return []
#endif
data Archive = Archive
{ zEntries :: [Entry]
, zSignature :: Maybe B.ByteString
, zComment :: B.ByteString
} deriving (Read, Show)
instance Binary Archive where
put = putArchive
get = getArchive
data Entry = Entry
{ eRelativePath :: FilePath
, eCompressionMethod :: CompressionMethod
, eLastModified :: Integer
, eCRC32 :: Word32
, eCompressedSize :: Word32
, eUncompressedSize :: Word32
, eExtraField :: B.ByteString
, eFileComment :: B.ByteString
, eVersionMadeBy :: Word16
, eInternalFileAttributes :: Word16
, eExternalFileAttributes :: Word32
, eCompressedData :: B.ByteString
} deriving (Read, Show, Eq)
data CompressionMethod = Deflate
| NoCompression
deriving (Read, Show, Eq)
data ZipOption = OptRecursive
| OptVerbose
| OptDestination FilePath
| OptLocation FilePath Bool
deriving (Read, Show, Eq)
data ZipException =
CRC32Mismatch FilePath
deriving (Show, Typeable, Data)
instance E.Exception ZipException
emptyArchive :: Archive
emptyArchive = Archive
{ zEntries = []
, zSignature = Nothing
, zComment = B.empty }
toArchive :: B.ByteString -> Archive
toArchive = decode
toArchiveOrFail :: B.ByteString -> Either String Archive
#if MIN_VERSION_binary(0,7,0)
toArchiveOrFail bs = case decodeOrFail bs of
Left (_,_,e) -> Left e
Right (_,_,x) -> Right x
#else
toArchiveOrFail bs = Right $ toArchive bs
#endif
fromArchive :: Archive -> B.ByteString
fromArchive = encode
filesInArchive :: Archive -> [FilePath]
filesInArchive = (map eRelativePath) . zEntries
addEntryToArchive :: Entry -> Archive -> Archive
addEntryToArchive entry archive =
let archive' = deleteEntryFromArchive (eRelativePath entry) archive
oldEntries = zEntries archive'
in archive' { zEntries = entry : oldEntries }
deleteEntryFromArchive :: FilePath -> Archive -> Archive
deleteEntryFromArchive path archive =
archive { zEntries = [e | e <- zEntries archive
, not (eRelativePath e `matches` path)] }
findEntryByPath :: FilePath -> Archive -> Maybe Entry
findEntryByPath path archive =
find (\e -> path `matches` eRelativePath e) (zEntries archive)
fromEntry :: Entry -> B.ByteString
fromEntry entry =
decompressData (eCompressionMethod entry) (eCompressedData entry)
toEntry :: FilePath
-> Integer
-> B.ByteString
-> Entry
toEntry path modtime contents =
let uncompressedSize = B.length contents
compressedData = compressData Deflate contents
compressedSize = B.length compressedData
(compressionMethod, finalData, finalSize) =
if uncompressedSize <= compressedSize
then (NoCompression, contents, uncompressedSize)
else (Deflate, compressedData, compressedSize)
crc32 = CRC32.crc32 contents
in Entry { eRelativePath = normalizePath path
, eCompressionMethod = compressionMethod
, eLastModified = modtime
, eCRC32 = crc32
, eCompressedSize = fromIntegral finalSize
, eUncompressedSize = fromIntegral uncompressedSize
, eExtraField = B.empty
, eFileComment = B.empty
, eVersionMadeBy = 0
, eInternalFileAttributes = 0
, eExternalFileAttributes = 0
, eCompressedData = finalData
}
readEntry :: [ZipOption] -> FilePath -> IO Entry
readEntry opts path = do
isDir <- doesDirectoryExist path
let path' = let p = path ++ (case reverse path of
('/':_) -> ""
_ | isDir -> "/"
| otherwise -> "") in
(case [(l,a) | OptLocation l a <- opts] of
((l,a):_) -> if a then l </> p else l </> takeFileName p
_ -> p)
contents <- if isDir
then return B.empty
else B.fromStrict <$> S.readFile path
#if MIN_VERSION_directory(1,2,0)
modEpochTime <- fmap (floor . utcTimeToPOSIXSeconds)
$ getModificationTime path
#else
(TOD modEpochTime _) <- getModificationTime path
#endif
let entry = toEntry path' modEpochTime contents
entryE <-
#ifdef _WINDOWS
return $ entry
#else
do fm <- fmap fileMode $ getFileStatus path
let modes = fromIntegral $ shiftL (toInteger fm) 16
return $ entry { eExternalFileAttributes = modes,
eVersionMadeBy = versionMadeBy }
#endif
when (OptVerbose `elem` opts) $ do
let compmethod = case eCompressionMethod entryE of
Deflate -> "deflated"
NoCompression -> "stored"
hPutStrLn stderr $
printf " adding: %s (%s %.f%%)" (eRelativePath entryE)
compmethod (100 (100 * compressionRatio entryE))
return entryE
writeEntry :: [ZipOption] -> Entry -> IO ()
writeEntry opts entry = do
let path = case [d | OptDestination d <- opts] of
(x:_) -> x </> eRelativePath entry
_ -> eRelativePath entry
let dir = takeDirectory path
exists <- doesDirectoryExist dir
unless exists $ do
createDirectoryIfMissing True dir
when (OptVerbose `elem` opts) $
hPutStrLn stderr $ " creating: " ++ dir
if length path > 0 && last path == '/'
then return ()
else do
when (OptVerbose `elem` opts) $ do
hPutStrLn stderr $ case eCompressionMethod entry of
Deflate -> " inflating: " ++ path
NoCompression -> "extracting: " ++ path
let uncompressedData = fromEntry entry
if eCRC32 entry == CRC32.crc32 uncompressedData
then B.writeFile path uncompressedData
else E.throwIO $ CRC32Mismatch path
#ifndef _WINDOWS
let modes = fromIntegral $ shiftR (eExternalFileAttributes entry) 16
when (eVersionMadeBy entry .&. 0xFF00 == 0x0300 &&
modes /= 0) $ setFileMode path modes
#endif
setFileTimeStamp path (eLastModified entry)
addFilesToArchive :: [ZipOption] -> Archive -> [FilePath] -> IO Archive
addFilesToArchive opts archive files = do
filesAndChildren <- if OptRecursive `elem` opts
then mapM getDirectoryContentsRecursive files >>= return . nub . concat
else return files
entries <- mapM (readEntry opts) filesAndChildren
return $ foldr addEntryToArchive archive entries
extractFilesFromArchive :: [ZipOption] -> Archive -> IO ()
extractFilesFromArchive opts archive =
mapM_ (writeEntry opts) $ zEntries archive
normalizePath :: FilePath -> String
normalizePath path =
let dir = takeDirectory path
fn = takeFileName path
(_drive, dir') = splitDrive dir
dirParts = filter (/=".") $ splitDirectories dir'
in intercalate "/" (dirParts ++ [fn])
matches :: FilePath -> FilePath -> Bool
matches fp1 fp2 = normalizePath fp1 == normalizePath fp2
compressData :: CompressionMethod -> B.ByteString -> B.ByteString
compressData Deflate = Zlib.compress
compressData NoCompression = id
decompressData :: CompressionMethod -> B.ByteString -> B.ByteString
decompressData Deflate = Zlib.decompress
decompressData NoCompression = id
compressionRatio :: Entry -> Float
compressionRatio entry =
if eUncompressedSize entry == 0
then 1
else fromIntegral (eCompressedSize entry) / fromIntegral (eUncompressedSize entry)
data MSDOSDateTime = MSDOSDateTime { msDOSDate :: Word16
, msDOSTime :: Word16
} deriving (Read, Show, Eq)
minMSDOSDateTime :: Integer
minMSDOSDateTime = 315532800
epochTimeToMSDOSDateTime :: Integer -> MSDOSDateTime
epochTimeToMSDOSDateTime epochtime | epochtime < minMSDOSDateTime =
epochTimeToMSDOSDateTime minMSDOSDateTime
epochTimeToMSDOSDateTime epochtime =
let ut = toUTCTime (TOD epochtime 0)
dosTime = toEnum $ (ctSec ut `div` 2) + shiftL (ctMin ut) 5 + shiftL (ctHour ut) 11
dosDate = toEnum $ ctDay ut + shiftL (fromEnum (ctMonth ut) + 1) 5 + shiftL (ctYear ut 1980) 9
in MSDOSDateTime { msDOSDate = dosDate, msDOSTime = dosTime }
msDOSDateTimeToEpochTime :: MSDOSDateTime -> Integer
msDOSDateTimeToEpochTime (MSDOSDateTime {msDOSDate = dosDate, msDOSTime = dosTime}) =
let seconds = fromIntegral $ 2 * (dosTime .&. 0O37)
minutes = fromIntegral $ (shiftR dosTime 5) .&. 0O77
hour = fromIntegral $ shiftR dosTime 11
day = fromIntegral $ dosDate .&. 0O37
month = fromIntegral $ ((shiftR dosDate 5) .&. 0O17) 1
year = fromIntegral $ shiftR dosDate 9
timeSinceEpoch = TimeDiff
{ tdYear = year + 10,
tdMonth = month,
tdDay = day 1,
tdHour = hour,
tdMin = minutes,
tdSec = seconds,
tdPicosec = 0 }
(TOD epochsecs _) = addToClockTime timeSinceEpoch (TOD 0 0)
in epochsecs
getDirectoryContentsRecursive :: FilePath -> IO [FilePath]
getDirectoryContentsRecursive path = do
isDir <- doesDirectoryExist path
if isDir
then do
contents <- getDirectoryContents path
let contents' = map (path </>) $ filter (`notElem` ["..","."]) contents
children <- mapM getDirectoryContentsRecursive contents'
if path == "."
then return (concat children)
else return (path : concat children)
else return [path]
setFileTimeStamp :: FilePath -> Integer -> IO ()
setFileTimeStamp file epochtime = do
#ifdef _WINDOWS
return ()
#else
let epochtime' = fromInteger epochtime
setFileTimes file epochtime' epochtime'
#endif
getArchive :: Get Archive
getArchive = do
#if MIN_VERSION_binary(0,6,0)
locals <- many getLocalFile
files <- many (getFileHeader (M.fromList locals))
digSig <- Just `fmap` getDigitalSignature <|> return Nothing
#else
locals <- manySig 0x04034b50 getLocalFile
files <- manySig 0x02014b50 (getFileHeader (M.fromList locals))
digSig <- lookAheadM getDigitalSignature
#endif
endSig <- getWord32le
unless (endSig == 0x06054b50)
$ fail "Did not find end of central directory signature"
skip 2
skip 2
skip 2
skip 2
skip 4
skip 4
commentLength <- getWord16le
zipComment <- getLazyByteString (toEnum $ fromEnum commentLength)
return $ Archive
{ zEntries = files
, zSignature = digSig
, zComment = zipComment
}
putArchive :: Archive -> Put
putArchive archive = do
mapM_ putLocalFile $ zEntries archive
let localFileSizes = map localFileSize $ zEntries archive
let offsets = scanl (+) 0 localFileSizes
let cdOffset = last offsets
_ <- zipWithM putFileHeader offsets (zEntries archive)
putDigitalSignature $ zSignature archive
putWord32le 0x06054b50
putWord16le 0
putWord16le 0
putWord16le $ fromIntegral $ length $ zEntries archive
putWord16le $ fromIntegral $ length $ zEntries archive
putWord32le $ sum $ map fileHeaderSize $ zEntries archive
putWord32le $ fromIntegral cdOffset
putWord16le $ fromIntegral $ B.length $ zComment archive
putLazyByteString $ zComment archive
fileHeaderSize :: Entry -> Word32
fileHeaderSize f =
fromIntegral $ 4 + 2 + 2 + 2 + 2 + 2 + 2 + 4 + 4 + 4 + 2 + 2 + 2 + 2 + 2 + 4 + 4 +
fromIntegral (B.length $ fromString $ normalizePath $ eRelativePath f) +
B.length (eExtraField f) + B.length (eFileComment f)
localFileSize :: Entry -> Word32
localFileSize f =
fromIntegral $ 4 + 2 + 2 + 2 + 2 + 2 + 4 + 4 + 4 + 2 + 2 +
fromIntegral (B.length $ fromString $ normalizePath $ eRelativePath f) +
B.length (eExtraField f) + B.length (eCompressedData f)
getLocalFile :: Get (Word32, B.ByteString)
getLocalFile = do
offset <- bytesRead
getWord32le >>= ensure (== 0x04034b50)
skip 2
bitflag <- getWord16le
skip 2
skip 2
skip 2
skip 4
compressedSize <- getWord32le
when (compressedSize == 0xFFFFFFFF) $
fail "Can't read ZIP64 archive."
skip 4
fileNameLength <- getWord16le
extraFieldLength <- getWord16le
skip (fromIntegral fileNameLength)
skip (fromIntegral extraFieldLength)
compressedData <- if bitflag .&. 0O10 == 0
then getLazyByteString (fromIntegral compressedSize)
else
do raw <- getWordsTilSig 0x08074b50
skip 4
cs <- getWord32le
skip 4
if fromIntegral cs == B.length raw
then return $ raw
else fail "Content size mismatch in data descriptor record"
return (fromIntegral offset, compressedData)
getWordsTilSig :: Word32 -> Get B.ByteString
#if MIN_VERSION_binary(0, 6, 0)
getWordsTilSig sig = (B.fromChunks . reverse) `fmap` go Nothing []
where
sig' = S.pack [fromIntegral $ sig .&. 0xFF,
fromIntegral $ sig `shiftR` 8 .&. 0xFF,
fromIntegral $ sig `shiftR` 16 .&. 0xFF,
fromIntegral $ sig `shiftR` 24 .&. 0xFF]
chunkSize = 16384
checkChunk chunk = do
let (prefix, start) = S.breakSubstring sig' chunk
if S.null start
then return $ Right chunk
else return $ Left $ S.length prefix
go :: Maybe (Word8, Word8, Word8) -> [S.ByteString] -> Get [S.ByteString]
go prefixes acc = do
eitherChunkOrIndex <- lookAheadE $ do
chunk <- getByteString chunkSize <|> B.toStrict `fmap` getRemainingLazyByteString
case prefixes of
Just (byte3,byte2,byte1) ->
let len = S.length chunk in
if len >= 1 &&
S.pack [byte3,byte2,byte1,S.index chunk 0] == sig'
then return $ Left $ 3
else if len >= 2 &&
S.pack [byte2,byte1,S.index chunk 0,S.index chunk 1] == sig'
then return $ Left $ 2
else if len >= 3 &&
S.pack [byte1,S.index chunk 0,S.index chunk 1,S.index chunk 2] == sig'
then return $ Left $ 1
else checkChunk chunk
Nothing -> checkChunk chunk
case eitherChunkOrIndex of
Left index -> if index < 0
then do
skip (4 + index)
return $ (S.take (S.length (head acc) + index) (head acc)) : (tail acc)
else do
lastchunk <- getByteString index
skip 4
return (lastchunk:acc)
Right chunk -> if len == chunkSize
then go prefixes' (chunk:acc)
else fail $ "getWordsTilSig: signature not found before EOF"
where
len = S.length chunk
prefixes' = Just $ (S.index chunk (len 3), S.index chunk (len 2), S.index chunk (len 1))
#else
getWordsTilSig sig = B.pack `fmap` go []
where
go acc = do
sig' <- lookAhead getWord32le
if sig == sig'
then skip 4 >> return (reverse acc)
else do
w <- getWord8
go (w:acc)
#endif
putLocalFile :: Entry -> Put
putLocalFile f = do
putWord32le 0x04034b50
putWord16le 20
putWord16le 0x802
putWord16le $ case eCompressionMethod f of
NoCompression -> 0
Deflate -> 8
let modTime = epochTimeToMSDOSDateTime $ eLastModified f
putWord16le $ msDOSTime modTime
putWord16le $ msDOSDate modTime
putWord32le $ eCRC32 f
putWord32le $ eCompressedSize f
putWord32le $ eUncompressedSize f
putWord16le $ fromIntegral $ B.length $ fromString
$ normalizePath $ eRelativePath f
putWord16le $ fromIntegral $ B.length $ eExtraField f
putLazyByteString $ fromString $ normalizePath $ eRelativePath f
putLazyByteString $ eExtraField f
putLazyByteString $ eCompressedData f
getFileHeader :: M.Map Word32 B.ByteString
-> Get Entry
getFileHeader locals = do
getWord32le >>= ensure (== 0x02014b50)
vmb <- getWord16le
versionNeededToExtract <- getWord8
skip 1
unless (versionNeededToExtract <= 20) $
fail "This archive requires zip >= 2.0 to extract."
skip 2
rawCompressionMethod <- getWord16le
compressionMethod <- case rawCompressionMethod of
0 -> return NoCompression
8 -> return Deflate
_ -> fail $ "Unknown compression method " ++ show rawCompressionMethod
lastModFileTime <- getWord16le
lastModFileDate <- getWord16le
crc32 <- getWord32le
compressedSize <- getWord32le
uncompressedSize <- getWord32le
fileNameLength <- getWord16le
extraFieldLength <- getWord16le
fileCommentLength <- getWord16le
skip 2
internalFileAttributes <- getWord16le
externalFileAttributes <- getWord32le
relativeOffset <- getWord32le
fileName <- getLazyByteString (toEnum $ fromEnum fileNameLength)
extraField <- getLazyByteString (toEnum $ fromEnum extraFieldLength)
fileComment <- getLazyByteString (toEnum $ fromEnum fileCommentLength)
compressedData <- case (M.lookup relativeOffset locals) of
Just x -> return x
Nothing -> fail $ "Unable to find data at offset " ++
show relativeOffset
return $ Entry
{ eRelativePath = toString fileName
, eCompressionMethod = compressionMethod
, eLastModified = msDOSDateTimeToEpochTime $
MSDOSDateTime { msDOSDate = lastModFileDate,
msDOSTime = lastModFileTime }
, eCRC32 = crc32
, eCompressedSize = compressedSize
, eUncompressedSize = uncompressedSize
, eExtraField = extraField
, eFileComment = fileComment
, eVersionMadeBy = vmb
, eInternalFileAttributes = internalFileAttributes
, eExternalFileAttributes = externalFileAttributes
, eCompressedData = compressedData
}
putFileHeader :: Word32
-> Entry
-> Put
putFileHeader offset local = do
putWord32le 0x02014b50
putWord16le $ eVersionMadeBy local
putWord16le 20
putWord16le 0x802
putWord16le $ case eCompressionMethod local of
NoCompression -> 0
Deflate -> 8
let modTime = epochTimeToMSDOSDateTime $ eLastModified local
putWord16le $ msDOSTime modTime
putWord16le $ msDOSDate modTime
putWord32le $ eCRC32 local
putWord32le $ eCompressedSize local
putWord32le $ eUncompressedSize local
putWord16le $ fromIntegral $ B.length $ fromString
$ normalizePath $ eRelativePath local
putWord16le $ fromIntegral $ B.length $ eExtraField local
putWord16le $ fromIntegral $ B.length $ eFileComment local
putWord16le 0
putWord16le $ eInternalFileAttributes local
putWord32le $ eExternalFileAttributes local
putWord32le offset
putLazyByteString $ fromString $ normalizePath $ eRelativePath local
putLazyByteString $ eExtraField local
putLazyByteString $ eFileComment local
#if MIN_VERSION_binary(0,6,0)
getDigitalSignature :: Get B.ByteString
getDigitalSignature = do
getWord32le >>= ensure (== 0x05054b50)
sigSize <- getWord16le
getLazyByteString (toEnum $ fromEnum sigSize)
#else
getDigitalSignature :: Get (Maybe B.ByteString)
getDigitalSignature = do
hdrSig <- getWord32le
if hdrSig /= 0x05054b50
then return Nothing
else do
sigSize <- getWord16le
getLazyByteString (toEnum $ fromEnum sigSize) >>= return . Just
#endif
putDigitalSignature :: Maybe B.ByteString -> Put
putDigitalSignature Nothing = return ()
putDigitalSignature (Just sig) = do
putWord32le 0x05054b50
putWord16le $ fromIntegral $ B.length sig
putLazyByteString sig
ensure :: (a -> Bool) -> a -> Get ()
ensure p val =
if p val
then return ()
else fail "ensure not satisfied"
toString :: B.ByteString -> String
toString = TL.unpack . TL.decodeUtf8
fromString :: String -> B.ByteString
fromString = TL.encodeUtf8 . TL.pack