module Codec.Archive.Zip
(
Archive (..)
, Entry (..)
, CompressionMethod (..)
, ZipOption (..)
, emptyArchive
, toArchive
, toArchiveOrFail
, fromArchive
, filesInArchive
, addEntryToArchive
, deleteEntryFromArchive
, findEntryByPath
, fromEntry
, toEntry
, readEntry
, writeEntry
, addFilesToArchive
, extractFilesFromArchive
) where
import System.Time ( toUTCTime, addToClockTime, CalendarTime (..), ClockTime (..), TimeDiff (..) )
#if MIN_VERSION_directory(1,2,0)
import Data.Time.Clock.POSIX ( utcTimeToPOSIXSeconds )
#endif
import Data.Bits ( shiftL, shiftR, (.&.) )
import Data.Binary
import Data.Binary.Get
import Data.Binary.Put
import Data.List ( nub, find, intercalate )
import Text.Printf
import System.FilePath
import System.Directory ( doesDirectoryExist, getDirectoryContents, createDirectoryIfMissing )
import Control.Monad ( when, unless, zipWithM )
import System.Directory ( getModificationTime )
import System.IO ( stderr, hPutStrLn )
import qualified Data.Digest.CRC32 as CRC32
import qualified Data.Map as M
#if MIN_VERSION_binary(0,6,0)
import Control.Applicative
#endif
#ifndef _WINDOWS
import System.Posix.Files ( setFileTimes )
#endif
import qualified Data.ByteString.Lazy as B
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TL
import qualified Codec.Compression.Zlib.Raw as Zlib
#if !MIN_VERSION_binary(0, 6, 0)
manySig :: Word32 -> Get a -> Get [a]
manySig sig p = do
sig' <- lookAhead getWord32le
if sig == sig'
then do
r <- p
rs <- manySig sig p
return $ r : rs
else return []
#endif
data Archive = Archive
{ zEntries :: [Entry]
, zSignature :: Maybe B.ByteString
, zComment :: B.ByteString
} deriving (Read, Show)
instance Binary Archive where
put = putArchive
get = getArchive
data Entry = Entry
{ eRelativePath :: FilePath
, eCompressionMethod :: CompressionMethod
, eLastModified :: Integer
, eCRC32 :: Word32
, eCompressedSize :: Word32
, eUncompressedSize :: Word32
, eExtraField :: B.ByteString
, eFileComment :: B.ByteString
, eInternalFileAttributes :: Word16
, eExternalFileAttributes :: Word32
, eCompressedData :: B.ByteString
} deriving (Read, Show, Eq)
data CompressionMethod = Deflate
| NoCompression
deriving (Read, Show, Eq)
data ZipOption = OptRecursive
| OptVerbose
| OptDestination FilePath
| OptLocation FilePath Bool
deriving (Read, Show, Eq)
emptyArchive :: Archive
emptyArchive = Archive
{ zEntries = []
, zSignature = Nothing
, zComment = B.empty }
toArchive :: B.ByteString -> Archive
toArchive = decode
toArchiveOrFail :: B.ByteString -> Either String Archive
#if MIN_VERSION_binary(0,7,0)
toArchiveOrFail bs = case decodeOrFail bs of
Left (_,_,e) -> Left e
Right (_,_,x) -> Right x
#else
toArchiveOrFail bs = Right $ toArchive bs
#endif
fromArchive :: Archive -> B.ByteString
fromArchive = encode
filesInArchive :: Archive -> [FilePath]
filesInArchive = (map eRelativePath) . zEntries
addEntryToArchive :: Entry -> Archive -> Archive
addEntryToArchive entry archive =
let archive' = deleteEntryFromArchive (eRelativePath entry) archive
oldEntries = zEntries archive'
in archive' { zEntries = entry : oldEntries }
deleteEntryFromArchive :: FilePath -> Archive -> Archive
deleteEntryFromArchive path archive =
archive { zEntries = [e | e <- zEntries archive
, not (eRelativePath e `matches` path)] }
findEntryByPath :: FilePath -> Archive -> Maybe Entry
findEntryByPath path archive =
find (\e -> path `matches` eRelativePath e) (zEntries archive)
fromEntry :: Entry -> B.ByteString
fromEntry entry =
let uncompressedData = decompressData (eCompressionMethod entry) (eCompressedData entry)
in if eCRC32 entry == CRC32.crc32 uncompressedData
then uncompressedData
else error "CRC32 mismatch"
toEntry :: FilePath
-> Integer
-> B.ByteString
-> Entry
toEntry path modtime contents =
let uncompressedSize = B.length contents
compressedData = compressData Deflate contents
compressedSize = B.length compressedData
(compressionMethod, finalData, finalSize) =
if uncompressedSize <= compressedSize
then (NoCompression, contents, uncompressedSize)
else (Deflate, compressedData, compressedSize)
crc32 = CRC32.crc32 contents
in Entry { eRelativePath = normalizePath path
, eCompressionMethod = compressionMethod
, eLastModified = modtime
, eCRC32 = crc32
, eCompressedSize = fromIntegral finalSize
, eUncompressedSize = fromIntegral uncompressedSize
, eExtraField = B.empty
, eFileComment = B.empty
, eInternalFileAttributes = 0
, eExternalFileAttributes = 0
, eCompressedData = finalData
}
readEntry :: [ZipOption] -> FilePath -> IO Entry
readEntry opts path = do
isDir <- doesDirectoryExist path
let path' = let p = path ++ (case reverse path of
('/':_) -> ""
_ | isDir -> "/"
| otherwise -> "") in
(case [(l,a) | OptLocation l a <- opts] of
((l,a):_) -> if a then l </> p else l
_ -> p)
contents <- if isDir
then return B.empty
else B.readFile path
#if MIN_VERSION_directory(1,2,0)
modEpochTime <- fmap (floor . utcTimeToPOSIXSeconds)
$ getModificationTime path
#else
(TOD modEpochTime _) <- getModificationTime path
#endif
let entry = toEntry path' modEpochTime contents
when (OptVerbose `elem` opts) $ do
let compmethod = case eCompressionMethod entry of
Deflate -> "deflated"
NoCompression -> "stored"
hPutStrLn stderr $
printf " adding: %s (%s %.f%%)" (eRelativePath entry)
compmethod (100 (100 * compressionRatio entry))
return entry
writeEntry :: [ZipOption] -> Entry -> IO ()
writeEntry opts entry = do
let path = case [d | OptDestination d <- opts] of
(x:_) -> x </> eRelativePath entry
_ -> eRelativePath entry
let dir = takeDirectory path
exists <- doesDirectoryExist dir
unless exists $ do
createDirectoryIfMissing True dir
when (OptVerbose `elem` opts) $
hPutStrLn stderr $ " creating: " ++ dir
if length path > 0 && last path == '/'
then return ()
else do
when (OptVerbose `elem` opts) $ do
hPutStrLn stderr $ case eCompressionMethod entry of
Deflate -> " inflating: " ++ path
NoCompression -> "extracting: " ++ path
B.writeFile path (fromEntry entry)
setFileTimeStamp path (eLastModified entry)
addFilesToArchive :: [ZipOption] -> Archive -> [FilePath] -> IO Archive
addFilesToArchive opts archive files = do
filesAndChildren <- if OptRecursive `elem` opts
then mapM getDirectoryContentsRecursive files >>= return . nub . concat
else return files
entries <- mapM (readEntry opts) filesAndChildren
return $ foldr addEntryToArchive archive entries
extractFilesFromArchive :: [ZipOption] -> Archive -> IO ()
extractFilesFromArchive opts archive =
mapM_ (writeEntry opts) $ zEntries archive
normalizePath :: FilePath -> String
normalizePath path =
let dir = takeDirectory path
fn = takeFileName path
(_drive, dir') = splitDrive dir
dirParts = filter (/=".") $ splitDirectories dir'
in intercalate "/" (dirParts ++ [fn])
matches :: FilePath -> FilePath -> Bool
matches fp1 fp2 = normalizePath fp1 == normalizePath fp2
compressData :: CompressionMethod -> B.ByteString -> B.ByteString
compressData Deflate = Zlib.compress
compressData NoCompression = id
decompressData :: CompressionMethod -> B.ByteString -> B.ByteString
decompressData Deflate = Zlib.decompress
decompressData NoCompression = id
compressionRatio :: Entry -> Float
compressionRatio entry =
if eUncompressedSize entry == 0
then 1
else fromIntegral (eCompressedSize entry) / fromIntegral (eUncompressedSize entry)
data MSDOSDateTime = MSDOSDateTime { msDOSDate :: Word16
, msDOSTime :: Word16
} deriving (Read, Show, Eq)
minMSDOSDateTime :: Integer
minMSDOSDateTime = 315532800
epochTimeToMSDOSDateTime :: Integer -> MSDOSDateTime
epochTimeToMSDOSDateTime epochtime | epochtime < minMSDOSDateTime =
epochTimeToMSDOSDateTime minMSDOSDateTime
epochTimeToMSDOSDateTime epochtime =
let ut = toUTCTime (TOD epochtime 0)
dosTime = toEnum $ (ctSec ut `div` 2) + shiftL (ctMin ut) 5 + shiftL (ctHour ut) 11
dosDate = toEnum $ ctDay ut + shiftL (fromEnum (ctMonth ut) + 1) 5 + shiftL (ctYear ut 1980) 9
in MSDOSDateTime { msDOSDate = dosDate, msDOSTime = dosTime }
msDOSDateTimeToEpochTime :: MSDOSDateTime -> Integer
msDOSDateTimeToEpochTime (MSDOSDateTime {msDOSDate = dosDate, msDOSTime = dosTime}) =
let seconds = fromIntegral $ 2 * (dosTime .&. 0O37)
minutes = fromIntegral $ (shiftR dosTime 5) .&. 0O77
hour = fromIntegral $ shiftR dosTime 11
day = fromIntegral $ dosDate .&. 0O37
month = fromIntegral $ ((shiftR dosDate 5) .&. 0O17) 1
year = fromIntegral $ shiftR dosDate 9
timeSinceEpoch = TimeDiff
{ tdYear = year + 10,
tdMonth = month,
tdDay = day 1,
tdHour = hour,
tdMin = minutes,
tdSec = seconds,
tdPicosec = 0 }
(TOD epochsecs _) = addToClockTime timeSinceEpoch (TOD 0 0)
in epochsecs
getDirectoryContentsRecursive :: FilePath -> IO [FilePath]
getDirectoryContentsRecursive path = do
isDir <- doesDirectoryExist path
if isDir
then do
contents <- getDirectoryContents path
let contents' = map (path </>) $ filter (`notElem` ["..","."]) contents
children <- mapM getDirectoryContentsRecursive contents'
if path == "."
then return (concat children)
else return (path : concat children)
else return [path]
setFileTimeStamp :: FilePath -> Integer -> IO ()
setFileTimeStamp file epochtime = do
#ifdef _WINDOWS
return ()
#else
let epochtime' = fromInteger epochtime
setFileTimes file epochtime' epochtime'
#endif
getArchive :: Get Archive
getArchive = do
#if MIN_VERSION_binary(0,6,0)
locals <- many getLocalFile
files <- many (getFileHeader (M.fromList locals))
digSig <- Just `fmap` getDigitalSignature <|> return Nothing
#else
locals <- manySig 0x04034b50 getLocalFile
files <- manySig 0x02014b50 (getFileHeader (M.fromList locals))
digSig <- lookAheadM getDigitalSignature
#endif
endSig <- getWord32le
unless (endSig == 0x06054b50)
$ fail "Did not find end of central directory signature"
skip 2
skip 2
skip 2
skip 2
skip 4
skip 4
commentLength <- getWord16le
zipComment <- getLazyByteString (toEnum $ fromEnum commentLength)
return $ Archive
{ zEntries = files
, zSignature = digSig
, zComment = zipComment
}
putArchive :: Archive -> Put
putArchive archive = do
mapM_ putLocalFile $ zEntries archive
let localFileSizes = map localFileSize $ zEntries archive
let offsets = scanl (+) 0 localFileSizes
let cdOffset = last offsets
_ <- zipWithM putFileHeader offsets (zEntries archive)
putDigitalSignature $ zSignature archive
putWord32le 0x06054b50
putWord16le 0
putWord16le 0
putWord16le $ fromIntegral $ length $ zEntries archive
putWord16le $ fromIntegral $ length $ zEntries archive
putWord32le $ sum $ map fileHeaderSize $ zEntries archive
putWord32le $ fromIntegral cdOffset
putWord16le $ fromIntegral $ B.length $ zComment archive
putLazyByteString $ zComment archive
fileHeaderSize :: Entry -> Word32
fileHeaderSize f =
fromIntegral $ 4 + 2 + 2 + 2 + 2 + 2 + 2 + 4 + 4 + 4 + 2 + 2 + 2 + 2 + 2 + 4 + 4 +
fromIntegral (B.length $ fromString $ normalizePath $ eRelativePath f) +
B.length (eExtraField f) + B.length (eFileComment f)
localFileSize :: Entry -> Word32
localFileSize f =
fromIntegral $ 4 + 2 + 2 + 2 + 2 + 2 + 4 + 4 + 4 + 2 + 2 +
fromIntegral (B.length $ fromString $ normalizePath $ eRelativePath f) +
B.length (eExtraField f) + B.length (eCompressedData f)
getLocalFile :: Get (Word32, B.ByteString)
getLocalFile = do
offset <- bytesRead
getWord32le >>= ensure (== 0x04034b50)
skip 2
bitflag <- getWord16le
skip 2
skip 2
skip 2
skip 4
compressedSize <- getWord32le
when (compressedSize == 0xFFFFFFFF) $
fail "Can't read ZIP64 archive."
skip 4
fileNameLength <- getWord16le
extraFieldLength <- getWord16le
skip (fromIntegral fileNameLength)
skip (fromIntegral extraFieldLength)
compressedData <- if bitflag .&. 0O10 == 0
then getLazyByteString (fromIntegral compressedSize)
else
do raw <- getWordsTilSig 0x08074b50
skip 4
cs <- getWord32le
skip 4
if fromIntegral cs == length raw
then return $ B.pack raw
else fail "Content size mismatch in data descriptor record"
return (fromIntegral offset, compressedData)
getWordsTilSig :: Word32 -> Get [Word8]
getWordsTilSig sig = go []
where
go acc = do
#if MIN_VERSION_binary(0, 6, 0)
(getWord32le >>= ensure (== sig) >> return (reverse acc)) <|>
do w <- getWord8
go (w:acc)
#else
sig' <- lookAhead getWord32le
if sig == sig'
then skip 4 >> return (reverse acc)
else do
w <- getWord8
go (w:acc)
#endif
putLocalFile :: Entry -> Put
putLocalFile f = do
putWord32le 0x04034b50
putWord16le 20
putWord16le 0x802
putWord16le $ case eCompressionMethod f of
NoCompression -> 0
Deflate -> 8
let modTime = epochTimeToMSDOSDateTime $ eLastModified f
putWord16le $ msDOSTime modTime
putWord16le $ msDOSDate modTime
putWord32le $ eCRC32 f
putWord32le $ eCompressedSize f
putWord32le $ eUncompressedSize f
putWord16le $ fromIntegral $ B.length $ fromString
$ normalizePath $ eRelativePath f
putWord16le $ fromIntegral $ B.length $ eExtraField f
putLazyByteString $ fromString $ normalizePath $ eRelativePath f
putLazyByteString $ eExtraField f
putLazyByteString $ eCompressedData f
getFileHeader :: M.Map Word32 B.ByteString
-> Get Entry
getFileHeader locals = do
getWord32le >>= ensure (== 0x02014b50)
skip 2
versionNeededToExtract <- getWord8
skip 1
unless (versionNeededToExtract <= 20) $
fail "This archive requires zip >= 2.0 to extract."
skip 2
rawCompressionMethod <- getWord16le
compressionMethod <- case rawCompressionMethod of
0 -> return NoCompression
8 -> return Deflate
_ -> fail $ "Unknown compression method " ++ show rawCompressionMethod
lastModFileTime <- getWord16le
lastModFileDate <- getWord16le
crc32 <- getWord32le
compressedSize <- getWord32le
uncompressedSize <- getWord32le
fileNameLength <- getWord16le
extraFieldLength <- getWord16le
fileCommentLength <- getWord16le
skip 2
internalFileAttributes <- getWord16le
externalFileAttributes <- getWord32le
relativeOffset <- getWord32le
fileName <- getLazyByteString (toEnum $ fromEnum fileNameLength)
extraField <- getLazyByteString (toEnum $ fromEnum extraFieldLength)
fileComment <- getLazyByteString (toEnum $ fromEnum fileCommentLength)
compressedData <- case (M.lookup relativeOffset locals) of
Just x -> return x
Nothing -> fail $ "Unable to find data at offset " ++
show relativeOffset
return $ Entry
{ eRelativePath = toString fileName
, eCompressionMethod = compressionMethod
, eLastModified = msDOSDateTimeToEpochTime $
MSDOSDateTime { msDOSDate = lastModFileDate,
msDOSTime = lastModFileTime }
, eCRC32 = crc32
, eCompressedSize = compressedSize
, eUncompressedSize = uncompressedSize
, eExtraField = extraField
, eFileComment = fileComment
, eInternalFileAttributes = internalFileAttributes
, eExternalFileAttributes = externalFileAttributes
, eCompressedData = compressedData
}
putFileHeader :: Word32
-> Entry
-> Put
putFileHeader offset local = do
putWord32le 0x02014b50
putWord16le 0
putWord16le 20
putWord16le 0x802
putWord16le $ case eCompressionMethod local of
NoCompression -> 0
Deflate -> 8
let modTime = epochTimeToMSDOSDateTime $ eLastModified local
putWord16le $ msDOSTime modTime
putWord16le $ msDOSDate modTime
putWord32le $ eCRC32 local
putWord32le $ eCompressedSize local
putWord32le $ eUncompressedSize local
putWord16le $ fromIntegral $ B.length $ fromString
$ normalizePath $ eRelativePath local
putWord16le $ fromIntegral $ B.length $ eExtraField local
putWord16le $ fromIntegral $ B.length $ eFileComment local
putWord16le 0
putWord16le $ eInternalFileAttributes local
putWord32le $ eExternalFileAttributes local
putWord32le offset
putLazyByteString $ fromString $ normalizePath $ eRelativePath local
putLazyByteString $ eExtraField local
putLazyByteString $ eFileComment local
#if MIN_VERSION_binary(0,6,0)
getDigitalSignature :: Get B.ByteString
getDigitalSignature = do
getWord32le >>= ensure (== 0x05054b50)
sigSize <- getWord16le
getLazyByteString (toEnum $ fromEnum sigSize)
#else
getDigitalSignature :: Get (Maybe B.ByteString)
getDigitalSignature = do
hdrSig <- getWord32le
if hdrSig /= 0x05054b50
then return Nothing
else do
sigSize <- getWord16le
getLazyByteString (toEnum $ fromEnum sigSize) >>= return . Just
#endif
putDigitalSignature :: Maybe B.ByteString -> Put
putDigitalSignature Nothing = return ()
putDigitalSignature (Just sig) = do
putWord32le 0x05054b50
putWord16le $ fromIntegral $ B.length sig
putLazyByteString sig
ensure :: (a -> Bool) -> a -> Get ()
ensure p val =
if p val
then return ()
else fail "ensure not satisfied"
toString :: B.ByteString -> String
toString = TL.unpack . TL.decodeUtf8
fromString :: String -> B.ByteString
fromString = TL.encodeUtf8 . TL.pack