{-# LANGUAGE Safe #-}
{- arch-tag: GZip file support in Haskell
Copyright (c) 2004-2011 John Goerzen <jgoerzen@complete.org>

All rights reserved.

For license and copyright information, see the file LICENSE
-}

{- |
   Module     : System.FileArchive.GZip
   Copyright  : Copyright (C) 2004-2011 John Goerzen
   SPDX-License-Identifier: BSD-3-Clause

   Stability  : stable
   Portability: portable

GZip file decompression

Copyright (c) 2004 John Goerzen, jgoerzen\@complete.org

The GZip format is described in RFC1952.
-}
module System.FileArchive.GZip (
                                  -- * GZip Files
                                  -- $gzipfiles

                                  -- * Types
                                  Header(..), Section, GZipError(..),
                                  Footer(..),
                                  -- * Whole-File Processing
                                  decompress,
                                  hDecompress,
                                  read_sections,
                                  -- * Section Processing
                                  read_header,
                                  read_section
                                 )
    where

import           Control.Monad.Error
import           Data.Bits                ((.&.))
import           Data.Bits.Utils          (fromBytes)
import           Data.Char                (ord)
import           Data.Compression.Inflate (inflate_string_remainder)
import           Data.Hash.CRC32.GZip     (update_crc)
import           Data.Word                (Word32)
import           System.IO                (Handle, hGetContents, hPutStr)

data GZipError = CRCError               -- ^ CRC-32 check failed
               | NotGZIPFile            -- ^ Couldn't find a GZip header
               | UnknownMethod          -- ^ Compressed with something other than method 8 (deflate)
               | UnknownError String    -- ^ Other problem arose
               deriving (GZipError -> GZipError -> Bool
(GZipError -> GZipError -> Bool)
-> (GZipError -> GZipError -> Bool) -> Eq GZipError
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: GZipError -> GZipError -> Bool
$c/= :: GZipError -> GZipError -> Bool
== :: GZipError -> GZipError -> Bool
$c== :: GZipError -> GZipError -> Bool
Eq, Int -> GZipError -> ShowS
[GZipError] -> ShowS
GZipError -> [Char]
(Int -> GZipError -> ShowS)
-> (GZipError -> [Char])
-> ([GZipError] -> ShowS)
-> Show GZipError
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
showList :: [GZipError] -> ShowS
$cshowList :: [GZipError] -> ShowS
show :: GZipError -> [Char]
$cshow :: GZipError -> [Char]
showsPrec :: Int -> GZipError -> ShowS
$cshowsPrec :: Int -> GZipError -> ShowS
Show)

instance Error GZipError where
    noMsg :: GZipError
noMsg = [Char] -> GZipError
UnknownError [Char]
""
    strMsg :: [Char] -> GZipError
strMsg = [Char] -> GZipError
UnknownError

-- | First two bytes of file
magic :: String
magic :: [Char]
magic = [Char]
"\x1f\x8b"

-- | Flags
fFHCRC, fFEXTRA, fFNAME, fFCOMMENT :: Int
-- fFTEXT = 1 :: Int
fFHCRC :: Int
fFHCRC = Int
2
fFEXTRA :: Int
fFEXTRA = Int
4
fFNAME :: Int
fFNAME = Int
8
fFCOMMENT :: Int
fFCOMMENT = Int
16

{- | The data structure representing the GZip header.  This occurs
at the beginning of each 'Section' on disk. -}
data Header = Header {
                      Header -> Int
method   :: Int,    -- ^ Compression method.  Only 8 is defined at present.
                      Header -> Int
flags    :: Int,
                      Header -> Maybe [Char]
extra    :: Maybe String,
                      Header -> Maybe [Char]
filename :: Maybe String,
                      Header -> Maybe [Char]
comment  :: Maybe String,
                      Header -> Word32
mtime    :: Word32,  -- ^ Modification time of the original file
                      Header -> Int
xfl      :: Int,       -- ^ Extra flags
                      Header -> Int
os       :: Int         -- ^ Creating operating system
                     } deriving (Header -> Header -> Bool
(Header -> Header -> Bool)
-> (Header -> Header -> Bool) -> Eq Header
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Header -> Header -> Bool
$c/= :: Header -> Header -> Bool
== :: Header -> Header -> Bool
$c== :: Header -> Header -> Bool
Eq, Int -> Header -> ShowS
[Header] -> ShowS
Header -> [Char]
(Int -> Header -> ShowS)
-> (Header -> [Char]) -> ([Header] -> ShowS) -> Show Header
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
showList :: [Header] -> ShowS
$cshowList :: [Header] -> ShowS
show :: Header -> [Char]
$cshow :: Header -> [Char]
showsPrec :: Int -> Header -> ShowS
$cshowsPrec :: Int -> Header -> ShowS
Show)

{- | Stored on-disk at the end of each section. -}
data Footer = Footer {
                      Footer -> Word32
size       :: Word32,   -- ^ The size of the original, decompressed data
                      Footer -> Word32
crc32      :: Word32,  -- ^ The stored GZip CRC-32 of the original, decompressed data
                      Footer -> Bool
crc32valid :: Bool -- ^ Whether or not the stored CRC-32 matches the calculated CRC-32 of the data
                     }

{- | A section represents a compressed component in a GZip file.
Every GZip file has at least one. -}
type Section = (Header, String, Footer)

split1 :: String -> (Char, String)
split1 :: [Char] -> (Char, [Char])
split1 [Char]
s = ([Char] -> Char
forall a. [a] -> a
head [Char]
s, ShowS
forall a. [a] -> [a]
tail [Char]
s)

{- | Read a GZip file, decompressing all sections found.

Writes the decompressed data stream to the given output handle.

Returns Nothing if the action was successful, or Just GZipError if there
was a problem.  If there was a problem, the data written to the output
handle should be discarded.
-}

hDecompress :: Handle                   -- ^ Input handle
            -> Handle                   -- ^ Output handle
            -> IO (Maybe GZipError)
hDecompress :: Handle -> Handle -> IO (Maybe GZipError)
hDecompress Handle
infd Handle
outfd =
    do [Char]
inc <- Handle -> IO [Char]
hGetContents Handle
infd
       let ([Char]
outstr, Maybe GZipError
err) = [Char] -> ([Char], Maybe GZipError)
decompress [Char]
inc
       Handle -> [Char] -> IO ()
hPutStr Handle
outfd [Char]
outstr
       Maybe GZipError -> IO (Maybe GZipError)
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe GZipError
err

{- | Read a GZip file, decompressing all sections that are found.

Returns a decompresed data stream and Nothing, or an unreliable string
and Just (error).  If you get anything other than Nothing, the String
returned should be discarded.
-}
decompress :: String -> (String, Maybe GZipError)
{-
decompress s =
    do x <- read_header s
       let rem = snd x
       return $ inflate_string rem
-}
decompress :: [Char] -> ([Char], Maybe GZipError)
decompress [Char]
s =
    let procs :: [Section] -> (String, Bool)
        procs :: [Section] -> ([Char], Bool)
procs [] = ([], Bool
True)
        procs ((Header
_, [Char]
content, Footer
foot):[Section]
xs) =
            let ([Char]
nexth, Bool
nextb) = [Section] -> ([Char], Bool)
procs [Section]
xs in
                ([Char]
content [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
nexth, (Footer -> Bool
crc32valid Footer
foot) Bool -> Bool -> Bool
&& Bool
nextb)
        in case [Char] -> Either GZipError [Section]
read_sections [Char]
s of
           Left GZipError
x -> ([Char]
"", GZipError -> Maybe GZipError
forall a. a -> Maybe a
Just GZipError
x)
           Right [Section]
x -> let ([Char]
decomp, Bool
iscrcok) = [Section] -> ([Char], Bool)
procs [Section]
x
                          in ([Char]
decomp, if Bool
iscrcok then Maybe GZipError
forall a. Maybe a
Nothing else GZipError -> Maybe GZipError
forall a. a -> Maybe a
Just GZipError
CRCError)

{-
decompress s = do x <- read_sections s
                  return $ concatMap (\(_, x, _) -> x) x
-}

-- | Read all sections.
read_sections :: String -> Either GZipError [Section]
read_sections :: [Char] -> Either GZipError [Section]
read_sections [] = [Section] -> Either GZipError [Section]
forall a b. b -> Either a b
Right []
read_sections [Char]
s =
    do (Section, [Char])
x <- [Char] -> Either GZipError (Section, [Char])
read_section [Char]
s
       case (Section, [Char])
x of
           (Section
sect, [Char]
remain) ->
               do [Section]
next <- [Char] -> Either GZipError [Section]
read_sections [Char]
remain
                  [Section] -> Either GZipError [Section]
forall (m :: * -> *) a. Monad m => a -> m a
return ([Section] -> Either GZipError [Section])
-> [Section] -> Either GZipError [Section]
forall a b. (a -> b) -> a -> b
$ Section
sect Section -> [Section] -> [Section]
forall a. a -> [a] -> [a]
: [Section]
next

parseword :: String -> Word32
parseword :: [Char] -> Word32
parseword [Char]
s = [Word32] -> Word32
forall a. (Bits a, Num a) => [a] -> a
fromBytes ([Word32] -> Word32) -> [Word32] -> Word32
forall a b. (a -> b) -> a -> b
$ (Char -> Word32) -> [Char] -> [Word32]
forall a b. (a -> b) -> [a] -> [b]
map (Int -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word32) -> (Char -> Int) -> Char -> Word32
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Int
ord) ([Char] -> [Word32]) -> [Char] -> [Word32]
forall a b. (a -> b) -> a -> b
$ ShowS
forall a. [a] -> [a]
reverse [Char]
s

-- | Read one section, returning (ThisSection, Remainder)
read_section :: String -> Either GZipError (Section, String)
read_section :: [Char] -> Either GZipError (Section, [Char])
read_section [Char]
s =
        do (Header, [Char])
x <- [Char] -> Either GZipError (Header, [Char])
read_header [Char]
s
           let headerrem :: [Char]
headerrem = (Header, [Char]) -> [Char]
forall a b. (a, b) -> b
snd (Header, [Char])
x
           let ([Char]
decompressed, Word32
crc, [Char]
remainder) = [Char] -> ([Char], Word32, [Char])
read_data [Char]
headerrem
           let ([Char]
crc32str, [Char]
rm) = Int -> [Char] -> ([Char], [Char])
forall a. Int -> [a] -> ([a], [a])
splitAt Int
4 [Char]
remainder
           let ([Char]
sizestr, [Char]
rem2) = Int -> [Char] -> ([Char], [Char])
forall a. Int -> [a] -> ([a], [a])
splitAt Int
4 [Char]
rm
           let filecrc32 :: Word32
filecrc32 = [Char] -> Word32
parseword [Char]
crc32str
           let filesize :: Word32
filesize = [Char] -> Word32
parseword [Char]
sizestr
           (Section, [Char]) -> Either GZipError (Section, [Char])
forall (m :: * -> *) a. Monad m => a -> m a
return (((Header, [Char]) -> Header
forall a b. (a, b) -> a
fst (Header, [Char])
x, [Char]
decompressed,
                   Footer {size :: Word32
size = Word32
filesize, crc32 :: Word32
crc32 = Word32
filecrc32,
                           crc32valid :: Bool
crc32valid = Word32
filecrc32 Word32 -> Word32 -> Bool
forall a. Eq a => a -> a -> Bool
== Word32
crc})
                   ,[Char]
rem2)

-- | Read the file's compressed data, returning
-- (Decompressed, Calculated CRC32, Remainder)
read_data :: String -> (String, Word32, String)
read_data :: [Char] -> ([Char], Word32, [Char])
read_data [Char]
x =
    let ([Char]
decompressed1, [Char]
remainder) = [Char] -> ([Char], [Char])
inflate_string_remainder [Char]
x
        ([Char]
decompressed, Word32
crc32) = [Char] -> Word32 -> ([Char], Word32)
read_data_internal [Char]
decompressed1 Word32
0
        in
          ([Char]
decompressed, Word32
crc32, [Char]
remainder)
    where
      read_data_internal :: [Char] -> Word32 -> ([Char], Word32)
read_data_internal [] Word32
ck = ([], Word32
ck)
      read_data_internal (Char
y:[Char]
ys) Word32
ck =
        let newcrc :: Word32
newcrc = Word32 -> Char -> Word32
update_crc Word32
ck Char
y
            n :: ([Char], Word32)
n = Word32
newcrc Word32 -> ([Char], Word32) -> ([Char], Word32)
`seq` [Char] -> Word32 -> ([Char], Word32)
read_data_internal [Char]
ys Word32
newcrc
            in
            (Char
y Char -> ShowS
forall a. a -> [a] -> [a]
: ([Char], Word32) -> [Char]
forall a b. (a, b) -> a
fst ([Char], Word32)
n, ([Char], Word32) -> Word32
forall a b. (a, b) -> b
snd ([Char], Word32)
n)



{- | Read the GZip header.  Return (Header, Remainder).
-}
read_header :: String -> Either GZipError (Header, String)
read_header :: [Char] -> Either GZipError (Header, [Char])
read_header [Char]
s =
    let ok :: Either a [Char]
ok = [Char] -> Either a [Char]
forall a b. b -> Either a b
Right [Char]
"ok" in
    do let ([Char]
mag, [Char]
rem) = Int -> [Char] -> ([Char], [Char])
forall a. Int -> [a] -> ([a], [a])
splitAt Int
2 [Char]
s
       if [Char]
mag [Char] -> [Char] -> Bool
forall a. Eq a => a -> a -> Bool
/= [Char]
magic
          then GZipError -> Either GZipError [Char]
forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError GZipError
NotGZIPFile
          else Either GZipError [Char]
forall {a}. Either a [Char]
ok
       let (Char
method, [Char]
rem2) = [Char] -> (Char, [Char])
split1 [Char]
rem
       if (Char -> Int
ord(Char
method) Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Int
8)
          then GZipError -> Either GZipError [Char]
forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError GZipError
UnknownMethod
          else Either GZipError [Char]
forall {a}. Either a [Char]
ok
       let (Char
flag_S, [Char]
rem3) = [Char] -> (Char, [Char])
split1 [Char]
rem2
       let flag :: Int
flag = Char -> Int
ord Char
flag_S
       let ([Char]
mtimea, [Char]
rem3a) = Int -> [Char] -> ([Char], [Char])
forall a. Int -> [a] -> ([a], [a])
splitAt Int
4 [Char]
rem3
       let mtime :: Word32
mtime = [Char] -> Word32
parseword [Char]
mtimea
       let (Char
xfla, [Char]
rem3b) = [Char] -> (Char, [Char])
split1 [Char]
rem3a
       let xfl :: Int
xfl = Char -> Int
ord Char
xfla
       let (Char
osa, [Char]
_) = [Char] -> (Char, [Char])
split1 [Char]
rem3b
       let os :: Int
os = Char -> Int
ord Char
osa
       -- skip modtime (4), extraflag (1), and os (1)
       let rem4 :: [Char]
rem4 = Int -> ShowS
forall a. Int -> [a] -> [a]
drop Int
6 [Char]
rem3

       let (Maybe [Char]
extra, [Char]
rem5) =
               if (Int
flag Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
fFEXTRA Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Int
0)
               -- Skip past the extra field if we have it.
                  then let (Char
xlen_S, [Char]
_) = [Char] -> (Char, [Char])
split1 [Char]
rem4
                           (Char
xlen2_S, [Char]
rem4b) = [Char] -> (Char, [Char])
split1 [Char]
rem4
                           xlen :: Int
xlen = (Char -> Int
ord Char
xlen_S) Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
256 Int -> Int -> Int
forall a. Num a => a -> a -> a
* (Char -> Int
ord Char
xlen2_S)
                           ([Char]
ex, [Char]
rrem) = Int -> [Char] -> ([Char], [Char])
forall a. Int -> [a] -> ([a], [a])
splitAt Int
xlen [Char]
rem4b
                           in ([Char] -> Maybe [Char]
forall a. a -> Maybe a
Just [Char]
ex, [Char]
rrem)
                  else (Maybe [Char]
forall a. Maybe a
Nothing, [Char]
rem4)

       let (Maybe [Char]
filename, [Char]
rem6) =
               if (Int
flag Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
fFNAME Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Int
0)
               -- Skip past the null-terminated filename
                  then let fn :: [Char]
fn = (Char -> Bool) -> ShowS
forall a. (a -> Bool) -> [a] -> [a]
takeWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'\x00') [Char]
rem5
                                in ([Char] -> Maybe [Char]
forall a. a -> Maybe a
Just [Char]
fn, Int -> ShowS
forall a. Int -> [a] -> [a]
drop (([Char] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Char]
fn) Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) [Char]
rem5)
                  else (Maybe [Char]
forall a. Maybe a
Nothing, [Char]
rem5)

       let (Maybe [Char]
comment, [Char]
rem7) =
               if (Int
flag Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
fFCOMMENT Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Int
0)
                  -- Skip past the null-terminated comment
                  then let cm :: [Char]
cm = (Char -> Bool) -> ShowS
forall a. (a -> Bool) -> [a] -> [a]
takeWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'\x00') [Char]
rem6
                           in ([Char] -> Maybe [Char]
forall a. a -> Maybe a
Just [Char]
cm, Int -> ShowS
forall a. Int -> [a] -> [a]
drop (([Char] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Char]
cm) Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) [Char]
rem6)
                  else (Maybe [Char]
forall a. Maybe a
Nothing, [Char]
rem6)

       [Char]
rem8 <- if (Int
flag Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
fFHCRC Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Int
0)
                  -- Skip past the header CRC
                  then [Char] -> Either GZipError [Char]
forall (m :: * -> *) a. Monad m => a -> m a
return ([Char] -> Either GZipError [Char])
-> [Char] -> Either GZipError [Char]
forall a b. (a -> b) -> a -> b
$ Int -> ShowS
forall a. Int -> [a] -> [a]
drop Int
2 [Char]
rem7
                  else [Char] -> Either GZipError [Char]
forall (m :: * -> *) a. Monad m => a -> m a
return [Char]
rem7

       (Header, [Char]) -> Either GZipError (Header, [Char])
forall (m :: * -> *) a. Monad m => a -> m a
return (Header {method :: Int
method = Char -> Int
ord Char
method,
                      flags :: Int
flags = Int
flag,
                      extra :: Maybe [Char]
extra = Maybe [Char]
extra,
                      filename :: Maybe [Char]
filename = Maybe [Char]
filename,
                      comment :: Maybe [Char]
comment = Maybe [Char]
comment,
                      mtime :: Word32
mtime = Word32
mtime,
                      xfl :: Int
xfl = Int
xfl,
                      os :: Int
os = Int
os}, [Char]
rem8)

----------------------------------------------------------------------
-- Documentation
----------------------------------------------------------------------

{- $gzipfiles

GZip files contain one or more 'Section's.  Each 'Section', on disk, begins
with a GZip 'Header', then stores the compressed data itself, and finally
stores a GZip 'Footer'.

The 'Header' identifies the file as a GZip file, records the original
modification date and time, and, in some cases, also records the original
filename and comments.

The 'Footer' contains a GZip CRC32 checksum over the decompressed data as
well as a 32-bit length of the decompressed data.  The module
'Data.Hash.CRC32.GZip' is used to validate stored CRC32 values.

The vast majority of GZip files contain only one 'Section'.  Standard tools
that work with GZip files create single-section files by default.

Multi-section files can be created by simply concatenating two existing
GZip files together.  The standard gunzip and zcat tools will simply
concatenate the decompressed data when reading these files back.  The
'decompress' function in this module will do the same.

When reading data from this module, please use caution regarding how you access
it.  For instance, if you are wanting to write the decompressed stream
to disk and validate its CRC32 value, you could use the 'decompress'
function.  However, you should process the entire stream before you check
the value of the Bool it returns.  Otherwise, you will force Haskell to buffer
the entire file in memory just so it can check the CRC32.
-}