{-# LANGUAGE Safe #-}
{-# OPTIONS_GHC -fno-warn-name-shadowing #-}
{- arch-tag: GZip file support in Haskell
Copyright (c) 2004-2011 John Goerzen <jgoerzen@complete.org>

All rights reserved.

For license and copyright information, see the file LICENSE
-}

{- |
   Module     : System.FileArchive.GZip
   Copyright  : Copyright (C) 2004-2011 John Goerzen
   SPDX-License-Identifier: BSD-3-Clause

   Stability  : stable
   Portability: portable

GZip file decompression

Copyright (c) 2004 John Goerzen, jgoerzen\@complete.org

The GZip format is described in RFC1952.
-}
module System.FileArchive.GZip (
                                  -- * GZip Files
                                  -- $gzipfiles

                                  -- * Types
                                  Header(..), Section, GZipError(..),
                                  Footer(..),
                                  -- * Whole-File Processing
                                  decompress,
                                  hDecompress,
                                  read_sections,
                                  -- * Section Processing
                                  read_header,
                                  read_section
                                 )
    where

import Control.Monad.Except     (MonadError(..))
import Data.Bits                ((.&.))
import Data.Bits.Utils          (fromBytes)
import Data.Char                (ord)
import Data.Compression.Inflate (inflate_string_remainder)
import Data.Hash.CRC32.GZip     (update_crc)
import Data.Word                (Word32)
import System.IO                (Handle, hGetContents, hPutStr)

data GZipError = CRCError               -- ^ CRC-32 check failed
               | NotGZIPFile            -- ^ Couldn't find a GZip header
               | UnknownMethod          -- ^ Compressed with something other than method 8 (deflate)
               | UnknownError String    -- ^ Other problem arose
               deriving (GZipError -> GZipError -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: GZipError -> GZipError -> Bool
$c/= :: GZipError -> GZipError -> Bool
== :: GZipError -> GZipError -> Bool
$c== :: GZipError -> GZipError -> Bool
Eq, Int -> GZipError -> ShowS
[GZipError] -> ShowS
GZipError -> [Char]
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
showList :: [GZipError] -> ShowS
$cshowList :: [GZipError] -> ShowS
show :: GZipError -> [Char]
$cshow :: GZipError -> [Char]
showsPrec :: Int -> GZipError -> ShowS
$cshowsPrec :: Int -> GZipError -> ShowS
Show)

-- | First two bytes of file
magic :: String
magic :: [Char]
magic = [Char]
"\x1f\x8b"

-- | Flags
fFHCRC, fFEXTRA, fFNAME, fFCOMMENT :: Int
-- fFTEXT = 1 :: Int
fFHCRC :: Int
fFHCRC = Int
2
fFEXTRA :: Int
fFEXTRA = Int
4
fFNAME :: Int
fFNAME = Int
8
fFCOMMENT :: Int
fFCOMMENT = Int
16

{- | The data structure representing the GZip header.  This occurs
at the beginning of each 'Section' on disk. -}
data Header = Header {
                      Header -> Int
method   :: Int,    -- ^ Compression method.  Only 8 is defined at present.
                      Header -> Int
flags    :: Int,
                      Header -> Maybe [Char]
extra    :: Maybe String,
                      Header -> Maybe [Char]
filename :: Maybe String,
                      Header -> Maybe [Char]
comment  :: Maybe String,
                      Header -> Word32
mtime    :: Word32,  -- ^ Modification time of the original file
                      Header -> Int
xfl      :: Int,       -- ^ Extra flags
                      Header -> Int
os       :: Int         -- ^ Creating operating system
                     } deriving (Header -> Header -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Header -> Header -> Bool
$c/= :: Header -> Header -> Bool
== :: Header -> Header -> Bool
$c== :: Header -> Header -> Bool
Eq, Int -> Header -> ShowS
[Header] -> ShowS
Header -> [Char]
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
showList :: [Header] -> ShowS
$cshowList :: [Header] -> ShowS
show :: Header -> [Char]
$cshow :: Header -> [Char]
showsPrec :: Int -> Header -> ShowS
$cshowsPrec :: Int -> Header -> ShowS
Show)

{- | Stored on-disk at the end of each section. -}
data Footer = Footer {
                      Footer -> Word32
size       :: Word32,   -- ^ The size of the original, decompressed data
                      Footer -> Word32
crc32      :: Word32,  -- ^ The stored GZip CRC-32 of the original, decompressed data
                      Footer -> Bool
crc32valid :: Bool -- ^ Whether or not the stored CRC-32 matches the calculated CRC-32 of the data
                     }

{- | A section represents a compressed component in a GZip file.
Every GZip file has at least one. -}
type Section = (Header, String, Footer)

split1 :: String -> (Char, String)
split1 :: [Char] -> (Char, [Char])
split1 [Char]
s = (forall a. [a] -> a
head [Char]
s, forall a. [a] -> [a]
tail [Char]
s)

{- | Read a GZip file, decompressing all sections found.

Writes the decompressed data stream to the given output handle.

Returns Nothing if the action was successful, or Just GZipError if there
was a problem.  If there was a problem, the data written to the output
handle should be discarded.
-}

hDecompress :: Handle                   -- ^ Input handle
            -> Handle                   -- ^ Output handle
            -> IO (Maybe GZipError)
hDecompress :: Handle -> Handle -> IO (Maybe GZipError)
hDecompress Handle
infd Handle
outfd =
    do [Char]
inc <- Handle -> IO [Char]
hGetContents Handle
infd
       let ([Char]
outstr, Maybe GZipError
err) = [Char] -> ([Char], Maybe GZipError)
decompress [Char]
inc
       Handle -> [Char] -> IO ()
hPutStr Handle
outfd [Char]
outstr
       forall (m :: * -> *) a. Monad m => a -> m a
return Maybe GZipError
err

{- | Read a GZip file, decompressing all sections that are found.

Returns a decompresed data stream and Nothing, or an unreliable string
and Just (error).  If you get anything other than Nothing, the String
returned should be discarded.
-}
decompress :: String -> (String, Maybe GZipError)
{-
decompress s =
    do x <- read_header s
       let rem = snd x
       return $ inflate_string rem
-}
decompress :: [Char] -> ([Char], Maybe GZipError)
decompress [Char]
s =
    let procs :: [Section] -> (String, Bool)
        procs :: [Section] -> ([Char], Bool)
procs [] = ([], Bool
True)
        procs ((Header
_, [Char]
content, Footer
foot):[Section]
xs) =
            let ([Char]
nexth, Bool
nextb) = [Section] -> ([Char], Bool)
procs [Section]
xs in
                ([Char]
content forall a. [a] -> [a] -> [a]
++ [Char]
nexth, (Footer -> Bool
crc32valid Footer
foot) Bool -> Bool -> Bool
&& Bool
nextb)
        in case [Char] -> Either GZipError [Section]
read_sections [Char]
s of
           Left GZipError
x -> ([Char]
"", forall a. a -> Maybe a
Just GZipError
x)
           Right [Section]
x -> let ([Char]
decomp, Bool
iscrcok) = [Section] -> ([Char], Bool)
procs [Section]
x
                          in ([Char]
decomp, if Bool
iscrcok then forall a. Maybe a
Nothing else forall a. a -> Maybe a
Just GZipError
CRCError)

{-
decompress s = do x <- read_sections s
                  return $ concatMap (\(_, x, _) -> x) x
-}

-- | Read all sections.
read_sections :: String -> Either GZipError [Section]
read_sections :: [Char] -> Either GZipError [Section]
read_sections [] = forall a b. b -> Either a b
Right []
read_sections [Char]
s =
    do (Section, [Char])
x <- [Char] -> Either GZipError (Section, [Char])
read_section [Char]
s
       case (Section, [Char])
x of
           (Section
sect, [Char]
remain) ->
               do [Section]
next <- [Char] -> Either GZipError [Section]
read_sections [Char]
remain
                  forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Section
sect forall a. a -> [a] -> [a]
: [Section]
next

parseword :: String -> Word32
parseword :: [Char] -> Word32
parseword [Char]
s = forall a. (Bits a, Num a) => [a] -> a
fromBytes forall a b. (a -> b) -> a -> b
$ forall a b. (a -> b) -> [a] -> [b]
map (forall a b. (Integral a, Num b) => a -> b
fromIntegral forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Int
ord) forall a b. (a -> b) -> a -> b
$ forall a. [a] -> [a]
reverse [Char]
s

-- | Read one section, returning (ThisSection, Remainder)
read_section :: String -> Either GZipError (Section, String)
read_section :: [Char] -> Either GZipError (Section, [Char])
read_section [Char]
s =
        do (Header, [Char])
x <- [Char] -> Either GZipError (Header, [Char])
read_header [Char]
s
           let headerrem :: [Char]
headerrem = forall a b. (a, b) -> b
snd (Header, [Char])
x
           let ([Char]
decompressed, Word32
crc, [Char]
remainder) = [Char] -> ([Char], Word32, [Char])
read_data [Char]
headerrem
           let ([Char]
crc32str, [Char]
rm) = forall a. Int -> [a] -> ([a], [a])
splitAt Int
4 [Char]
remainder
           let ([Char]
sizestr, [Char]
rem2) = forall a. Int -> [a] -> ([a], [a])
splitAt Int
4 [Char]
rm
           let filecrc32 :: Word32
filecrc32 = [Char] -> Word32
parseword [Char]
crc32str
           let filesize :: Word32
filesize = [Char] -> Word32
parseword [Char]
sizestr
           forall (m :: * -> *) a. Monad m => a -> m a
return ((forall a b. (a, b) -> a
fst (Header, [Char])
x, [Char]
decompressed,
                   Footer {size :: Word32
size = Word32
filesize, crc32 :: Word32
crc32 = Word32
filecrc32,
                           crc32valid :: Bool
crc32valid = Word32
filecrc32 forall a. Eq a => a -> a -> Bool
== Word32
crc})
                   ,[Char]
rem2)

-- | Read the file's compressed data, returning
-- (Decompressed, Calculated CRC32, Remainder)
read_data :: String -> (String, Word32, String)
read_data :: [Char] -> ([Char], Word32, [Char])
read_data [Char]
x =
    let ([Char]
decompressed1, [Char]
remainder) = [Char] -> ([Char], [Char])
inflate_string_remainder [Char]
x
        ([Char]
decompressed, Word32
crc32) = [Char] -> Word32 -> ([Char], Word32)
read_data_internal [Char]
decompressed1 Word32
0
        in
          ([Char]
decompressed, Word32
crc32, [Char]
remainder)
    where
      read_data_internal :: [Char] -> Word32 -> ([Char], Word32)
read_data_internal [] Word32
ck = ([], Word32
ck)
      read_data_internal (Char
y:[Char]
ys) Word32
ck =
        let newcrc :: Word32
newcrc = Word32 -> Char -> Word32
update_crc Word32
ck Char
y
            n :: ([Char], Word32)
n = Word32
newcrc seq :: forall a b. a -> b -> b
`seq` [Char] -> Word32 -> ([Char], Word32)
read_data_internal [Char]
ys Word32
newcrc
            in
            (Char
y forall a. a -> [a] -> [a]
: forall a b. (a, b) -> a
fst ([Char], Word32)
n, forall a b. (a, b) -> b
snd ([Char], Word32)
n)



{- | Read the GZip header.  Return (Header, Remainder).
-}
read_header :: String -> Either GZipError (Header, String)
read_header :: [Char] -> Either GZipError (Header, [Char])
read_header [Char]
s =
    let ok :: Either a [Char]
ok = forall a b. b -> Either a b
Right [Char]
"ok" in
    do let ([Char]
mag, [Char]
rem1) = forall a. Int -> [a] -> ([a], [a])
splitAt Int
2 [Char]
s
       [Char]
_ <- if [Char]
mag forall a. Eq a => a -> a -> Bool
/= [Char]
magic
          then forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError GZipError
NotGZIPFile
          else forall {a}. Either a [Char]
ok
       let (Char
method, [Char]
rem2) = [Char] -> (Char, [Char])
split1 [Char]
rem1
       [Char]
_ <- if (Char -> Int
ord(Char
method) forall a. Eq a => a -> a -> Bool
/= Int
8)
          then forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError GZipError
UnknownMethod
          else forall {a}. Either a [Char]
ok
       let (Char
flag_S, [Char]
rem3) = [Char] -> (Char, [Char])
split1 [Char]
rem2
       let flag :: Int
flag = Char -> Int
ord Char
flag_S
       let ([Char]
mtimea, [Char]
rem3a) = forall a. Int -> [a] -> ([a], [a])
splitAt Int
4 [Char]
rem3
       let mtime :: Word32
mtime = [Char] -> Word32
parseword [Char]
mtimea
       let (Char
xfla, [Char]
rem3b) = [Char] -> (Char, [Char])
split1 [Char]
rem3a
       let xfl :: Int
xfl = Char -> Int
ord Char
xfla
       let (Char
osa, [Char]
_) = [Char] -> (Char, [Char])
split1 [Char]
rem3b
       let os :: Int
os = Char -> Int
ord Char
osa
       -- skip modtime (4), extraflag (1), and os (1)
       let rem4 :: [Char]
rem4 = forall a. Int -> [a] -> [a]
drop Int
6 [Char]
rem3

       let (Maybe [Char]
extra, [Char]
rem5) =
               if (Int
flag forall a. Bits a => a -> a -> a
.&. Int
fFEXTRA forall a. Eq a => a -> a -> Bool
/= Int
0)
               -- Skip past the extra field if we have it.
                  then let (Char
xlen_S, [Char]
_) = [Char] -> (Char, [Char])
split1 [Char]
rem4
                           (Char
xlen2_S, [Char]
rem4b) = [Char] -> (Char, [Char])
split1 [Char]
rem4
                           xlen :: Int
xlen = (Char -> Int
ord Char
xlen_S) forall a. Num a => a -> a -> a
+ Int
256 forall a. Num a => a -> a -> a
* (Char -> Int
ord Char
xlen2_S)
                           ([Char]
ex, [Char]
rrem) = forall a. Int -> [a] -> ([a], [a])
splitAt Int
xlen [Char]
rem4b
                           in (forall a. a -> Maybe a
Just [Char]
ex, [Char]
rrem)
                  else (forall a. Maybe a
Nothing, [Char]
rem4)

       let (Maybe [Char]
filename, [Char]
rem6) =
               if (Int
flag forall a. Bits a => a -> a -> a
.&. Int
fFNAME forall a. Eq a => a -> a -> Bool
/= Int
0)
               -- Skip past the null-terminated filename
                  then let fn :: [Char]
fn = forall a. (a -> Bool) -> [a] -> [a]
takeWhile (forall a. Eq a => a -> a -> Bool
/= Char
'\x00') [Char]
rem5
                                in (forall a. a -> Maybe a
Just [Char]
fn, forall a. Int -> [a] -> [a]
drop ((forall (t :: * -> *) a. Foldable t => t a -> Int
length [Char]
fn) forall a. Num a => a -> a -> a
+ Int
1) [Char]
rem5)
                  else (forall a. Maybe a
Nothing, [Char]
rem5)

       let (Maybe [Char]
comment, [Char]
rem7) =
               if (Int
flag forall a. Bits a => a -> a -> a
.&. Int
fFCOMMENT forall a. Eq a => a -> a -> Bool
/= Int
0)
                  -- Skip past the null-terminated comment
                  then let cm :: [Char]
cm = forall a. (a -> Bool) -> [a] -> [a]
takeWhile (forall a. Eq a => a -> a -> Bool
/= Char
'\x00') [Char]
rem6
                           in (forall a. a -> Maybe a
Just [Char]
cm, forall a. Int -> [a] -> [a]
drop ((forall (t :: * -> *) a. Foldable t => t a -> Int
length [Char]
cm) forall a. Num a => a -> a -> a
+ Int
1) [Char]
rem6)
                  else (forall a. Maybe a
Nothing, [Char]
rem6)

       [Char]
rem8 <- if (Int
flag forall a. Bits a => a -> a -> a
.&. Int
fFHCRC forall a. Eq a => a -> a -> Bool
/= Int
0)
                  -- Skip past the header CRC
                  then forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ forall a. Int -> [a] -> [a]
drop Int
2 [Char]
rem7
                  else forall (m :: * -> *) a. Monad m => a -> m a
return [Char]
rem7

       forall (m :: * -> *) a. Monad m => a -> m a
return (Header {method :: Int
method = Char -> Int
ord Char
method,
                      flags :: Int
flags = Int
flag,
                      extra :: Maybe [Char]
extra = Maybe [Char]
extra,
                      filename :: Maybe [Char]
filename = Maybe [Char]
filename,
                      comment :: Maybe [Char]
comment = Maybe [Char]
comment,
                      mtime :: Word32
mtime = Word32
mtime,
                      xfl :: Int
xfl = Int
xfl,
                      os :: Int
os = Int
os}, [Char]
rem8)

----------------------------------------------------------------------
-- Documentation
----------------------------------------------------------------------

{- $gzipfiles

GZip files contain one or more 'Section's.  Each 'Section', on disk, begins
with a GZip 'Header', then stores the compressed data itself, and finally
stores a GZip 'Footer'.

The 'Header' identifies the file as a GZip file, records the original
modification date and time, and, in some cases, also records the original
filename and comments.

The 'Footer' contains a GZip CRC32 checksum over the decompressed data as
well as a 32-bit length of the decompressed data.  The module
'Data.Hash.CRC32.GZip' is used to validate stored CRC32 values.

The vast majority of GZip files contain only one 'Section'.  Standard tools
that work with GZip files create single-section files by default.

Multi-section files can be created by simply concatenating two existing
GZip files together.  The standard gunzip and zcat tools will simply
concatenate the decompressed data when reading these files back.  The
'decompress' function in this module will do the same.

When reading data from this module, please use caution regarding how you access
it.  For instance, if you are wanting to write the decompressed stream
to disk and validate its CRC32 value, you could use the 'decompress'
function.  However, you should process the entire stream before you check
the value of the Bool it returns.  Otherwise, you will force Haskell to buffer
the entire file in memory just so it can check the CRC32.
-}