{-# LANGUAGE CPP #-}
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE FlexibleContexts #-}

-- |
-- Module: Codec.RPM.Parse
-- Copyright: (c) 2016-2017 Red Hat, Inc.
-- License: LGPL
--
-- Maintainer: https://github.com/weldr
-- Stability: stable
-- Portability: portable
--
-- A module for creating 'RPM' records from various data sources.

module Codec.RPM.Parse(
#ifdef TEST
                       parseLead,
                       parseSectionHeader,
                       parseOneTag,
                       parseSection,
#endif
                       parseRPM,
                       parseRPMC)
 where

#if !MIN_VERSION_base(4,8,0)
import           Control.Applicative((<$>))
#endif
import           Control.Monad(void)
import           Control.Monad.Except(MonadError, throwError)
import           Conduit((.|), Conduit, awaitForever, yield)
import           Data.Attoparsec.Binary
import           Data.Attoparsec.ByteString(Parser, anyWord8, count, take, takeByteString, word8)
import qualified Data.ByteString as BS
import qualified Data.ByteString.Char8 as C
import           Data.Conduit.Attoparsec(ParseError(..), conduitParserEither)
import           Data.Maybe(mapMaybe)
import           Prelude hiding(take)

import Codec.RPM.Internal.Numbers(asWord32)
import Codec.RPM.Tags(Tag, mkTag)
import Codec.RPM.Types(Header(..), Lead(..), RPM(..), SectionHeader(..))

-- "a <$> b <$> c" looks better than "a . b <$> c"
{-# ANN parseLead "HLint: ignore Functor law" #-}
parseLead :: Parser Lead
parseLead = do
    -- Verify this is an RPM by checking the first four bytes.
    void $ word32be 0xedabeedb

    rpmMajor <- anyWord8
    rpmMinor <- anyWord8
    rpmType  <- anyWord16be
    rpmArchNum <- anyWord16be
    rpmName <- C.unpack <$> BS.takeWhile (/= 0) <$> take 66
    rpmOSNum <- anyWord16be
    rpmSigType <- anyWord16be

    -- Skip 16 reserved bytes at the end of the lead.
    void $ take 16
    
    return Lead { rpmMajor,
                  rpmMinor,
                  rpmType,
                  rpmArchNum,
                  rpmName,
                  rpmOSNum,
                  rpmSigType }

parseSectionHeader :: Parser SectionHeader
parseSectionHeader = do
    -- Verify this is a header section header by checking the first three bytes.
    void $ word8 0x8e >> word8 0xad >> word8 0xe8

    sectionVersion <- anyWord8
    -- Skip four reserved bytes.
    void $ take 4
    sectionCount <- anyWord32be
    sectionSize <- anyWord32be

    return SectionHeader { sectionVersion,
                           sectionCount,
                           sectionSize }

parseOneTag :: C.ByteString -> C.ByteString -> Maybe Tag
parseOneTag store bs | BS.length bs < 16 = Nothing
                     | otherwise = let
    tag = fromIntegral . asWord32 $ BS.take 4 bs
    ty  = fromIntegral . asWord32 $ BS.take 4 (BS.drop 4 bs)
    off = fromIntegral . asWord32 $ BS.take 4 (BS.drop 8 bs)
    cnt = fromIntegral . asWord32 $ BS.take 4 (BS.drop 12 bs)
 in
    mkTag store tag ty off cnt

parseSection :: Parser Header
parseSection = do
    headerSectionHeader <- parseSectionHeader
    -- Grab the tags as a list of bytestrings.  We need the store before we can process the tags, as
    -- that's where all the values for the tags are kept.  However, grabbing each individual tag here
    -- makes it a lot easier to process them later.
    rawTags <- count (fromIntegral $ sectionCount headerSectionHeader) (take 16)
    headerStore <- take (fromIntegral $ sectionSize headerSectionHeader)

    -- Now that we've got the store, process each tag by looking up its values in the store.
    -- NOTE: mapMaybe will reject tags which are Nothing
    let headerTags = mapMaybe (parseOneTag headerStore) rawTags

    return Header { headerSectionHeader,
                    headerTags,
                    headerStore }

-- | A parser (in the attoparsec sense of the term) that constructs 'Codec.RPM.Types.RPM' records.
-- The parser can be run against a 'Data.ByteString.ByteString' of RPM data using any of the usual
-- functions.  'Data.Attoparsec.ByteString.parse' and 'Data.Attoparsec.ByteString.parseOnly' are
-- especially useful:
--
-- > import Data.Attoparsec.ByteString(parse)
-- > import qualified Data.ByteString as BS
-- > s <- BS.readFile "some.rpm"
-- > result <- parse parseRPM s
--
-- The 'Data.Attoparsec.ByteString.Result' can then be examined directly or converted using
-- 'Data.Attoparsec.ByteString.maybeResult' (for converting it into a 'Maybe' 'RPM') or
-- 'Data.Attoparsec.ByteString.eitherResult' (for converting it into an 'Either' 'String' 'RPM').
-- In the latter case, the String contains any parse error that occurred when reading the
-- RPM data.
parseRPM :: Parser RPM
parseRPM = do
    -- First comes the (mostly useless) lead.
    rpmLead <- parseLead
    -- Then comes the signature, which is like a regular section except it's also padded.
    sig <- parseSection
    void $ take (signaturePadding sig)
    -- And then comes the real header.  There could be several, but for now there's only ever one.
    hdr <- parseSection
    rpmArchive <- takeByteString
    return RPM { rpmLead,
                 rpmSignatures=[sig],
                 rpmHeaders=[hdr],
                 rpmArchive }
 where
    signaturePadding :: Header -> Int
    signaturePadding hdr = let
        remainder = (sectionSize . headerSectionHeader) hdr `mod` 8
     in
        if remainder > 0 then fromIntegral $ 8 - remainder else 0

-- | Like 'parseRPM', but puts the result into a 'Conduit' as an 'Either', containing either a
-- 'ParseError' or an 'RPM'.  The result can be extracted with 'Control.Monad.Except.runExceptT',
-- like so:
--
-- > import Conduit((.|), runConduitRes, sourceFile)
-- > import Control.Monad.Except(runExceptT)
-- > result <- runExceptT $ runConduitRes $ sourceFile "some.rpm" .| parseRPMC .| someConsumer
--
-- On success, the 'RPM' record will be passed down the conduit for futher processing or
-- consumption.  Functions can be written to extract just one element out of the 'RPM' and
-- pass it along.  For instance:
--
-- > payloadC :: MonadError e m => Conduit RPM m BS.ByteStrin
-- > payloadC = awaitForever (yield . rpmArchive)
--
-- On error, the rest of the conduit will be skipped and the 'ParseError' will be returned
-- as the result to be dealt with.
parseRPMC :: MonadError String m => Conduit C.ByteString m RPM
parseRPMC =
    conduitParserEither parseRPM .| consumer
 where
    consumer = awaitForever $ either (throwError . errorMessage) (yield . snd)