{-# LANGUAGE CPP, MagicHash, BangPatterns, ScopedTypeVariables #-}

{-|

Module      : GHC.Packing.Type
Copyright   : (c) Jost Berthold, 2010-2015,
License     : BSD3
Maintainer  : Jost Berthold <jost.berthold@gmail.com>
Stability   : experimental
Portability : no (depends on GHC internals)

= Serialized type for the packman library, instances and helpers

The data type @'Serialized' a@ includes a phantom type @a@ to ensure
type safety within one and the same program run. Type @a@ can be
polymorphic (at compile time, that is) when @'Serialized' a@ is not used
apart from being argument to @'deserialize'@.

The @Show@, @Read@, and @Binary@ instances of @Serialized a@ require an
additional @Typeable@ context (which requires @a@ to be monomorphic)
in order to implement dynamic type checks when parsing and deserialising
data from external sources.

-}

module GHC.Packing.Type
--    ( Serialized(..)
-- TOOD assemble export list with structure and headings/text blocks
--    , ...    ) 
        where

import GHC.Prim -- ByteArray#
import GHC.Exts ( Int(..)) -- I#

-- Read and Show instances
import Text.Printf ( printf )
import Text.ParserCombinators.ReadP (sepBy1, many1, ReadP, munch,
    munch1, pfail, readP_to_S, satisfy, skipSpaces, string )
import Data.Char ( isDigit )

-- Binary instance
import Data.Binary ( Get, Binary(..), encode, decode, encodeFile, decodeFile )

-- we use UArrays of machine word size (TargetWord)
import Data.Word( Word, Word64, Word32 )
import Data.Array.Base ( UArray(..), elems, listArray )
import Foreign.Storable ( sizeOf )

-- for dynamic type checks when parsing
import Data.Typeable (Typeable(..), typeOf)
#if MIN_VERSION_base(4,8,0)
import Data.Typeable.Internal (TypeRep(..), typeRepFingerprint)
#else
import Data.Typeable.Internal (TypeRep(..))
#endif
import qualified GHC.Fingerprint

-- for a hash of the executable. Using GHC.Fingerprint.getFileHash
import GHC.Fingerprint(getFileHash)
import System.Environment
import System.IO.Unsafe

-- for control flow and exceptions
import Control.Monad(when)
import Control.Exception(throw)

import GHC.Packing.PackException

-- | The type of Serialized data. Phantom type 'a' ensures that we
-- unpack data as the expected type.
data Serialized a = Serialized { packetData :: ByteArray# }

{- $ShowReadBinary

The power of evaluation-orthogonal serialisation is that one can
/externalise/ partially evaluated data (containing thunks), for
instance write it to disk or send it over a network.
Therefore, the module defines a 'Binary' instance for 'Serialized a',
as well as instances for 'Read' and 'Show'@ which satisfy 

> read . show == id :: 'Serialized' a -> 'Serialized' a

The phantom type is enough to ensure type-correctness when serialised
data remain in one single program run. However, when data from
previous runs are read  from an external source, their type needs to
be checked at runtime. Type information must be stored together with
the (binary) serialisation data.

The serialised data contain pointers to static data in the generating
program (top-level functions and constants) and very likely to
additional library code. Therefore, the /exact same binary/ must be
used when reading in serialised data from an external source. A hash
of the executable is included in the representation to ensure this.

-}

-- | prints packet as Word array in 4 columns (/Word/ meaning the
-- machine word size), and additionally includes Fingerprint hash
-- values for executable binary and type.
instance Typeable a => Show (Serialized a) where
    show p = unlines [ "Serialization Packet, size " ++ show size,
                       ", program " ++ show prgHash,
                       ", type fingerprint" ++ show t,
                       showWArray (UArray 0 (size-1) size dat) ]
        where size = case sizeofByteArray# dat of
                          sz# -> (I# sz# ) `div` sizeOf(undefined::TargetWord)
              t    = typeFP ( undefined :: a )
              dat  = packetData p

-- | Helper to show a serialized structure as a packet (Word Array)
showWArray :: UArray Int TargetWord -> String
showWArray arr = unlines [ show i ++ ":" ++ unwords (map showH row)
                         | (i,row) <- zip  [0,4..] elRows ]
    where showH w = -- "\t0x" ++ showHex w " "
                    printf ('\t':hexWordFmt) w
          elRows = takeEach4 (elems arr)
          
          takeEach4 :: [a] -> [[a]]
          takeEach4 [] = []
          takeEach4 xs = first:takeEach4 rest
            where (first,rest) = splitAt 4 xs

-----------------------------------------------
-- | Reads the format generated by the 'Show' instance, checks
--   hash values for executable and type and parses exactly as much as
--   the included data size announces.
instance Typeable a => Read (Serialized a)
    -- using ReadP parser (base-4.x)
    where readsPrec _ input
           = case parseP input of
              []  -> throw P_ParseError -- no parse
              [((sz,tp,dat),r)]
                  -> let !(UArray _ _ _ arr# ) = listArray (0,sz-1) dat
                         t = typeFP (undefined::a)
                     in if t == tp
                              then [(Serialized arr# , r)]
                              else throw P_TypeMismatch
              other-> throw P_ParseError
                       -- ambiguous parse for packet

-- | Packet Parser, reads the format generated by the @Read@ instance.
-- Could also consume other formats of the array (not implemented).
-- Returns: (data size in words, type fingerprint, array values)
parseP :: ReadS (Int, FP, [TargetWord]) 
parseP = readP_to_S $
-- read header with size and type, then iterate over array values,
-- reading several hex words in one row, separated by
-- tab and space. Packet size needed to avoid returning a prefix.
         do string "Serialization Packet, size "
            sz_str <- munch1 isDigit
            let sz = read sz_str::Int
            string ", program "
            h  <- munch1 (not . (== '\n'))
            when (read h /= prgHash) (throw P_BinaryMismatch)
              -- executables do not match. No ambiguous parses here,
              -- so just throw; otherwise we would only pfail.
            newline
            string ", type "
            tp <- munch1 (not . (== '\n'))
            newline
            let startRow = do { many1 digit; colon; tabSpace }
                row = do { startRow; sepBy1 hexNum tabSpace }
            valss <- sepBy1 row newline
            skipSpaces -- eat remaining spaces
            let vals = concat valss
                l    = length vals
            -- filter out wrong lengths:
            if (sz /= length vals) then pfail
                                   else return (sz, read tp, vals)

digit = satisfy isDigit
colon = satisfy (==':')
tabSpace = munch1 ( \x -> x `elem` " \t" )
newline = munch1 (\x -> x `elem` " \n")

hexNum :: ReadP TargetWord
hexNum = do string "0x"
            ds <- munch hexDigit
            return (read ("0x" ++ ds))
  where hexDigit = (\x -> x `elem` "0123456789abcdefABCDEF")

------------------------------------------------------------------

-- | The binary format of @'Serialized' a@ data includes FingerPrint
--   hash values for type and executable binary, which are checked
--   when reading Serialized data back in using @get@.
instance Typeable a => Binary (Serialized a) where
    -- We make our life simple and construct/deconstruct Word
    -- (U)Arrays, quite as we did in the Show/Read instances.
    put (Serialized bArr#)
        = do put prgHash
             put (typeFP (undefined :: a))
             let arr    = UArray 0 (sz-1) sz bArr# :: UArray Int TargetWord
                 sz     = case sizeofByteArray# bArr# of
                            sz# -> (I# sz# ) `div` sizeOf(undefined::TargetWord)
             put arr
    get = do hash   <- get :: Get FP
             when (hash /= prgHash) 
               (throw P_BinaryMismatch) 
             -- executables do not match
             tp <- get :: Get FP
             when (tp /= typeFP (undefined :: a))
               (throw P_TypeMismatch)
                -- Type error during packet parse
             uarr   <- get :: Get (UArray Int TargetWord)
             let !(UArray _ _ sz bArr#) = uarr
             return ( Serialized bArr# )

------------------------------------------------------------------
-- $ComparingTypes
-----------------------------------------------
-- Helper functions to compare types at runtime:
--   We use type "fingerprints" defined in 'GHC.Fingerprint.Type'

-- This should ensure (as of GHC.7.8) that types with the same name
-- but different definition get different hashes.  (however, we also
-- require the executable to be exactly the same, so this is not
-- strictly necessary anyway).

-- Typeable context for dynamic type checks. 

-- | The module uses a custom GHC fingerprint type with its two Word64
--   fields, to be able to /read/ fingerprints
data FP = FP Word64 Word64 deriving (Read, Show, Eq)

-- | checks whether the type of the given expression matches the given Fingerprint
matches :: Typeable a => a -> FP -> Bool
matches x (FP c1 c2) = f1 == c1 && f2 == c2
  where  (GHC.Fingerprint.Fingerprint f1 f2) = typeRepFingerprint (typeOf x)

#if ! MIN_VERSION_base(4,8,0)
-- typeRepFingerprint is provided since base-4.8.0.0
typeRepFingerprint typeRep = ghcFP
    where TypeRep ghcFP _ _ = typeRep
#endif

-- | creates an 'FP' from a GHC 'Fingerprint'
toFP :: GHC.Fingerprint.Fingerprint -> FP
toFP (GHC.Fingerprint.Fingerprint f1 f2) = FP f1 f2

-- | returns the type fingerprint of an expression
typeFP :: Typeable a => a -> FP
typeFP = toFP . typeRepFingerprint . typeOf

-- | Binary instance for fingerprint data (encoding TypeRep and
--   executable in binary-encoded @Serialized a@)
instance Binary FP where
  put (FP f1 f2) = do put f1
                      put f2
  get            = do f1 <- get :: Get Word64
                      f2 <- get :: Get Word64
                      return (FP f1 f2)

-----------------------------------------------

-- | To check that the program (executable) is identical when packing
-- and unpacking, the fingerprint type from above is used (Read/Show
-- instances required). An 'FP' fingerprint of the executable is
-- computed once, by unsafePerformIO inside this CAF (safe to inline,
-- just inefficient).

{-# NOINLINE prgHash #-}
prgHash :: FP
prgHash = unsafePerformIO $ 
          getExecutablePath >>= getFileHash >>= return . toFP

-----------------------------------------------

-- | The target word size is the size of a machine word on the
-- platform we run on.
--
-- This type is only used in Binary, Read and Show instances, where
-- packets are stored as 'UArrays' of 'TargetWord'.
--
-- Actually, GHC uses machine word size (as Haskell 2010 spec. does
-- not fix it) so we could just use Word. See
-- <http://www.haskell.org/ghc/docs/7.8.3/html/users_guide/bugs-and-infelicities.html#haskell-98-2010-undefined>

-- We'd rather just import 'GHC.Constants.TargetWord' but it was
-- removed. This code here is a cheap and incomplete hack, as the
-- package would otherwise need a configure script.

#if x86_64_BUILD_ARCH
type TargetWord = Word64
hexWordFmt = "0x%016x"
#elif i386_BUILD_ARCH
type TargetWord = Word32
hexWordFmt = "0x%08x"
#elif powerpc_BUILD_ARCH
#error Don't know word size of your Power-PC model
#else
#warning Don't know the word size on your machine.
type TargetWord = Word
#endif