{-# LANGUAGE GADTs #-} -------------------------------------------------------------------------------- -- | -- Module: Data.IDX.Internal -- Copyright: Christof Schramm -- License: GPLv3 -- -- Maintainer: Christof Schramm <christof.schramm@campus.lmu.de> -- Stability: Experimental -- Portability: Shoud work with all major haskell implementations -- -- An internal package, the API contained here should not be used -- and is subject to possibly breaking changes. Use these functions -- and types at your own risk. -- -- The safe interface is in 'Data.IDX' -------------------------------------------------------------------------------- module Data.IDX.Internal where import Control.Monad (replicateM) import Data.Binary import qualified Data.Vector.Unboxed as V import Data.Vector.Unboxed ((!)) -- | A type to describe the content, according to IDX spec data IDXContentType where IDXUnsignedByte :: IDXContentType IDXSignedByte :: IDXContentType IDXShort :: IDXContentType IDXInt :: IDXContentType IDXFloat :: IDXContentType IDXDouble :: IDXContentType deriving Show -- | Datatype for storing IDXData. Internally data is always stored either -- as 'Int' or 'Double' unboxed vectors. However when binary serialization -- is used, the data is serialized according to the 'IDXContentType'. data IDXData = IDXInts IDXContentType (V.Vector Int) (V.Vector Int ) | IDXDoubles IDXContentType (V.Vector Int) (V.Vector Double) deriving Show newtype IDXLabels = IDXLabels (V.Vector Int) -- | Return the what type the data is stored in idxType :: IDXData -> IDXContentType idxType (IDXInts t _ _) = t idxType (IDXDoubles t _ _) = t -- | Return an unboxed Vector of Int dimensions idxDimensions :: IDXData -> V.Vector Int idxDimensions (IDXInts _ ds _) = ds idxDimensions (IDXDoubles _ ds _) = ds -- | Return wether the data in this IDXData value is -- stored as integral values isIDXIntegral :: IDXData -> Bool isIDXIntegral (IDXInts _ _ _) = True isIDXIntegral (_ ) = False -- | Return wether the data in this IDXData value is -- stored as double values isIDXReal :: IDXData -> Bool isIDXReal (IDXDoubles _ _ _) = True isIDXReal (_ ) = False -- | Return contained ints, if no ints are contained, -- convert content to ints by using 'round'. Data is stored like -- in a C-array, i.e. the last index changes first. idxIntContent :: IDXData -> V.Vector Int idxIntContent (IDXInts _ _ v) = v idxIntContent (IDXDoubles _ _ v) = V.fromList $ [round $ (v ! i) | i <- [0.. V.length v]] -- | Return contained doubles, if no doubles are contained -- convert the content to double by using 'fromIntegral'. Data is stored like -- in a C-array, i.e. the last index changes first. idxDoubleContent :: IDXData -> V.Vector Double idxDoubleContent (IDXDoubles _ _ v) = v idxDoubleContent (IDXInts _ _ v) = V.fromList $ [fromIntegral $ (v ! i) | i <- [0.. V.length v]] -- | Helper function to read a (possibly big) vector of binary -- values as chunks. Strictly evaluates each chunk and then -- concatenates the chunks, does not leak space. readContent :: (V.Unbox a) => (Int -> Get (V.Vector a)) -- ^ To Get a chunk of size n -> Int -- ^ Chunk size -> Int -- ^ Expected input -> Get (V.Vector a) readContent readEntries chunkSize n = if n > chunkSize then do headChunk <- readEntries (n `mod` chunkSize) let nChunks = n `div` chunkSize chunkList <- replicateM nChunks (readContent readEntries chunkSize chunkSize) return $! V.concat $ headChunk:chunkList else do rest <- readEntries n return $! rest