{-# LANGUAGE ForeignFunctionInterface #-}
module Bio.Iteratee.Builder (
BB(..),
newBuffer,
fillBuffer,
expandBuffer,
encodeBgzf,
BgzfTokens(..),
BclArgs(..),
BclSpecialType(..),
int_loop,
loop_bcl_special
) where
import Bio.Iteratee
import Bio.Iteratee.Bgzf ( compressChunk, maxBlockSize, bgzfEofMarker )
import Bio.Prelude
import Foreign.Marshal.Utils ( copyBytes )
import qualified Data.ByteString as B
import qualified Data.ByteString.Unsafe as B
import qualified Data.Vector.Storable as V
data BB = BB { buffer :: {-# UNPACK #-} !(ForeignPtr Word8)
, size :: {-# UNPACK #-} !Int
, off :: {-# UNPACK #-} !Int
, used :: {-# UNPACK #-} !Int
, mark :: {-# UNPACK #-} !Int
, mark2 :: {-# UNPACK #-} !Int }
data BgzfTokens = TkWord32 {-# UNPACK #-} !Word32 BgzfTokens
| TkWord16 {-# UNPACK #-} !Word16 BgzfTokens
| TkWord8 {-# UNPACK #-} !Word8 BgzfTokens
| TkFloat {-# UNPACK #-} !Float BgzfTokens
| TkDouble {-# UNPACK #-} !Double BgzfTokens
| TkString {-# UNPACK #-} !B.ByteString BgzfTokens
| TkDecimal {-# UNPACK #-} !Int BgzfTokens
| TkSetMark BgzfTokens
| TkEndRecord BgzfTokens
| TkEndRecordPart1 BgzfTokens
| TkEndRecordPart2 BgzfTokens
| TkEnd
| TkBclSpecial !BclArgs BgzfTokens
| TkLowLevel {-# UNPACK #-} !Int (BB -> IO BB) BgzfTokens
data BclSpecialType = BclNucsBin | BclNucsWide | BclNucsAsc | BclNucsAscRev | BclQualsBin | BclQualsAsc | BclQualsAscRev
data BclArgs = BclArgs BclSpecialType
{-# UNPACK #-} !(V.Vector Word8)
{-# UNPACK #-} !Int
{-# UNPACK #-} !Int
{-# UNPACK #-} !Int
{-# UNPACK #-} !Int
newBuffer :: Int -> IO BB
newBuffer sz = mallocForeignPtrBytes sz >>= \ar -> return $ BB ar sz 0 0 maxBound maxBound
expandBuffer :: Int -> BB -> IO BB
expandBuffer minsz b = do
let sz' = max (2 * (size b - used b)) minsz
arr1 <- mallocForeignPtrBytes sz'
withForeignPtr arr1 $ \d ->
withForeignPtr (buffer b) $ \s ->
copyBytes d (plusPtr s (off b)) (used b - off b)
return BB{ buffer = arr1
, size = sz'
, off = 0
, used = used b - off b
, mark = if mark b == maxBound then maxBound else mark b - off b
, mark2 = if mark2 b == maxBound then maxBound else mark2 b - off b }
compressChunk' :: Int -> ForeignPtr Word8 -> Int -> Int -> IO B.ByteString
compressChunk' lv fptr off len =
withForeignPtr fptr $ \ptr ->
compressChunk lv (plusPtr ptr off) (fromIntegral len)
instance Nullable (Endo BgzfTokens) where
nullC f = case appEndo f TkEnd of TkEnd -> True ; _ -> False
encodeBgzf :: MonadIO m => Int -> Enumeratee (Endo BgzfTokens) B.ByteString m b
encodeBgzf lv = (\out -> newBuffer (1024*1024) `ioBind` \bb -> eneeCheckIfDone (liftI . go bb) out)
><> parRunIO (2*numCapabilities)
where
go bb0 k (EOF mx) = final_flush bb0 mx k
go bb0 k (Chunk f)
| size bb0 - used bb0 < 1024 = expandBuffer (1024*1024) bb0 `ioBind` \bb' -> go' bb' k (appEndo f TkEnd)
| otherwise = go' bb0 k (appEndo f TkEnd)
go' bb0 k tk = fillBuffer bb0 tk `ioBind` \(bb',tk') -> flush_blocks tk' bb' k
flush_blocks tk bb k
| min (mark bb) (used bb) - off bb < maxBlockSize =
case tk of TkEnd -> liftI $ go bb k
_ ->
expandBuffer (1024*1024) bb `ioBind` \bb' -> go' bb' k tk
| otherwise =
eneeCheckIfDone (flush_blocks tk bb { off = off bb + maxBlockSize }) $
k $ Chunk [compressChunk' lv (buffer bb) (off bb) maxBlockSize]
final_flush bb mx k
| used bb > off bb =
idone (k $ Chunk [ compressChunk' lv (buffer bb) (off bb) (used bb - off bb)
, return bgzfEofMarker ]) (EOF mx)
| otherwise =
idone (k $ Chunk [ return bgzfEofMarker ]) (EOF mx)
fillBuffer :: BB -> BgzfTokens -> IO (BB, BgzfTokens)
fillBuffer bb0 tk = withForeignPtr (buffer bb0) (\p -> go_slowish p bb0 tk)
where
go_slowish p bb = go_fast p bb (used bb)
go_fast p bb use tk1 = case tk1 of
_ | size bb - use < 1024 -> return (bb { used = use },tk1)
TkEnd -> return (bb { used = use },tk1)
TkWord32 x tk' -> do pokeByteOff p use x
go_fast p bb (use + 4) tk'
TkWord16 x tk' -> do pokeByteOff p use x
go_fast p bb (use + 2) tk'
TkWord8 x tk' -> do pokeByteOff p use x
go_fast p bb (use + 1) tk'
TkFloat x tk' -> do pokeByteOff p use x
go_fast p bb (use + 4) tk'
TkDouble x tk' -> do pokeByteOff p use x
go_fast p bb (use + 8) tk'
TkString s tk'
| B.length s > size bb - use -> return (bb { used = use },tk1)
| otherwise -> do let ln = B.length s
B.unsafeUseAsCString s $ \q ->
copyBytes (p `plusPtr` use) q ln
go_fast p bb (use + ln) tk'
TkDecimal x tk' -> do ln <- int_loop (p `plusPtr` use) x
go_fast p bb (use + ln) tk'
TkSetMark tk' -> go_slowish p bb { used = use + 4, mark = use } tk'
TkEndRecord tk' -> do let !l = use - mark bb - 4
pokeByteOff p (mark bb) (fromIntegral l :: Word32)
go_slowish p bb { used = use, mark = maxBound } tk'
TkEndRecordPart1 tk' -> do let !l = use - mark bb - 4
pokeByteOff p (mark bb - 4) (fromIntegral l :: Word32)
go_slowish p bb { used = use, mark2 = use } tk'
TkEndRecordPart2 tk' -> do let !l = use - mark2 bb
pokeByteOff p (mark bb) (fromIntegral l :: Word32)
go_slowish p bb { used = use, mark = maxBound } tk'
TkBclSpecial special_args tk' -> do
l <- loop_bcl_special (p `plusPtr` use) special_args
go_fast p bb (use + l) tk'
TkLowLevel minsize proc tk'
| size bb - use < minsize -> return (bb { used = use },tk1)
| otherwise -> do bb' <- proc bb { used = use }
go_slowish p bb' tk'
loop_bcl_special :: Ptr Word8 -> BclArgs -> IO Int
loop_bcl_special p (BclArgs tp vec stride u v i) =
V.unsafeWith vec $ \q -> case tp of
BclNucsBin -> do
nuc_loop p stride (plusPtr q i) u v
return $ (v - u + 2) `div` 2
BclNucsWide -> do
nuc_loop_wide p stride (plusPtr q i) u v
return $ v - u + 1
BclNucsAsc -> do
nuc_loop_asc p stride (plusPtr q i) u v
return $ v - u + 1
BclNucsAscRev -> do
nuc_loop_asc_rev p stride (plusPtr q i) u v
return $ v - u + 1
BclQualsBin -> do
qual_loop p stride (plusPtr q i) u v
return $ v - u + 1
BclQualsAsc -> do
qual_loop_asc p stride (plusPtr q i) u v
return $ v - u + 1
BclQualsAscRev -> do
qual_loop_asc_rev p stride (plusPtr q i) u v
return $ v - u + 1
foreign import ccall unsafe "nuc_loop"
nuc_loop :: Ptr Word8 -> Int -> Ptr Word8 -> Int -> Int -> IO ()
foreign import ccall unsafe "nuc_loop_wide"
nuc_loop_wide :: Ptr Word8 -> Int -> Ptr Word8 -> Int -> Int -> IO ()
foreign import ccall unsafe "nuc_loop_asc"
nuc_loop_asc :: Ptr Word8 -> Int -> Ptr Word8 -> Int -> Int -> IO ()
foreign import ccall unsafe "nuc_loop_asc_rev"
nuc_loop_asc_rev :: Ptr Word8 -> Int -> Ptr Word8 -> Int -> Int -> IO ()
foreign import ccall unsafe "qual_loop"
qual_loop :: Ptr Word8 -> Int -> Ptr Word8 -> Int -> Int -> IO ()
foreign import ccall unsafe "qual_loop_asc"
qual_loop_asc :: Ptr Word8 -> Int -> Ptr Word8 -> Int -> Int -> IO ()
foreign import ccall unsafe "qual_loop_asc_rev"
qual_loop_asc_rev :: Ptr Word8 -> Int -> Ptr Word8 -> Int -> Int -> IO ()
foreign import ccall unsafe "int_loop"
int_loop :: Ptr Word8 -> Int -> IO Int