module Bio.Iteratee.Builder (
BB(..),
newBuffer,
fillBuffer,
expandBuffer,
encodeBgzf,
BgzfTokens(..),
BclArgs(..),
BclSpecialType(..),
int_loop,
loop_bcl_special
) where
import Bio.Iteratee
import Bio.Iteratee.Bgzf ( compressChunk, maxBlockSize, bgzfEofMarker )
import Bio.Prelude
import Foreign.ForeignPtr ( ForeignPtr, withForeignPtr, mallocForeignPtrBytes )
import Foreign.Marshal.Utils ( copyBytes )
import Foreign.Ptr ( Ptr, plusPtr )
import Foreign.Storable ( pokeByteOff )
import qualified Data.ByteString as B
import qualified Data.ByteString.Unsafe as B
import qualified Data.Vector.Storable as VS
data BB = BB { buffer :: !(ForeignPtr Word8)
, size :: !Int
, off :: !Int
, used :: !Int
, mark :: !Int
, mark2 :: !Int }
instance Show BB where
show bb = show (size bb, off bb, used bb, mark bb, mark2 bb)
data BgzfTokens = TkWord32 !Word32 BgzfTokens
| TkWord16 !Word16 BgzfTokens
| TkWord8 !Word8 BgzfTokens
| TkFloat !Float BgzfTokens
| TkDouble !Double BgzfTokens
| TkString !B.ByteString BgzfTokens
| TkDecimal !Int BgzfTokens
| TkLnString !B.ByteString BgzfTokens
| TkSetMark BgzfTokens
| TkEndRecord BgzfTokens
| TkEndRecordPart1 BgzfTokens
| TkEndRecordPart2 BgzfTokens
| TkEnd
| TkBclSpecial !BclArgs BgzfTokens
| TkLowLevel !Int (BB -> IO BB) BgzfTokens
data BclSpecialType = BclNucsBin | BclNucsAsc | BclNucsAscRev | BclQualsBin | BclQualsAsc | BclQualsAscRev
data BclArgs = BclArgs BclSpecialType
!(VS.Vector Word8)
!Int
!Int
!Int
!Int
newBuffer :: Int -> IO BB
newBuffer sz = mallocForeignPtrBytes sz >>= \ar -> return $ BB ar sz 0 0 maxBound maxBound
expandBuffer :: Int -> BB -> IO BB
expandBuffer minsz b = do
let sz' = max (2 * (size b used b)) minsz
arr1 <- mallocForeignPtrBytes sz'
withForeignPtr arr1 $ \d ->
withForeignPtr (buffer b) $ \s ->
copyBytes d (plusPtr s (off b)) (used b off b)
return $ BB { buffer = arr1
, size = sz'
, off = 0
, used = used b off b
, mark = if mark b == maxBound then maxBound else mark b off b
, mark2 = if mark2 b == maxBound then maxBound else mark2 b off b }
compressChunk' :: Int -> ForeignPtr Word8 -> Int -> Int -> IO B.ByteString
compressChunk' lv fptr off len =
withForeignPtr fptr $ \ptr ->
compressChunk lv (plusPtr ptr off) (fromIntegral len)
instance Nullable (Endo BgzfTokens) where
nullC f = case appEndo f TkEnd of TkEnd -> True ; _ -> False
encodeBgzf :: MonadIO m => Int -> Enumeratee (Endo BgzfTokens) B.ByteString m b
encodeBgzf lv = (\out -> newBuffer (1024*1024) `ioBind` \bb -> eneeCheckIfDone (liftI . go bb) out)
><> parRunIO (2*numCapabilities)
where
go bb0 k (EOF mx) = final_flush bb0 mx k
go bb0 k (Chunk f)
| size bb0 used bb0 < 1024 = expandBuffer (1024*1024) bb0 `ioBind` \bb' -> go' bb' k (appEndo f TkEnd)
| otherwise = go' bb0 k (appEndo f TkEnd)
go1 bb0 k tk = expandBuffer (1024*1024) bb0 `ioBind` \bb' -> go' bb' k tk
go' bb0 k tk = fillBuffer bb0 tk `ioBind` \(bb',tk') -> flush_blocks tk' bb' k
flush_blocks tk bb k
| min (mark bb) (used bb) off bb < maxBlockSize =
case tk of TkEnd -> liftI $ go bb k
_ -> go1 bb k tk
| otherwise = do
eneeCheckIfDone (flush_blocks tk bb { off = off bb + maxBlockSize }) $
k $ Chunk [compressChunk' lv (buffer bb) (off bb) maxBlockSize]
final_flush bb mx k
| used bb > off bb =
idone (k $ Chunk [ compressChunk' lv (buffer bb) (off bb) (used bb off bb)
, return bgzfEofMarker ]) (EOF mx)
| otherwise =
idone (k $ Chunk [ return bgzfEofMarker ]) (EOF mx)
fillBuffer :: BB -> BgzfTokens -> IO (BB, BgzfTokens)
fillBuffer bb0 tk = withForeignPtr (buffer bb0) (\p -> go_slowish p bb0 tk)
where
go_slowish p bb tk1 = go_fast p bb (used bb) tk1
go_fast p bb use tk1 = case tk1 of
_ | size bb use < 1024 -> return (bb { used = use },tk1)
TkEnd -> return (bb { used = use },tk1)
TkWord32 x tk' -> do pokeByteOff p use x
go_fast p bb (use + 4) tk'
TkWord16 x tk' -> do pokeByteOff p use x
go_fast p bb (use + 2) tk'
TkWord8 x tk' -> do pokeByteOff p use x
go_fast p bb (use + 1) tk'
TkFloat x tk' -> do pokeByteOff p use x
go_fast p bb (use + 4) tk'
TkDouble x tk' -> do pokeByteOff p use x
go_fast p bb (use + 8) tk'
TkString s tk'
| B.length s > size bb use -> return (bb { used = use },tk')
| otherwise -> do let ln = B.length s
B.unsafeUseAsCString s $ \q ->
copyBytes (p `plusPtr` use) q ln
go_fast p bb (use + ln) tk'
TkDecimal x tk' -> do ln <- int_loop (p `plusPtr` use) x
go_fast p bb (use + ln) tk'
TkLnString s tk'
| B.length s > size bb use 4 -> return (bb { used = use },tk')
| otherwise -> do let ln = B.length s
pokeByteOff p use (fromIntegral ln :: Word32)
B.unsafeUseAsCString s $ \q ->
copyBytes (p `plusPtr` (use + 4)) q ln
go_fast p bb (use + ln + 4) tk'
TkSetMark tk' -> go_slowish p bb { used = use + 4, mark = use } tk'
TkEndRecord tk' -> do let !l = use mark bb 4
pokeByteOff p (mark bb) (fromIntegral l :: Word32)
go_slowish p bb { used = use, mark = maxBound } tk'
TkEndRecordPart1 tk' -> do let !l = use mark bb 4
pokeByteOff p (mark bb 4) (fromIntegral l :: Word32)
go_slowish p bb { used = use, mark2 = use } tk'
TkEndRecordPart2 tk' -> do let !l = use mark2 bb
pokeByteOff p (mark bb) (fromIntegral l :: Word32)
go_slowish p bb { used = use, mark = maxBound } tk'
TkBclSpecial special_args tk' -> do
l <- loop_bcl_special (p `plusPtr` use) special_args
go_fast p bb (use + l) tk'
TkLowLevel minsize proc tk'
| size bb use < minsize -> return (bb { used = use },tk1)
| otherwise -> do bb' <- proc bb { used = use }
go_slowish p bb' tk'
loop_bcl_special :: Ptr Word8 -> BclArgs -> IO Int
loop_bcl_special p (BclArgs tp vec stride u v i) =
VS.unsafeWith vec $ \q -> case tp of
BclNucsBin -> do
nuc_loop p stride (plusPtr q i) u v
return $ (v u + 2) `div` 2
BclNucsAsc -> do
nuc_loop_asc p stride (plusPtr q i) u v
return $ v u + 1
BclNucsAscRev -> do
nuc_loop_asc_rev p stride (plusPtr q i) u v
return $ v u + 1
BclQualsBin -> do
qual_loop p stride (plusPtr q i) u v
return $ v u + 1
BclQualsAsc -> do
qual_loop_asc p stride (plusPtr q i) u v
return $ v u + 1
BclQualsAscRev -> do
qual_loop_asc_rev p stride (plusPtr q i) u v
return $ v u + 1
foreign import ccall unsafe "nuc_loop"
nuc_loop :: Ptr Word8 -> Int -> Ptr Word8 -> Int -> Int -> IO ()
foreign import ccall unsafe "nuc_loop_asc"
nuc_loop_asc :: Ptr Word8 -> Int -> Ptr Word8 -> Int -> Int -> IO ()
foreign import ccall unsafe "nuc_loop_asc_rev"
nuc_loop_asc_rev :: Ptr Word8 -> Int -> Ptr Word8 -> Int -> Int -> IO ()
foreign import ccall unsafe "qual_loop"
qual_loop :: Ptr Word8 -> Int -> Ptr Word8 -> Int -> Int -> IO ()
foreign import ccall unsafe "qual_loop_asc"
qual_loop_asc :: Ptr Word8 -> Int -> Ptr Word8 -> Int -> Int -> IO ()
foreign import ccall unsafe "qual_loop_asc_rev"
qual_loop_asc_rev :: Ptr Word8 -> Int -> Ptr Word8 -> Int -> Int -> IO ()
foreign import ccall unsafe "int_loop"
int_loop :: Ptr Word8 -> Int -> IO Int