{-# LANGUAGE BangPatterns #-} {-# LANGUAGE ScopedTypeVariables #-} module HaskellWorks.Data.Dsv.Strict.Cursor ( DsvCursor(..) , snippet , nextField , nextPosition , nextRow , mmapCursor , toListVector , toVectorVector ) where import Data.Word import HaskellWorks.Data.Dsv.Strict.Cursor.Type import HaskellWorks.Data.Product import HaskellWorks.Data.RankSelect.Base.Rank1 import HaskellWorks.Data.RankSelect.Base.Select1 import HaskellWorks.Data.RankSelect.CsPoppy import qualified Data.ByteString as BS import qualified Data.Vector as DV import qualified Data.Vector.Storable as DVS import qualified HaskellWorks.Data.Dsv.Strict.Cursor.Internal as SVS import qualified HaskellWorks.Data.FromForeignRegion as IO mmapCursor :: Word8 -> Bool -> FilePath -> IO (DsvCursor BS.ByteString CsPoppy) mmapCursor :: Word8 -> Bool -> FilePath -> IO (DsvCursor ByteString CsPoppy) mmapCursor Word8 delimiter Bool useIndex FilePath filePath = do (!ByteString bs) :*: (!Vector Word64 v) <- FilePath -> IO (ByteString :*: Vector Word64) forall a. FromForeignRegion a => FilePath -> IO a IO.mmapFromForeignRegion FilePath filePath let !Vector Word64 _ = Vector Word64 v :: DVS.Vector Word64 (!Vector Word64 markers, !Vector Word64 newlines) <- if Bool useIndex then (,) (Vector Word64 -> Vector Word64 -> (Vector Word64, Vector Word64)) -> IO (Vector Word64) -> IO (Vector Word64 -> (Vector Word64, Vector Word64)) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b <$> FilePath -> IO (Vector Word64) forall a. FromForeignRegion a => FilePath -> IO a IO.mmapFromForeignRegion (FilePath filePath FilePath -> FilePath -> FilePath forall a. [a] -> [a] -> [a] ++ FilePath ".markers.idx") IO (Vector Word64 -> (Vector Word64, Vector Word64)) -> IO (Vector Word64) -> IO (Vector Word64, Vector Word64) forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b <*> FilePath -> IO (Vector Word64) forall a. FromForeignRegion a => FilePath -> IO a IO.mmapFromForeignRegion (FilePath filePath FilePath -> FilePath -> FilePath forall a. [a] -> [a] -> [a] ++ FilePath ".newlines.idx") else (Vector Word64, Vector Word64) -> IO (Vector Word64, Vector Word64) forall (m :: * -> *) a. Monad m => a -> m a return ((Vector Word64, Vector Word64) -> IO (Vector Word64, Vector Word64)) -> (Vector Word64, Vector Word64) -> IO (Vector Word64, Vector Word64) forall a b. (a -> b) -> a -> b $ Word8 -> Vector Word64 -> (Vector Word64, Vector Word64) SVS.makeIndexes Word8 delimiter Vector Word64 v DsvCursor ByteString CsPoppy -> IO (DsvCursor ByteString CsPoppy) forall (m :: * -> *) a. Monad m => a -> m a return DsvCursor :: forall t s. Elem t -> t -> s -> s -> Word64 -> DsvCursor t s DsvCursor { dsvCursorDelimiter :: Elem ByteString dsvCursorDelimiter = Word8 Elem ByteString delimiter , dsvCursorText :: ByteString dsvCursorText = ByteString bs , dsvCursorMarkers :: CsPoppy dsvCursorMarkers = Vector Word64 -> CsPoppy makeCsPoppy Vector Word64 markers , dsvCursorNewlines :: CsPoppy dsvCursorNewlines = Vector Word64 -> CsPoppy makeCsPoppy Vector Word64 newlines , dsvCursorPosition :: Word64 dsvCursorPosition = Word64 0 } snippet :: DsvCursor BS.ByteString CsPoppy -> BS.ByteString snippet :: DsvCursor ByteString CsPoppy -> ByteString snippet DsvCursor ByteString CsPoppy c = Int -> ByteString -> ByteString BS.take (Int len Int -> Int -> Int forall a. Ord a => a -> a -> a `max` Int 0) (ByteString -> ByteString) -> ByteString -> ByteString forall a b. (a -> b) -> a -> b $ Int -> ByteString -> ByteString BS.drop Int posC (ByteString -> ByteString) -> ByteString -> ByteString forall a b. (a -> b) -> a -> b $ DsvCursor ByteString CsPoppy -> ByteString forall t s. DsvCursor t s -> t dsvCursorText DsvCursor ByteString CsPoppy c where d :: DsvCursor ByteString CsPoppy d = DsvCursor ByteString CsPoppy -> DsvCursor ByteString CsPoppy nextField DsvCursor ByteString CsPoppy c posC :: Int posC = Word64 -> Int forall a b. (Integral a, Num b) => a -> b fromIntegral (Word64 -> Int) -> Word64 -> Int forall a b. (a -> b) -> a -> b $ DsvCursor ByteString CsPoppy -> Word64 forall t s. DsvCursor t s -> Word64 dsvCursorPosition DsvCursor ByteString CsPoppy c posD :: Int posD = Word64 -> Int forall a b. (Integral a, Num b) => a -> b fromIntegral (Word64 -> Int) -> Word64 -> Int forall a b. (a -> b) -> a -> b $ DsvCursor ByteString CsPoppy -> Word64 forall t s. DsvCursor t s -> Word64 dsvCursorPosition DsvCursor ByteString CsPoppy d len :: Int len = Int posD Int -> Int -> Int forall a. Num a => a -> a -> a - Int posC {-# INLINE snippet #-} atEnd :: DsvCursor BS.ByteString CsPoppy -> Bool atEnd :: DsvCursor ByteString CsPoppy -> Bool atEnd DsvCursor ByteString CsPoppy c = ByteString -> Bool BS.null (Int -> ByteString -> ByteString BS.drop (Word64 -> Int forall a b. (Integral a, Num b) => a -> b fromIntegral (DsvCursor ByteString CsPoppy -> Word64 forall t s. DsvCursor t s -> Word64 dsvCursorPosition DsvCursor ByteString CsPoppy c)) (DsvCursor ByteString CsPoppy -> ByteString forall t s. DsvCursor t s -> t dsvCursorText DsvCursor ByteString CsPoppy c)) {-# INLINE atEnd #-} nextField :: DsvCursor BS.ByteString CsPoppy -> DsvCursor BS.ByteString CsPoppy nextField :: DsvCursor ByteString CsPoppy -> DsvCursor ByteString CsPoppy nextField DsvCursor ByteString CsPoppy cursor = DsvCursor ByteString CsPoppy cursor { dsvCursorPosition :: Word64 dsvCursorPosition = Word64 newPos } where currentRank :: Word64 currentRank = CsPoppy -> Word64 -> Word64 forall v. Rank1 v => v -> Word64 -> Word64 rank1 (DsvCursor ByteString CsPoppy -> CsPoppy forall t s. DsvCursor t s -> s dsvCursorMarkers DsvCursor ByteString CsPoppy cursor) (DsvCursor ByteString CsPoppy -> Word64 forall t s. DsvCursor t s -> Word64 dsvCursorPosition DsvCursor ByteString CsPoppy cursor) newPos :: Word64 newPos = CsPoppy -> Word64 -> Word64 forall v. Select1 v => v -> Word64 -> Word64 select1 (DsvCursor ByteString CsPoppy -> CsPoppy forall t s. DsvCursor t s -> s dsvCursorMarkers DsvCursor ByteString CsPoppy cursor) (Word64 currentRank Word64 -> Word64 -> Word64 forall a. Num a => a -> a -> a + Word64 1) Word64 -> Word64 -> Word64 forall a. Num a => a -> a -> a - Word64 1 {-# INLINE nextField #-} nextRow :: DsvCursor BS.ByteString CsPoppy -> DsvCursor BS.ByteString CsPoppy nextRow :: DsvCursor ByteString CsPoppy -> DsvCursor ByteString CsPoppy nextRow DsvCursor ByteString CsPoppy cursor = DsvCursor ByteString CsPoppy cursor { dsvCursorPosition :: Word64 dsvCursorPosition = if Word64 newPos Word64 -> Word64 -> Bool forall a. Ord a => a -> a -> Bool > DsvCursor ByteString CsPoppy -> Word64 forall t s. DsvCursor t s -> Word64 dsvCursorPosition DsvCursor ByteString CsPoppy cursor then Word64 newPos else Int -> Word64 forall a b. (Integral a, Num b) => a -> b fromIntegral (ByteString -> Int BS.length (DsvCursor ByteString CsPoppy -> ByteString forall t s. DsvCursor t s -> t dsvCursorText DsvCursor ByteString CsPoppy cursor)) } where currentRank :: Word64 currentRank = CsPoppy -> Word64 -> Word64 forall v. Rank1 v => v -> Word64 -> Word64 rank1 (DsvCursor ByteString CsPoppy -> CsPoppy forall t s. DsvCursor t s -> s dsvCursorNewlines DsvCursor ByteString CsPoppy cursor) (DsvCursor ByteString CsPoppy -> Word64 forall t s. DsvCursor t s -> Word64 dsvCursorPosition DsvCursor ByteString CsPoppy cursor) newPos :: Word64 newPos = CsPoppy -> Word64 -> Word64 forall v. Select1 v => v -> Word64 -> Word64 select1 (DsvCursor ByteString CsPoppy -> CsPoppy forall t s. DsvCursor t s -> s dsvCursorNewlines DsvCursor ByteString CsPoppy cursor) (Word64 currentRank Word64 -> Word64 -> Word64 forall a. Num a => a -> a -> a + Word64 1) Word64 -> Word64 -> Word64 forall a. Num a => a -> a -> a - Word64 1 {-# INLINE nextRow #-} nextPosition :: DsvCursor BS.ByteString CsPoppy -> DsvCursor BS.ByteString CsPoppy nextPosition :: DsvCursor ByteString CsPoppy -> DsvCursor ByteString CsPoppy nextPosition DsvCursor ByteString CsPoppy cursor = DsvCursor ByteString CsPoppy cursor { dsvCursorPosition :: Word64 dsvCursorPosition = if ByteString -> Bool BS.null (Int -> ByteString -> ByteString BS.drop (Word64 -> Int forall a b. (Integral a, Num b) => a -> b fromIntegral Word64 newPos) (DsvCursor ByteString CsPoppy -> ByteString forall t s. DsvCursor t s -> t dsvCursorText DsvCursor ByteString CsPoppy cursor)) then Int -> Word64 forall a b. (Integral a, Num b) => a -> b fromIntegral (ByteString -> Int BS.length (DsvCursor ByteString CsPoppy -> ByteString forall t s. DsvCursor t s -> t dsvCursorText DsvCursor ByteString CsPoppy cursor)) else Word64 newPos } where newPos :: Word64 newPos = DsvCursor ByteString CsPoppy -> Word64 forall t s. DsvCursor t s -> Word64 dsvCursorPosition DsvCursor ByteString CsPoppy cursor Word64 -> Word64 -> Word64 forall a. Num a => a -> a -> a + Word64 1 {-# INLINE nextPosition #-} getRowBetween :: DsvCursor BS.ByteString CsPoppy -> DsvCursor BS.ByteString CsPoppy -> DV.Vector BS.ByteString getRowBetween :: DsvCursor ByteString CsPoppy -> DsvCursor ByteString CsPoppy -> Vector ByteString getRowBetween DsvCursor ByteString CsPoppy c DsvCursor ByteString CsPoppy d = Int -> (DsvCursor ByteString CsPoppy -> Maybe (ByteString, DsvCursor ByteString CsPoppy)) -> DsvCursor ByteString CsPoppy -> Vector ByteString forall b a. Int -> (b -> Maybe (a, b)) -> b -> Vector a DV.unfoldrN Int c2d DsvCursor ByteString CsPoppy -> Maybe (ByteString, DsvCursor ByteString CsPoppy) go DsvCursor ByteString CsPoppy c where cr :: Word64 cr = CsPoppy -> Word64 -> Word64 forall v. Rank1 v => v -> Word64 -> Word64 rank1 (DsvCursor ByteString CsPoppy -> CsPoppy forall t s. DsvCursor t s -> s dsvCursorMarkers DsvCursor ByteString CsPoppy c) (DsvCursor ByteString CsPoppy -> Word64 forall t s. DsvCursor t s -> Word64 dsvCursorPosition DsvCursor ByteString CsPoppy c) dr :: Word64 dr = CsPoppy -> Word64 -> Word64 forall v. Rank1 v => v -> Word64 -> Word64 rank1 (DsvCursor ByteString CsPoppy -> CsPoppy forall t s. DsvCursor t s -> s dsvCursorMarkers DsvCursor ByteString CsPoppy d) (DsvCursor ByteString CsPoppy -> Word64 forall t s. DsvCursor t s -> Word64 dsvCursorPosition DsvCursor ByteString CsPoppy d) c2d :: Int c2d = Word64 -> Int forall a b. (Integral a, Num b) => a -> b fromIntegral (Word64 dr Word64 -> Word64 -> Word64 forall a. Num a => a -> a -> a - Word64 cr) go :: DsvCursor BS.ByteString CsPoppy -> Maybe (BS.ByteString, DsvCursor BS.ByteString CsPoppy) go :: DsvCursor ByteString CsPoppy -> Maybe (ByteString, DsvCursor ByteString CsPoppy) go DsvCursor ByteString CsPoppy e = case DsvCursor ByteString CsPoppy -> DsvCursor ByteString CsPoppy nextField DsvCursor ByteString CsPoppy e of DsvCursor ByteString CsPoppy f -> case DsvCursor ByteString CsPoppy -> DsvCursor ByteString CsPoppy nextPosition DsvCursor ByteString CsPoppy f of DsvCursor ByteString CsPoppy g -> case DsvCursor ByteString CsPoppy -> ByteString snippet DsvCursor ByteString CsPoppy e of ByteString s -> (ByteString, DsvCursor ByteString CsPoppy) -> Maybe (ByteString, DsvCursor ByteString CsPoppy) forall a. a -> Maybe a Just (ByteString s, DsvCursor ByteString CsPoppy g) {-# INLINE go #-} {-# INLINE getRowBetween #-} toListVector :: DsvCursor BS.ByteString CsPoppy -> [DV.Vector BS.ByteString] toListVector :: DsvCursor ByteString CsPoppy -> [Vector ByteString] toListVector DsvCursor ByteString CsPoppy c = if DsvCursor ByteString CsPoppy -> Word64 forall t s. DsvCursor t s -> Word64 dsvCursorPosition DsvCursor ByteString CsPoppy d Word64 -> Word64 -> Bool forall a. Ord a => a -> a -> Bool > DsvCursor ByteString CsPoppy -> Word64 forall t s. DsvCursor t s -> Word64 dsvCursorPosition DsvCursor ByteString CsPoppy c Bool -> Bool -> Bool && Bool -> Bool not (DsvCursor ByteString CsPoppy -> Bool atEnd DsvCursor ByteString CsPoppy c) then DsvCursor ByteString CsPoppy -> DsvCursor ByteString CsPoppy -> Vector ByteString getRowBetween DsvCursor ByteString CsPoppy c DsvCursor ByteString CsPoppy dVector ByteString -> [Vector ByteString] -> [Vector ByteString] forall a. a -> [a] -> [a] :DsvCursor ByteString CsPoppy -> [Vector ByteString] toListVector DsvCursor ByteString CsPoppy d else [] where d :: DsvCursor ByteString CsPoppy d = DsvCursor ByteString CsPoppy -> DsvCursor ByteString CsPoppy nextPosition (DsvCursor ByteString CsPoppy -> DsvCursor ByteString CsPoppy nextRow DsvCursor ByteString CsPoppy c) {-# INLINE toListVector #-} toVectorVector :: DsvCursor BS.ByteString CsPoppy -> DV.Vector (DV.Vector BS.ByteString) toVectorVector :: DsvCursor ByteString CsPoppy -> Vector (Vector ByteString) toVectorVector = [Vector ByteString] -> Vector (Vector ByteString) forall a. [a] -> Vector a DV.fromList ([Vector ByteString] -> Vector (Vector ByteString)) -> (DsvCursor ByteString CsPoppy -> [Vector ByteString]) -> DsvCursor ByteString CsPoppy -> Vector (Vector ByteString) forall b c a. (b -> c) -> (a -> b) -> a -> c . DsvCursor ByteString CsPoppy -> [Vector ByteString] toListVector {-# INLINE toVectorVector #-}