{-# LANGUAGE BangPatterns        #-}
{-# LANGUAGE ScopedTypeVariables #-}

module HaskellWorks.Data.Dsv.Strict.Cursor
  ( DsvCursor(..)
  , snippet
  , nextField
  , nextPosition
  , nextRow
  , mmapCursor
  , toListVector
  , toVectorVector
  ) where

import Data.Word
import HaskellWorks.Data.Dsv.Strict.Cursor.Type
import HaskellWorks.Data.Product
import HaskellWorks.Data.RankSelect.Base.Rank1
import HaskellWorks.Data.RankSelect.Base.Select1
import HaskellWorks.Data.RankSelect.CsPoppy

import qualified Data.ByteString                              as BS
import qualified Data.Vector                                  as DV
import qualified Data.Vector.Storable                         as DVS
import qualified HaskellWorks.Data.Dsv.Strict.Cursor.Internal as SVS
import qualified HaskellWorks.Data.FromForeignRegion          as IO

mmapCursor :: Word8 -> Bool -> FilePath -> IO (DsvCursor BS.ByteString CsPoppy)
mmapCursor :: Word8 -> Bool -> FilePath -> IO (DsvCursor ByteString CsPoppy)
mmapCursor Word8
delimiter Bool
useIndex FilePath
filePath = do
  (!ByteString
bs) :*: (!Vector Word64
v) <- FilePath -> IO (ByteString :*: Vector Word64)
forall a. FromForeignRegion a => FilePath -> IO a
IO.mmapFromForeignRegion FilePath
filePath
  let !Vector Word64
_ = Vector Word64
v :: DVS.Vector Word64
  (!Vector Word64
markers, !Vector Word64
newlines) <- if Bool
useIndex
    then (,)
      (Vector Word64 -> Vector Word64 -> (Vector Word64, Vector Word64))
-> IO (Vector Word64)
-> IO (Vector Word64 -> (Vector Word64, Vector Word64))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> FilePath -> IO (Vector Word64)
forall a. FromForeignRegion a => FilePath -> IO a
IO.mmapFromForeignRegion (FilePath
filePath FilePath -> FilePath -> FilePath
forall a. [a] -> [a] -> [a]
++ FilePath
".markers.idx")
      IO (Vector Word64 -> (Vector Word64, Vector Word64))
-> IO (Vector Word64) -> IO (Vector Word64, Vector Word64)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> FilePath -> IO (Vector Word64)
forall a. FromForeignRegion a => FilePath -> IO a
IO.mmapFromForeignRegion (FilePath
filePath FilePath -> FilePath -> FilePath
forall a. [a] -> [a] -> [a]
++ FilePath
".newlines.idx")
    else (Vector Word64, Vector Word64) -> IO (Vector Word64, Vector Word64)
forall (m :: * -> *) a. Monad m => a -> m a
return ((Vector Word64, Vector Word64)
 -> IO (Vector Word64, Vector Word64))
-> (Vector Word64, Vector Word64)
-> IO (Vector Word64, Vector Word64)
forall a b. (a -> b) -> a -> b
$ Word8 -> Vector Word64 -> (Vector Word64, Vector Word64)
SVS.makeIndexes Word8
delimiter Vector Word64
v
  DsvCursor ByteString CsPoppy -> IO (DsvCursor ByteString CsPoppy)
forall (m :: * -> *) a. Monad m => a -> m a
return DsvCursor :: forall t s. Elem t -> t -> s -> s -> Word64 -> DsvCursor t s
DsvCursor
    { dsvCursorDelimiter :: Elem ByteString
dsvCursorDelimiter = Word8
Elem ByteString
delimiter
    , dsvCursorText :: ByteString
dsvCursorText      = ByteString
bs
    , dsvCursorMarkers :: CsPoppy
dsvCursorMarkers   = Vector Word64 -> CsPoppy
makeCsPoppy Vector Word64
markers
    , dsvCursorNewlines :: CsPoppy
dsvCursorNewlines  = Vector Word64 -> CsPoppy
makeCsPoppy Vector Word64
newlines
    , dsvCursorPosition :: Word64
dsvCursorPosition  = Word64
0
    }

snippet :: DsvCursor BS.ByteString CsPoppy -> BS.ByteString
snippet :: DsvCursor ByteString CsPoppy -> ByteString
snippet DsvCursor ByteString CsPoppy
c = Int -> ByteString -> ByteString
BS.take (Int
len Int -> Int -> Int
forall a. Ord a => a -> a -> a
`max` Int
0) (ByteString -> ByteString) -> ByteString -> ByteString
forall a b. (a -> b) -> a -> b
$ Int -> ByteString -> ByteString
BS.drop Int
posC (ByteString -> ByteString) -> ByteString -> ByteString
forall a b. (a -> b) -> a -> b
$ DsvCursor ByteString CsPoppy -> ByteString
forall t s. DsvCursor t s -> t
dsvCursorText DsvCursor ByteString CsPoppy
c
  where d :: DsvCursor ByteString CsPoppy
d = DsvCursor ByteString CsPoppy -> DsvCursor ByteString CsPoppy
nextField DsvCursor ByteString CsPoppy
c
        posC :: Int
posC = Word64 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word64 -> Int) -> Word64 -> Int
forall a b. (a -> b) -> a -> b
$ DsvCursor ByteString CsPoppy -> Word64
forall t s. DsvCursor t s -> Word64
dsvCursorPosition DsvCursor ByteString CsPoppy
c
        posD :: Int
posD = Word64 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word64 -> Int) -> Word64 -> Int
forall a b. (a -> b) -> a -> b
$ DsvCursor ByteString CsPoppy -> Word64
forall t s. DsvCursor t s -> Word64
dsvCursorPosition DsvCursor ByteString CsPoppy
d
        len :: Int
len  = Int
posD Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
posC
{-# INLINE snippet #-}

atEnd :: DsvCursor BS.ByteString CsPoppy -> Bool
atEnd :: DsvCursor ByteString CsPoppy -> Bool
atEnd DsvCursor ByteString CsPoppy
c = ByteString -> Bool
BS.null (Int -> ByteString -> ByteString
BS.drop (Word64 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (DsvCursor ByteString CsPoppy -> Word64
forall t s. DsvCursor t s -> Word64
dsvCursorPosition DsvCursor ByteString CsPoppy
c)) (DsvCursor ByteString CsPoppy -> ByteString
forall t s. DsvCursor t s -> t
dsvCursorText DsvCursor ByteString CsPoppy
c))
{-# INLINE atEnd #-}

nextField :: DsvCursor BS.ByteString CsPoppy -> DsvCursor BS.ByteString CsPoppy
nextField :: DsvCursor ByteString CsPoppy -> DsvCursor ByteString CsPoppy
nextField DsvCursor ByteString CsPoppy
cursor = DsvCursor ByteString CsPoppy
cursor
  { dsvCursorPosition :: Word64
dsvCursorPosition = Word64
newPos
  }
  where currentRank :: Word64
currentRank = CsPoppy -> Word64 -> Word64
forall v. Rank1 v => v -> Word64 -> Word64
rank1   (DsvCursor ByteString CsPoppy -> CsPoppy
forall t s. DsvCursor t s -> s
dsvCursorMarkers DsvCursor ByteString CsPoppy
cursor) (DsvCursor ByteString CsPoppy -> Word64
forall t s. DsvCursor t s -> Word64
dsvCursorPosition DsvCursor ByteString CsPoppy
cursor)
        newPos :: Word64
newPos      = CsPoppy -> Word64 -> Word64
forall v. Select1 v => v -> Word64 -> Word64
select1 (DsvCursor ByteString CsPoppy -> CsPoppy
forall t s. DsvCursor t s -> s
dsvCursorMarkers DsvCursor ByteString CsPoppy
cursor) (Word64
currentRank Word64 -> Word64 -> Word64
forall a. Num a => a -> a -> a
+ Word64
1) Word64 -> Word64 -> Word64
forall a. Num a => a -> a -> a
- Word64
1
{-# INLINE nextField #-}

nextRow :: DsvCursor BS.ByteString CsPoppy -> DsvCursor BS.ByteString CsPoppy
nextRow :: DsvCursor ByteString CsPoppy -> DsvCursor ByteString CsPoppy
nextRow DsvCursor ByteString CsPoppy
cursor = DsvCursor ByteString CsPoppy
cursor
  { dsvCursorPosition :: Word64
dsvCursorPosition = if Word64
newPos Word64 -> Word64 -> Bool
forall a. Ord a => a -> a -> Bool
> DsvCursor ByteString CsPoppy -> Word64
forall t s. DsvCursor t s -> Word64
dsvCursorPosition DsvCursor ByteString CsPoppy
cursor
                          then Word64
newPos
                          else Int -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral (ByteString -> Int
BS.length (DsvCursor ByteString CsPoppy -> ByteString
forall t s. DsvCursor t s -> t
dsvCursorText DsvCursor ByteString CsPoppy
cursor))

  }
  where currentRank :: Word64
currentRank = CsPoppy -> Word64 -> Word64
forall v. Rank1 v => v -> Word64 -> Word64
rank1   (DsvCursor ByteString CsPoppy -> CsPoppy
forall t s. DsvCursor t s -> s
dsvCursorNewlines DsvCursor ByteString CsPoppy
cursor) (DsvCursor ByteString CsPoppy -> Word64
forall t s. DsvCursor t s -> Word64
dsvCursorPosition DsvCursor ByteString CsPoppy
cursor)
        newPos :: Word64
newPos      = CsPoppy -> Word64 -> Word64
forall v. Select1 v => v -> Word64 -> Word64
select1 (DsvCursor ByteString CsPoppy -> CsPoppy
forall t s. DsvCursor t s -> s
dsvCursorNewlines DsvCursor ByteString CsPoppy
cursor) (Word64
currentRank Word64 -> Word64 -> Word64
forall a. Num a => a -> a -> a
+ Word64
1) Word64 -> Word64 -> Word64
forall a. Num a => a -> a -> a
- Word64
1
{-# INLINE nextRow #-}

nextPosition :: DsvCursor BS.ByteString CsPoppy -> DsvCursor BS.ByteString CsPoppy
nextPosition :: DsvCursor ByteString CsPoppy -> DsvCursor ByteString CsPoppy
nextPosition DsvCursor ByteString CsPoppy
cursor = DsvCursor ByteString CsPoppy
cursor
    { dsvCursorPosition :: Word64
dsvCursorPosition = if ByteString -> Bool
BS.null (Int -> ByteString -> ByteString
BS.drop (Word64 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word64
newPos) (DsvCursor ByteString CsPoppy -> ByteString
forall t s. DsvCursor t s -> t
dsvCursorText DsvCursor ByteString CsPoppy
cursor))
                            then Int -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral (ByteString -> Int
BS.length (DsvCursor ByteString CsPoppy -> ByteString
forall t s. DsvCursor t s -> t
dsvCursorText DsvCursor ByteString CsPoppy
cursor))
                            else Word64
newPos
    }
  where newPos :: Word64
newPos  = DsvCursor ByteString CsPoppy -> Word64
forall t s. DsvCursor t s -> Word64
dsvCursorPosition DsvCursor ByteString CsPoppy
cursor Word64 -> Word64 -> Word64
forall a. Num a => a -> a -> a
+ Word64
1
{-# INLINE nextPosition #-}

getRowBetween :: DsvCursor BS.ByteString CsPoppy -> DsvCursor BS.ByteString CsPoppy -> DV.Vector BS.ByteString
getRowBetween :: DsvCursor ByteString CsPoppy
-> DsvCursor ByteString CsPoppy -> Vector ByteString
getRowBetween DsvCursor ByteString CsPoppy
c DsvCursor ByteString CsPoppy
d = Int
-> (DsvCursor ByteString CsPoppy
    -> Maybe (ByteString, DsvCursor ByteString CsPoppy))
-> DsvCursor ByteString CsPoppy
-> Vector ByteString
forall b a. Int -> (b -> Maybe (a, b)) -> b -> Vector a
DV.unfoldrN Int
c2d DsvCursor ByteString CsPoppy
-> Maybe (ByteString, DsvCursor ByteString CsPoppy)
go DsvCursor ByteString CsPoppy
c
  where cr :: Word64
cr  = CsPoppy -> Word64 -> Word64
forall v. Rank1 v => v -> Word64 -> Word64
rank1 (DsvCursor ByteString CsPoppy -> CsPoppy
forall t s. DsvCursor t s -> s
dsvCursorMarkers DsvCursor ByteString CsPoppy
c) (DsvCursor ByteString CsPoppy -> Word64
forall t s. DsvCursor t s -> Word64
dsvCursorPosition DsvCursor ByteString CsPoppy
c)
        dr :: Word64
dr  = CsPoppy -> Word64 -> Word64
forall v. Rank1 v => v -> Word64 -> Word64
rank1 (DsvCursor ByteString CsPoppy -> CsPoppy
forall t s. DsvCursor t s -> s
dsvCursorMarkers DsvCursor ByteString CsPoppy
d) (DsvCursor ByteString CsPoppy -> Word64
forall t s. DsvCursor t s -> Word64
dsvCursorPosition DsvCursor ByteString CsPoppy
d)
        c2d :: Int
c2d = Word64 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word64
dr Word64 -> Word64 -> Word64
forall a. Num a => a -> a -> a
- Word64
cr)
        go :: DsvCursor BS.ByteString CsPoppy -> Maybe (BS.ByteString, DsvCursor BS.ByteString CsPoppy)
        go :: DsvCursor ByteString CsPoppy
-> Maybe (ByteString, DsvCursor ByteString CsPoppy)
go DsvCursor ByteString CsPoppy
e = case DsvCursor ByteString CsPoppy -> DsvCursor ByteString CsPoppy
nextField DsvCursor ByteString CsPoppy
e of
          DsvCursor ByteString CsPoppy
f -> case DsvCursor ByteString CsPoppy -> DsvCursor ByteString CsPoppy
nextPosition DsvCursor ByteString CsPoppy
f of
            DsvCursor ByteString CsPoppy
g -> case DsvCursor ByteString CsPoppy -> ByteString
snippet DsvCursor ByteString CsPoppy
e of
              ByteString
s -> (ByteString, DsvCursor ByteString CsPoppy)
-> Maybe (ByteString, DsvCursor ByteString CsPoppy)
forall a. a -> Maybe a
Just (ByteString
s, DsvCursor ByteString CsPoppy
g)
        {-# INLINE go #-}
{-# INLINE getRowBetween #-}

toListVector :: DsvCursor BS.ByteString CsPoppy -> [DV.Vector BS.ByteString]
toListVector :: DsvCursor ByteString CsPoppy -> [Vector ByteString]
toListVector DsvCursor ByteString CsPoppy
c = if DsvCursor ByteString CsPoppy -> Word64
forall t s. DsvCursor t s -> Word64
dsvCursorPosition DsvCursor ByteString CsPoppy
d Word64 -> Word64 -> Bool
forall a. Ord a => a -> a -> Bool
> DsvCursor ByteString CsPoppy -> Word64
forall t s. DsvCursor t s -> Word64
dsvCursorPosition DsvCursor ByteString CsPoppy
c Bool -> Bool -> Bool
&& Bool -> Bool
not (DsvCursor ByteString CsPoppy -> Bool
atEnd DsvCursor ByteString CsPoppy
c)
  then DsvCursor ByteString CsPoppy
-> DsvCursor ByteString CsPoppy -> Vector ByteString
getRowBetween DsvCursor ByteString CsPoppy
c DsvCursor ByteString CsPoppy
dVector ByteString -> [Vector ByteString] -> [Vector ByteString]
forall a. a -> [a] -> [a]
:DsvCursor ByteString CsPoppy -> [Vector ByteString]
toListVector DsvCursor ByteString CsPoppy
d
  else []
  where d :: DsvCursor ByteString CsPoppy
d = DsvCursor ByteString CsPoppy -> DsvCursor ByteString CsPoppy
nextPosition (DsvCursor ByteString CsPoppy -> DsvCursor ByteString CsPoppy
nextRow DsvCursor ByteString CsPoppy
c)
{-# INLINE toListVector #-}

toVectorVector :: DsvCursor BS.ByteString CsPoppy -> DV.Vector (DV.Vector BS.ByteString)
toVectorVector :: DsvCursor ByteString CsPoppy -> Vector (Vector ByteString)
toVectorVector = [Vector ByteString] -> Vector (Vector ByteString)
forall a. [a] -> Vector a
DV.fromList ([Vector ByteString] -> Vector (Vector ByteString))
-> (DsvCursor ByteString CsPoppy -> [Vector ByteString])
-> DsvCursor ByteString CsPoppy
-> Vector (Vector ByteString)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. DsvCursor ByteString CsPoppy -> [Vector ByteString]
toListVector
{-# INLINE toVectorVector #-}