{-|

Module      : Data.CSV.Sip
Description : works with CSV files
Copyright   : (C) Jonathan Lamothe
License     : GPL-3.0-or-later
Maintainer  : jonathan@jlamothe.net
Stability   : experimental
Portability : POSIX

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or (at
your option) any later version.

This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program.  If not, see <https://www.gnu.org/licenses/>.

-}

{-# LANGUAGE LambdaCase, OverloadedStrings #-}

module Data.CSV.Sip (
  -- * Working with Files
  -- ** Read an entire CSV file
  slurpCSV,
  slurpRawCSV,
  slurpLabelledCSV,
  slurpRawLabelledCSV,
  -- ** Write an entire CSV file
  writeCSV,
  writeRawCSV,
  -- * Conduits
  -- ** Producers
  readFromCSV,
  readFromCSVRaw,
  encodeCSV,
  encodeRawCSV,
  -- ** Consumers
  writeToCSV,
  writeToCSVRaw,
  -- ** Transformers
  -- *** Encoding
  encodeRows,
  encodeRawRows,
  -- *** Decoding
  labelFields,
  decodeRows,
  decodeRawRows,
  decodeUTF8,
  toBytes,
) where

import Conduit
  ( ConduitT
  , MonadResource
  , await
  , mapC
  , runConduit
  , sinkFile
  , sourceFile
  , yield
  , (.|)
  )
import Control.Monad (unless)
import Control.Monad.Trans.Class (lift)
import Control.Monad.Trans.State (StateT, evalStateT, get, gets, modify)
import qualified Data.ByteString as BS
import Data.Conduit.List (consume, sourceList)
import qualified Data.Map as M
import Data.Maybe (fromMaybe)
import qualified Data.Text as T
import Data.Text.Encoding (decodeUtf8', encodeUtf8)
import Data.Word (Word8)

-- | read an entire CSV file
slurpCSV
  :: MonadResource m
  => FilePath
  -- ^ the path to the file to read
  -> m [[T.Text]]
slurpCSV :: FilePath -> m [[Text]]
slurpCSV FilePath
file = ConduitT () Void m [[Text]] -> m [[Text]]
forall (m :: * -> *) r. Monad m => ConduitT () Void m r -> m r
runConduit (ConduitT () Void m [[Text]] -> m [[Text]])
-> ConduitT () Void m [[Text]] -> m [[Text]]
forall a b. (a -> b) -> a -> b
$ FilePath -> ConduitT () ByteString m ()
forall (m :: * -> *) i.
MonadResource m =>
FilePath -> ConduitT i ByteString m ()
sourceFile FilePath
file ConduitT () ByteString m ()
-> ConduitM ByteString Void m [[Text]]
-> ConduitT () Void m [[Text]]
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| ConduitT ByteString [Text] m ()
forall (m :: * -> *). Monad m => ConduitT ByteString [Text] m ()
decodeRows ConduitT ByteString [Text] m ()
-> ConduitM [Text] Void m [[Text]]
-> ConduitM ByteString Void m [[Text]]
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| ConduitM [Text] Void m [[Text]]
forall (m :: * -> *) a o. Monad m => ConduitT a o m [a]
consume

-- | read an entire CSV file in raw mode
slurpRawCSV
  :: MonadResource m
  => FilePath
  -- ^ the path to the file to read
  -> m [[BS.ByteString]]
slurpRawCSV :: FilePath -> m [[ByteString]]
slurpRawCSV FilePath
file = ConduitT () Void m [[ByteString]] -> m [[ByteString]]
forall (m :: * -> *) r. Monad m => ConduitT () Void m r -> m r
runConduit (ConduitT () Void m [[ByteString]] -> m [[ByteString]])
-> ConduitT () Void m [[ByteString]] -> m [[ByteString]]
forall a b. (a -> b) -> a -> b
$ FilePath -> ConduitT () ByteString m ()
forall (m :: * -> *) i.
MonadResource m =>
FilePath -> ConduitT i ByteString m ()
sourceFile FilePath
file ConduitT () ByteString m ()
-> ConduitM ByteString Void m [[ByteString]]
-> ConduitT () Void m [[ByteString]]
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| ConduitT ByteString [ByteString] m ()
forall (m :: * -> *).
Monad m =>
ConduitT ByteString [ByteString] m ()
decodeRawRows ConduitT ByteString [ByteString] m ()
-> ConduitM [ByteString] Void m [[ByteString]]
-> ConduitM ByteString Void m [[ByteString]]
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| ConduitM [ByteString] Void m [[ByteString]]
forall (m :: * -> *) a o. Monad m => ConduitT a o m [a]
consume

-- | read an entire CSV file with a header
slurpLabelledCSV
  :: MonadResource m
  => FilePath
  -- ^ the path to the file to read
  -> m [M.Map T.Text T.Text]
slurpLabelledCSV :: FilePath -> m [Map Text Text]
slurpLabelledCSV FilePath
file = ConduitT () Void m [Map Text Text] -> m [Map Text Text]
forall (m :: * -> *) r. Monad m => ConduitT () Void m r -> m r
runConduit (ConduitT () Void m [Map Text Text] -> m [Map Text Text])
-> ConduitT () Void m [Map Text Text] -> m [Map Text Text]
forall a b. (a -> b) -> a -> b
$
  FilePath -> ConduitT () ByteString m ()
forall (m :: * -> *) i.
MonadResource m =>
FilePath -> ConduitT i ByteString m ()
sourceFile FilePath
file ConduitT () ByteString m ()
-> ConduitM ByteString Void m [Map Text Text]
-> ConduitT () Void m [Map Text Text]
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| ConduitT ByteString [Text] m ()
forall (m :: * -> *). Monad m => ConduitT ByteString [Text] m ()
decodeRows ConduitT ByteString [Text] m ()
-> ConduitM [Text] Void m [Map Text Text]
-> ConduitM ByteString Void m [Map Text Text]
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| ConduitT [Text] (Map Text Text) m ()
forall (m :: * -> *) a.
(Monad m, Ord a) =>
ConduitT [a] (Map a a) m ()
labelFields ConduitT [Text] (Map Text Text) m ()
-> ConduitM (Map Text Text) Void m [Map Text Text]
-> ConduitM [Text] Void m [Map Text Text]
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.|ConduitM (Map Text Text) Void m [Map Text Text]
forall (m :: * -> *) a o. Monad m => ConduitT a o m [a]
consume

-- | read an entire CSV file with a header
slurpRawLabelledCSV
  :: MonadResource m
  => FilePath
  -- ^ the path to the file to read
  -> m [M.Map BS.ByteString BS.ByteString]
slurpRawLabelledCSV :: FilePath -> m [Map ByteString ByteString]
slurpRawLabelledCSV FilePath
file = ConduitT () Void m [Map ByteString ByteString]
-> m [Map ByteString ByteString]
forall (m :: * -> *) r. Monad m => ConduitT () Void m r -> m r
runConduit (ConduitT () Void m [Map ByteString ByteString]
 -> m [Map ByteString ByteString])
-> ConduitT () Void m [Map ByteString ByteString]
-> m [Map ByteString ByteString]
forall a b. (a -> b) -> a -> b
$
  FilePath -> ConduitT () ByteString m ()
forall (m :: * -> *) i.
MonadResource m =>
FilePath -> ConduitT i ByteString m ()
sourceFile FilePath
file ConduitT () ByteString m ()
-> ConduitM ByteString Void m [Map ByteString ByteString]
-> ConduitT () Void m [Map ByteString ByteString]
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| ConduitT ByteString [ByteString] m ()
forall (m :: * -> *).
Monad m =>
ConduitT ByteString [ByteString] m ()
decodeRawRows ConduitT ByteString [ByteString] m ()
-> ConduitM [ByteString] Void m [Map ByteString ByteString]
-> ConduitM ByteString Void m [Map ByteString ByteString]
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| ConduitT [ByteString] (Map ByteString ByteString) m ()
forall (m :: * -> *) a.
(Monad m, Ord a) =>
ConduitT [a] (Map a a) m ()
labelFields ConduitT [ByteString] (Map ByteString ByteString) m ()
-> ConduitM
     (Map ByteString ByteString) Void m [Map ByteString ByteString]
-> ConduitM [ByteString] Void m [Map ByteString ByteString]
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.|ConduitM
  (Map ByteString ByteString) Void m [Map ByteString ByteString]
forall (m :: * -> *) a o. Monad m => ConduitT a o m [a]
consume

-- | write a CSV file from Text-based rows
writeCSV
  :: MonadResource m
  => FilePath
  -- ^ the path to the file to write to
  -> [[T.Text]]
  -- ^ the fields/rows being written
  -> m ()
writeCSV :: FilePath -> [[Text]] -> m ()
writeCSV FilePath
file [[Text]]
csv = ConduitT () Void m () -> m ()
forall (m :: * -> *) r. Monad m => ConduitT () Void m r -> m r
runConduit (ConduitT () Void m () -> m ()) -> ConduitT () Void m () -> m ()
forall a b. (a -> b) -> a -> b
$ [[Text]] -> ConduitT () ByteString m ()
forall (m :: * -> *).
Monad m =>
[[Text]] -> ConduitT () ByteString m ()
encodeCSV [[Text]]
csv ConduitT () ByteString m ()
-> ConduitM ByteString Void m () -> ConduitT () Void m ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| FilePath -> ConduitM ByteString Void m ()
forall (m :: * -> *) o.
MonadResource m =>
FilePath -> ConduitT ByteString o m ()
sinkFile FilePath
file

-- | write a CSV file from raw ByteString-based rows
writeRawCSV
  :: MonadResource m
  => FilePath
  -- ^ the path to the file to write to
  -> [[BS.ByteString]]
  -- ^ the fields/rows being written
  -> m ()
writeRawCSV :: FilePath -> [[ByteString]] -> m ()
writeRawCSV FilePath
file [[ByteString]]
csv = ConduitT () Void m () -> m ()
forall (m :: * -> *) r. Monad m => ConduitT () Void m r -> m r
runConduit (ConduitT () Void m () -> m ()) -> ConduitT () Void m () -> m ()
forall a b. (a -> b) -> a -> b
$ [[ByteString]] -> ConduitT () ByteString m ()
forall (m :: * -> *).
Monad m =>
[[ByteString]] -> ConduitT () ByteString m ()
encodeRawCSV [[ByteString]]
csv ConduitT () ByteString m ()
-> ConduitM ByteString Void m () -> ConduitT () Void m ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| FilePath -> ConduitM ByteString Void m ()
forall (m :: * -> *) o.
MonadResource m =>
FilePath -> ConduitT ByteString o m ()
sinkFile FilePath
file

-- | reads a stream of Text-based rows from a CSV file
readFromCSV
  :: MonadResource m
  => FilePath
  -- ^ the path to the CSV file to read from
  -> ConduitT i [T.Text] m ()
readFromCSV :: FilePath -> ConduitT i [Text] m ()
readFromCSV FilePath
file = FilePath -> ConduitT i ByteString m ()
forall (m :: * -> *) i.
MonadResource m =>
FilePath -> ConduitT i ByteString m ()
sourceFile FilePath
file ConduitT i ByteString m ()
-> ConduitM ByteString [Text] m () -> ConduitT i [Text] m ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| ConduitM ByteString [Text] m ()
forall (m :: * -> *). Monad m => ConduitT ByteString [Text] m ()
decodeRows

-- | reads a stream of ByteString-based rows from a CSV file
readFromCSVRaw
  :: MonadResource m
  => FilePath
  -- ^ the path to the CSV file to read from
  -> ConduitT i [BS.ByteString] m ()
readFromCSVRaw :: FilePath -> ConduitT i [ByteString] m ()
readFromCSVRaw FilePath
file = FilePath -> ConduitT i ByteString m ()
forall (m :: * -> *) i.
MonadResource m =>
FilePath -> ConduitT i ByteString m ()
sourceFile FilePath
file ConduitT i ByteString m ()
-> ConduitM ByteString [ByteString] m ()
-> ConduitT i [ByteString] m ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| ConduitM ByteString [ByteString] m ()
forall (m :: * -> *).
Monad m =>
ConduitT ByteString [ByteString] m ()
decodeRawRows

-- | encode an entire CSV file
encodeCSV
  :: Monad m
  => [[T.Text]]
  -- ^ the data being encoded, organized into rows and fields
  -> ConduitT () BS.ByteString m ()
encodeCSV :: [[Text]] -> ConduitT () ByteString m ()
encodeCSV [[Text]]
csv = [[Text]] -> ConduitT () [Text] m ()
forall (m :: * -> *) a i. Monad m => [a] -> ConduitT i a m ()
sourceList [[Text]]
csv ConduitT () [Text] m ()
-> ConduitM [Text] ByteString m () -> ConduitT () ByteString m ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| ConduitM [Text] ByteString m ()
forall (m :: * -> *). Monad m => ConduitT [Text] ByteString m ()
encodeRows

-- | encode an entire CSV file
encodeRawCSV
  :: Monad m
  => [[BS.ByteString]]
  -- ^ the data being encoded, organized into rows and fields
  -> ConduitT () BS.ByteString m ()
encodeRawCSV :: [[ByteString]] -> ConduitT () ByteString m ()
encodeRawCSV [[ByteString]]
csv = [[ByteString]] -> ConduitT () [ByteString] m ()
forall (m :: * -> *) a i. Monad m => [a] -> ConduitT i a m ()
sourceList [[ByteString]]
csv ConduitT () [ByteString] m ()
-> ConduitM [ByteString] ByteString m ()
-> ConduitT () ByteString m ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| ConduitM [ByteString] ByteString m ()
forall (m :: * -> *).
Monad m =>
ConduitT [ByteString] ByteString m ()
encodeRawRows

-- | Writes a stream of Text-based rows to a CSV file
writeToCSV
  :: MonadResource m
  => FilePath
  -- ^ the path to the CSV file to write to
  -> ConduitT [T.Text] o m ()
writeToCSV :: FilePath -> ConduitT [Text] o m ()
writeToCSV FilePath
file = ConduitT [Text] ByteString m ()
forall (m :: * -> *). Monad m => ConduitT [Text] ByteString m ()
encodeRows ConduitT [Text] ByteString m ()
-> ConduitM ByteString o m () -> ConduitT [Text] o m ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| FilePath -> ConduitM ByteString o m ()
forall (m :: * -> *) o.
MonadResource m =>
FilePath -> ConduitT ByteString o m ()
sinkFile FilePath
file

-- | Writes a stream of ByteString-based rows to a CSV file
writeToCSVRaw
  :: MonadResource m
  => FilePath
  -- ^ the path to the CSV file to write to
  -> ConduitT [BS.ByteString] o m ()
writeToCSVRaw :: FilePath -> ConduitT [ByteString] o m ()
writeToCSVRaw FilePath
file = ConduitT [ByteString] ByteString m ()
forall (m :: * -> *).
Monad m =>
ConduitT [ByteString] ByteString m ()
encodeRawRows ConduitT [ByteString] ByteString m ()
-> ConduitM ByteString o m () -> ConduitT [ByteString] o m ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| FilePath -> ConduitM ByteString o m ()
forall (m :: * -> *) o.
MonadResource m =>
FilePath -> ConduitT ByteString o m ()
sinkFile FilePath
file

-- | encode a CSV stream row by row, each element in the list read
-- represents a field, with the entire list representing a row
encodeRows :: Monad m => ConduitT [T.Text] BS.ByteString m ()
encodeRows :: ConduitT [Text] ByteString m ()
encodeRows = ([Text] -> [ByteString]) -> ConduitT [Text] [ByteString] m ()
forall (m :: * -> *) a b. Monad m => (a -> b) -> ConduitT a b m ()
mapC ((Text -> ByteString) -> [Text] -> [ByteString]
forall a b. (a -> b) -> [a] -> [b]
map Text -> ByteString
encodeUtf8) ConduitT [Text] [ByteString] m ()
-> ConduitM [ByteString] ByteString m ()
-> ConduitT [Text] ByteString m ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| ConduitM [ByteString] ByteString m ()
forall (m :: * -> *).
Monad m =>
ConduitT [ByteString] ByteString m ()
encodeRawRows

-- | encode raw CSV stream row by row, each element in the list read
-- represents a field, with the entire list representing a row
encodeRawRows :: Monad m => ConduitT [BS.ByteString] BS.ByteString m ()
encodeRawRows :: ConduitT [ByteString] ByteString m ()
encodeRawRows = ConduitT [ByteString] ByteString m (Maybe [ByteString])
forall (m :: * -> *) i. Monad m => Consumer i m (Maybe i)
await ConduitT [ByteString] ByteString m (Maybe [ByteString])
-> (Maybe [ByteString] -> ConduitT [ByteString] ByteString m ())
-> ConduitT [ByteString] ByteString m ()
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \case

  Just [ByteString]
fs-> do
    [ByteString] -> ConduitT [ByteString] ByteString m ()
forall (m :: * -> *).
Monad m =>
[ByteString] -> ConduitT [ByteString] ByteString m ()
encodeFields [ByteString]
fs
    ConduitT [ByteString] ByteString m ()
forall (m :: * -> *).
Monad m =>
ConduitT [ByteString] ByteString m ()
encodeRawRows

  Maybe [ByteString]
Nothing -> () -> ConduitT [ByteString] ByteString m ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()

-- | read a CSV stream, using the first row as a header containing
-- field labels
labelFields :: (Monad m, Ord a) => ConduitT [a] (M.Map a a) m ()
labelFields :: ConduitT [a] (Map a a) m ()
labelFields = ConduitT [a] (Map a a) m (Maybe [a])
forall (m :: * -> *) i. Monad m => Consumer i m (Maybe i)
await ConduitT [a] (Map a a) m (Maybe [a])
-> (Maybe [a] -> ConduitT [a] (Map a a) m ())
-> ConduitT [a] (Map a a) m ()
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \case
  Just [a]
headers -> [a] -> ConduitT [a] (Map a a) m ()
forall (m :: * -> *) a.
(Monad m, Ord a) =>
[a] -> ConduitT [a] (Map a a) m ()
labelLoop [a]
headers
  Maybe [a]
Nothing      -> () -> ConduitT [a] (Map a a) m ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()

-- | decode the rows from a stream of ByteStrings
decodeRows :: Monad m => ConduitT BS.ByteString [T.Text] m ()
decodeRows :: ConduitT ByteString [Text] m ()
decodeRows = ConduitT ByteString [ByteString] m ()
forall (m :: * -> *).
Monad m =>
ConduitT ByteString [ByteString] m ()
decodeRawRows ConduitT ByteString [ByteString] m ()
-> ConduitM [ByteString] [Text] m ()
-> ConduitT ByteString [Text] m ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| ([ByteString] -> [Text]) -> ConduitM [ByteString] [Text] m ()
forall (m :: * -> *) a b. Monad m => (a -> b) -> ConduitT a b m ()
mapC ((ByteString -> Text) -> [ByteString] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map ((ByteString -> Text) -> [ByteString] -> [Text])
-> (ByteString -> Text) -> [ByteString] -> [Text]
forall a b. (a -> b) -> a -> b
$ Text -> Maybe Text -> Text
forall a. a -> Maybe a -> a
fromMaybe Text
"" (Maybe Text -> Text)
-> (ByteString -> Maybe Text) -> ByteString -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Maybe Text
decodeUTF8)

-- | decode the rows returning raw ByteStrings instead of text
decodeRawRows :: Monad m => ConduitT BS.ByteString [BS.ByteString] m ()
decodeRawRows :: ConduitT ByteString [ByteString] m ()
decodeRawRows = ConduitT ByteString Word8 m ()
forall (m :: * -> *). Monad m => ConduitT ByteString Word8 m ()
toBytes ConduitT ByteString Word8 m ()
-> ConduitM Word8 [ByteString] m ()
-> ConduitT ByteString [ByteString] m ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| StateT DecodeState (ConduitT Word8 [ByteString] m) ()
-> DecodeState -> ConduitM Word8 [ByteString] m ()
forall (m :: * -> *) s a. Monad m => StateT s m a -> s -> m a
evalStateT StateT DecodeState (ConduitT Word8 [ByteString] m) ()
forall (m :: * -> *). Monad m => Decoder m
decodeLoop DecodeState
newDecodeState

-- | decode a raw ByteString into Text (if possible)
decodeUTF8 :: BS.ByteString -> Maybe T.Text
decodeUTF8 :: ByteString -> Maybe Text
decodeUTF8 ByteString
bs = case ByteString -> Either UnicodeException Text
decodeUtf8' ByteString
bs of
  Left UnicodeException
_    -> Maybe Text
forall a. Maybe a
Nothing
  Right Text
txt -> Text -> Maybe Text
forall a. a -> Maybe a
Just Text
txt

-- | convert a stream to ByteStrings to a string of bytes
toBytes :: Monad m => ConduitT BS.ByteString Word8 m ()
toBytes :: ConduitT ByteString Word8 m ()
toBytes = ConduitT ByteString Word8 m (Maybe ByteString)
forall (m :: * -> *) i. Monad m => Consumer i m (Maybe i)
await ConduitT ByteString Word8 m (Maybe ByteString)
-> (Maybe ByteString -> ConduitT ByteString Word8 m ())
-> ConduitT ByteString Word8 m ()
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \case
  Just ByteString
bs -> do
    let bytes :: [Word8]
bytes = ByteString -> [Word8]
BS.unpack ByteString
bs
    (Word8 -> ConduitT ByteString Word8 m ())
-> [Word8] -> ConduitT ByteString Word8 m ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ Word8 -> ConduitT ByteString Word8 m ()
forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield [Word8]
bytes
    ConduitT ByteString Word8 m ()
forall (m :: * -> *). Monad m => ConduitT ByteString Word8 m ()
toBytes
  Maybe ByteString
Nothing -> () -> ConduitT ByteString Word8 m ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()

-- Internal

data DecodeState = DecodeState
  { DecodeState -> Bool
isQuoted  :: Bool
  , DecodeState -> [ByteString]
fields    :: [BS.ByteString]
  , DecodeState -> ByteString
collected :: BS.ByteString
  } deriving (DecodeState -> DecodeState -> Bool
(DecodeState -> DecodeState -> Bool)
-> (DecodeState -> DecodeState -> Bool) -> Eq DecodeState
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: DecodeState -> DecodeState -> Bool
$c/= :: DecodeState -> DecodeState -> Bool
== :: DecodeState -> DecodeState -> Bool
$c== :: DecodeState -> DecodeState -> Bool
Eq, Int -> DecodeState -> ShowS
[DecodeState] -> ShowS
DecodeState -> FilePath
(Int -> DecodeState -> ShowS)
-> (DecodeState -> FilePath)
-> ([DecodeState] -> ShowS)
-> Show DecodeState
forall a.
(Int -> a -> ShowS) -> (a -> FilePath) -> ([a] -> ShowS) -> Show a
showList :: [DecodeState] -> ShowS
$cshowList :: [DecodeState] -> ShowS
show :: DecodeState -> FilePath
$cshow :: DecodeState -> FilePath
showsPrec :: Int -> DecodeState -> ShowS
$cshowsPrec :: Int -> DecodeState -> ShowS
Show)

type Decoder m = StateT DecodeState (ConduitT Word8 [BS.ByteString] m) ()
type Modifier = DecodeState -> DecodeState

newDecodeState :: DecodeState
newDecodeState :: DecodeState
newDecodeState = DecodeState :: Bool -> [ByteString] -> ByteString -> DecodeState
DecodeState
  { isQuoted :: Bool
isQuoted  = Bool
False
  , fields :: [ByteString]
fields    = []
  , collected :: ByteString
collected = ByteString
""
  }

-- Conduits

encodeFields
  :: Monad m
  => [BS.ByteString]
  -> ConduitT [BS.ByteString] BS.ByteString m ()
encodeFields :: [ByteString] -> ConduitT [ByteString] ByteString m ()
encodeFields []     = ByteString -> ConduitT [ByteString] ByteString m ()
forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield ByteString
"\r\n"
encodeFields [ByteString
f]    = ByteString -> ConduitT [ByteString] ByteString m ()
forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield (ByteString -> ConduitT [ByteString] ByteString m ())
-> ByteString -> ConduitT [ByteString] ByteString m ()
forall a b. (a -> b) -> a -> b
$ ByteString -> ByteString
escapeField ByteString
f ByteString -> ByteString -> ByteString
`BS.append` ByteString
"\r\n"
encodeFields (ByteString
f:[ByteString]
fs) = do
  ByteString -> ConduitT [ByteString] ByteString m ()
forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield (ByteString -> ConduitT [ByteString] ByteString m ())
-> ByteString -> ConduitT [ByteString] ByteString m ()
forall a b. (a -> b) -> a -> b
$ ByteString -> ByteString
escapeField ByteString
f ByteString -> ByteString -> ByteString
`BS.append` ByteString
","
  [ByteString] -> ConduitT [ByteString] ByteString m ()
forall (m :: * -> *).
Monad m =>
[ByteString] -> ConduitT [ByteString] ByteString m ()
encodeFields [ByteString]
fs

labelLoop :: (Monad m, Ord a) => [a] -> ConduitT [a] (M.Map a a) m ()
labelLoop :: [a] -> ConduitT [a] (Map a a) m ()
labelLoop [a]
headers = ConduitT [a] (Map a a) m (Maybe [a])
forall (m :: * -> *) i. Monad m => Consumer i m (Maybe i)
await ConduitT [a] (Map a a) m (Maybe [a])
-> (Maybe [a] -> ConduitT [a] (Map a a) m ())
-> ConduitT [a] (Map a a) m ()
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \case
  Just [a]
values -> do
    Map a a -> ConduitT [a] (Map a a) m ()
forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield (Map a a -> ConduitT [a] (Map a a) m ())
-> Map a a -> ConduitT [a] (Map a a) m ()
forall a b. (a -> b) -> a -> b
$ [(a, a)] -> Map a a
forall k a. Ord k => [(k, a)] -> Map k a
M.fromList ([(a, a)] -> Map a a) -> [(a, a)] -> Map a a
forall a b. (a -> b) -> a -> b
$ [a] -> [a] -> [(a, a)]
forall a b. [a] -> [b] -> [(a, b)]
zip [a]
headers [a]
values
    [a] -> ConduitT [a] (Map a a) m ()
forall (m :: * -> *) a.
(Monad m, Ord a) =>
[a] -> ConduitT [a] (Map a a) m ()
labelLoop [a]
headers
  Maybe [a]
Nothing     -> () -> ConduitT [a] (Map a a) m ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()

-- Decoders

decodeLoop :: Monad m => Decoder m
decodeLoop :: Decoder m
decodeLoop = ConduitT Word8 [ByteString] m (Maybe Word8)
-> StateT DecodeState (ConduitT Word8 [ByteString] m) (Maybe Word8)
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift ConduitT Word8 [ByteString] m (Maybe Word8)
forall (m :: * -> *) i. Monad m => Consumer i m (Maybe i)
await StateT DecodeState (ConduitT Word8 [ByteString] m) (Maybe Word8)
-> (Maybe Word8 -> Decoder m) -> Decoder m
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \case
  Just Word8
byte -> case Word8
byte of
    Word8
0x22 -> Decoder m
forall (m :: * -> *). Monad m => Decoder m
processQuote
    Word8
0x2c -> Decoder m
forall (m :: * -> *). Monad m => Decoder m
processComma
    Word8
0x0d -> Decoder m
forall (m :: * -> *). Monad m => Decoder m
processCR
    Word8
0x0a -> Decoder m
forall (m :: * -> *). Monad m => Decoder m
processLF
    Word8
_    -> Modifier -> Decoder m
forall (m :: * -> *). Monad m => Modifier -> Decoder m
performAction (Modifier -> Decoder m) -> Modifier -> Decoder m
forall a b. (a -> b) -> a -> b
$ Word8 -> Modifier
addByte Word8
byte
  Maybe Word8
Nothing -> Decoder m
forall (m :: * -> *). Monad m => Decoder m
cleanup

processQuote :: Monad m => Decoder m
processQuote :: Decoder m
processQuote = (DecodeState -> Bool)
-> StateT DecodeState (ConduitT Word8 [ByteString] m) Bool
forall (m :: * -> *) s a. Monad m => (s -> a) -> StateT s m a
gets DecodeState -> Bool
isQuoted StateT DecodeState (ConduitT Word8 [ByteString] m) Bool
-> (Bool -> Decoder m) -> Decoder m
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \case
  Bool
True  -> Decoder m
forall (m :: * -> *). Monad m => Decoder m
processQuotedQuote
  Bool
False -> Decoder m
forall (m :: * -> *). Monad m => Decoder m
processUnquotedQuote

processComma :: Monad m => Decoder m
processComma :: Decoder m
processComma = (DecodeState -> Bool)
-> StateT DecodeState (ConduitT Word8 [ByteString] m) Bool
forall (m :: * -> *) s a. Monad m => (s -> a) -> StateT s m a
gets DecodeState -> Bool
isQuoted StateT DecodeState (ConduitT Word8 [ByteString] m) Bool
-> (Bool -> Decoder m) -> Decoder m
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Modifier -> Decoder m
forall (m :: * -> *). Monad m => Modifier -> Decoder m
performAction (Modifier -> Decoder m) -> (Bool -> Modifier) -> Bool -> Decoder m
forall b c a. (b -> c) -> (a -> b) -> a -> c
. \case
  Bool
True  -> Word8 -> Modifier
addByte Word8
0x2c
  Bool
False -> Modifier
commitField

processCR :: Monad m => Decoder m
processCR :: Decoder m
processCR = (DecodeState -> Bool)
-> StateT DecodeState (ConduitT Word8 [ByteString] m) Bool
forall (m :: * -> *) s a. Monad m => (s -> a) -> StateT s m a
gets DecodeState -> Bool
isQuoted StateT DecodeState (ConduitT Word8 [ByteString] m) Bool
-> (Bool -> Decoder m) -> Decoder m
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \case
  Bool
True  -> Modifier -> Decoder m
forall (m :: * -> *). Monad m => Modifier -> Decoder m
performAction (Modifier -> Decoder m) -> Modifier -> Decoder m
forall a b. (a -> b) -> a -> b
$ Word8 -> Modifier
addByte Word8
0xd
  Bool
False -> Decoder m
forall (m :: * -> *). Monad m => Decoder m
endRow

processLF :: Monad m => Decoder m
processLF :: Decoder m
processLF = (DecodeState -> Bool)
-> StateT DecodeState (ConduitT Word8 [ByteString] m) Bool
forall (m :: * -> *) s a. Monad m => (s -> a) -> StateT s m a
gets DecodeState -> Bool
isQuoted StateT DecodeState (ConduitT Word8 [ByteString] m) Bool
-> (Bool -> Decoder m) -> Decoder m
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \case
  Bool
True  -> Modifier -> Decoder m
forall (m :: * -> *). Monad m => Modifier -> Decoder m
performAction (Modifier -> Decoder m) -> Modifier -> Decoder m
forall a b. (a -> b) -> a -> b
$ Word8 -> Modifier
addByte Word8
0xa
  Bool
False -> Decoder m
forall (m :: * -> *). Monad m => Decoder m
endRow

processQuotedQuote :: Monad m => Decoder m
processQuotedQuote :: Decoder m
processQuotedQuote = ConduitT Word8 [ByteString] m (Maybe Word8)
-> StateT DecodeState (ConduitT Word8 [ByteString] m) (Maybe Word8)
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift ConduitT Word8 [ByteString] m (Maybe Word8)
forall (m :: * -> *) i. Monad m => Consumer i m (Maybe i)
await StateT DecodeState (ConduitT Word8 [ByteString] m) (Maybe Word8)
-> (Maybe Word8 -> Decoder m) -> Decoder m
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \case
  Just Word8
byte -> case Word8
byte of
    Word8
0x22 -> Modifier -> Decoder m
forall (m :: * -> *). Monad m => Modifier -> Decoder m
performAction (Modifier -> Decoder m) -> Modifier -> Decoder m
forall a b. (a -> b) -> a -> b
$ Word8 -> Modifier
addByte Word8
0x22 -- quote
    Word8
0x2c -> Modifier -> Decoder m
forall (m :: * -> *). Monad m => Modifier -> Decoder m
performAction Modifier
commitField    -- comma
    Word8
0x0d -> Decoder m
forall (m :: * -> *). Monad m => Decoder m
commitRow                    -- carriage return
    Word8
0x0a -> Decoder m
forall (m :: * -> *). Monad m => Decoder m
commitRow                    -- line feed
    Word8
_    -> Decoder m
forall (m :: * -> *). Monad m => Decoder m
corruptedField
  Maybe Word8
Nothing -> Decoder m
forall (m :: * -> *). Monad m => Decoder m
cleanup

processUnquotedQuote :: Monad m => Decoder m
processUnquotedQuote :: Decoder m
processUnquotedQuote = (DecodeState -> Bool)
-> StateT DecodeState (ConduitT Word8 [ByteString] m) Bool
forall (m :: * -> *) s a. Monad m => (s -> a) -> StateT s m a
gets (ByteString -> Bool
BS.null (ByteString -> Bool)
-> (DecodeState -> ByteString) -> DecodeState -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. DecodeState -> ByteString
collected) StateT DecodeState (ConduitT Word8 [ByteString] m) Bool
-> (Bool -> Decoder m) -> Decoder m
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \case
  Bool
True  -> Modifier -> Decoder m
forall (m :: * -> *). Monad m => Modifier -> Decoder m
performAction Modifier
setQuoted
  Bool
False -> Decoder m
forall (m :: * -> *). Monad m => Decoder m
corruptedField

endRow :: Monad m => Decoder m
endRow :: Decoder m
endRow = do
  DecodeState
s <- StateT DecodeState (ConduitT Word8 [ByteString] m) DecodeState
forall (m :: * -> *) s. Monad m => StateT s m s
get
  if [ByteString] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null (DecodeState -> [ByteString]
fields DecodeState
s) Bool -> Bool -> Bool
&& ByteString -> Bool
BS.null (DecodeState -> ByteString
collected DecodeState
s)
    then Decoder m
forall (m :: * -> *). Monad m => Decoder m
decodeLoop
    else Decoder m
forall (m :: * -> *). Monad m => Decoder m
commitRow

commitRow :: Monad m => Decoder m
commitRow :: Decoder m
commitRow = do
  Modifier -> Decoder m
forall (m :: * -> *) s. Monad m => (s -> s) -> StateT s m ()
modify Modifier
commitField
  (DecodeState -> [ByteString])
-> StateT DecodeState (ConduitT Word8 [ByteString] m) [ByteString]
forall (m :: * -> *) s a. Monad m => (s -> a) -> StateT s m a
gets DecodeState -> [ByteString]
fields StateT DecodeState (ConduitT Word8 [ByteString] m) [ByteString]
-> ([ByteString] -> Decoder m) -> Decoder m
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= ConduitT Word8 [ByteString] m () -> Decoder m
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (ConduitT Word8 [ByteString] m () -> Decoder m)
-> ([ByteString] -> ConduitT Word8 [ByteString] m ())
-> [ByteString]
-> Decoder m
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [ByteString] -> ConduitT Word8 [ByteString] m ()
forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield
  Modifier -> Decoder m
forall (m :: * -> *). Monad m => Modifier -> Decoder m
performAction Modifier
dropFields

corruptedField :: Monad m => Decoder m
corruptedField :: Decoder m
corruptedField = do
  Modifier -> Decoder m
forall (m :: * -> *) s. Monad m => (s -> s) -> StateT s m ()
modify Modifier
dropField
  Decoder m
forall (m :: * -> *). Monad m => Decoder m
ignoreField

ignoreField :: Monad m => Decoder m
ignoreField :: Decoder m
ignoreField = ConduitT Word8 [ByteString] m (Maybe Word8)
-> StateT DecodeState (ConduitT Word8 [ByteString] m) (Maybe Word8)
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift ConduitT Word8 [ByteString] m (Maybe Word8)
forall (m :: * -> *) i. Monad m => Consumer i m (Maybe i)
await StateT DecodeState (ConduitT Word8 [ByteString] m) (Maybe Word8)
-> (Maybe Word8 -> Decoder m) -> Decoder m
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \case
  Just Word8
byte -> case Word8
byte of
    Word8
0x2c -> Modifier -> Decoder m
forall (m :: * -> *). Monad m => Modifier -> Decoder m
performAction Modifier
commitField -- comma
    Word8
0x0d -> Decoder m
forall (m :: * -> *). Monad m => Decoder m
commitRow
    Word8
_    -> Decoder m
forall (m :: * -> *). Monad m => Decoder m
ignoreField
  Maybe Word8
Nothing -> Decoder m
forall (m :: * -> *). Monad m => Decoder m
cleanup

cleanup :: Monad m => Decoder m
cleanup :: Decoder m
cleanup = do
  (DecodeState -> Bool)
-> StateT DecodeState (ConduitT Word8 [ByteString] m) Bool
forall (m :: * -> *) s a. Monad m => (s -> a) -> StateT s m a
gets DecodeState -> Bool
isQuoted StateT DecodeState (ConduitT Word8 [ByteString] m) Bool
-> (Bool -> Decoder m) -> Decoder m
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \case
    Bool
True  -> Modifier -> Decoder m
forall (m :: * -> *) s. Monad m => (s -> s) -> StateT s m ()
modify (Modifier -> Decoder m) -> Modifier -> Decoder m
forall a b. (a -> b) -> a -> b
$ Modifier
commitField Modifier -> Modifier -> Modifier
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Modifier
dropField
    Bool
False -> (DecodeState -> Bool)
-> StateT DecodeState (ConduitT Word8 [ByteString] m) Bool
forall (m :: * -> *) s a. Monad m => (s -> a) -> StateT s m a
gets (ByteString -> Bool
BS.null (ByteString -> Bool)
-> (DecodeState -> ByteString) -> DecodeState -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. DecodeState -> ByteString
collected) StateT DecodeState (ConduitT Word8 [ByteString] m) Bool
-> (Bool -> Decoder m) -> Decoder m
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \case
      Bool
True  -> () -> Decoder m
forall (m :: * -> *) a. Monad m => a -> m a
return ()
      Bool
False -> Modifier -> Decoder m
forall (m :: * -> *) s. Monad m => (s -> s) -> StateT s m ()
modify Modifier
commitField
  [ByteString]
fs <- (DecodeState -> [ByteString])
-> StateT DecodeState (ConduitT Word8 [ByteString] m) [ByteString]
forall (m :: * -> *) s a. Monad m => (s -> a) -> StateT s m a
gets DecodeState -> [ByteString]
fields
  Bool -> Decoder m -> Decoder m
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless ([ByteString] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [ByteString]
fs) (Decoder m -> Decoder m) -> Decoder m -> Decoder m
forall a b. (a -> b) -> a -> b
$
    ConduitT Word8 [ByteString] m () -> Decoder m
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (ConduitT Word8 [ByteString] m () -> Decoder m)
-> ConduitT Word8 [ByteString] m () -> Decoder m
forall a b. (a -> b) -> a -> b
$ [ByteString] -> ConduitT Word8 [ByteString] m ()
forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield [ByteString]
fs

performAction :: Monad m => Modifier -> Decoder m
performAction :: Modifier -> Decoder m
performAction Modifier
f = do
  Modifier -> Decoder m
forall (m :: * -> *) s. Monad m => (s -> s) -> StateT s m ()
modify Modifier
f
  Decoder m
forall (m :: * -> *). Monad m => Decoder m
decodeLoop

-- Modifiers

addByte :: Word8 -> Modifier
addByte :: Word8 -> Modifier
addByte Word8
b DecodeState
s = let
  collected' :: ByteString
collected' = ByteString -> Word8 -> ByteString
BS.snoc (DecodeState -> ByteString
collected DecodeState
s) Word8
b
  in DecodeState
s { collected :: ByteString
collected = ByteString
collected' }

commitField :: Modifier
commitField :: Modifier
commitField DecodeState
s = let
  isQuoted' :: Bool
isQuoted'  = Bool
False
  fields' :: [ByteString]
fields'    = DecodeState -> [ByteString]
fields DecodeState
s [ByteString] -> [ByteString] -> [ByteString]
forall a. [a] -> [a] -> [a]
++ [DecodeState -> ByteString
collected DecodeState
s]
  collected' :: ByteString
collected' = ByteString
""
  in DecodeState
s
  { isQuoted :: Bool
isQuoted  = Bool
isQuoted'
  , fields :: [ByteString]
fields    = [ByteString]
fields'
  , collected :: ByteString
collected = ByteString
collected'
  }

dropFields :: Modifier
dropFields :: Modifier
dropFields DecodeState
s = DecodeState
s { fields :: [ByteString]
fields = [] }

dropField :: Modifier
dropField :: Modifier
dropField DecodeState
s = DecodeState
s
  { isQuoted :: Bool
isQuoted  = Bool
False
  , collected :: ByteString
collected = ByteString
""
  }

setQuoted :: Modifier
setQuoted :: Modifier
setQuoted DecodeState
s = DecodeState
s { isQuoted :: Bool
isQuoted = Bool
True }

-- Helpers
escapeField :: BS.ByteString -> BS.ByteString
escapeField :: ByteString -> ByteString
escapeField ByteString
field = let
  bytes :: [Word8]
bytes = ByteString -> [Word8]
BS.unpack ByteString
field
  in [ByteString] -> ByteString
BS.concat
    [ ByteString
"\""
    , [Word8] -> ByteString
BS.pack ([Word8] -> ByteString) -> [Word8] -> ByteString
forall a b. (a -> b) -> a -> b
$ [Word8] -> [Word8]
escapeLoop [Word8]
bytes
    , ByteString
"\""
    ]

escapeLoop :: [Word8] -> [Word8]
escapeLoop :: [Word8] -> [Word8]
escapeLoop []        = []
escapeLoop (Word8
0x22:[Word8]
bs) = [Word8
0x22, Word8
0x22] [Word8] -> [Word8] -> [Word8]
forall a. [a] -> [a] -> [a]
++ [Word8] -> [Word8]
escapeLoop [Word8]
bs -- escape quote
escapeLoop (Word8
b:[Word8]
bs)    = Word8
b Word8 -> [Word8] -> [Word8]
forall a. a -> [a] -> [a]
: [Word8] -> [Word8]
escapeLoop [Word8]
bs

--jl