{-# LANGUAGE BangPatterns, OverloadedStrings, ScopedTypeVariables #-}

-- Module:      Data.Csv.Encoding
-- Copyright:   (c) 2011 MailRank, Inc.
--              (c) 2012 Johan Tibell
-- License:     BSD3
-- Maintainer:  Johan Tibell <johan.tibell@gmail.com>
-- Stability:   experimental
-- Portability: portable
--
-- Encoding and decoding of data types into CSV.
module Data.Csv.Encoding
    (
    -- * Encoding and decoding
      HasHeader(..)
    , decode
    , decodeByName
    , Quoting(..)
    , encode
    , encodeByName
    , encodeDefaultOrderedByName

    -- ** Encoding and decoding options
    , DecodeOptions(..)
    , defaultDecodeOptions
    , decodeWith
    , decodeWithP
    , decodeByNameWith
    , decodeByNameWithP
    , EncodeOptions(..)
    , defaultEncodeOptions
    , encodeWith
    , encodeByNameWith
    , encodeDefaultOrderedByNameWith

    -- ** Encoding and decoding single records
    , encodeRecord
    , encodeNamedRecord
    , recordSep
    ) where

import Data.ByteString.Builder
import Control.Applicative as AP (Applicative(..), (<|>))
import Data.Attoparsec.ByteString.Char8 (endOfInput)
import qualified Data.Attoparsec.ByteString.Lazy as AL
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as B8
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString.Lazy.Char8 as BL8
import qualified Data.HashMap.Strict as HM
import Data.Vector (Vector)
import qualified Data.Vector as V
import Data.Word (Word8)
import Data.Monoid
import Prelude hiding (unlines)

import qualified Data.Csv.Conversion as Conversion
import Data.Csv.Conversion (FromNamedRecord, FromRecord, ToNamedRecord,
                            ToRecord, parseNamedRecord, parseRecord, runParser,
                            toNamedRecord, toRecord)
import Data.Csv.Parser hiding (csv, csvWithHeader)
import qualified Data.Csv.Parser as Parser
import Data.Csv.Types hiding (toNamedRecord)
import qualified Data.Csv.Types as Types
import Data.Csv.Util (blankLine, endOfLine, toStrict)


-- TODO: 'encode' isn't as efficient as it could be.

------------------------------------------------------------------------
-- * Encoding and decoding

-- | Efficiently deserialize CSV records from a lazy 'L.ByteString'.
-- If this fails due to incomplete or invalid input, @'Left' msg@ is
-- returned. Equivalent to @'decodeWith' 'defaultDecodeOptions'@.
decode :: FromRecord a
       => HasHeader     -- ^ Data contains header that should be
                        -- skipped
       -> L.ByteString  -- ^ CSV data
       -> Either String (Vector a)
decode :: HasHeader -> ByteString -> Either String (Vector a)
decode = DecodeOptions
-> HasHeader -> ByteString -> Either String (Vector a)
forall a.
FromRecord a =>
DecodeOptions
-> HasHeader -> ByteString -> Either String (Vector a)
decodeWith DecodeOptions
defaultDecodeOptions
{-# INLINE decode #-}

-- | Efficiently deserialize CSV records from a lazy 'L.ByteString'.
-- If this fails due to incomplete or invalid input, @'Left' msg@ is
-- returned. The data is assumed to be preceded by a header.
-- Equivalent to @'decodeByNameWith' 'defaultDecodeOptions'@.
decodeByName :: FromNamedRecord a
             => L.ByteString  -- ^ CSV data
             -> Either String (Header, Vector a)
decodeByName :: ByteString -> Either String (Header, Vector a)
decodeByName = DecodeOptions -> ByteString -> Either String (Header, Vector a)
forall a.
FromNamedRecord a =>
DecodeOptions -> ByteString -> Either String (Header, Vector a)
decodeByNameWith DecodeOptions
defaultDecodeOptions
{-# INLINE decodeByName #-}

-- | Efficiently serialize CSV records as a lazy 'L.ByteString'.
encode :: ToRecord a => [a] -> L.ByteString
encode :: [a] -> ByteString
encode = EncodeOptions -> [a] -> ByteString
forall a. ToRecord a => EncodeOptions -> [a] -> ByteString
encodeWith EncodeOptions
defaultEncodeOptions
{-# INLINE encode #-}

-- | Efficiently serialize CSV records as a lazy 'L.ByteString'. The
-- header is written before any records and dictates the field order.
encodeByName :: ToNamedRecord a => Header -> [a] -> L.ByteString
encodeByName :: Header -> [a] -> ByteString
encodeByName = EncodeOptions -> Header -> [a] -> ByteString
forall a.
ToNamedRecord a =>
EncodeOptions -> Header -> [a] -> ByteString
encodeByNameWith EncodeOptions
defaultEncodeOptions
{-# INLINE encodeByName #-}

-- | Like 'encodeByName', but header and field order is dictated by
-- the 'Conversion.header' method.
encodeDefaultOrderedByName :: (Conversion.DefaultOrdered a, ToNamedRecord a) =>
                              [a] -> L.ByteString
encodeDefaultOrderedByName :: [a] -> ByteString
encodeDefaultOrderedByName = EncodeOptions -> [a] -> ByteString
forall a.
(DefaultOrdered a, ToNamedRecord a) =>
EncodeOptions -> [a] -> ByteString
encodeDefaultOrderedByNameWith EncodeOptions
defaultEncodeOptions
{-# INLINE encodeDefaultOrderedByName #-}

------------------------------------------------------------------------
-- ** Encoding and decoding options

-- | Like 'decode', but lets you customize how the CSV data is parsed.
decodeWith :: FromRecord a
           => DecodeOptions  -- ^ Decoding options
           -> HasHeader      -- ^ Data contains header that should be
                             -- skipped
           -> L.ByteString   -- ^ CSV data
           -> Either String (Vector a)
decodeWith :: DecodeOptions
-> HasHeader -> ByteString -> Either String (Vector a)
decodeWith = (DecodeOptions -> Parser (Vector a))
-> DecodeOptions
-> HasHeader
-> ByteString
-> Either String (Vector a)
forall a.
(DecodeOptions -> Parser a)
-> DecodeOptions -> HasHeader -> ByteString -> Either String a
decodeWithC ((Header -> Parser a) -> DecodeOptions -> Parser (Vector a)
forall a.
(Header -> Parser a) -> DecodeOptions -> Parser (Vector a)
csv Header -> Parser a
forall a. FromRecord a => Header -> Parser a
parseRecord)
{-# INLINE [1] decodeWith #-}

{-# RULES
    "idDecodeWith" decodeWith = idDecodeWith
 #-}

-- | Same as 'decodeWith', but more efficient as no type
-- conversion is performed.
idDecodeWith :: DecodeOptions -> HasHeader -> L.ByteString
             -> Either String (Vector (Vector B.ByteString))
idDecodeWith :: DecodeOptions
-> HasHeader -> ByteString -> Either String (Vector Header)
idDecodeWith = (DecodeOptions -> Parser (Vector Header))
-> DecodeOptions
-> HasHeader
-> ByteString
-> Either String (Vector Header)
forall a.
(DecodeOptions -> Parser a)
-> DecodeOptions -> HasHeader -> ByteString -> Either String a
decodeWithC DecodeOptions -> Parser (Vector Header)
Parser.csv

-- | Like 'decodeWith'', but lets you specify a parser function.
--
-- @since 0.5.2.0
decodeWithP :: (Record -> Conversion.Parser a)
            -- ^ Custom parser function
            -> DecodeOptions -- ^ Decoding options
            -> HasHeader     -- ^ Data contains header that should be
                             -- skipped
            -> L.ByteString  -- ^ CSV data
            -> Either String (Vector a)
decodeWithP :: (Header -> Parser a)
-> DecodeOptions
-> HasHeader
-> ByteString
-> Either String (Vector a)
decodeWithP Header -> Parser a
_parseRecord = (DecodeOptions -> Parser (Vector a))
-> DecodeOptions
-> HasHeader
-> ByteString
-> Either String (Vector a)
forall a.
(DecodeOptions -> Parser a)
-> DecodeOptions -> HasHeader -> ByteString -> Either String a
decodeWithC ((Header -> Parser a) -> DecodeOptions -> Parser (Vector a)
forall a.
(Header -> Parser a) -> DecodeOptions -> Parser (Vector a)
csv Header -> Parser a
_parseRecord)
{-# INLINE [1] decodeWithP #-}

-- | Decode CSV data using the provided parser, skipping a leading
-- header if 'hasHeader' is 'HasHeader'. Returns 'Left' @errMsg@ on
-- failure.
decodeWithC :: (DecodeOptions -> AL.Parser a) -> DecodeOptions -> HasHeader
            -> BL8.ByteString -> Either String a
decodeWithC :: (DecodeOptions -> Parser a)
-> DecodeOptions -> HasHeader -> ByteString -> Either String a
decodeWithC DecodeOptions -> Parser a
p !DecodeOptions
opts HasHeader
hasHeader = Parser a -> ByteString -> Either String a
forall a. Parser a -> ByteString -> Either String a
decodeWithP' Parser a
parser
  where parser :: Parser a
parser = case HasHeader
hasHeader of
            HasHeader
HasHeader -> Word8 -> Parser Header
header (DecodeOptions -> Word8
decDelimiter DecodeOptions
opts) Parser Header -> Parser a -> Parser a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> DecodeOptions -> Parser a
p DecodeOptions
opts
            HasHeader
NoHeader  -> DecodeOptions -> Parser a
p DecodeOptions
opts
{-# INLINE decodeWithC #-}

-- | Like 'decodeByName', but lets you customize how the CSV data is
-- parsed.
decodeByNameWith :: FromNamedRecord a
                 => DecodeOptions  -- ^ Decoding options
                 -> L.ByteString   -- ^ CSV data
                 -> Either String (Header, Vector a)
decodeByNameWith :: DecodeOptions -> ByteString -> Either String (Header, Vector a)
decodeByNameWith !DecodeOptions
opts = Parser (Header, Vector a)
-> ByteString -> Either String (Header, Vector a)
forall a. Parser a -> ByteString -> Either String a
decodeWithP' ((NamedRecord -> Parser a)
-> DecodeOptions -> Parser (Header, Vector a)
forall a.
(NamedRecord -> Parser a)
-> DecodeOptions -> Parser (Header, Vector a)
csvWithHeader NamedRecord -> Parser a
forall a. FromNamedRecord a => NamedRecord -> Parser a
parseNamedRecord DecodeOptions
opts)

-- | Like 'decodeByNameWith', but lets you specify a parser function.
--
-- @since 0.5.2.0
decodeByNameWithP :: (NamedRecord -> Conversion.Parser a)
                  -- ^ Custom parser function
                  -> DecodeOptions -- ^ Decoding options
                  -> L.ByteString  -- ^ CSV data
                  -> Either String (Header, Vector a)
decodeByNameWithP :: (NamedRecord -> Parser a)
-> DecodeOptions -> ByteString -> Either String (Header, Vector a)
decodeByNameWithP NamedRecord -> Parser a
_parseNamedRecord !DecodeOptions
opts =
  Parser (Header, Vector a)
-> ByteString -> Either String (Header, Vector a)
forall a. Parser a -> ByteString -> Either String a
decodeWithP' ((NamedRecord -> Parser a)
-> DecodeOptions -> Parser (Header, Vector a)
forall a.
(NamedRecord -> Parser a)
-> DecodeOptions -> Parser (Header, Vector a)
csvWithHeader NamedRecord -> Parser a
_parseNamedRecord DecodeOptions
opts)

-- | Should quoting be applied to fields, and at which level?
data Quoting
    = QuoteNone        -- ^ No quotes.
    | QuoteMinimal     -- ^ Quotes according to RFC 4180.
    | QuoteAll         -- ^ Always quote.
    deriving (Quoting -> Quoting -> Bool
(Quoting -> Quoting -> Bool)
-> (Quoting -> Quoting -> Bool) -> Eq Quoting
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Quoting -> Quoting -> Bool
$c/= :: Quoting -> Quoting -> Bool
== :: Quoting -> Quoting -> Bool
$c== :: Quoting -> Quoting -> Bool
Eq, Int -> Quoting -> ShowS
[Quoting] -> ShowS
Quoting -> String
(Int -> Quoting -> ShowS)
-> (Quoting -> String) -> ([Quoting] -> ShowS) -> Show Quoting
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Quoting] -> ShowS
$cshowList :: [Quoting] -> ShowS
show :: Quoting -> String
$cshow :: Quoting -> String
showsPrec :: Int -> Quoting -> ShowS
$cshowsPrec :: Int -> Quoting -> ShowS
Show)

-- | Options that controls how data is encoded. These options can be
-- used to e.g. encode data in a tab-separated format instead of in a
-- comma-separated format.
--
-- To avoid having your program stop compiling when new fields are
-- added to 'EncodeOptions', create option records by overriding
-- values in 'defaultEncodeOptions'. Example:
--
-- > myOptions = defaultEncodeOptions {
-- >       encDelimiter = fromIntegral (ord '\t')
-- >     }
--
-- /N.B./ The 'encDelimiter' must /not/ be the quote character (i.e.
-- @\"@) or one of the record separator characters (i.e. @\\n@ or
-- @\\r@).
data EncodeOptions = EncodeOptions
    { -- | Field delimiter.
      EncodeOptions -> Word8
encDelimiter  :: {-# UNPACK #-} !Word8

      -- | Record separator selection.  @True@ for CRLF (@\\r\\n@) and
      -- @False@ for LF (@\\n@).
    , EncodeOptions -> Bool
encUseCrLf :: !Bool

      -- | Include a header row when encoding @ToNamedRecord@
      -- instances.
    , EncodeOptions -> Bool
encIncludeHeader :: !Bool

      -- | What kind of quoting should be applied to text fields.
    , EncodeOptions -> Quoting
encQuoting :: !Quoting
    } deriving (EncodeOptions -> EncodeOptions -> Bool
(EncodeOptions -> EncodeOptions -> Bool)
-> (EncodeOptions -> EncodeOptions -> Bool) -> Eq EncodeOptions
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: EncodeOptions -> EncodeOptions -> Bool
$c/= :: EncodeOptions -> EncodeOptions -> Bool
== :: EncodeOptions -> EncodeOptions -> Bool
$c== :: EncodeOptions -> EncodeOptions -> Bool
Eq, Int -> EncodeOptions -> ShowS
[EncodeOptions] -> ShowS
EncodeOptions -> String
(Int -> EncodeOptions -> ShowS)
-> (EncodeOptions -> String)
-> ([EncodeOptions] -> ShowS)
-> Show EncodeOptions
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [EncodeOptions] -> ShowS
$cshowList :: [EncodeOptions] -> ShowS
show :: EncodeOptions -> String
$cshow :: EncodeOptions -> String
showsPrec :: Int -> EncodeOptions -> ShowS
$cshowsPrec :: Int -> EncodeOptions -> ShowS
Show)

-- | Encoding options for CSV files.
defaultEncodeOptions :: EncodeOptions
defaultEncodeOptions :: EncodeOptions
defaultEncodeOptions = EncodeOptions :: Word8 -> Bool -> Bool -> Quoting -> EncodeOptions
EncodeOptions
    { encDelimiter :: Word8
encDelimiter     = Word8
44  -- comma
    , encUseCrLf :: Bool
encUseCrLf       = Bool
True
    , encIncludeHeader :: Bool
encIncludeHeader = Bool
True
    , encQuoting :: Quoting
encQuoting       = Quoting
QuoteMinimal
    }

-- | Like 'encode', but lets you customize how the CSV data is
-- encoded.
encodeWith :: ToRecord a => EncodeOptions -> [a] -> L.ByteString
encodeWith :: EncodeOptions -> [a] -> ByteString
encodeWith EncodeOptions
opts
    | Word8 -> Bool
validDelim (EncodeOptions -> Word8
encDelimiter EncodeOptions
opts) =
        Builder -> ByteString
toLazyByteString
        (Builder -> ByteString) -> ([a] -> Builder) -> [a] -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Builder -> [Builder] -> Builder
unlines (Bool -> Builder
recordSep (EncodeOptions -> Bool
encUseCrLf EncodeOptions
opts))
        ([Builder] -> Builder) -> ([a] -> [Builder]) -> [a] -> Builder
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (a -> Builder) -> [a] -> [Builder]
forall a b. (a -> b) -> [a] -> [b]
map (Quoting -> Word8 -> Header -> Builder
encodeRecord (EncodeOptions -> Quoting
encQuoting EncodeOptions
opts) (EncodeOptions -> Word8
encDelimiter EncodeOptions
opts)
              (Header -> Builder) -> (a -> Header) -> a -> Builder
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> Header
forall a. ToRecord a => a -> Header
toRecord)
    | Bool
otherwise = [a] -> ByteString
forall a. a
encodeOptionsError
{-# INLINE encodeWith #-}

-- | Check if the delimiter is valid.
validDelim :: Word8 -> Bool
validDelim :: Word8 -> Bool
validDelim Word8
delim = Word8
delim Word8 -> [Word8] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` [Word8
cr, Word8
nl, Word8
dquote]
  where
    nl :: Word8
nl = Word8
10
    cr :: Word8
cr = Word8
13
    dquote :: Word8
dquote = Word8
34

-- | Raises an exception indicating that the provided delimiter isn't
-- valid. See 'validDelim'.
--
-- Keep this message consistent with the documentation of
-- 'EncodeOptions'.
encodeOptionsError :: a
encodeOptionsError :: a
encodeOptionsError = String -> a
forall a. HasCallStack => String -> a
error (String -> a) -> String -> a
forall a b. (a -> b) -> a -> b
$ String
"Data.Csv: " String -> ShowS
forall a. [a] -> [a] -> [a]
++
        String
"The 'encDelimiter' must /not/ be the quote character (i.e. " String -> ShowS
forall a. [a] -> [a] -> [a]
++
        String
"\") or one of the record separator characters (i.e. \\n or " String -> ShowS
forall a. [a] -> [a] -> [a]
++
        String
"\\r)"

-- | Encode a single record, without the trailing record separator
-- (i.e. newline).
encodeRecord :: Quoting -> Word8 -> Record -> Builder
encodeRecord :: Quoting -> Word8 -> Header -> Builder
encodeRecord Quoting
qtng Word8
delim = [Builder] -> Builder
forall a. Monoid a => [a] -> a
mconcat ([Builder] -> Builder)
-> (Header -> [Builder]) -> Header -> Builder
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Builder -> [Builder] -> [Builder]
intersperse (Word8 -> Builder
word8 Word8
delim)
                     ([Builder] -> [Builder])
-> (Header -> [Builder]) -> Header -> [Builder]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (ByteString -> Builder) -> [ByteString] -> [Builder]
forall a b. (a -> b) -> [a] -> [b]
map ByteString -> Builder
byteString ([ByteString] -> [Builder])
-> (Header -> [ByteString]) -> Header -> [Builder]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (ByteString -> ByteString) -> [ByteString] -> [ByteString]
forall a b. (a -> b) -> [a] -> [b]
map (Quoting -> Word8 -> ByteString -> ByteString
escape Quoting
qtng Word8
delim) ([ByteString] -> [ByteString])
-> (Header -> [ByteString]) -> Header -> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Header -> [ByteString]
forall a. Vector a -> [a]
V.toList
{-# INLINE encodeRecord #-}

-- | Encode a single named record, without the trailing record
-- separator (i.e. newline), using the given field order.
encodeNamedRecord :: Header -> Quoting -> Word8 -> NamedRecord -> Builder
encodeNamedRecord :: Header -> Quoting -> Word8 -> NamedRecord -> Builder
encodeNamedRecord Header
hdr Quoting
qtng Word8
delim =
    Quoting -> Word8 -> Header -> Builder
encodeRecord Quoting
qtng Word8
delim (Header -> Builder)
-> (NamedRecord -> Header) -> NamedRecord -> Builder
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Header -> NamedRecord -> Header
namedRecordToRecord Header
hdr

-- TODO: Optimize
escape :: Quoting -> Word8 -> B.ByteString -> B.ByteString
escape :: Quoting -> Word8 -> ByteString -> ByteString
escape !Quoting
qtng !Word8
delim !ByteString
s
    | (Quoting
qtng Quoting -> Quoting -> Bool
forall a. Eq a => a -> a -> Bool
== Quoting
QuoteMinimal Bool -> Bool -> Bool
&&
        (Word8 -> Bool) -> ByteString -> Bool
B.any (\ Word8
b -> Word8
b Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
dquote Bool -> Bool -> Bool
|| Word8
b Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
delim Bool -> Bool -> Bool
|| Word8
b Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
nl Bool -> Bool -> Bool
|| Word8
b Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
cr) ByteString
s
      ) Bool -> Bool -> Bool
|| Quoting
qtng Quoting -> Quoting -> Bool
forall a. Eq a => a -> a -> Bool
== Quoting
QuoteAll
         = ByteString -> ByteString
toStrict (ByteString -> ByteString)
-> (Builder -> ByteString) -> Builder -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Builder -> ByteString
toLazyByteString (Builder -> ByteString) -> Builder -> ByteString
forall a b. (a -> b) -> a -> b
$
            Word8 -> Builder
word8 Word8
dquote
            Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> (Builder -> Word8 -> Builder) -> Builder -> ByteString -> Builder
forall a. (a -> Word8 -> a) -> a -> ByteString -> a
B.foldl
                (\ Builder
acc Word8
b -> Builder
acc Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> if Word8
b Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
dquote
                    then ByteString -> Builder
byteString ByteString
"\"\""
                    else Word8 -> Builder
word8 Word8
b)
                Builder
forall a. Monoid a => a
mempty
                ByteString
s
            Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Word8 -> Builder
word8 Word8
dquote
    | Bool
otherwise = ByteString
s
  where
    dquote :: Word8
dquote = Word8
34
    nl :: Word8
nl     = Word8
10
    cr :: Word8
cr     = Word8
13

-- | Like 'encodeByName', but lets you customize how the CSV data is
-- encoded.
encodeByNameWith :: ToNamedRecord a => EncodeOptions -> Header -> [a]
                 -> L.ByteString
encodeByNameWith :: EncodeOptions -> Header -> [a] -> ByteString
encodeByNameWith EncodeOptions
opts Header
hdr [a]
v
    | Word8 -> Bool
validDelim (EncodeOptions -> Word8
encDelimiter EncodeOptions
opts) =
        Builder -> ByteString
toLazyByteString (Bool -> Builder
rows (EncodeOptions -> Bool
encIncludeHeader EncodeOptions
opts))
    | Bool
otherwise = ByteString
forall a. a
encodeOptionsError
  where
    rows :: Bool -> Builder
rows Bool
False = Builder
records
    rows Bool
True  = Quoting -> Word8 -> Header -> Builder
encodeRecord (EncodeOptions -> Quoting
encQuoting EncodeOptions
opts) (EncodeOptions -> Word8
encDelimiter EncodeOptions
opts) Header
hdr Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<>
                 Bool -> Builder
recordSep (EncodeOptions -> Bool
encUseCrLf EncodeOptions
opts) Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Builder
records
    records :: Builder
records = Builder -> [Builder] -> Builder
unlines (Bool -> Builder
recordSep (EncodeOptions -> Bool
encUseCrLf EncodeOptions
opts))
              ([Builder] -> Builder) -> ([a] -> [Builder]) -> [a] -> Builder
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (a -> Builder) -> [a] -> [Builder]
forall a b. (a -> b) -> [a] -> [b]
map (Header -> Quoting -> Word8 -> NamedRecord -> Builder
encodeNamedRecord Header
hdr (EncodeOptions -> Quoting
encQuoting EncodeOptions
opts) (EncodeOptions -> Word8
encDelimiter EncodeOptions
opts)
                     (NamedRecord -> Builder) -> (a -> NamedRecord) -> a -> Builder
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> NamedRecord
forall a. ToNamedRecord a => a -> NamedRecord
toNamedRecord)
              ([a] -> Builder) -> [a] -> Builder
forall a b. (a -> b) -> a -> b
$ [a]
v
{-# INLINE encodeByNameWith #-}

-- | Like 'encodeDefaultOrderedByNameWith', but lets you customize how
-- the CSV data is encoded.
encodeDefaultOrderedByNameWith ::
    forall a. (Conversion.DefaultOrdered a, ToNamedRecord a) =>
    EncodeOptions -> [a] -> L.ByteString
encodeDefaultOrderedByNameWith :: EncodeOptions -> [a] -> ByteString
encodeDefaultOrderedByNameWith EncodeOptions
opts [a]
v
    | Word8 -> Bool
validDelim (EncodeOptions -> Word8
encDelimiter EncodeOptions
opts) =
        Builder -> ByteString
toLazyByteString (Bool -> Builder
rows (EncodeOptions -> Bool
encIncludeHeader EncodeOptions
opts))
    | Bool
otherwise = ByteString
forall a. a
encodeOptionsError
  where
    hdr :: Header
hdr = (a -> Header
forall a. DefaultOrdered a => a -> Header
Conversion.headerOrder (a
forall a. HasCallStack => a
undefined :: a))
    rows :: Bool -> Builder
rows Bool
False = Builder
records
    rows Bool
True  = Quoting -> Word8 -> Header -> Builder
encodeRecord (EncodeOptions -> Quoting
encQuoting EncodeOptions
opts) (EncodeOptions -> Word8
encDelimiter EncodeOptions
opts) Header
hdr Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<>
                 Bool -> Builder
recordSep (EncodeOptions -> Bool
encUseCrLf EncodeOptions
opts) Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Builder
records
    records :: Builder
records = Builder -> [Builder] -> Builder
unlines (Bool -> Builder
recordSep (EncodeOptions -> Bool
encUseCrLf EncodeOptions
opts))
              ([Builder] -> Builder) -> ([a] -> [Builder]) -> [a] -> Builder
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (a -> Builder) -> [a] -> [Builder]
forall a b. (a -> b) -> [a] -> [b]
map (Header -> Quoting -> Word8 -> NamedRecord -> Builder
encodeNamedRecord Header
hdr (EncodeOptions -> Quoting
encQuoting EncodeOptions
opts) (EncodeOptions -> Word8
encDelimiter EncodeOptions
opts)
                     (NamedRecord -> Builder) -> (a -> NamedRecord) -> a -> Builder
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> NamedRecord
forall a. ToNamedRecord a => a -> NamedRecord
toNamedRecord)
              ([a] -> Builder) -> [a] -> Builder
forall a b. (a -> b) -> a -> b
$ [a]
v
{-# INLINE encodeDefaultOrderedByNameWith #-}

namedRecordToRecord :: Header -> NamedRecord -> Record
namedRecordToRecord :: Header -> NamedRecord -> Header
namedRecordToRecord Header
hdr NamedRecord
nr = (ByteString -> ByteString) -> Header -> Header
forall a b. (a -> b) -> Vector a -> Vector b
V.map ByteString -> ByteString
find Header
hdr
  where
    find :: ByteString -> ByteString
find ByteString
n = case ByteString -> NamedRecord -> Maybe ByteString
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
HM.lookup ByteString
n NamedRecord
nr of
        Maybe ByteString
Nothing -> String -> String -> ByteString
forall a. String -> String -> a
moduleError String
"namedRecordToRecord" (String -> ByteString) -> String -> ByteString
forall a b. (a -> b) -> a -> b
$
                   String
"header contains name " String -> ShowS
forall a. [a] -> [a] -> [a]
++ ShowS
forall a. Show a => a -> String
show (ByteString -> String
B8.unpack ByteString
n) String -> ShowS
forall a. [a] -> [a] -> [a]
++
                   String
" which is not present in the named record"
        Just ByteString
v  -> ByteString
v

moduleError :: String -> String -> a
moduleError :: String -> String -> a
moduleError String
func String
msg = String -> a
forall a. HasCallStack => String -> a
error (String -> a) -> String -> a
forall a b. (a -> b) -> a -> b
$ String
"Data.Csv.Encoding." String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
func String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
": " String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
msg
{-# NOINLINE moduleError #-}

recordSep :: Bool -> Builder
recordSep :: Bool -> Builder
recordSep Bool
False = Word8 -> Builder
word8 Word8
10 -- new line (\n)
recordSep Bool
True  = String -> Builder
string8 String
"\r\n"

unlines :: Builder -> [Builder] -> Builder
unlines :: Builder -> [Builder] -> Builder
unlines Builder
_ [] = Builder
forall a. Monoid a => a
mempty
unlines Builder
sep (Builder
b:[Builder]
bs) = Builder
b Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Builder
sep Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Builder -> [Builder] -> Builder
unlines Builder
sep [Builder]
bs

intersperse :: Builder -> [Builder] -> [Builder]
intersperse :: Builder -> [Builder] -> [Builder]
intersperse Builder
_   []      = []
intersperse Builder
sep (Builder
x:[Builder]
xs)  = Builder
x Builder -> [Builder] -> [Builder]
forall a. a -> [a] -> [a]
: Builder -> [Builder] -> [Builder]
prependToAll Builder
sep [Builder]
xs

prependToAll :: Builder -> [Builder] -> [Builder]
prependToAll :: Builder -> [Builder] -> [Builder]
prependToAll Builder
_   []     = []
prependToAll Builder
sep (Builder
x:[Builder]
xs) = Builder
sep Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Builder
x Builder -> [Builder] -> [Builder]
forall a. a -> [a] -> [a]
: Builder -> [Builder] -> [Builder]
prependToAll Builder
sep [Builder]
xs

decodeWithP' :: AL.Parser a -> L.ByteString -> Either String a
decodeWithP' :: Parser a -> ByteString -> Either String a
decodeWithP' Parser a
p ByteString
s =
    case Parser a -> ByteString -> Result a
forall a. Parser a -> ByteString -> Result a
AL.parse Parser a
p ByteString
s of
      AL.Done ByteString
_ a
v     -> a -> Either String a
forall a b. b -> Either a b
Right a
v
      AL.Fail ByteString
left [String]
_ String
msg -> String -> Either String a
forall a b. a -> Either a b
Left String
errMsg
        where
          errMsg :: String
errMsg = String
"parse error (" String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
msg String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
") at " String -> ShowS
forall a. [a] -> [a] -> [a]
++
                   (if ByteString -> Int64
BL8.length ByteString
left Int64 -> Int64 -> Bool
forall a. Ord a => a -> a -> Bool
> Int64
100
                    then (Int -> ShowS
forall a. Int -> [a] -> [a]
take Int
100 ShowS -> ShowS
forall a b. (a -> b) -> a -> b
$ ByteString -> String
BL8.unpack ByteString
left) String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" (truncated)"
                    else ShowS
forall a. Show a => a -> String
show (ByteString -> String
BL8.unpack ByteString
left))
{-# INLINE decodeWithP' #-}

-- These alternative implementation of the 'csv' and 'csvWithHeader'
-- parsers from the 'Parser' module performs the
-- 'FromRecord'/'FromNamedRecord' conversions on-the-fly, thereby
-- avoiding the need to hold a big 'CSV' value in memory. The 'CSV'
-- type has a quite large memory overhead due to high constant
-- overheads of 'B.ByteString' and 'V.Vector'.

-- TODO: Check that the error messages don't duplicate prefixes, as in
-- "parse error: conversion error: ...".

-- | Parse a CSV file that does not include a header.
csv :: (Record -> Conversion.Parser a) -> DecodeOptions
    -> AL.Parser (V.Vector a)
csv :: (Header -> Parser a) -> DecodeOptions -> Parser (Vector a)
csv Header -> Parser a
_parseRecord !DecodeOptions
opts = do
    [a]
vals <- Parser ByteString [a]
records
    Vector a -> Parser (Vector a)
forall (m :: * -> *) a. Monad m => a -> m a
return (Vector a -> Parser (Vector a)) -> Vector a -> Parser (Vector a)
forall a b. (a -> b) -> a -> b
$! [a] -> Vector a
forall a. [a] -> Vector a
V.fromList [a]
vals
  where
    records :: Parser ByteString [a]
records = do
        !Header
r <- Word8 -> Parser Header
record (DecodeOptions -> Word8
decDelimiter DecodeOptions
opts)
        if Header -> Bool
blankLine Header
r
            then (Parser ByteString ()
forall t. Chunk t => Parser t ()
endOfInput Parser ByteString ()
-> Parser ByteString [a] -> Parser ByteString [a]
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> [a] -> Parser ByteString [a]
forall (f :: * -> *) a. Applicative f => a -> f a
pure []) Parser ByteString [a]
-> Parser ByteString [a] -> Parser ByteString [a]
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Parser ByteString ()
endOfLine Parser ByteString ()
-> Parser ByteString [a] -> Parser ByteString [a]
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser ByteString [a]
records)
            else case Parser a -> Either String a
forall a. Parser a -> Either String a
runParser (Header -> Parser a
_parseRecord Header
r) of
                Left String
msg  -> String -> Parser ByteString [a]
forall (m :: * -> *) a. MonadFail m => String -> m a
fail (String -> Parser ByteString [a])
-> String -> Parser ByteString [a]
forall a b. (a -> b) -> a -> b
$ String
"conversion error: " String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
msg
                Right a
val -> do
                    ![a]
vals <- (Parser ByteString ()
forall t. Chunk t => Parser t ()
endOfInput Parser ByteString ()
-> Parser ByteString [a] -> Parser ByteString [a]
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> [a] -> Parser ByteString [a]
forall (f :: * -> *) a. Applicative f => a -> f a
AP.pure []) Parser ByteString [a]
-> Parser ByteString [a] -> Parser ByteString [a]
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Parser ByteString ()
endOfLine Parser ByteString ()
-> Parser ByteString [a] -> Parser ByteString [a]
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser ByteString [a]
records)
                    [a] -> Parser ByteString [a]
forall (m :: * -> *) a. Monad m => a -> m a
return (a
val a -> [a] -> [a]
forall a. a -> [a] -> [a]
: [a]
vals)
{-# INLINE csv #-}

-- | Parse a CSV file that includes a header.
csvWithHeader :: (NamedRecord -> Conversion.Parser a) -> DecodeOptions
              -> AL.Parser (Header, V.Vector a)
csvWithHeader :: (NamedRecord -> Parser a)
-> DecodeOptions -> Parser (Header, Vector a)
csvWithHeader NamedRecord -> Parser a
_parseNamedRecord !DecodeOptions
opts = do
    !Header
hdr <- Word8 -> Parser Header
header (DecodeOptions -> Word8
decDelimiter DecodeOptions
opts)
    [a]
vals <- Header -> Parser ByteString [a]
records Header
hdr
    let !v :: Vector a
v = [a] -> Vector a
forall a. [a] -> Vector a
V.fromList [a]
vals
    (Header, Vector a) -> Parser (Header, Vector a)
forall (m :: * -> *) a. Monad m => a -> m a
return (Header
hdr, Vector a
v)
  where
    records :: Header -> Parser ByteString [a]
records Header
hdr = do
        !Header
r <- Word8 -> Parser Header
record (DecodeOptions -> Word8
decDelimiter DecodeOptions
opts)
        if Header -> Bool
blankLine Header
r
            then (Parser ByteString ()
forall t. Chunk t => Parser t ()
endOfInput Parser ByteString ()
-> Parser ByteString [a] -> Parser ByteString [a]
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> [a] -> Parser ByteString [a]
forall (f :: * -> *) a. Applicative f => a -> f a
pure []) Parser ByteString [a]
-> Parser ByteString [a] -> Parser ByteString [a]
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Parser ByteString ()
endOfLine Parser ByteString ()
-> Parser ByteString [a] -> Parser ByteString [a]
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Header -> Parser ByteString [a]
records Header
hdr)
            else case Parser a -> Either String a
forall a. Parser a -> Either String a
runParser (Header -> Header -> Parser a
convert Header
hdr Header
r) of
                Left String
msg  -> String -> Parser ByteString [a]
forall (m :: * -> *) a. MonadFail m => String -> m a
fail (String -> Parser ByteString [a])
-> String -> Parser ByteString [a]
forall a b. (a -> b) -> a -> b
$ String
"conversion error: " String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
msg
                Right a
val -> do
                    ![a]
vals <- (Parser ByteString ()
forall t. Chunk t => Parser t ()
endOfInput Parser ByteString ()
-> Parser ByteString [a] -> Parser ByteString [a]
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> [a] -> Parser ByteString [a]
forall (f :: * -> *) a. Applicative f => a -> f a
pure []) Parser ByteString [a]
-> Parser ByteString [a] -> Parser ByteString [a]
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Parser ByteString ()
endOfLine Parser ByteString ()
-> Parser ByteString [a] -> Parser ByteString [a]
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Header -> Parser ByteString [a]
records Header
hdr)
                    [a] -> Parser ByteString [a]
forall (m :: * -> *) a. Monad m => a -> m a
return (a
val a -> [a] -> [a]
forall a. a -> [a] -> [a]
: [a]
vals)

    convert :: Header -> Header -> Parser a
convert Header
hdr = NamedRecord -> Parser a
_parseNamedRecord (NamedRecord -> Parser a)
-> (Header -> NamedRecord) -> Header -> Parser a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Header -> Header -> NamedRecord
Types.toNamedRecord Header
hdr