{-# LANGUAGE BangPatterns, OverloadedStrings, ScopedTypeVariables #-}
module Data.Csv.Encoding
(
HasHeader(..)
, decode
, decodeByName
, Quoting(..)
, encode
, encodeByName
, encodeDefaultOrderedByName
, DecodeOptions(..)
, defaultDecodeOptions
, decodeWith
, decodeWithP
, decodeByNameWith
, decodeByNameWithP
, EncodeOptions(..)
, defaultEncodeOptions
, encodeWith
, encodeByNameWith
, encodeDefaultOrderedByNameWith
, encodeRecord
, encodeNamedRecord
, recordSep
) where
import Data.ByteString.Builder
import Control.Applicative as AP (Applicative(..), (<|>))
import Data.Attoparsec.ByteString.Char8 (endOfInput)
import qualified Data.Attoparsec.ByteString.Lazy as AL
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as B8
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString.Lazy.Char8 as BL8
import qualified Data.HashMap.Strict as HM
import Data.Vector (Vector)
import qualified Data.Vector as V
import Data.Word (Word8)
import Data.Monoid
import Prelude hiding (unlines)
import qualified Data.Csv.Conversion as Conversion
import Data.Csv.Conversion (FromNamedRecord, FromRecord, ToNamedRecord,
ToRecord, parseNamedRecord, parseRecord, runParser,
toNamedRecord, toRecord)
import Data.Csv.Parser hiding (csv, csvWithHeader)
import qualified Data.Csv.Parser as Parser
import Data.Csv.Types hiding (toNamedRecord)
import qualified Data.Csv.Types as Types
import Data.Csv.Util (blankLine, endOfLine, toStrict)
decode :: FromRecord a
=> HasHeader
-> L.ByteString
-> Either String (Vector a)
decode = decodeWith defaultDecodeOptions
{-# INLINE decode #-}
decodeByName :: FromNamedRecord a
=> L.ByteString
-> Either String (Header, Vector a)
decodeByName = decodeByNameWith defaultDecodeOptions
{-# INLINE decodeByName #-}
encode :: ToRecord a => [a] -> L.ByteString
encode = encodeWith defaultEncodeOptions
{-# INLINE encode #-}
encodeByName :: ToNamedRecord a => Header -> [a] -> L.ByteString
encodeByName = encodeByNameWith defaultEncodeOptions
{-# INLINE encodeByName #-}
encodeDefaultOrderedByName :: (Conversion.DefaultOrdered a, ToNamedRecord a) =>
[a] -> L.ByteString
encodeDefaultOrderedByName = encodeDefaultOrderedByNameWith defaultEncodeOptions
{-# INLINE encodeDefaultOrderedByName #-}
decodeWith :: FromRecord a
=> DecodeOptions
-> HasHeader
-> L.ByteString
-> Either String (Vector a)
decodeWith = decodeWithC (csv parseRecord)
{-# INLINE [1] decodeWith #-}
{-# RULES
"idDecodeWith" decodeWith = idDecodeWith
#-}
idDecodeWith :: DecodeOptions -> HasHeader -> L.ByteString
-> Either String (Vector (Vector B.ByteString))
idDecodeWith = decodeWithC Parser.csv
decodeWithP :: (Record -> Conversion.Parser a)
-> DecodeOptions
-> HasHeader
-> L.ByteString
-> Either String (Vector a)
decodeWithP _parseRecord = decodeWithC (csv _parseRecord)
{-# INLINE [1] decodeWithP #-}
decodeWithC :: (DecodeOptions -> AL.Parser a) -> DecodeOptions -> HasHeader
-> BL8.ByteString -> Either String a
decodeWithC p !opts hasHeader = decodeWithP' parser
where parser = case hasHeader of
HasHeader -> header (decDelimiter opts) *> p opts
NoHeader -> p opts
{-# INLINE decodeWithC #-}
decodeByNameWith :: FromNamedRecord a
=> DecodeOptions
-> L.ByteString
-> Either String (Header, Vector a)
decodeByNameWith !opts = decodeWithP' (csvWithHeader parseNamedRecord opts)
decodeByNameWithP :: (NamedRecord -> Conversion.Parser a)
-> DecodeOptions
-> L.ByteString
-> Either String (Header, Vector a)
decodeByNameWithP _parseNamedRecord !opts =
decodeWithP' (csvWithHeader _parseNamedRecord opts)
data Quoting
= QuoteNone
| QuoteMinimal
| QuoteAll
deriving (Eq, Show)
data EncodeOptions = EncodeOptions
{
encDelimiter :: {-# UNPACK #-} !Word8
, encUseCrLf :: !Bool
, encIncludeHeader :: !Bool
, encQuoting :: !Quoting
} deriving (Eq, Show)
defaultEncodeOptions :: EncodeOptions
defaultEncodeOptions = EncodeOptions
{ encDelimiter = 44
, encUseCrLf = True
, encIncludeHeader = True
, encQuoting = QuoteMinimal
}
encodeWith :: ToRecord a => EncodeOptions -> [a] -> L.ByteString
encodeWith opts
| validDelim (encDelimiter opts) =
toLazyByteString
. unlines (recordSep (encUseCrLf opts))
. map (encodeRecord (encQuoting opts) (encDelimiter opts)
. toRecord)
| otherwise = encodeOptionsError
{-# INLINE encodeWith #-}
validDelim :: Word8 -> Bool
validDelim delim = delim `notElem` [cr, nl, dquote]
where
nl = 10
cr = 13
dquote = 34
encodeOptionsError :: a
encodeOptionsError = error $ "Data.Csv: " ++
"The 'encDelimiter' must /not/ be the quote character (i.e. " ++
"\") or one of the record separator characters (i.e. \\n or " ++
"\\r)"
encodeRecord :: Quoting -> Word8 -> Record -> Builder
encodeRecord qtng delim = mconcat . intersperse (word8 delim)
. map byteString . map (escape qtng delim) . V.toList
{-# INLINE encodeRecord #-}
encodeNamedRecord :: Header -> Quoting -> Word8 -> NamedRecord -> Builder
encodeNamedRecord hdr qtng delim =
encodeRecord qtng delim . namedRecordToRecord hdr
escape :: Quoting -> Word8 -> B.ByteString -> B.ByteString
escape !qtng !delim !s
| (qtng == QuoteMinimal &&
B.any (\ b -> b == dquote || b == delim || b == nl || b == cr) s
) || qtng == QuoteAll
= toStrict . toLazyByteString $
word8 dquote
<> B.foldl
(\ acc b -> acc <> if b == dquote
then byteString "\"\""
else word8 b)
mempty
s
<> word8 dquote
| otherwise = s
where
dquote = 34
nl = 10
cr = 13
encodeByNameWith :: ToNamedRecord a => EncodeOptions -> Header -> [a]
-> L.ByteString
encodeByNameWith opts hdr v
| validDelim (encDelimiter opts) =
toLazyByteString (rows (encIncludeHeader opts))
| otherwise = encodeOptionsError
where
rows False = records
rows True = encodeRecord (encQuoting opts) (encDelimiter opts) hdr <>
recordSep (encUseCrLf opts) <> records
records = unlines (recordSep (encUseCrLf opts))
. map (encodeNamedRecord hdr (encQuoting opts) (encDelimiter opts)
. toNamedRecord)
$ v
{-# INLINE encodeByNameWith #-}
encodeDefaultOrderedByNameWith ::
forall a. (Conversion.DefaultOrdered a, ToNamedRecord a) =>
EncodeOptions -> [a] -> L.ByteString
encodeDefaultOrderedByNameWith opts v
| validDelim (encDelimiter opts) =
toLazyByteString (rows (encIncludeHeader opts))
| otherwise = encodeOptionsError
where
hdr = (Conversion.headerOrder (undefined :: a))
rows False = records
rows True = encodeRecord (encQuoting opts) (encDelimiter opts) hdr <>
recordSep (encUseCrLf opts) <> records
records = unlines (recordSep (encUseCrLf opts))
. map (encodeNamedRecord hdr (encQuoting opts) (encDelimiter opts)
. toNamedRecord)
$ v
{-# INLINE encodeDefaultOrderedByNameWith #-}
namedRecordToRecord :: Header -> NamedRecord -> Record
namedRecordToRecord hdr nr = V.map find hdr
where
find n = case HM.lookup n nr of
Nothing -> moduleError "namedRecordToRecord" $
"header contains name " ++ show (B8.unpack n) ++
" which is not present in the named record"
Just v -> v
moduleError :: String -> String -> a
moduleError func msg = error $ "Data.Csv.Encoding." ++ func ++ ": " ++ msg
{-# NOINLINE moduleError #-}
recordSep :: Bool -> Builder
recordSep False = word8 10
recordSep True = string8 "\r\n"
unlines :: Builder -> [Builder] -> Builder
unlines _ [] = mempty
unlines sep (b:bs) = b <> sep <> unlines sep bs
intersperse :: Builder -> [Builder] -> [Builder]
intersperse _ [] = []
intersperse sep (x:xs) = x : prependToAll sep xs
prependToAll :: Builder -> [Builder] -> [Builder]
prependToAll _ [] = []
prependToAll sep (x:xs) = sep <> x : prependToAll sep xs
decodeWithP' :: AL.Parser a -> L.ByteString -> Either String a
decodeWithP' p s =
case AL.parse p s of
AL.Done _ v -> Right v
AL.Fail left _ msg -> Left errMsg
where
errMsg = "parse error (" ++ msg ++ ") at " ++
(if BL8.length left > 100
then (take 100 $ BL8.unpack left) ++ " (truncated)"
else show (BL8.unpack left))
{-# INLINE decodeWithP' #-}
csv :: (Record -> Conversion.Parser a) -> DecodeOptions
-> AL.Parser (V.Vector a)
csv _parseRecord !opts = do
vals <- records
return $! V.fromList vals
where
records = do
!r <- record (decDelimiter opts)
if blankLine r
then (endOfInput *> pure []) <|> (endOfLine *> records)
else case runParser (_parseRecord r) of
Left msg -> fail $ "conversion error: " ++ msg
Right val -> do
!vals <- (endOfInput *> AP.pure []) <|> (endOfLine *> records)
return (val : vals)
{-# INLINE csv #-}
csvWithHeader :: (NamedRecord -> Conversion.Parser a) -> DecodeOptions
-> AL.Parser (Header, V.Vector a)
csvWithHeader _parseNamedRecord !opts = do
!hdr <- header (decDelimiter opts)
vals <- records hdr
let !v = V.fromList vals
return (hdr, v)
where
records hdr = do
!r <- record (decDelimiter opts)
if blankLine r
then (endOfInput *> pure []) <|> (endOfLine *> records hdr)
else case runParser (convert hdr r) of
Left msg -> fail $ "conversion error: " ++ msg
Right val -> do
!vals <- (endOfInput *> pure []) <|> (endOfLine *> records hdr)
return (val : vals)
convert hdr = _parseNamedRecord . Types.toNamedRecord hdr