{-# LANGUAGE BangPatterns, CPP, DeriveFunctor, ScopedTypeVariables #-}
module Data.Csv.Incremental
(
HeaderParser(..)
, decodeHeader
, decodeHeaderWith
, Parser(..)
, HasHeader(..)
, decode
, decodeWith
, decodeWithP
, decodeByName
, decodeByNameWith
, decodeByNameWithP
, encode
, encodeWith
, encodeRecord
, Builder
, encodeByName
, encodeDefaultOrderedByName
, encodeByNameWith
, encodeDefaultOrderedByNameWith
, encodeNamedRecord
, NamedBuilder
) where
import Control.Applicative ((<|>))
import qualified Data.Attoparsec.ByteString as A
import Data.Attoparsec.ByteString.Char8 (endOfInput)
import qualified Data.ByteString as B
import qualified Data.ByteString.Builder as Builder
import qualified Data.ByteString.Lazy as L
import Data.Semigroup as Semi (Semigroup, (<>))
import qualified Data.Vector as V
import Data.Word (Word8)
import Data.Csv.Conversion hiding (Parser, header, namedRecord, record,
toNamedRecord)
import qualified Data.Csv.Conversion as Conversion
import qualified Data.Csv.Encoding as Encoding
import Data.Csv.Encoding (EncodeOptions(..), Quoting(..), recordSep)
import Data.Csv.Parser
import Data.Csv.Types
import Data.Csv.Util (endOfLine)
#if !MIN_VERSION_base(4,8,0)
import Data.Monoid (Monoid(mappend, mempty))
import Control.Applicative ((<*))
#endif
data HeaderParser a =
FailH !B.ByteString String
| PartialH (B.ByteString -> HeaderParser a)
| DoneH !Header a
deriving Functor
instance Show a => Show (HeaderParser a) where
showsPrec d (FailH rest msg) = showParen (d > appPrec) showStr
where
showStr = showString "FailH " . showsPrec (appPrec+1) rest .
showString " " . showsPrec (appPrec+1) msg
showsPrec _ (PartialH _) = showString "PartialH <function>"
showsPrec d (DoneH hdr x) = showParen (d > appPrec) showStr
where
showStr = showString "DoneH " . showsPrec (appPrec+1) hdr .
showString " " . showsPrec (appPrec+1) x
appPrec :: Int
appPrec = 10
decodeHeader :: HeaderParser B.ByteString
decodeHeader = decodeHeaderWith defaultDecodeOptions
decodeHeaderWith :: DecodeOptions -> HeaderParser B.ByteString
decodeHeaderWith !opts = PartialH (go . parser)
where
parser = A.parse (header $ decDelimiter opts)
go (A.Fail rest _ msg) = FailH rest err
where err = "parse error (" ++ msg ++ ")"
go (A.Partial k) = PartialH $ \ s -> go (k s)
go (A.Done rest r) = DoneH r rest
data Parser a =
Fail !B.ByteString String
| Many [Either String a] (B.ByteString -> Parser a)
| Done [Either String a]
deriving Functor
instance Show a => Show (Parser a) where
showsPrec d (Fail rest msg) = showParen (d > appPrec) showStr
where
showStr = showString "Fail " . showsPrec (appPrec+1) rest .
showString " " . showsPrec (appPrec+1) msg
showsPrec d (Many rs _) = showParen (d > appPrec) showStr
where
showStr = showString "Many " . showsPrec (appPrec+1) rs .
showString " <function>"
showsPrec d (Done rs) = showParen (d > appPrec) showStr
where
showStr = showString "Done " . showsPrec (appPrec+1) rs
data More = Incomplete | Complete
deriving (Eq, Show)
decode :: FromRecord a
=> HasHeader
-> Parser a
decode = decodeWith defaultDecodeOptions
decodeWith :: FromRecord a
=> DecodeOptions
-> HasHeader
-> Parser a
decodeWith !opts hasHeader = decodeWithP parseRecord opts hasHeader
decodeWithP :: (Record -> Conversion.Parser a)
-> DecodeOptions
-> HasHeader
-> Parser a
decodeWithP p !opts hasHeader = case hasHeader of
HasHeader -> go (decodeHeaderWith opts)
NoHeader -> Many [] $ \ s -> decodeWithP' p opts s
where go (FailH rest msg) = Fail rest msg
go (PartialH k) = Many [] $ \ s' -> go (k s')
go (DoneH _ rest) = decodeWithP' p opts rest
decodeByName :: FromNamedRecord a
=> HeaderParser (Parser a)
decodeByName = decodeByNameWith defaultDecodeOptions
decodeByNameWith :: FromNamedRecord a
=> DecodeOptions
-> HeaderParser (Parser a)
decodeByNameWith !opts = decodeByNameWithP parseNamedRecord opts
decodeByNameWithP :: (NamedRecord -> Conversion.Parser a)
-> DecodeOptions
-> HeaderParser (Parser a)
decodeByNameWithP p !opts = go (decodeHeaderWith opts)
where
go (FailH rest msg) = FailH rest msg
go (PartialH k) = PartialH $ \ s -> go (k s)
go (DoneH hdr rest) =
DoneH hdr (decodeWithP' (p . toNamedRecord hdr) opts rest)
decodeWithP' :: (Record -> Conversion.Parser a) -> DecodeOptions -> B.ByteString
-> Parser a
decodeWithP' p !opts = go Incomplete [] . parser
where
go !_ !acc (A.Fail rest _ msg)
| null acc = Fail rest err
| otherwise = Many (reverse acc) (\ s -> Fail (rest `B.append` s) err)
where err = "parse error (" ++ msg ++ ")"
go Incomplete acc (A.Partial k) = Many (reverse acc) cont
where cont s = go m [] (k s)
where m | B.null s = Complete
| otherwise = Incomplete
go Complete _ (A.Partial _) = moduleError "decodeWithP'" msg
where msg = "attoparsec should never return Partial in this case"
go m acc (A.Done rest r)
| B.null rest = case m of
Complete -> Done (reverse acc')
Incomplete -> Many (reverse acc') (cont [])
| otherwise = go m acc' (parser rest)
where cont acc'' s
| B.null s = Done (reverse acc'')
| otherwise = go Incomplete acc'' (parser s)
acc' | blankLine r = acc
| otherwise = let !r' = convert r in r' : acc
parser = A.parse (record (decDelimiter opts) <* (endOfLine <|> endOfInput))
convert = runParser . p
{-# INLINE decodeWithP' #-}
blankLine :: V.Vector B.ByteString -> Bool
blankLine v = V.length v == 1 && (B.null (V.head v))
encode :: ToRecord a => Builder a -> L.ByteString
encode = encodeWith Encoding.defaultEncodeOptions
encodeWith :: ToRecord a => EncodeOptions -> Builder a
-> L.ByteString
encodeWith opts b =
Builder.toLazyByteString $
runBuilder b (encQuoting opts) (encDelimiter opts) (encUseCrLf opts)
encodeRecord :: ToRecord a => a -> Builder a
encodeRecord r = Builder $ \ qtng delim useCrLf ->
Encoding.encodeRecord qtng delim (toRecord r) <> recordSep useCrLf
newtype Builder a = Builder {
runBuilder :: Quoting -> Word8 -> Bool -> Builder.Builder
}
instance Semi.Semigroup (Builder a) where
Builder f <> Builder g =
Builder $ \ qtng delim useCrlf ->
f qtng delim useCrlf <> g qtng delim useCrlf
instance Monoid (Builder a) where
mempty = Builder (\ _ _ _ -> mempty)
mappend = (Semi.<>)
encodeByName :: ToNamedRecord a => Header -> NamedBuilder a -> L.ByteString
encodeByName = encodeByNameWith Encoding.defaultEncodeOptions
encodeDefaultOrderedByName :: (DefaultOrdered a, ToNamedRecord a) =>
NamedBuilder a -> L.ByteString
encodeDefaultOrderedByName =
encodeDefaultOrderedByNameWith Encoding.defaultEncodeOptions
encodeByNameWith :: ToNamedRecord a => EncodeOptions -> Header -> NamedBuilder a
-> L.ByteString
encodeByNameWith opts hdr b =
Builder.toLazyByteString $
encHdr <>
runNamedBuilder b hdr (encQuoting opts) (encDelimiter opts)
(encUseCrLf opts)
where
encHdr
| encIncludeHeader opts =
Encoding.encodeRecord (encQuoting opts) (encDelimiter opts) hdr
<> recordSep (encUseCrLf opts)
| otherwise = mempty
encodeDefaultOrderedByNameWith ::
forall a. (DefaultOrdered a, ToNamedRecord a) =>
EncodeOptions -> NamedBuilder a -> L.ByteString
encodeDefaultOrderedByNameWith opts b =
Builder.toLazyByteString $
encHdr <>
runNamedBuilder b hdr (encQuoting opts)
(encDelimiter opts) (encUseCrLf opts)
where
hdr = Conversion.headerOrder (undefined :: a)
encHdr
| encIncludeHeader opts =
Encoding.encodeRecord (encQuoting opts) (encDelimiter opts) hdr
<> recordSep (encUseCrLf opts)
| otherwise = mempty
encodeNamedRecord :: ToNamedRecord a => a -> NamedBuilder a
encodeNamedRecord nr = NamedBuilder $ \ hdr qtng delim useCrLf ->
Encoding.encodeNamedRecord hdr qtng delim
(Conversion.toNamedRecord nr) <> recordSep useCrLf
newtype NamedBuilder a = NamedBuilder {
runNamedBuilder :: Header -> Quoting -> Word8 -> Bool -> Builder.Builder
}
instance Semigroup (NamedBuilder a) where
NamedBuilder f <> NamedBuilder g =
NamedBuilder $ \ hdr qtng delim useCrlf ->
f hdr qtng delim useCrlf <> g hdr qtng delim useCrlf
instance Monoid (NamedBuilder a) where
mempty = NamedBuilder (\ _ _ _ _ -> mempty)
mappend = (Semi.<>)
moduleError :: String -> String -> a
moduleError func msg = error $ "Data.Csv.Incremental." ++ func ++ ": " ++ msg
{-# NOINLINE moduleError #-}