{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
module Data.Csv.Parser.Megaparsec.Internals
( ConversionError (..)
, Parser
, csv
, csvWithHeader
, decodeWithC
, toNamedRecord
, header
, name
, record
, field
, escapedField
, unescapedField)
where
import Control.Monad
import Data.ByteString (ByteString)
import Data.Csv hiding
( Parser
, record
, header
, toNamedRecord )
import Data.Data
import Data.Vector (Vector)
import Data.Word (Word8)
import Text.Megaparsec
import Text.Megaparsec.Byte
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as BL
import qualified Data.Csv as C
import qualified Data.HashMap.Strict as H
import qualified Data.Vector as V
newtype ConversionError = ConversionError String
deriving (ConversionError -> ConversionError -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: ConversionError -> ConversionError -> Bool
$c/= :: ConversionError -> ConversionError -> Bool
== :: ConversionError -> ConversionError -> Bool
$c== :: ConversionError -> ConversionError -> Bool
Eq, Typeable ConversionError
ConversionError -> DataType
ConversionError -> Constr
(forall b. Data b => b -> b) -> ConversionError -> ConversionError
forall a.
Typeable a
-> (forall (c :: * -> *).
(forall d b. Data d => c (d -> b) -> d -> c b)
-> (forall g. g -> c g) -> a -> c a)
-> (forall (c :: * -> *).
(forall b r. Data b => c (b -> r) -> c r)
-> (forall r. r -> c r) -> Constr -> c a)
-> (a -> Constr)
-> (a -> DataType)
-> (forall (t :: * -> *) (c :: * -> *).
Typeable t =>
(forall d. Data d => c (t d)) -> Maybe (c a))
-> (forall (t :: * -> * -> *) (c :: * -> *).
Typeable t =>
(forall d e. (Data d, Data e) => c (t d e)) -> Maybe (c a))
-> ((forall b. Data b => b -> b) -> a -> a)
-> (forall r r'.
(r -> r' -> r) -> r -> (forall d. Data d => d -> r') -> a -> r)
-> (forall r r'.
(r' -> r -> r) -> r -> (forall d. Data d => d -> r') -> a -> r)
-> (forall u. (forall d. Data d => d -> u) -> a -> [u])
-> (forall u. Int -> (forall d. Data d => d -> u) -> a -> u)
-> (forall (m :: * -> *).
Monad m =>
(forall d. Data d => d -> m d) -> a -> m a)
-> (forall (m :: * -> *).
MonadPlus m =>
(forall d. Data d => d -> m d) -> a -> m a)
-> (forall (m :: * -> *).
MonadPlus m =>
(forall d. Data d => d -> m d) -> a -> m a)
-> Data a
forall u.
Int -> (forall d. Data d => d -> u) -> ConversionError -> u
forall u. (forall d. Data d => d -> u) -> ConversionError -> [u]
forall r r'.
(r -> r' -> r)
-> r -> (forall d. Data d => d -> r') -> ConversionError -> r
forall r r'.
(r' -> r -> r)
-> r -> (forall d. Data d => d -> r') -> ConversionError -> r
forall (m :: * -> *).
Monad m =>
(forall d. Data d => d -> m d)
-> ConversionError -> m ConversionError
forall (m :: * -> *).
MonadPlus m =>
(forall d. Data d => d -> m d)
-> ConversionError -> m ConversionError
forall (c :: * -> *).
(forall b r. Data b => c (b -> r) -> c r)
-> (forall r. r -> c r) -> Constr -> c ConversionError
forall (c :: * -> *).
(forall d b. Data d => c (d -> b) -> d -> c b)
-> (forall g. g -> c g) -> ConversionError -> c ConversionError
forall (t :: * -> *) (c :: * -> *).
Typeable t =>
(forall d. Data d => c (t d)) -> Maybe (c ConversionError)
forall (t :: * -> * -> *) (c :: * -> *).
Typeable t =>
(forall d e. (Data d, Data e) => c (t d e))
-> Maybe (c ConversionError)
gmapMo :: forall (m :: * -> *).
MonadPlus m =>
(forall d. Data d => d -> m d)
-> ConversionError -> m ConversionError
$cgmapMo :: forall (m :: * -> *).
MonadPlus m =>
(forall d. Data d => d -> m d)
-> ConversionError -> m ConversionError
gmapMp :: forall (m :: * -> *).
MonadPlus m =>
(forall d. Data d => d -> m d)
-> ConversionError -> m ConversionError
$cgmapMp :: forall (m :: * -> *).
MonadPlus m =>
(forall d. Data d => d -> m d)
-> ConversionError -> m ConversionError
gmapM :: forall (m :: * -> *).
Monad m =>
(forall d. Data d => d -> m d)
-> ConversionError -> m ConversionError
$cgmapM :: forall (m :: * -> *).
Monad m =>
(forall d. Data d => d -> m d)
-> ConversionError -> m ConversionError
gmapQi :: forall u.
Int -> (forall d. Data d => d -> u) -> ConversionError -> u
$cgmapQi :: forall u.
Int -> (forall d. Data d => d -> u) -> ConversionError -> u
gmapQ :: forall u. (forall d. Data d => d -> u) -> ConversionError -> [u]
$cgmapQ :: forall u. (forall d. Data d => d -> u) -> ConversionError -> [u]
gmapQr :: forall r r'.
(r' -> r -> r)
-> r -> (forall d. Data d => d -> r') -> ConversionError -> r
$cgmapQr :: forall r r'.
(r' -> r -> r)
-> r -> (forall d. Data d => d -> r') -> ConversionError -> r
gmapQl :: forall r r'.
(r -> r' -> r)
-> r -> (forall d. Data d => d -> r') -> ConversionError -> r
$cgmapQl :: forall r r'.
(r -> r' -> r)
-> r -> (forall d. Data d => d -> r') -> ConversionError -> r
gmapT :: (forall b. Data b => b -> b) -> ConversionError -> ConversionError
$cgmapT :: (forall b. Data b => b -> b) -> ConversionError -> ConversionError
dataCast2 :: forall (t :: * -> * -> *) (c :: * -> *).
Typeable t =>
(forall d e. (Data d, Data e) => c (t d e))
-> Maybe (c ConversionError)
$cdataCast2 :: forall (t :: * -> * -> *) (c :: * -> *).
Typeable t =>
(forall d e. (Data d, Data e) => c (t d e))
-> Maybe (c ConversionError)
dataCast1 :: forall (t :: * -> *) (c :: * -> *).
Typeable t =>
(forall d. Data d => c (t d)) -> Maybe (c ConversionError)
$cdataCast1 :: forall (t :: * -> *) (c :: * -> *).
Typeable t =>
(forall d. Data d => c (t d)) -> Maybe (c ConversionError)
dataTypeOf :: ConversionError -> DataType
$cdataTypeOf :: ConversionError -> DataType
toConstr :: ConversionError -> Constr
$ctoConstr :: ConversionError -> Constr
gunfold :: forall (c :: * -> *).
(forall b r. Data b => c (b -> r) -> c r)
-> (forall r. r -> c r) -> Constr -> c ConversionError
$cgunfold :: forall (c :: * -> *).
(forall b r. Data b => c (b -> r) -> c r)
-> (forall r. r -> c r) -> Constr -> c ConversionError
gfoldl :: forall (c :: * -> *).
(forall d b. Data d => c (d -> b) -> d -> c b)
-> (forall g. g -> c g) -> ConversionError -> c ConversionError
$cgfoldl :: forall (c :: * -> *).
(forall d b. Data d => c (d -> b) -> d -> c b)
-> (forall g. g -> c g) -> ConversionError -> c ConversionError
Data, Typeable, Eq ConversionError
ConversionError -> ConversionError -> Bool
ConversionError -> ConversionError -> Ordering
ConversionError -> ConversionError -> ConversionError
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: ConversionError -> ConversionError -> ConversionError
$cmin :: ConversionError -> ConversionError -> ConversionError
max :: ConversionError -> ConversionError -> ConversionError
$cmax :: ConversionError -> ConversionError -> ConversionError
>= :: ConversionError -> ConversionError -> Bool
$c>= :: ConversionError -> ConversionError -> Bool
> :: ConversionError -> ConversionError -> Bool
$c> :: ConversionError -> ConversionError -> Bool
<= :: ConversionError -> ConversionError -> Bool
$c<= :: ConversionError -> ConversionError -> Bool
< :: ConversionError -> ConversionError -> Bool
$c< :: ConversionError -> ConversionError -> Bool
compare :: ConversionError -> ConversionError -> Ordering
$ccompare :: ConversionError -> ConversionError -> Ordering
Ord, ReadPrec [ConversionError]
ReadPrec ConversionError
Int -> ReadS ConversionError
ReadS [ConversionError]
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [ConversionError]
$creadListPrec :: ReadPrec [ConversionError]
readPrec :: ReadPrec ConversionError
$creadPrec :: ReadPrec ConversionError
readList :: ReadS [ConversionError]
$creadList :: ReadS [ConversionError]
readsPrec :: Int -> ReadS ConversionError
$creadsPrec :: Int -> ReadS ConversionError
Read, Int -> ConversionError -> String -> String
[ConversionError] -> String -> String
ConversionError -> String
forall a.
(Int -> a -> String -> String)
-> (a -> String) -> ([a] -> String -> String) -> Show a
showList :: [ConversionError] -> String -> String
$cshowList :: [ConversionError] -> String -> String
show :: ConversionError -> String
$cshow :: ConversionError -> String
showsPrec :: Int -> ConversionError -> String -> String
$cshowsPrec :: Int -> ConversionError -> String -> String
Show)
instance ShowErrorComponent ConversionError where
showErrorComponent :: ConversionError -> String
showErrorComponent (ConversionError String
msg) =
String
"conversion error: " forall a. [a] -> [a] -> [a]
++ String
msg
type Parser = Parsec ConversionError BL.ByteString
csv :: FromRecord a
=> DecodeOptions
-> Parser (Vector a)
csv :: forall a. FromRecord a => DecodeOptions -> Parser (Vector a)
csv DecodeOptions {Word8
decDelimiter :: DecodeOptions -> Word8
decDelimiter :: Word8
..} = do
[a]
xs <- forall (m :: * -> *) a sep. MonadPlus m => m a -> m sep -> m [a]
sepEndBy1 (forall a. Word8 -> (Record -> Parser a) -> Parser a
record Word8
decDelimiter forall a. FromRecord a => Record -> Parser a
parseRecord) forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Word8) =>
m (Tokens s)
eol
forall e s (m :: * -> *). MonadParsec e s m => m ()
eof
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$! forall a. [a] -> Vector a
V.fromList [a]
xs
csvWithHeader :: FromNamedRecord a
=> DecodeOptions
-> Parser (Header, Vector a)
DecodeOptions {Word8
decDelimiter :: Word8
decDelimiter :: DecodeOptions -> Word8
..} = do
!Record
hdr <- Word8 -> Parser Record
header Word8
decDelimiter
let f :: Record -> Parser a
f = forall a. FromNamedRecord a => NamedRecord -> Parser a
parseNamedRecord forall b c a. (b -> c) -> (a -> b) -> a -> c
. Record -> Record -> NamedRecord
toNamedRecord Record
hdr
[a]
xs <- forall (m :: * -> *) a sep. MonadPlus m => m a -> m sep -> m [a]
sepEndBy1 (forall a. Word8 -> (Record -> Parser a) -> Parser a
record Word8
decDelimiter Record -> Parser a
f) forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Word8) =>
m (Tokens s)
eol
forall e s (m :: * -> *). MonadParsec e s m => m ()
eof
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ let !v :: Vector a
v = forall a. [a] -> Vector a
V.fromList [a]
xs in (Record
hdr, Vector a
v)
decodeWithC
:: (DecodeOptions -> Parser a)
-> DecodeOptions
-> HasHeader
-> FilePath
-> BL.ByteString
-> Either (ParseErrorBundle BL.ByteString ConversionError) a
decodeWithC :: forall a.
(DecodeOptions -> Parser a)
-> DecodeOptions
-> HasHeader
-> String
-> ByteString
-> Either (ParseErrorBundle ByteString ConversionError) a
decodeWithC DecodeOptions -> Parser a
p opts :: DecodeOptions
opts@DecodeOptions {Word8
decDelimiter :: Word8
decDelimiter :: DecodeOptions -> Word8
..} HasHeader
hasHeader = forall e s a.
Parsec e s a -> String -> s -> Either (ParseErrorBundle s e) a
parse Parser a
parser
where
parser :: Parser a
parser = case HasHeader
hasHeader of
HasHeader
HasHeader -> Word8 -> Parser Record
header Word8
decDelimiter forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> DecodeOptions -> Parser a
p DecodeOptions
opts
HasHeader
NoHeader -> DecodeOptions -> Parser a
p DecodeOptions
opts
{-# INLINE decodeWithC #-}
toNamedRecord :: Header -> Record -> NamedRecord
toNamedRecord :: Record -> Record -> NamedRecord
toNamedRecord Record
hdr Record
v = forall k v. (Eq k, Hashable k) => [(k, v)] -> HashMap k v
H.fromList forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Vector a -> [a]
V.toList forall a b. (a -> b) -> a -> b
$ forall a b. Vector a -> Vector b -> Vector (a, b)
V.zip Record
hdr Record
v
{-# INLINE toNamedRecord #-}
header :: Word8 -> Parser Header
Word8
del = forall a. [a] -> Vector a
V.fromList forall (m :: * -> *) a b. Monad m => (a -> b) -> m a -> m b
<$!> ParsecT ConversionError ByteString Identity [ByteString]
p forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Word8) =>
m (Tokens s)
eol
where
p :: ParsecT ConversionError ByteString Identity [ByteString]
p = forall (m :: * -> *) a sep. MonadPlus m => m a -> m sep -> m [a]
sepBy1 (Word8 -> Parser ByteString
name Word8
del) (forall (f :: * -> *) a. Functor f => f a -> f ()
void forall a b. (a -> b) -> a -> b
$ forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Word8) =>
Token s -> m (Token s)
char Word8
del) forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
<?> String
"file header"
{-# INLINE header #-}
name :: Word8 -> Parser Name
name :: Word8 -> Parser ByteString
name Word8
del = Word8 -> Parser ByteString
field Word8
del forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
<?> String
"name in header"
{-# INLINE name #-}
record
:: Word8
-> (Record -> C.Parser a)
-> Parser a
record :: forall a. Word8 -> (Record -> Parser a) -> Parser a
record Word8
del Record -> Parser a
f = do
forall e s (m :: * -> *) a. MonadParsec e s m => m a -> m ()
notFollowedBy forall e s (m :: * -> *). MonadParsec e s m => m ()
eof
Record
r <- forall a. [a] -> Vector a
V.fromList forall (m :: * -> *) a b. Monad m => (a -> b) -> m a -> m b
<$!> (forall (m :: * -> *) a sep. MonadPlus m => m a -> m sep -> m [a]
sepBy1 (Word8 -> Parser ByteString
field Word8
del) (forall (f :: * -> *) a. Functor f => f a -> f ()
void forall a b. (a -> b) -> a -> b
$ forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Word8) =>
Token s -> m (Token s)
char Word8
del) forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
<?> String
"record")
case forall a. Parser a -> Either String a
C.runParser (Record -> Parser a
f Record
r) of
Left String
msg -> forall e s (m :: * -> *) a. MonadParsec e s m => e -> m a
customFailure (String -> ConversionError
ConversionError String
msg)
Right a
x -> forall (m :: * -> *) a. Monad m => a -> m a
return a
x
{-# INLINE record #-}
field :: Word8 -> Parser Field
field :: Word8 -> Parser ByteString
field Word8
del = forall e s (m :: * -> *) a.
MonadParsec e s m =>
String -> m a -> m a
label String
"field" (Parser ByteString
escapedField forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Word8 -> Parser ByteString
unescapedField Word8
del)
{-# INLINE field #-}
escapedField :: Parser ByteString
escapedField :: Parser ByteString
escapedField =
[Word8] -> ByteString
B.pack forall (m :: * -> *) a b. Monad m => (a -> b) -> m a -> m b
<$!> forall (m :: * -> *) open close a.
Applicative m =>
m open -> m close -> m a -> m a
between (forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Word8) =>
Token s -> m (Token s)
char Token ByteString
34) (forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Word8) =>
Token s -> m (Token s)
char Token ByteString
34) (forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many forall a b. (a -> b) -> a -> b
$ ParsecT ConversionError ByteString Identity (Token ByteString)
normalChar forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> ParsecT ConversionError ByteString Identity Word8
escapedDq)
where
normalChar :: ParsecT ConversionError ByteString Identity (Token ByteString)
normalChar = forall e s (m :: * -> *).
MonadParsec e s m =>
Token s -> m (Token s)
anySingleBut Token ByteString
34 forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
<?> String
"unescaped character"
escapedDq :: ParsecT ConversionError ByteString Identity Word8
escapedDq = forall e s (m :: * -> *) a.
MonadParsec e s m =>
String -> m a -> m a
label String
"escaped double-quote" (Word8
34 forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
string Tokens ByteString
"\"\"")
{-# INLINE escapedField #-}
unescapedField :: Word8 -> Parser ByteString
unescapedField :: Word8 -> Parser ByteString
unescapedField Word8
del = ByteString -> ByteString
BL.toStrict forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall e s (m :: * -> *).
MonadParsec e s m =>
Maybe String -> (Token s -> Bool) -> m (Tokens s)
takeWhileP (forall a. a -> Maybe a
Just String
"unescaped character") Word8 -> Bool
f
where
f :: Word8 -> Bool
f Word8
x = Word8
x forall a. Eq a => a -> a -> Bool
/= Word8
del Bool -> Bool -> Bool
&& Word8
x forall a. Eq a => a -> a -> Bool
/= Word8
34 Bool -> Bool -> Bool
&& Word8
x forall a. Eq a => a -> a -> Bool
/= Word8
10 Bool -> Bool -> Bool
&& Word8
x forall a. Eq a => a -> a -> Bool
/= Word8
13
{-# INLINE unescapedField #-}