-- |
-- Module      :  Data.Csv.Parser.Megaparsec.Internals
-- Copyright   :  © 2016–2021 Stack Builders
-- License     :  MIT
--

{-# LANGUAGE BangPatterns       #-}
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE OverloadedStrings  #-}
{-# LANGUAGE RecordWildCards    #-}

module Data.Csv.Parser.Megaparsec.Internals 
    ( ConversionError (..)
    , Parser
    , csv
    , csvWithHeader
    , decodeWithC
    , toNamedRecord
    , header
    , name
    , record
    , field
    , escapedField
    , unescapedField)
where

import Control.Monad
import Data.ByteString (ByteString)
import Data.Csv hiding
  ( Parser
  , record
  , header
  , toNamedRecord )
import Data.Data
import Data.Vector (Vector)
import Data.Word (Word8)
import Text.Megaparsec
import Text.Megaparsec.Byte
import qualified Data.ByteString      as B
import qualified Data.ByteString.Lazy as BL
import qualified Data.Csv             as C
import qualified Data.HashMap.Strict  as H
import qualified Data.Vector          as V

----------------------------------------------------------------------------
-- Custom error component and other types

-- | Custom error component for CSV parsing. It allows typed reporting of
-- conversion errors.

newtype ConversionError = ConversionError String
  deriving (ConversionError -> ConversionError -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: ConversionError -> ConversionError -> Bool
$c/= :: ConversionError -> ConversionError -> Bool
== :: ConversionError -> ConversionError -> Bool
$c== :: ConversionError -> ConversionError -> Bool
Eq, Typeable ConversionError
ConversionError -> DataType
ConversionError -> Constr
(forall b. Data b => b -> b) -> ConversionError -> ConversionError
forall a.
Typeable a
-> (forall (c :: * -> *).
    (forall d b. Data d => c (d -> b) -> d -> c b)
    -> (forall g. g -> c g) -> a -> c a)
-> (forall (c :: * -> *).
    (forall b r. Data b => c (b -> r) -> c r)
    -> (forall r. r -> c r) -> Constr -> c a)
-> (a -> Constr)
-> (a -> DataType)
-> (forall (t :: * -> *) (c :: * -> *).
    Typeable t =>
    (forall d. Data d => c (t d)) -> Maybe (c a))
-> (forall (t :: * -> * -> *) (c :: * -> *).
    Typeable t =>
    (forall d e. (Data d, Data e) => c (t d e)) -> Maybe (c a))
-> ((forall b. Data b => b -> b) -> a -> a)
-> (forall r r'.
    (r -> r' -> r) -> r -> (forall d. Data d => d -> r') -> a -> r)
-> (forall r r'.
    (r' -> r -> r) -> r -> (forall d. Data d => d -> r') -> a -> r)
-> (forall u. (forall d. Data d => d -> u) -> a -> [u])
-> (forall u. Int -> (forall d. Data d => d -> u) -> a -> u)
-> (forall (m :: * -> *).
    Monad m =>
    (forall d. Data d => d -> m d) -> a -> m a)
-> (forall (m :: * -> *).
    MonadPlus m =>
    (forall d. Data d => d -> m d) -> a -> m a)
-> (forall (m :: * -> *).
    MonadPlus m =>
    (forall d. Data d => d -> m d) -> a -> m a)
-> Data a
forall u.
Int -> (forall d. Data d => d -> u) -> ConversionError -> u
forall u. (forall d. Data d => d -> u) -> ConversionError -> [u]
forall r r'.
(r -> r' -> r)
-> r -> (forall d. Data d => d -> r') -> ConversionError -> r
forall r r'.
(r' -> r -> r)
-> r -> (forall d. Data d => d -> r') -> ConversionError -> r
forall (m :: * -> *).
Monad m =>
(forall d. Data d => d -> m d)
-> ConversionError -> m ConversionError
forall (m :: * -> *).
MonadPlus m =>
(forall d. Data d => d -> m d)
-> ConversionError -> m ConversionError
forall (c :: * -> *).
(forall b r. Data b => c (b -> r) -> c r)
-> (forall r. r -> c r) -> Constr -> c ConversionError
forall (c :: * -> *).
(forall d b. Data d => c (d -> b) -> d -> c b)
-> (forall g. g -> c g) -> ConversionError -> c ConversionError
forall (t :: * -> *) (c :: * -> *).
Typeable t =>
(forall d. Data d => c (t d)) -> Maybe (c ConversionError)
forall (t :: * -> * -> *) (c :: * -> *).
Typeable t =>
(forall d e. (Data d, Data e) => c (t d e))
-> Maybe (c ConversionError)
gmapMo :: forall (m :: * -> *).
MonadPlus m =>
(forall d. Data d => d -> m d)
-> ConversionError -> m ConversionError
$cgmapMo :: forall (m :: * -> *).
MonadPlus m =>
(forall d. Data d => d -> m d)
-> ConversionError -> m ConversionError
gmapMp :: forall (m :: * -> *).
MonadPlus m =>
(forall d. Data d => d -> m d)
-> ConversionError -> m ConversionError
$cgmapMp :: forall (m :: * -> *).
MonadPlus m =>
(forall d. Data d => d -> m d)
-> ConversionError -> m ConversionError
gmapM :: forall (m :: * -> *).
Monad m =>
(forall d. Data d => d -> m d)
-> ConversionError -> m ConversionError
$cgmapM :: forall (m :: * -> *).
Monad m =>
(forall d. Data d => d -> m d)
-> ConversionError -> m ConversionError
gmapQi :: forall u.
Int -> (forall d. Data d => d -> u) -> ConversionError -> u
$cgmapQi :: forall u.
Int -> (forall d. Data d => d -> u) -> ConversionError -> u
gmapQ :: forall u. (forall d. Data d => d -> u) -> ConversionError -> [u]
$cgmapQ :: forall u. (forall d. Data d => d -> u) -> ConversionError -> [u]
gmapQr :: forall r r'.
(r' -> r -> r)
-> r -> (forall d. Data d => d -> r') -> ConversionError -> r
$cgmapQr :: forall r r'.
(r' -> r -> r)
-> r -> (forall d. Data d => d -> r') -> ConversionError -> r
gmapQl :: forall r r'.
(r -> r' -> r)
-> r -> (forall d. Data d => d -> r') -> ConversionError -> r
$cgmapQl :: forall r r'.
(r -> r' -> r)
-> r -> (forall d. Data d => d -> r') -> ConversionError -> r
gmapT :: (forall b. Data b => b -> b) -> ConversionError -> ConversionError
$cgmapT :: (forall b. Data b => b -> b) -> ConversionError -> ConversionError
dataCast2 :: forall (t :: * -> * -> *) (c :: * -> *).
Typeable t =>
(forall d e. (Data d, Data e) => c (t d e))
-> Maybe (c ConversionError)
$cdataCast2 :: forall (t :: * -> * -> *) (c :: * -> *).
Typeable t =>
(forall d e. (Data d, Data e) => c (t d e))
-> Maybe (c ConversionError)
dataCast1 :: forall (t :: * -> *) (c :: * -> *).
Typeable t =>
(forall d. Data d => c (t d)) -> Maybe (c ConversionError)
$cdataCast1 :: forall (t :: * -> *) (c :: * -> *).
Typeable t =>
(forall d. Data d => c (t d)) -> Maybe (c ConversionError)
dataTypeOf :: ConversionError -> DataType
$cdataTypeOf :: ConversionError -> DataType
toConstr :: ConversionError -> Constr
$ctoConstr :: ConversionError -> Constr
gunfold :: forall (c :: * -> *).
(forall b r. Data b => c (b -> r) -> c r)
-> (forall r. r -> c r) -> Constr -> c ConversionError
$cgunfold :: forall (c :: * -> *).
(forall b r. Data b => c (b -> r) -> c r)
-> (forall r. r -> c r) -> Constr -> c ConversionError
gfoldl :: forall (c :: * -> *).
(forall d b. Data d => c (d -> b) -> d -> c b)
-> (forall g. g -> c g) -> ConversionError -> c ConversionError
$cgfoldl :: forall (c :: * -> *).
(forall d b. Data d => c (d -> b) -> d -> c b)
-> (forall g. g -> c g) -> ConversionError -> c ConversionError
Data, Typeable, Eq ConversionError
ConversionError -> ConversionError -> Bool
ConversionError -> ConversionError -> Ordering
ConversionError -> ConversionError -> ConversionError
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: ConversionError -> ConversionError -> ConversionError
$cmin :: ConversionError -> ConversionError -> ConversionError
max :: ConversionError -> ConversionError -> ConversionError
$cmax :: ConversionError -> ConversionError -> ConversionError
>= :: ConversionError -> ConversionError -> Bool
$c>= :: ConversionError -> ConversionError -> Bool
> :: ConversionError -> ConversionError -> Bool
$c> :: ConversionError -> ConversionError -> Bool
<= :: ConversionError -> ConversionError -> Bool
$c<= :: ConversionError -> ConversionError -> Bool
< :: ConversionError -> ConversionError -> Bool
$c< :: ConversionError -> ConversionError -> Bool
compare :: ConversionError -> ConversionError -> Ordering
$ccompare :: ConversionError -> ConversionError -> Ordering
Ord, ReadPrec [ConversionError]
ReadPrec ConversionError
Int -> ReadS ConversionError
ReadS [ConversionError]
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [ConversionError]
$creadListPrec :: ReadPrec [ConversionError]
readPrec :: ReadPrec ConversionError
$creadPrec :: ReadPrec ConversionError
readList :: ReadS [ConversionError]
$creadList :: ReadS [ConversionError]
readsPrec :: Int -> ReadS ConversionError
$creadsPrec :: Int -> ReadS ConversionError
Read, Int -> ConversionError -> String -> String
[ConversionError] -> String -> String
ConversionError -> String
forall a.
(Int -> a -> String -> String)
-> (a -> String) -> ([a] -> String -> String) -> Show a
showList :: [ConversionError] -> String -> String
$cshowList :: [ConversionError] -> String -> String
show :: ConversionError -> String
$cshow :: ConversionError -> String
showsPrec :: Int -> ConversionError -> String -> String
$cshowsPrec :: Int -> ConversionError -> String -> String
Show)

instance ShowErrorComponent ConversionError where
  showErrorComponent :: ConversionError -> String
showErrorComponent (ConversionError String
msg) =
    String
"conversion error: " forall a. [a] -> [a] -> [a]
++ String
msg

-- | Parser type that uses “custom error component” 'ConversionError'.

type Parser = Parsec ConversionError BL.ByteString

----------------------------------------------------------------------------
-- The parser

-- | Parse a CSV file that does not include a header.

csv :: FromRecord a
  => DecodeOptions     -- ^ Decoding options
  -> Parser (Vector a) -- ^ The parser that parses collection of records
csv :: forall a. FromRecord a => DecodeOptions -> Parser (Vector a)
csv DecodeOptions {Word8
decDelimiter :: DecodeOptions -> Word8
decDelimiter :: Word8
..} = do
  [a]
xs <- forall (m :: * -> *) a sep. MonadPlus m => m a -> m sep -> m [a]
sepEndBy1 (forall a. Word8 -> (Record -> Parser a) -> Parser a
record Word8
decDelimiter forall a. FromRecord a => Record -> Parser a
parseRecord) forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Word8) =>
m (Tokens s)
eol
  forall e s (m :: * -> *). MonadParsec e s m => m ()
eof
  forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$! forall a. [a] -> Vector a
V.fromList [a]
xs

-- | Parse a CSV file that includes a header.

csvWithHeader :: FromNamedRecord a
  => DecodeOptions     -- ^ Decoding options
  -> Parser (Header, Vector a)
     -- ^ The parser that parser collection of named records
csvWithHeader :: forall a.
FromNamedRecord a =>
DecodeOptions -> Parser (Record, Vector a)
csvWithHeader DecodeOptions {Word8
decDelimiter :: Word8
decDelimiter :: DecodeOptions -> Word8
..} = do
  !Record
hdr <- Word8 -> Parser Record
header Word8
decDelimiter
  let f :: Record -> Parser a
f = forall a. FromNamedRecord a => NamedRecord -> Parser a
parseNamedRecord forall b c a. (b -> c) -> (a -> b) -> a -> c
. Record -> Record -> NamedRecord
toNamedRecord Record
hdr
  [a]
xs   <- forall (m :: * -> *) a sep. MonadPlus m => m a -> m sep -> m [a]
sepEndBy1 (forall a. Word8 -> (Record -> Parser a) -> Parser a
record Word8
decDelimiter Record -> Parser a
f) forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Word8) =>
m (Tokens s)
eol
  forall e s (m :: * -> *). MonadParsec e s m => m ()
eof
  forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ let !v :: Vector a
v = forall a. [a] -> Vector a
V.fromList [a]
xs in (Record
hdr, Vector a
v)

-- | Decode CSV data using the provided parser, skipping a leading header if
-- necessary.

decodeWithC
  :: (DecodeOptions -> Parser a)
     -- ^ Parsing function parametrized by 'DecodeOptions'
  -> DecodeOptions
     -- ^ Decoding options
  -> HasHeader
     -- ^ Whether to expect a header in the input
  -> FilePath
     -- ^ File name (only for displaying in parse error messages, use empty
     -- string if you have none)
  -> BL.ByteString
     -- ^ CSV data
  -> Either (ParseErrorBundle BL.ByteString ConversionError) a
decodeWithC :: forall a.
(DecodeOptions -> Parser a)
-> DecodeOptions
-> HasHeader
-> String
-> ByteString
-> Either (ParseErrorBundle ByteString ConversionError) a
decodeWithC DecodeOptions -> Parser a
p opts :: DecodeOptions
opts@DecodeOptions {Word8
decDelimiter :: Word8
decDelimiter :: DecodeOptions -> Word8
..} HasHeader
hasHeader = forall e s a.
Parsec e s a -> String -> s -> Either (ParseErrorBundle s e) a
parse Parser a
parser
  where
    parser :: Parser a
parser = case HasHeader
hasHeader of
      HasHeader
HasHeader -> Word8 -> Parser Record
header Word8
decDelimiter forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> DecodeOptions -> Parser a
p DecodeOptions
opts
      HasHeader
NoHeader  -> DecodeOptions -> Parser a
p DecodeOptions
opts
{-# INLINE decodeWithC #-}

-- | Convert a 'Record' to a 'NamedRecord' by attaching column names. The
-- 'Header' and 'Record' must be of the same length.

toNamedRecord :: Header -> Record -> NamedRecord
toNamedRecord :: Record -> Record -> NamedRecord
toNamedRecord Record
hdr Record
v = forall k v. (Eq k, Hashable k) => [(k, v)] -> HashMap k v
H.fromList forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Vector a -> [a]
V.toList forall a b. (a -> b) -> a -> b
$ forall a b. Vector a -> Vector b -> Vector (a, b)
V.zip Record
hdr Record
v
{-# INLINE toNamedRecord #-}

-- | Parse a header, including the terminating line separator.

header :: Word8 -> Parser Header
header :: Word8 -> Parser Record
header Word8
del = forall a. [a] -> Vector a
V.fromList forall (m :: * -> *) a b. Monad m => (a -> b) -> m a -> m b
<$!> ParsecT ConversionError ByteString Identity [ByteString]
p forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Word8) =>
m (Tokens s)
eol
  where
    p :: ParsecT ConversionError ByteString Identity [ByteString]
p = forall (m :: * -> *) a sep. MonadPlus m => m a -> m sep -> m [a]
sepBy1 (Word8 -> Parser ByteString
name Word8
del) (forall (f :: * -> *) a. Functor f => f a -> f ()
void forall a b. (a -> b) -> a -> b
$ forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Word8) =>
Token s -> m (Token s)
char Word8
del) forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
<?> String
"file header"
{-# INLINE header #-}

-- | Parse a header name. Header names have the same format as regular
-- 'field's.

name :: Word8 -> Parser Name
name :: Word8 -> Parser ByteString
name Word8
del = Word8 -> Parser ByteString
field Word8
del forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
<?> String
"name in header"
{-# INLINE name #-}

-- | Parse a record, not including the terminating line separator. The
-- terminating line separate is not included as the last record in a CSV
-- file is allowed to not have a terminating line separator.

record
  :: Word8             -- ^ Field delimiter
  -> (Record -> C.Parser a)
     -- ^ How to “parse” record to get the data of interest
  -> Parser a
record :: forall a. Word8 -> (Record -> Parser a) -> Parser a
record Word8
del Record -> Parser a
f = do
  forall e s (m :: * -> *) a. MonadParsec e s m => m a -> m ()
notFollowedBy forall e s (m :: * -> *). MonadParsec e s m => m ()
eof -- to prevent reading empty line at the end of file
  Record
r <- forall a. [a] -> Vector a
V.fromList forall (m :: * -> *) a b. Monad m => (a -> b) -> m a -> m b
<$!> (forall (m :: * -> *) a sep. MonadPlus m => m a -> m sep -> m [a]
sepBy1 (Word8 -> Parser ByteString
field Word8
del) (forall (f :: * -> *) a. Functor f => f a -> f ()
void forall a b. (a -> b) -> a -> b
$ forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Word8) =>
Token s -> m (Token s)
char Word8
del) forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
<?> String
"record")
  case forall a. Parser a -> Either String a
C.runParser (Record -> Parser a
f Record
r) of
    Left String
msg -> forall e s (m :: * -> *) a. MonadParsec e s m => e -> m a
customFailure (String -> ConversionError
ConversionError String
msg)
    Right a
x  -> forall (m :: * -> *) a. Monad m => a -> m a
return a
x
{-# INLINE record #-}

-- | Parse a field. The field may be in either the escaped or non-escaped
-- format. The returned value is unescaped.

field :: Word8 -> Parser Field
field :: Word8 -> Parser ByteString
field Word8
del = forall e s (m :: * -> *) a.
MonadParsec e s m =>
String -> m a -> m a
label String
"field" (Parser ByteString
escapedField forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Word8 -> Parser ByteString
unescapedField Word8
del)
{-# INLINE field #-}

-- | Parse an escaped field.

escapedField :: Parser ByteString
escapedField :: Parser ByteString
escapedField =
  [Word8] -> ByteString
B.pack forall (m :: * -> *) a b. Monad m => (a -> b) -> m a -> m b
<$!> forall (m :: * -> *) open close a.
Applicative m =>
m open -> m close -> m a -> m a
between (forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Word8) =>
Token s -> m (Token s)
char Token ByteString
34) (forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Word8) =>
Token s -> m (Token s)
char Token ByteString
34) (forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many forall a b. (a -> b) -> a -> b
$ ParsecT ConversionError ByteString Identity (Token ByteString)
normalChar forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> ParsecT ConversionError ByteString Identity Word8
escapedDq)
  where
    normalChar :: ParsecT ConversionError ByteString Identity (Token ByteString)
normalChar = forall e s (m :: * -> *).
MonadParsec e s m =>
Token s -> m (Token s)
anySingleBut Token ByteString
34 forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
<?> String
"unescaped character"
    escapedDq :: ParsecT ConversionError ByteString Identity Word8
escapedDq  = forall e s (m :: * -> *) a.
MonadParsec e s m =>
String -> m a -> m a
label String
"escaped double-quote" (Word8
34 forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
string Tokens ByteString
"\"\"")
{-# INLINE escapedField #-}

-- | Parse an unescaped field.

unescapedField :: Word8 -> Parser ByteString
unescapedField :: Word8 -> Parser ByteString
unescapedField Word8
del = ByteString -> ByteString
BL.toStrict forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall e s (m :: * -> *).
MonadParsec e s m =>
Maybe String -> (Token s -> Bool) -> m (Tokens s)
takeWhileP (forall a. a -> Maybe a
Just String
"unescaped character") Word8 -> Bool
f
  where
    f :: Word8 -> Bool
f Word8
x = Word8
x forall a. Eq a => a -> a -> Bool
/= Word8
del Bool -> Bool -> Bool
&& Word8
x forall a. Eq a => a -> a -> Bool
/= Word8
34 Bool -> Bool -> Bool
&& Word8
x forall a. Eq a => a -> a -> Bool
/= Word8
10 Bool -> Bool -> Bool
&& Word8
x forall a. Eq a => a -> a -> Bool
/= Word8
13
{-# INLINE unescapedField #-}