{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}

{- | Build CSVs using the abstractions provided in the @colonnade@ library, and
  parse CSVs using 'Siphon', which is the dual of 'Colonnade'.
  Read the documentation for @colonnade@ before reading the documentation
  for @siphon@. All of the examples on this page assume a common set of
  imports that are provided at the bottom of this page.
-}
module Siphon
  ( -- * Encode CSV
    encodeCsv
  , encodeCsvStream
  , encodeCsvUtf8
  , encodeCsvStreamUtf8

    -- * Decode CSV
  , decodeCsvUtf8
  , decodeHeadedCsvUtf8
  , decodeIndexedCsvUtf8

    -- * Build Siphon
  , headed
  , headless
  , indexed

    -- * Types
  , Escaped
  , Siphon
  , SiphonError (..)
  , Indexed (..)

    -- * For Testing
  , headedToIndexed

    -- * Utility
  , humanizeSiphonError
  , eqSiphonHeaders
  , showSiphonHeaders

    -- * Imports
    -- $setup
  ) where

import Control.Monad
import Control.Monad.ST
import Control.Monad.Trans.Class
import Data.Attoparsec.ByteString.Char8 (char)
import Data.ByteString (ByteString)
import Data.ByteString.Builder (byteString, toLazyByteString)
import Data.Char (chr)
import Data.Functor.Classes (Eq1, Show1, liftEq, showsPrec1)
import Data.Functor.Identity (Identity (..))
import Data.Text (Text)
import Data.Text.Encoding (decodeUtf8')
import Data.Vector (Vector)
import Data.Vector.Mutable (MVector)
import Data.Word (Word8)
import Siphon.Types
import Streaming (Of (..), Stream)

import qualified Colonnade.Encode as CE
import qualified Data.Attoparsec.ByteString as A
import qualified Data.Attoparsec.Lazy as AL
import qualified Data.Attoparsec.Types as ATYP
import qualified Data.Attoparsec.Zepto as Z
import qualified Data.ByteString as B
import qualified Data.ByteString as S
import qualified Data.ByteString.Builder as BB
import qualified Data.ByteString.Builder as Builder
import qualified Data.ByteString.Char8 as BC8
import qualified Data.ByteString.Lazy as LByteString
import qualified Data.ByteString.Unsafe as S
import qualified Data.List as L
import qualified Data.Semigroup as SG
import qualified Data.Text as T
import qualified Data.Text.Lazy as LT
import qualified Data.Text.Lazy.Builder as TB
import qualified Data.Vector as V
import qualified Data.Vector.Mutable as MV
import qualified Streaming as SM
import qualified Streaming.Prelude as SMP

newtype Escaped c = Escaped {getEscaped :: c}
data Ended = EndedYes | EndedNo
  deriving (Show)
data CellResult c = CellResultData !c | CellResultNewline !c !Ended
  deriving (Show)

-- | Backwards-compatibility alias for 'decodeHeadedCsvUtf8'.
decodeCsvUtf8 ::
  (Monad m) =>
  Siphon CE.Headed ByteString a ->
  -- | encoded csv
  Stream (Of ByteString) m () ->
  Stream (Of a) m (Maybe SiphonError)
decodeCsvUtf8 = decodeHeadedCsvUtf8

-- | Decode a CSV whose first row is contains headers identify each column.
decodeHeadedCsvUtf8 ::
  (Monad m) =>
  Siphon CE.Headed ByteString a ->
  -- | encoded csv
  Stream (Of ByteString) m () ->
  Stream (Of a) m (Maybe SiphonError)
decodeHeadedCsvUtf8 headedSiphon s1 = do
  e <- lift (consumeHeaderRowUtf8 s1)
  case e of
    Left err -> return (Just err)
    Right (v :> s2) -> case headedToIndexed utf8ToStr v headedSiphon of
      Left err -> return (Just err)
      Right ixedSiphon -> do
        let requiredLength = V.length v
        consumeBodyUtf8 1 requiredLength ixedSiphon s2

-- | Decode a CSV without a header.
decodeIndexedCsvUtf8 ::
  (Monad m) =>
  -- | How many columns are there? This number should be greater than any indices referenced by the scheme.
  Int ->
  Siphon Indexed ByteString a ->
  -- | encoded csv
  Stream (Of ByteString) m () ->
  Stream (Of a) m (Maybe SiphonError)
decodeIndexedCsvUtf8 !requiredLength ixedSiphon s1 = do
  consumeBodyUtf8 0 requiredLength ixedSiphon s1

encodeCsvStreamUtf8 ::
  (Monad m, CE.Headedness h) =>
  CE.Colonnade h a ByteString ->
  Stream (Of a) m r ->
  Stream (Of ByteString) m r
encodeCsvStreamUtf8 =
  encodeCsvInternal escapeChar8 (B.singleton comma) (B.singleton newline)

{- | Streaming variant of 'encodeCsv'. This is particularly useful
  when you need to produce millions of rows without having them
  all loaded into memory at the same time.
-}
encodeCsvStream ::
  (Monad m, CE.Headedness h) =>
  CE.Colonnade h a Text ->
  Stream (Of a) m r ->
  Stream (Of Text) m r
encodeCsvStream =
  encodeCsvInternal textEscapeChar8 (T.singleton ',') (T.singleton '\n')

{- | Encode a collection to a CSV as a text 'TB.Builder'. For example,
  we can take the following columnar encoding of a person:

>>> :{
let colPerson :: Colonnade Headed Person Text
    colPerson = mconcat
      [ C.headed "Name" name
      , C.headed "Age" (T.pack . show . age)
      , C.headed "Company" (fromMaybe "N/A" . company)
      ]
:}

And we have the following people whom we wish to encode
in this way:

>>> :{
let people :: [Person]
    people =
      [ Person "Chao" 26 (Just "Tectonic, Inc.")
      , Person "Elsie" 41 (Just "Globex Corporation")
      , Person "Arabella" 19 Nothing
      ]
:}

We pair the encoding with the rows to get a CSV:

>>> LTIO.putStr (TB.toLazyText (encodeCsv colPerson people))
Name,Age,Company
Chao,26,"Tectonic, Inc."
Elsie,41,Globex Corporation
Arabella,19,N/A
-}
encodeCsv ::
  (Foldable f, CE.Headedness h) =>
  -- | Tablular encoding
  CE.Colonnade h a Text ->
  -- | Value of each row
  f a ->
  TB.Builder
encodeCsv enc =
  textStreamToBuilder . encodeCsvStream enc . SMP.each

-- | Encode a collection to a CSV as a bytestring 'BB.Builder'.
encodeCsvUtf8 ::
  (Foldable f, CE.Headedness h) =>
  -- | Tablular encoding
  CE.Colonnade h a ByteString ->
  -- | Value of each row
  f a ->
  BB.Builder
encodeCsvUtf8 enc =
  streamToBuilder . encodeCsvStreamUtf8 enc . SMP.each

streamToBuilder :: Stream (Of ByteString) Identity () -> BB.Builder
streamToBuilder s =
  SM.destroy
    s
    (\(bs :> bb) -> BB.byteString bs <> bb)
    runIdentity
    (\() -> mempty)

textStreamToBuilder :: Stream (Of Text) Identity () -> TB.Builder
textStreamToBuilder s =
  SM.destroy
    s
    (\(bs :> bb) -> TB.fromText bs <> bb)
    runIdentity
    (\() -> mempty)

encodeCsvInternal ::
  (Monad m, CE.Headedness h) =>
  (c -> Escaped c) ->
  -- | separator
  c ->
  -- | newline
  c ->
  CE.Colonnade h a c ->
  Stream (Of a) m r ->
  Stream (Of c) m r
encodeCsvInternal escapeFunc separatorStr newlineStr colonnade s = do
  case CE.headednessExtract of
    Just toContent -> encodeHeader toContent escapeFunc separatorStr newlineStr colonnade
    Nothing -> return ()
  encodeRows escapeFunc separatorStr newlineStr colonnade s

encodeHeader ::
  (Monad m) =>
  (h c -> c) ->
  (c -> Escaped c) ->
  -- | separator
  c ->
  -- | newline
  c ->
  CE.Colonnade h a c ->
  Stream (Of c) m ()
encodeHeader toContent escapeFunc separatorStr newlineStr colonnade = do
  let (vs, ws) = V.splitAt 1 (CE.getColonnade colonnade)
  -- we only need to do this split because the first cell
  -- gets treated differently than the others. It does not
  -- get a separator added before it.
  V.forM_ vs $ \(CE.OneColonnade h _) -> do
    SMP.yield (getEscaped (escapeFunc (toContent h)))
  V.forM_ ws $ \(CE.OneColonnade h _) -> do
    SMP.yield separatorStr
    SMP.yield (getEscaped (escapeFunc (toContent h)))
  SMP.yield newlineStr

mapStreamM ::
  (Monad m) =>
  (a -> Stream (Of b) m x) ->
  Stream (Of a) m r ->
  Stream (Of b) m r
mapStreamM f = SM.concats . SM.mapsM (\(a :> s) -> return (f a >> return s))

encodeRows ::
  (Monad m) =>
  (c -> Escaped c) ->
  -- | separator
  c ->
  -- | newline
  c ->
  CE.Colonnade f a c ->
  Stream (Of a) m r ->
  Stream (Of c) m r
encodeRows escapeFunc separatorStr newlineStr colonnade = mapStreamM $ \a -> do
  let (vs, ws) = V.splitAt 1 (CE.getColonnade colonnade)
  -- we only need to do this split because the first cell
  -- gets treated differently than the others. It does not
  -- get a separator added before it.
  V.forM_ vs $ \(CE.OneColonnade _ encode) -> SMP.yield (getEscaped (escapeFunc (encode a)))
  V.forM_ ws $ \(CE.OneColonnade _ encode) -> do
    SMP.yield separatorStr
    SMP.yield (getEscaped (escapeFunc (encode a)))
  SMP.yield newlineStr

{- | Maps over a 'Decolonnade' that expects headers, converting these
  expected headers into the indices of the columns that they
  correspond to.
-}
headedToIndexed ::
  forall c a.
  (Eq c) =>
  (c -> T.Text) ->
  -- | Headers in the source document
  Vector c ->
  -- | Decolonnade that contains expected headers
  Siphon CE.Headed c a ->
  Either SiphonError (Siphon Indexed c a)
headedToIndexed toStr v =
  mapLeft (\(HeaderErrors a b c) -> SiphonError 0 (RowErrorHeaders a b c))
    . getEitherWrap
    . go
 where
  go ::
    forall b.
    Siphon CE.Headed c b ->
    EitherWrap HeaderErrors (Siphon Indexed c b)
  go (SiphonPure b) = EitherWrap (Right (SiphonPure b))
  go (SiphonAp (CE.Headed h) decode apNext) =
    let rnext = go apNext
        ixs = V.elemIndices h v
        ixsLen = V.length ixs
        rcurrent
          | ixsLen == 1 = Right (ixs V.! 0)
          | ixsLen == 0 = Left (HeaderErrors V.empty (V.singleton (toStr h)) V.empty)
          | otherwise =
              let dups = V.singleton (V.map (\ix -> CellError ix (toStr (v V.! ix {- (V.unsafeIndex v ix) -}))) ixs)
               in Left (HeaderErrors dups V.empty V.empty)
     in (\ix nextSiphon -> SiphonAp (Indexed ix) decode nextSiphon)
          <$> EitherWrap rcurrent
          <*> rnext

data HeaderErrors = HeaderErrors !(Vector (Vector CellError)) !(Vector T.Text) !(Vector Int)

instance Semigroup HeaderErrors where
  HeaderErrors a1 b1 c1 <> HeaderErrors a2 b2 c2 =
    HeaderErrors
      (mappend a1 a2)
      (mappend b1 b2)
      (mappend c1 c2)

instance Monoid HeaderErrors where
  mempty = HeaderErrors mempty mempty mempty
  mappend = (SG.<>)

-- byteStringChar8 :: Siphon ByteString
-- byteStringChar8 = Siphon
--   escape
--   encodeRow
--   (A.parse (row comma))
--   B.null

escapeChar8 :: ByteString -> Escaped ByteString
escapeChar8 t = case B.find (\c -> c == newline || c == cr || c == comma || c == doubleQuote) t of
  Nothing -> Escaped t
  Just _ -> escapeAlways t

textEscapeChar8 :: Text -> Escaped Text
textEscapeChar8 t = case T.find (\c -> c == '\n' || c == '\r' || c == ',' || c == '"') t of
  Nothing -> Escaped t
  Just _ -> textEscapeAlways t

-- This implementation is definitely suboptimal.
-- A better option (which would waste a little space
-- but would be much faster) would be to build the
-- new bytestring by writing to a buffer directly.
escapeAlways :: ByteString -> Escaped ByteString
escapeAlways t =
  Escaped $
    LByteString.toStrict $
      Builder.toLazyByteString $
        Builder.word8 doubleQuote
          <> B.foldl
            ( \acc b ->
                acc
                  <> if b == doubleQuote
                    then
                      Builder.byteString
                        (B.pack [doubleQuote, doubleQuote])
                    else Builder.word8 b
            )
            mempty
            t
          <> Builder.word8 doubleQuote

-- Suboptimal for similar reason as escapeAlways.
textEscapeAlways :: Text -> Escaped Text
textEscapeAlways t =
  Escaped $
    LT.toStrict $
      TB.toLazyText $
        TB.singleton '"'
          <> T.foldl
            ( \acc b ->
                acc
                  <> if b == '"'
                    then TB.fromString "\"\""
                    else TB.singleton b
            )
            mempty
            t
          <> TB.singleton '"'

-- Parse a record, not including the terminating line separator. The
-- terminating line separate is not included as the last record in a
-- CSV file is allowed to not have a terminating line separator. You
-- most likely want to use the 'endOfLine' parser in combination with
-- this parser.
--
-- row :: Word8  -- ^ Field delimiter
--     -> AL.Parser (Vector ByteString)
-- row !delim = rowNoNewline delim <* endOfLine
-- {-# INLINE row #-}
--
-- rowNoNewline :: Word8  -- ^ Field delimiter
--              -> AL.Parser (Vector ByteString)
-- rowNoNewline !delim = V.fromList <$!> field delim `sepByDelim1'` delim
-- {-# INLINE rowNoNewline #-}
--
-- removeBlankLines :: [Vector ByteString] -> [Vector ByteString]
-- removeBlankLines = filter (not . blankLine)

{- | Parse a field. The field may be in either the escaped or
  non-escaped format. The return value is unescaped. This
  parser will consume the comma that comes after a field
  but not a newline that follows a field. If we are positioned
  at a newline when it starts, that newline will be consumed
  and we return CellResultNewline.
-}
field :: Word8 -> AL.Parser (CellResult ByteString)
field !delim = do
  mb <- A.peekWord8
  -- We purposely don't use <|> as we want to commit to the first
  -- choice if we see a double quote.
  case mb of
    Just b
      | b == doubleQuote -> do
          (bs, tc) <- escapedField
          case tc of
            TrailCharComma -> return (CellResultData bs)
            TrailCharNewline -> return (CellResultNewline bs EndedNo)
            TrailCharEnd -> return (CellResultNewline bs EndedYes)
      | b == 10 || b == 13 -> do
          _ <- eatNewlines
          isEnd <- A.atEnd
          if isEnd
            then return (CellResultNewline B.empty EndedYes)
            else return (CellResultNewline B.empty EndedNo)
      | otherwise -> do
          (bs, tc) <- unescapedField delim
          case tc of
            TrailCharComma -> return (CellResultData bs)
            TrailCharNewline -> return (CellResultNewline bs EndedNo)
            TrailCharEnd -> return (CellResultNewline bs EndedYes)
    Nothing -> return (CellResultNewline B.empty EndedYes)
{-# INLINE field #-}

eatNewlines :: AL.Parser S.ByteString
eatNewlines = A.takeWhile (\x -> x == 10 || x == 13)

escapedField :: AL.Parser (S.ByteString, TrailChar)
escapedField = do
  _ <- dquote
  -- The scan state is 'True' if the previous character was a double
  -- quote.  We need to drop a trailing double quote left by scan.
  s <-
    S.init
      <$> ( A.scan False $ \s c ->
              if c == doubleQuote
                then Just (not s)
                else
                  if s
                    then Nothing
                    else Just False
          )
  mb <- A.peekWord8
  trailChar <- case mb of
    Just b
      | b == comma -> A.anyWord8 >> return TrailCharComma
      | b == newline -> A.anyWord8 >> return TrailCharNewline
      | b == cr -> do
          _ <- A.anyWord8
          _ <- A.word8 newline
          return TrailCharNewline
      | otherwise -> fail "encountered double quote after escaped field"
    Nothing -> return TrailCharEnd
  if doubleQuote `S.elem` s
    then case Z.parse unescape s of
      Right r -> return (r, trailChar)
      Left err -> fail err
    else return (s, trailChar)

data TrailChar = TrailCharNewline | TrailCharComma | TrailCharEnd

{- | Consume an unescaped field. If it ends with a newline,
  leave that in tact. If it ends with a comma, consume the comma.
-}
unescapedField :: Word8 -> AL.Parser (S.ByteString, TrailChar)
unescapedField !delim = do
  bs <- A.takeWhile $ \c ->
    c /= doubleQuote
      && c /= newline
      && c /= delim
      && c /= cr
  mb <- A.peekWord8
  case mb of
    Just b
      | b == comma -> A.anyWord8 >> return (bs, TrailCharComma)
      | b == newline -> A.anyWord8 >> return (bs, TrailCharNewline)
      | b == cr -> do
          _ <- A.anyWord8
          _ <- A.word8 newline
          return (bs, TrailCharNewline)
      | otherwise -> fail "encountered double quote in unescaped field"
    Nothing -> return (bs, TrailCharEnd)

dquote :: AL.Parser Char
dquote = char '"'

{- | This could be improved. We could avoid the builder and just
write to a buffer directly.
-}
unescape :: Z.Parser S.ByteString
unescape = (LByteString.toStrict . toLazyByteString) <$!> go mempty
 where
  go acc = do
    h <- Z.takeWhile (/= doubleQuote)
    let rest = do
          start <- Z.take 2
          if ( S.unsafeHead start == doubleQuote
                && S.unsafeIndex start 1 == doubleQuote
             )
            then go (acc `mappend` byteString h `mappend` byteString (BC8.singleton '"'))
            else fail "invalid CSV escape sequence"
    done <- Z.atEnd
    if done
      then return (acc `mappend` byteString h)
      else rest

doubleQuote, newline, cr, comma :: Word8
doubleQuote = 34
newline = 10
cr = 13
comma = 44

{- | This adds one to the index because text editors consider
  line number to be one-based, not zero-based.
-}
humanizeSiphonError :: SiphonError -> String
humanizeSiphonError (SiphonError ix e) =
  unlines $
    ("Decolonnade error on line " ++ show (ix + 1) ++ " of file.")
      : ("Error Category: " ++ descr)
      : map ("  " ++) errDescrs
 where
  (descr, errDescrs) = prettyRowError e

prettyRowError :: RowError -> (String, [String])
prettyRowError x = case x of
  RowErrorParse ->
    (,)
      "CSV Parsing"
      [ "The cells were malformed."
      ]
  RowErrorSize reqLen actualLen ->
    (,)
      "Row Length"
      [ "Expected the row to have exactly " ++ show reqLen ++ " cells."
      , "The row only has " ++ show actualLen ++ " cells."
      ]
  RowErrorHeaderSize reqLen actualLen ->
    (,)
      "Minimum Header Length"
      [ "Expected the row to have at least " ++ show reqLen ++ " cells."
      , "The row only has " ++ show actualLen ++ " cells."
      ]
  RowErrorMalformed column ->
    (,)
      "Text Decolonnade"
      [ "Tried to decode input input in column " ++ columnNumToLetters column ++ " text"
      , "There is a mistake in the encoding of the text."
      ]
  RowErrorHeaders dupErrs namedErrs unnamedErrs ->
    (,) "Missing Headers" $
      concat
        [ if V.length namedErrs > 0 then prettyNamedMissingHeaders namedErrs else []
        , if V.length unnamedErrs > 0 then ["Missing unnamed headers"] else []
        , if V.length dupErrs > 0 then prettyHeadingErrors dupErrs else []
        ]
  RowErrorDecode errs -> (,) "Cell Decolonnade" (prettyCellErrors errs)

prettyCellErrors :: Vector CellError -> [String]
prettyCellErrors errs = drop 1 $
  flip concatMap errs $ \(CellError ix content) ->
    let str = T.unpack content
     in [ "-----------"
        , "Column " ++ columnNumToLetters ix
        , "Cell Content Length: " ++ show (Prelude.length str)
        , "Cell Content: "
            ++ if null str
              then "[empty cell]"
              else str
        ]

prettyNamedMissingHeaders :: Vector T.Text -> [String]
prettyNamedMissingHeaders missing =
  concat
    [ concatMap (\h -> ["The header " ++ T.unpack h ++ " was missing."]) missing
    ]

prettyHeadingErrors :: Vector (Vector CellError) -> [String]
prettyHeadingErrors missing = join (V.toList (fmap f missing))
 where
  f :: Vector CellError -> [String]
  f v
    | not (V.null w) && V.all (== V.head w) (V.tail w) =
        [ "The header ["
        , T.unpack (V.head w)
        , "] appears in columns "
        , L.intercalate ", " (V.toList (V.map (\(CellError ix _) -> columnNumToLetters ix) v))
        ]
    | otherwise =
        multiMsg
          : V.toList
            (V.map (\(CellError ix content) -> "  Column " ++ columnNumToLetters ix ++ ": " ++ T.unpack content) v)
   where
    w :: Vector T.Text
    w = V.map cellErrorContent v
    multiMsg :: String
    multiMsg = "Multiple headers matched the same predicate:"

columnNumToLetters :: Int -> String
columnNumToLetters i
  | i >= 0 && i < 25 = [chr (i + 65)]
  | otherwise = "Beyond Z. Fix this."

newtype EitherWrap a b = EitherWrap
  { getEitherWrap :: Either a b
  }
  deriving (Functor)

instance (Monoid a) => Applicative (EitherWrap a) where
  pure = EitherWrap . Right
  EitherWrap (Left a1) <*> EitherWrap (Left a2) = EitherWrap (Left (mappend a1 a2))
  EitherWrap (Left a1) <*> EitherWrap (Right _) = EitherWrap (Left a1)
  EitherWrap (Right _) <*> EitherWrap (Left a2) = EitherWrap (Left a2)
  EitherWrap (Right f) <*> EitherWrap (Right b) = EitherWrap (Right (f b))

mapLeft :: (a -> b) -> Either a c -> Either b c
mapLeft _ (Right a) = Right a
mapLeft f (Left a) = Left (f a)

consumeHeaderRowUtf8 ::
  (Monad m) =>
  Stream (Of ByteString) m () ->
  m (Either SiphonError (Of (Vector ByteString) (Stream (Of ByteString) m ())))
consumeHeaderRowUtf8 = consumeHeaderRow (A.parse (field comma)) B.null B.empty (\() -> True)

consumeBodyUtf8 ::
  forall m a.
  (Monad m) =>
  -- | index of first row, usually zero or one
  Int ->
  -- | Required row length
  Int ->
  Siphon Indexed ByteString a ->
  Stream (Of ByteString) m () ->
  Stream (Of a) m (Maybe SiphonError)
consumeBodyUtf8 =
  consumeBody
    utf8ToStr
    (A.parse (field comma))
    B.null
    B.empty
    (\() -> True)

utf8ToStr :: ByteString -> T.Text
utf8ToStr = either (\_ -> T.empty) id . decodeUtf8'

consumeHeaderRow ::
  forall m r c.
  (Monad m) =>
  (c -> ATYP.IResult c (CellResult c)) ->
  -- | true if null string
  (c -> Bool) ->
  c ->
  -- | true if termination is acceptable
  (r -> Bool) ->
  Stream (Of c) m r ->
  m (Either SiphonError (Of (Vector c) (Stream (Of c) m r)))
consumeHeaderRow parseCell isNull emptyStr isGood s0 = go 0 StrictListNil s0
 where
  go ::
    Int ->
    StrictList c ->
    Stream (Of c) m r ->
    m (Either SiphonError (Of (Vector c) (Stream (Of c) m r)))
  go !cellsLen !cells !s1 = do
    e <- skipWhile isNull s1
    case e of
      Left r ->
        return $
          if isGood r
            then Right (reverseVectorStrictList cellsLen cells :> return r)
            else Left (SiphonError 0 RowErrorParse)
      Right (c :> s2) -> handleResult cellsLen cells (parseCell c) s2
  handleResult ::
    Int ->
    StrictList c ->
    ATYP.IResult c (CellResult c) ->
    Stream (Of c) m r ->
    m (Either SiphonError (Of (Vector c) (Stream (Of c) m r)))
  handleResult !cellsLen !cells !result s1 = case result of
    ATYP.Fail _ _ _ -> return $ Left $ SiphonError 0 RowErrorParse
    ATYP.Done !c1 !res -> case res of
      -- it might be wrong to ignore whether or not the stream has ended
      CellResultNewline cd _ -> do
        let v = reverseVectorStrictList (cellsLen + 1) (StrictListCons cd cells)
        return (Right (v :> (SMP.yield c1 >> s1)))
      CellResultData !cd ->
        if isNull c1
          then go (cellsLen + 1) (StrictListCons cd cells) s1
          else handleResult (cellsLen + 1) (StrictListCons cd cells) (parseCell c1) s1
    ATYP.Partial k -> do
      e <- skipWhile isNull s1
      case e of
        Left r -> handleResult cellsLen cells (k emptyStr) (return r)
        Right (c1 :> s2) -> handleResult cellsLen cells (k c1) s2

consumeBody ::
  forall m r c a.
  (Monad m) =>
  (c -> T.Text) ->
  (c -> ATYP.IResult c (CellResult c)) ->
  (c -> Bool) ->
  c ->
  -- | True if termination is acceptable. False if it is because of a decoding error.
  (r -> Bool) ->
  -- | index of first row, usually zero or one
  Int ->
  -- | Required row length
  Int ->
  Siphon Indexed c a ->
  Stream (Of c) m r ->
  Stream (Of a) m (Maybe SiphonError)
consumeBody toStr parseCell isNull emptyStr isGood row0 reqLen siphon s0 =
  go row0 0 StrictListNil s0
 where
  go :: Int -> Int -> StrictList c -> Stream (Of c) m r -> Stream (Of a) m (Maybe SiphonError)
  go !row !cellsLen !cells !s1 = do
    e <- lift (skipWhile isNull s1)
    case e of
      Left r ->
        return $
          if isGood r
            then Nothing
            else Just (SiphonError row RowErrorParse)
      Right (c :> s2) -> handleResult row cellsLen cells (parseCell c) s2
  handleResult ::
    Int ->
    Int ->
    StrictList c ->
    ATYP.IResult c (CellResult c) ->
    Stream (Of c) m r ->
    Stream (Of a) m (Maybe SiphonError)
  handleResult !row !cellsLen !cells !result s1 = case result of
    ATYP.Fail _ _ _ -> return $ Just $ SiphonError row RowErrorParse
    ATYP.Done !c1 !res -> case res of
      CellResultNewline !cd !ended -> do
        case decodeRow row (reverseVectorStrictList (cellsLen + 1) (StrictListCons cd cells)) of
          Left err -> return (Just err)
          Right a -> do
            SMP.yield a
            case ended of
              EndedYes -> do
                e <- lift (SM.inspect s1)
                case e of
                  Left r ->
                    return $
                      if isGood r
                        then Nothing
                        else Just (SiphonError row RowErrorParse)
                  Right _ -> error "siphon: logical error, stream should be exhausted"
              EndedNo ->
                if isNull c1
                  then go (row + 1) 0 StrictListNil s1
                  else handleResult (row + 1) 0 StrictListNil (parseCell c1) s1
      CellResultData !cd ->
        if isNull c1
          then go row (cellsLen + 1) (StrictListCons cd cells) s1
          else handleResult row (cellsLen + 1) (StrictListCons cd cells) (parseCell c1) s1
    ATYP.Partial k -> do
      e <- lift (skipWhile isNull s1)
      case e of
        Left r -> handleResult row cellsLen cells (k emptyStr) (return r)
        Right (c1 :> s2) -> handleResult row cellsLen cells (k c1) s2
  decodeRow :: Int -> Vector c -> Either SiphonError a
  decodeRow rowIx v =
    let vlen = V.length v
     in if vlen /= reqLen
          then Left $ SiphonError rowIx $ RowErrorSize reqLen vlen
          else uncheckedRunWithRow toStr rowIx siphon v

{- | You must pass the length of the list and as the first argument.
  Passing the wrong length will lead to an error.
-}
reverseVectorStrictList :: forall c. Int -> StrictList c -> Vector c
reverseVectorStrictList len sl0 = V.create $ do
  mv <- MV.new len
  go1 mv
  return mv
 where
  go1 :: forall s. MVector s c -> ST s ()
  go1 !mv = go2 (len - 1) sl0
   where
    go2 :: Int -> StrictList c -> ST s ()
    go2 _ StrictListNil = return ()
    go2 !ix (StrictListCons c slNext) = do
      MV.write mv ix c
      go2 (ix - 1) slNext

skipWhile ::
  forall m a r.
  (Monad m) =>
  (a -> Bool) ->
  Stream (Of a) m r ->
  m (Either r (Of a (Stream (Of a) m r)))
skipWhile f = go
 where
  go ::
    Stream (Of a) m r ->
    m (Either r (Of a (Stream (Of a) m r)))
  go s1 = do
    e <- SM.inspect s1
    case e of
      Left _ -> return e
      Right (a :> s2) ->
        if f a
          then go s2
          else return e

{- | Strict in the spine and in the values
This is built in reverse and then reversed by reverseVectorStrictList
when converting to a vector.
-}
data StrictList a = StrictListNil | StrictListCons !a !(StrictList a)

{- | This function uses 'unsafeIndex' to access
  elements of the 'Vector'.
-}
uncheckedRunWithRow ::
  (c -> T.Text) ->
  Int ->
  Siphon Indexed c a ->
  Vector c ->
  Either SiphonError a
uncheckedRunWithRow toStr i d v =
  mapLeft (SiphonError i . RowErrorDecode) (uncheckedRun toStr d v)

{- | This function does not check to make sure that the indicies in
  the 'Decolonnade' are in the 'Vector'. Only use this if you have
  already verified that none of the indices in the siphon are
  out of the bounds.
-}
uncheckedRun ::
  forall c a.
  (c -> T.Text) ->
  Siphon Indexed c a ->
  Vector c ->
  Either (Vector CellError) a
uncheckedRun toStr dc v = getEitherWrap (go dc)
 where
  go ::
    forall b.
    Siphon Indexed c b ->
    EitherWrap (Vector CellError) b
  go (SiphonPure b) = EitherWrap (Right b)
  go (SiphonAp (Indexed ix) decode apNext) =
    let rnext = go apNext
        content = v V.! ix -- V.unsafeIndex v ix
        rcurrent =
          maybe
            (Left (V.singleton (CellError ix (toStr content))))
            Right
            (decode content)
     in rnext <*> (EitherWrap rcurrent)

-- | Uses the argument to parse a CSV column.
headless :: (c -> Maybe a) -> Siphon CE.Headless c a
headless f = SiphonAp CE.Headless f (SiphonPure id)

{- | Uses the second argument to parse a CSV column whose
  header content matches the first column exactly.
-}
headed :: c -> (c -> Maybe a) -> Siphon CE.Headed c a
headed h f = SiphonAp (CE.Headed h) f (SiphonPure id)

{- | Uses the second argument to parse a CSV column that
  is positioned at the index given by the first argument.
-}
indexed :: Int -> (c -> Maybe a) -> Siphon Indexed c a
indexed ix f = SiphonAp (Indexed ix) f (SiphonPure id)

eqSiphonHeaders :: (Eq1 f, Eq c) => Siphon f c a -> Siphon f c b -> Bool
eqSiphonHeaders (SiphonPure _) (SiphonPure _) = True
eqSiphonHeaders (SiphonAp h0 _ s0) (SiphonAp h1 _ s1) =
  liftEq (==) h0 h1 && eqSiphonHeaders s0 s1
eqSiphonHeaders _ _ = False

showSiphonHeaders :: (Show1 f, Show c) => Siphon f c a -> String
showSiphonHeaders (SiphonPure _) = ""
showSiphonHeaders (SiphonAp h0 _ s0) = showsPrec1 10 h0 (" :> " ++ showSiphonHeaders s0)

{- $setup

This code is copied from the head section. It has to be
run before every set of tests.

>>> :set -XOverloadedStrings
>>> import Siphon (Siphon)
>>> import Colonnade (Colonnade,Headed)
>>> import qualified Siphon as S
>>> import qualified Colonnade as C
>>> import qualified Data.Text as T
>>> import Data.Text (Text)
>>> import qualified Data.Text.Lazy.IO as LTIO
>>> import qualified Data.Text.Lazy.Builder as LB
>>> import Data.Maybe (fromMaybe)
>>> data Person = Person { name :: Text, age :: Int, company :: Maybe Text}
-}