Text/CSV/ByteString.hs

--------------------------------------------------------------------
-- |
-- Module    : Text.CSV.ByteString
-- Copyright : (c) Don Stewart 2008
-- License   : BSD3
--
-- Maintainer: Don Stewart <dons@galois.com>
-- Stability : provisional
-- Portability:
--
--------------------------------------------------------------------
--
-- Parsing comma separated values format (CSV) efficiently using ByteStrings,
--

module Text.CSV.ByteString where

import qualified Data.ByteString        as S
import qualified Data.ByteString.Unsafe as S
import qualified Data.ByteString.Char8  as S

import Text.CSV.ByteString.Lex

------------------------------------------------------------------------

-- | A CSV file is a series of records. According to the RFC, the
-- records all have to have the same length. As an extension, I allow
-- variable length records. 
type CSV    = [Record]

-- | A record is a series of fields
-- Each record is located on a separate line, delimited by a line break (CRLF).
type Record = [Field]

-- | A field is a strict ByteString.
-- Within the header and each record, there may be one or more
-- ields, separated by commas.  Each line should contain the same number
-- of fields throughout the file.  Spaces are considered part of a field
-- and should not be ignored.  The last field in the record must not be
-- followed by a comma.
type Field = S.ByteString

-- | Parse a ByteString into a CSV form.
parseCSV :: S.ByteString -> Maybe CSV
parseCSV s
   | S.null s  = Nothing
   | otherwise = Just $! parseRecords toks []
  where
   toks = lexCSV s

   parseRecords :: [CSVToken] -> CSV -> CSV
   parseRecords [] csv = reverse csv
   parseRecords xs csv = parseRecords (tail rest) (fields  : csv)
      where
        (line, rest) = break (== Newline) xs
        fields       = [ s | Item s <- line ]
        -- todo, handle nesting.