-------------------------------------------------------------------- -- | -- Module : Text.CSV.ByteString -- Copyright : (c) Don Stewart 2008 -- License : BSD3 -- -- Maintainer: Don Stewart <dons@galois.com> -- Stability : provisional -- Portability: -- -------------------------------------------------------------------- -- -- Parsing comma separated values format (CSV) efficiently using ByteStrings, -- module Text.CSV.ByteString where import qualified Data.ByteString as S import qualified Data.ByteString.Unsafe as S import qualified Data.ByteString.Char8 as S import Text.CSV.ByteString.Lex ------------------------------------------------------------------------ -- | A CSV file is a series of records. According to the RFC, the -- records all have to have the same length. As an extension, I allow -- variable length records. type CSV = [Record] -- | A record is a series of fields -- Each record is located on a separate line, delimited by a line break (CRLF). type Record = [Field] -- | A field is a strict ByteString. -- Within the header and each record, there may be one or more -- ields, separated by commas. Each line should contain the same number -- of fields throughout the file. Spaces are considered part of a field -- and should not be ignored. The last field in the record must not be -- followed by a comma. type Field = S.ByteString -- | Parse a ByteString into a CSV form. parseCSV :: S.ByteString -> Maybe CSV parseCSV s | S.null s = Nothing | otherwise = Just $! parseRecords toks [] where toks = lexCSV s parseRecords :: [CSVToken] -> CSV -> CSV parseRecords [] csv = reverse csv parseRecords xs csv = parseRecords (tail rest) (fields : csv) where (line, rest) = break (== Newline) xs fields = [ s | Item s <- line ] -- todo, handle nesting.