{-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE OverloadedLabels #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE NoImplicitPrelude #-} -- | -- -- csv data type module Data.Csv ( CsvConfig(..), defaultCsvConfig, file, Header(..), fileLines, rowEmitter, runCsv, sep, field, skipField, texts, scis, doubles, ints, ) where import Box import Control.Lens import qualified Data.Attoparsec.Text as A import Data.Generics.Labels () import qualified Data.Text as Text import qualified Data.Text.IO as Text import NumHask.Prelude import Data.Scientific data CsvConfig = CsvConfig { -- | data set name name :: Text, -- | file suffix suffix :: Text, -- | local directory dir :: Text, -- | separator csep :: Char, -- | first row is a header row header :: Header } deriving (Show, Generic, Eq) -- | CsvConfig "Hill_Valley_with_noise" ".csv" "./other" '\t' HasHeader defaultCsvConfig :: CsvConfig defaultCsvConfig = CsvConfig "Hill_Valley_with_noise" ".csv" "./other" '\t' HasHeader -- | filepath for teh config. file :: CsvConfig -> FilePath file cfg = cfg ^. #dir <> "/" <> cfg ^. #name <> cfg ^. #suffix & Text.unpack -- | does the csv have a header row? data Header = HasHeader | NoHeader deriving (Show, Eq) -- | Emits a line of Text from a file. fileLines :: Handle -> Emitter IO Text fileLines h = Emitter $ do l :: (Either IOException Text) <- try (Text.hGetLine h) pure $ case l of Left _ -> Nothing Right a -> bool (Just a) Nothing (a == "") -- | emits parsed csv rows rowEmitter :: CsvConfig -> (Char -> A.Parser a) -> Cont IO (Emitter IO (Either Text a)) rowEmitter cfg p = Cont $ \eio -> withFile (file cfg) ReadMode (\h -> eio (eParse (p (cfg ^. #csep)) $ fileLines h)) -- | Run a parser across all lines of a file. -- -- >>> xss <- rights <$> runCsv defaultCsvConfig doubles -- -- >>> length xss -- 1212 -- -- >>> all ((==101) . length) xss -- True -- -- >>> take 2 xss -- [[39.02,36.49,38.2,38.85,39.38,39.74,37.02,39.53,38.81,38.79,37.65,39.34,38.55,39.03,37.21,36.32,37.81,38.95,36.7,39.72,37.06,37.29,36.43,36.53,36.19,38.17,37.3,36.15,36.68,36.7,36.68,36.99,38.92,37.25,37.47,36.32,35.75,35.68,34.66,34.26,35.62,36.6,34.78,34.67,34.3,33.4,31.4,31.75,31.75,32.84,33.76,35.74,34.01,33.91,36.88,34.41,35.52,36.94,36.95,35.57,38.02,37.32,39.05,37.97,37.01,38.98,38.83,38.87,38.03,38.4,38.25,38.61,36.23,37.81,37.98,38.58,38.96,38.97,39.08,38.79,38.79,36.31,36.59,38.19,37.95,39.63,39.27,37.19,37.13,37.47,37.57,36.62,36.92,38.8,38.52,38.07,36.73,39.46,37.5,39.1,0.0],[1.83,1.71,1.77,1.77,1.68,1.78,1.8,1.7,1.75,1.78,1.86,1.76,1.81,1.86,1.74,1.78,1.81,2.02,2.0,2.01,2.0,2.06,2.0,1.93,1.88,1.85,1.89,1.83,1.76,1.83,1.81,1.81,1.78,1.85,1.86,1.73,1.79,1.81,1.85,1.71,1.71,1.71,1.84,1.76,1.73,1.83,1.68,1.73,1.76,1.77,1.72,1.75,1.66,1.76,1.77,1.78,1.63,1.72,1.66,1.67,1.74,1.65,1.74,1.79,1.69,1.76,1.74,1.82,1.78,1.65,1.65,1.82,1.71,1.83,1.72,1.63,1.77,1.69,1.81,1.74,1.7,1.72,1.74,1.72,1.74,1.71,1.7,1.83,1.79,1.78,1.71,1.8,1.79,1.77,1.74,1.74,1.8,1.78,1.75,1.69,1.0]] runCsv :: CsvConfig -> (Char -> A.Parser a) -> IO [Either Text a] runCsv cfg p = with (rowEmitter cfg p) toList' -- Convert an emitter to a list. toList' :: Emitter IO a -> IO [a] toList' e = go [] where go xs = do r <- emit e case r of Nothing -> pure (reverse xs) Just x -> go (x : xs) -- * low-level generic csv parser helpers -- $setup -- >>> :set -XOverloadedStrings -- | Most parsing and building routines implicity assume comma separated, and newlines separating rows. -- -- >>> A.parse (sep ',') ",ok" -- Done "ok" () sep :: Char -> A.Parser () sep c = void (A.char c) -- | an unquoted field -- Does not consume the separator token -- >>> A.parse (field ',') "field,ok" -- Done ",ok" "field" field :: Char -> A.Parser Text field c = A.takeWhile (`notElem` [c]) -- | skipping a field -- >>> A.parse (skipField ',') "field,ok" -- Done ",ok" () skipField :: Char -> A.Parser () skipField c = A.skipWhile (`notElem` [c]) -- * Block list parsers -- | parser for a csv row of [Text] -- >>> A.parseOnly (texts ',') "field1,field2" -- Right ["field1","field2"] texts :: Char -> A.Parser [Text] texts c = field c `A.sepBy1` sep c -- | parser for a csv row of [Scientific] -- >>> A.parseOnly (scis ',') "1,2.2,3.3" -- Right [1.0,2.2,3.3] scis :: Char -> A.Parser [Scientific] scis c = A.scientific `A.sepBy1` sep c -- | parser for a csv row of [Double] -- >>> A.parseOnly (doubles ',') "1,2,3" -- Right [1.0,2.0,3.0] doubles :: Char -> A.Parser [Double] doubles c = A.double `A.sepBy1` sep c -- | parser for a csv row of [Int] -- >>> A.parseOnly (ints ',') "1,2,3" -- Right [1,2,3] ints :: Char -> A.Parser [Int] ints c = (A.signed A.decimal) `A.sepBy1` sep c