{-# language OverloadedStrings #-}
{-# options_ghc -Wno-unused-imports #-}
module Algebra.Graph.IO.SV (
  parseTSV, 
  tsvSink
  ) where

import Control.Monad (when)
import Control.Monad.IO.Class (MonadIO(..))
import Data.Void (Void)

-- algebraic-graphs
import qualified Algebra.Graph as G (Graph, edge, empty, overlay)
-- bytestring
import Data.ByteString (ByteString)
-- conduit
import Conduit (MonadUnliftIO(..), MonadResource, runResourceT)
import Data.Conduit (runConduit, ConduitT, (.|), yield, await)
import qualified Data.Conduit.Combinators as C (print, sourceFile, sinkFile, map, mapM, foldM, mapWhile)
-- conduit-extra
import Data.Conduit.Zlib (ungzip)
-- csv-conduit
import Data.CSV.Conduit (CSV(..), CSVSettings(..), Row)
-- exceptions
import Control.Monad.Catch (MonadThrow(..))

-- http-conduit
import Network.HTTP.Simple (httpSource, getResponseBody, Response, Request, parseRequest, setRequestMethod)
-- megaparsec
import Text.Megaparsec (parse)
import Text.Megaparsec.Char.Lexer (decimal)
-- parser.combinators
import Control.Monad.Combinators (count)
-- primitive
import Control.Monad.Primitive (PrimMonad(..))
-- tar-conduit
import Data.Conduit.Tar (Header(..), untarChunks, TarChunk, withEntries, headerFileType, FileType(..), headerFilePath)
-- text
import Data.Text (Text)
import Data.Text.Encoding (decodeUtf8)

import Algebra.Graph.IO.Internal.Megaparsec (Parser, ParseE)
import Algebra.Graph.IO.Internal.Conduit (unTarGz, fetch)




-- | Process chunks of a (uncompressed) TSV file and output the resulting graph
--
-- NB The TSV is assumed to have three columns, where the first two contain the node IDs of the edges
tsvSink :: (MonadThrow m) => ConduitT ByteString o m (G.Graph Int)
tsvSink :: forall (m :: * -> *) o.
MonadThrow m =>
ConduitT ByteString o m (Graph Int)
tsvSink = forall (m :: * -> *).
MonadThrow m =>
ConduitT ByteString (Row Text) m ()
parseTSV forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| forall (m :: * -> *) a b. Monad m => (a -> b) -> ConduitT a b m ()
C.map Row Text -> Maybe (Edge Int)
edgeP forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| forall (m :: * -> *) a o.
Monad m =>
ConduitT (Maybe (Edge a)) o m (Graph a)
accGraph

parseTSV :: MonadThrow m => ConduitT ByteString (Row Text) m ()
parseTSV :: forall (m :: * -> *).
MonadThrow m =>
ConduitT ByteString (Row Text) m ()
parseTSV = forall s r (m :: * -> *).
(CSV s r, MonadThrow m) =>
CSVSettings -> ConduitM s r m ()
intoCSV CSVSettings
tsvSettings

edgeP :: [Text] -> Maybe (Edge Int)
edgeP :: Row Text -> Maybe (Edge Int)
edgeP Row Text
t =
  case forall (t :: * -> *) (f :: * -> *) a b.
(Traversable t, Applicative f) =>
(a -> f b) -> t a -> f (t b)
traverse (forall e s a.
Parsec e s a -> String -> s -> Either (ParseErrorBundle s e) a
parse forall e s (m :: * -> *) a.
(MonadParsec e s m, Token s ~ Char, Num a) =>
m a
decimal String
"" :: Text -> Either ParseE Int) Row Text
t of
    Left ParseE
_ -> forall a. Maybe a
Nothing
    Right (Int
a:Int
b:Int
c:[Int]
_) -> forall a. a -> Maybe a
Just forall a b. (a -> b) -> a -> b
$ forall a. a -> a -> a -> Edge a
Edge Int
a Int
b Int
c
    Right [Int]
_ -> forall a. Maybe a
Nothing

data Edge a = Edge a a a deriving (Edge a -> Edge a -> Bool
forall a. Eq a => Edge a -> Edge a -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Edge a -> Edge a -> Bool
$c/= :: forall a. Eq a => Edge a -> Edge a -> Bool
== :: Edge a -> Edge a -> Bool
$c== :: forall a. Eq a => Edge a -> Edge a -> Bool
Eq, Int -> Edge a -> ShowS
forall a. Show a => Int -> Edge a -> ShowS
forall a. Show a => [Edge a] -> ShowS
forall a. Show a => Edge a -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Edge a] -> ShowS
$cshowList :: forall a. Show a => [Edge a] -> ShowS
show :: Edge a -> String
$cshow :: forall a. Show a => Edge a -> String
showsPrec :: Int -> Edge a -> ShowS
$cshowsPrec :: forall a. Show a => Int -> Edge a -> ShowS
Show)

accGraph :: (Monad m) => ConduitT (Maybe (Edge a)) o m (G.Graph a)
accGraph :: forall (m :: * -> *) a o.
Monad m =>
ConduitT (Maybe (Edge a)) o m (Graph a)
accGraph = forall a b c. (a -> b -> c) -> b -> a -> c
flip forall (m :: * -> *) a b o.
Monad m =>
(a -> b -> m a) -> a -> ConduitT b o m a
C.foldM forall a. Graph a
G.empty forall a b. (a -> b) -> a -> b
$ \Graph a
acc Maybe (Edge a)
m -> 
  case Maybe (Edge a)
m of
    Just (Edge a
a a
b a
_) -> forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a b. (a -> b) -> a -> b
$ (a
a forall a. a -> a -> Graph a
`G.edge` a
b) forall a. Graph a -> Graph a -> Graph a
`G.overlay` Graph a
acc
    Maybe (Edge a)
Nothing -> forall (f :: * -> *) a. Applicative f => a -> f a
pure Graph a
acc

-- | tab-separated values
tsvSettings :: CSVSettings
tsvSettings :: CSVSettings
tsvSettings = Char -> Maybe Char -> CSVSettings
CSVSettings Char
'\t' forall a. Maybe a
Nothing



-- playground



-- test0 :: IO () -- (G.Graph Int)
-- test0 = do
--   rq <- parseRequest "https://graphchallenge.s3.amazonaws.com/synthetic/partitionchallenge/static/simulated_blockmodel_graph_50_nodes.tar.gz"
--   runResourceT $ runConduit $
--     fetch rq .|
--     unTarGz .|
--     parseTarEntry fname .|
--     C.print
--     where
--       fname :: FilePath
--       fname = "simulated_blockmodel_graph_50_nodes.tsv"


-- -- | Parse a single file from a .tar archive
-- parseTarEntry :: (MonadThrow m) =>
--                  FilePath -- ^ file in .tar archive
--               -> ConduitT TarChunk (G.Graph Int) m ()
-- parseTarEntry fname =
--   withEntries (\h -> when (headerFileType h == FTNormal &&
--                             headerFilePath h == fname) tsvC)


-- tsvC :: (MonadThrow m) => ConduitT ByteString (G.Graph Int) m ()
-- tsvC = do
--   g <- tsvSink
--   yield g