{-|
Module      : Data.RDF.Parser.NQuads
Description : Representation and Incremental Processing of RDF Data
Copyright   : Travis Whitaker 2016
License     : MIT
Maintainer  : pi.boy.travis@gmail.com
Stability   : Provisional
Portability : Portable

A parser for <https://www.w3.org/TR/2014/REC-n-quads-20140225/ RDF 1.1 N-Quads>.
-}

{-# LANGUAGE OverloadedStrings #-}

module Data.RDF.Parser.NQuads (
    Result
  , parseNQuads
  , parseTriple
  , parseQuad
  , parseQuadLine
  , foldGraphs
  , foldResults
  ) where

import qualified Data.Attoparsec.Text      as A
import qualified Data.Attoparsec.Text.Lazy as AL

import Data.RDF.Types
import Data.RDF.Parser.Common

import qualified Data.Text.Lazy as TL

-- | Either an 'RDFGraph' or a parse error.
type Result = Either String RDFGraph

-- | A parser for
--   <https://www.w3.org/TR/2014/REC-n-quads-20140225/ RDF 1.1 N-Quads>. This
--   parser works incrementally by first lazily splitting the input into lines,
--   then parsing each line of the N-Quads document individually. This allows
--   for incremental processing in constant space, as well as extracting any
--   valid data from an N-Quads document that contains some invalid quads.
--   'TL.Text' is used because the RDF 1.1 specification stipulates that RDF
--   should always be encoded with Unicode.
--
--   Due to its incremental nature, this parser will accept some N-Quads
--   documents that are not legal according to the RDF 1.1 specification.
--   Specifically, this parser will provide duplicate 'Triple's if they exist in
--   the input N-Quads document; a proper graph consists of true sets of nodes
--   and edges, i.e. no duplicate nodes or edges. Any downstream program
--   incrementally consuming this parser's output should take care to ignore any
--   supernumerary triples.
--
--   Likewise, if a graph's constituent triples are not contiguous in the input
--   N-Quads document, then they will not be folded into contiguous 'RDFGraph's
--   in this parser's output. Any downstream program incrementally consuming
--   this parser's output and performing graph processing that discriminates
--   based on graph labels will not necessarily be presented each contiguous
--   labeled graph as a single 'RDFGraph' record. For example, something like
--   this could be used to lazily find all 'RDFGraph' records containing a named
--   graph's 'Triple's. Downstream processing must then be able to handle a
--   single named graph spanning multiple 'RDFGraph' records.
--
-- > filterGraph :: (Maybe IRI) -> [RDFGraph] -> [RDFGraph]
-- > filterGraph gl = filter (\g -> (graphLabel g) == gl)
parseNQuads :: TL.Text -> [Result]
parseNQuads :: Text -> [Result]
parseNQuads = [Either String Quad] -> [Result]
foldResults
            forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a -> b) -> [a] -> [b]
map (forall r. Result r -> Either String r
AL.eitherResult forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Parser a -> Text -> Result a
AL.parse Parser Quad
parseQuad)
            forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> [Text]
TL.lines

-- | Fold a list of 'Quad's into a list of 'RDFGraph's, where adjacent 'Quad's
--   in the input are included in the same 'RDFGraph'.
foldGraphs :: [Quad] -> [RDFGraph]
foldGraphs :: [Quad] -> [RDFGraph]
foldGraphs [] = []
foldGraphs (Quad
quad:[Quad]
quads) = RDFGraph -> [Quad] -> [RDFGraph]
go (Maybe IRI -> [Triple] -> RDFGraph
RDFGraph (Quad -> Maybe IRI
quadGraph Quad
quad) [Quad -> Triple
quadTriple Quad
quad]) [Quad]
quads
    where go :: RDFGraph -> [Quad] -> [RDFGraph]
go RDFGraph
g [] = [RDFGraph
g]
          go g :: RDFGraph
g@(RDFGraph Maybe IRI
gl [Triple]
ts) (Quad
q:[Quad]
qs)
                | Maybe IRI
gl forall a. Eq a => a -> a -> Bool
== Quad -> Maybe IRI
quadGraph Quad
q = RDFGraph -> [Quad] -> [RDFGraph]
go (Maybe IRI -> [Triple] -> RDFGraph
RDFGraph Maybe IRI
gl (Quad -> Triple
quadTriple Quad
qforall a. a -> [a] -> [a]
:[Triple]
ts)) [Quad]
qs
                | Bool
otherwise         = RDFGraph
g forall a. a -> [a] -> [a]
: RDFGraph -> [Quad] -> [RDFGraph]
go (Maybe IRI -> [Triple] -> RDFGraph
RDFGraph (Quad -> Maybe IRI
quadGraph Quad
q)
                                                       [Quad -> Triple
quadTriple Quad
q]) [Quad]
qs

-- | Fold a list of parsed 'Quad's into a list of parsed 'RDFGraph's, where
--   adjacent 'Quad's in the input are included in the same 'RDFGraph'.
foldResults :: [Either String Quad] -> [Result]
foldResults :: [Either String Quad] -> [Result]
foldResults [] = []
foldResults (Left String
e:[Either String Quad]
quads)     = forall a b. a -> Either a b
Left String
e forall a. a -> [a] -> [a]
: [Either String Quad] -> [Result]
foldResults [Either String Quad]
quads
foldResults (Right Quad
quad:[Either String Quad]
quads) = RDFGraph -> [Either String Quad] -> [Result]
go (Maybe IRI -> [Triple] -> RDFGraph
RDFGraph (Quad -> Maybe IRI
quadGraph Quad
quad)
                                               [Quad -> Triple
quadTriple Quad
quad])
                                     [Either String Quad]
quads
    where go :: RDFGraph -> [Either String Quad] -> [Result]
go RDFGraph
g []            = [forall a b. b -> Either a b
Right RDFGraph
g]
          go RDFGraph
g (Left String
e:[Either String Quad]
qs) = forall a b. b -> Either a b
Right RDFGraph
g forall a. a -> [a] -> [a]
: forall a b. a -> Either a b
Left String
e forall a. a -> [a] -> [a]
: [Either String Quad] -> [Result]
foldResults [Either String Quad]
qs
          go g :: RDFGraph
g@(RDFGraph Maybe IRI
gl [Triple]
ts) (Right Quad
q:[Either String Quad]
qs)
                | Maybe IRI
gl forall a. Eq a => a -> a -> Bool
== Quad -> Maybe IRI
quadGraph Quad
q = RDFGraph -> [Either String Quad] -> [Result]
go (Maybe IRI -> [Triple] -> RDFGraph
RDFGraph Maybe IRI
gl (Quad -> Triple
quadTriple Quad
qforall a. a -> [a] -> [a]
:[Triple]
ts)) [Either String Quad]
qs
                | Bool
otherwise         = forall a b. b -> Either a b
Right RDFGraph
g forall a. a -> [a] -> [a]
: RDFGraph -> [Either String Quad] -> [Result]
go (Maybe IRI -> [Triple] -> RDFGraph
RDFGraph (Quad -> Maybe IRI
quadGraph Quad
q)
                                                   [Quad -> Triple
quadTriple Quad
q]) [Either String Quad]
qs

-- | Parse a single N-Quads 'Triple'.
parseTriple :: A.Parser Triple
parseTriple :: Parser Triple
parseTriple = Subject -> Predicate -> Object -> Triple
Triple forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Parser Text Subject
parseSubject forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
A.skipSpace)
                     forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (Parser Text Predicate
parsePredicate forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
A.skipSpace)
                     forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser Object
parseObject

-- | Parse a single N-Quads 'Quad'.
parseQuad :: A.Parser Quad
parseQuad :: Parser Quad
parseQuad = Triple -> Maybe IRI -> Quad
Quad forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Triple
parseTriple
                 forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> ((Parser ()
A.skipSpace forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser Text (Maybe IRI)
parseGraphLabel) forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<*
                     (Parser ()
A.skipSpace forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Char -> Parser Text Char
A.char Char
'.'))

-- | Parse a single N-Quads 'Quad' on its own line. This parser is suitable for
--   using Attoparsec's incremental input mechanism 'parse'/'feed' instead of a
--   lazy 'T.Text'.
parseQuadLine :: A.Parser Quad
parseQuadLine :: Parser Quad
parseQuadLine = Parser Quad
parseQuad forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> Parser Text Char
A.char Char
'\n'