-- |The Core module provides the fundamental types, -- type classes, and functions of the library. -- -- TODO: update writeT to writeTriple, etc. module Data.RDF ( -- * Parsing RDF RdfParser(parseString, parseFile, parseURL), -- * Serializing RDF RdfSerializer(hWriteRdf, writeRdf, hWriteH, writeH, hWriteTs, writeTs, hWriteT, writeT, hWriteN, writeN), -- * RDF type RDF(empty, mkRdf, triplesOf, select, query, baseUrl, prefixMappings, addPrefixMappings), -- * RDF triples, nodes, and literals Triple(Triple), triple, Triples, sortTriples, Node(UNode, BNode, BNodeGen, LNode), LValue(PlainL, PlainLL, TypedL), -- * Supporting types and functions BaseUrl(BaseUrl), PrefixMappings(PrefixMappings), toPMList, PrefixMapping(PrefixMapping), NodeSelector, isUNode, isBNode, isLNode, equalSubjects, equalPredicates, equalObjects, isIsomorphic, subjectOf, predicateOf, objectOf, isEmpty, rdfContainsNode,tripleContainsNode, listSubjectsWithPredicate,listObjectsOfPredicate, Subject, Predicate, Object, ParseFailure(ParseFailure), {- FastString(uniq,value),mkFastString, -} s2t,t2s,unode,bnode,lnode,plainL,plainLL,typedL, View, view, fromEither, removeDupes ) where import Data.RDF.Namespace import Data.RDF.Utils ( s2t, t2s, canonicalize ) import qualified Data.Text as T import Data.List import System.IO import Text.Printf -- |A type class for ADTs that expose views to clients. class View a b where view :: a -> b -- |An alias for 'Node', defined for convenience and readability purposes. type Subject = Node -- |An alias for 'Node', defined for convenience and readability purposes. type Predicate = Node -- |An alias for 'Node', defined for convenience and readability purposes. type Object = Node -- |An RDF value is a set of (unique) RDF triples, together with the -- operations defined upon them. -- -- For information about the efficiency of the functions, see the -- documentation for the particular RDF instance. -- -- For more information about the concept of an RDF graph, see -- the following: . class RDF rdf where -- |Return the base URL of this RDF, if any. baseUrl :: rdf -> Maybe BaseUrl -- |Return the prefix mappings defined for this RDF, if any. prefixMappings :: rdf -> PrefixMappings -- |Return an RDF with the specified prefix mappings merged with -- the existing mappings. If the Bool arg is True, then a new mapping -- for an existing prefix will replace the old mapping; otherwise, -- the new mapping is ignored. addPrefixMappings :: rdf -> PrefixMappings -> Bool -> rdf -- |Return an empty RDF. empty :: rdf -- |Return a RDF containing all the given triples. Handling of duplicates -- in the input depend on the particular RDF implementation. mkRdf :: Triples -> Maybe BaseUrl -> PrefixMappings -> rdf -- |Return all triples in the RDF, as a list. triplesOf :: rdf -> Triples -- |Select the triples in the RDF that match the given selectors. -- -- The three NodeSelector parameters are optional functions that match -- the respective subject, predicate, and object of a triple. The triples -- returned are those in the given graph for which the first selector -- returns true when called on the subject, the second selector returns -- true when called on the predicate, and the third selector returns true -- when called on the ojbect. A 'Nothing' parameter is equivalent to a -- function that always returns true for the appropriate node; but -- implementations may be able to much more efficiently answer a select -- that involves a 'Nothing' parameter rather than an @(id True)@ parameter. -- -- The following call illustrates the use of select, and would result in -- the selection of all and only the triples that have a blank node -- as subject and a literal node as object: -- -- > select gr (Just isBNode) Nothing (Just isLNode) -- -- Note: this function may be very slow; see the documentation for the -- particular RDF implementation for more information. select :: rdf -> NodeSelector -> NodeSelector -> NodeSelector -> Triples -- |Return the triples in the RDF that match the given pattern, where -- the pattern (3 Maybe Node parameters) is interpreted as a triple pattern. -- -- The @Maybe Node@ params are interpreted as the subject, predicate, and -- object of a triple, respectively. @Just n@ is true iff the triple has -- a node equal to @n@ in the appropriate location; @Nothing@ is always -- true, regardless of the node in the appropriate location. -- -- For example, @ query rdf (Just n1) Nothing (Just n2) @ would return all -- and only the triples that have @n1@ as subject and @n2@ as object, -- regardless of the predicate of the triple. query :: rdf -> Maybe Node -> Maybe Node -> Maybe Node -> Triples -- |An RdfParser is a parser that knows how to parse 1 format of RDF and -- can parse an RDF document of that type from a string, a file, or a URL. -- Required configuration options will vary from instance to instance. class RdfParser p where -- |Parse RDF from the given bytestring, yielding a failure with error message or -- the resultant RDF. parseString :: forall rdf. (RDF rdf) => p -> T.Text -> Either ParseFailure rdf -- |Parse RDF from the local file with the given path, yielding a failure with error -- message or the resultant RDF in the IO monad. parseFile :: forall rdf. (RDF rdf) => p -> String -> IO (Either ParseFailure rdf) -- |Parse RDF from the remote file with the given HTTP URL (https is not supported), -- yielding a failure with error message or the resultant graph in the IO monad. parseURL :: forall rdf. (RDF rdf) => p -> String -> IO (Either ParseFailure rdf) -- |An RdfSerializer is a serializer of RDF to some particular output format, such as -- NTriples or Turtle. class RdfSerializer s where -- |Write the RDF to a file handle using whatever configuration is specified by -- the first argument. hWriteRdf :: forall rdf. (RDF rdf) => s -> Handle -> rdf -> IO () -- |Write the RDF to stdout; equivalent to @'hWriteRdf' stdout@. writeRdf :: forall rdf. (RDF rdf) => s -> rdf -> IO () -- |Write to the file handle whatever header information is required based on -- the output format. For example, if serializing to Turtle, this method would -- write the necessary \@prefix declarations and possibly a \@baseUrl declaration, -- whereas for NTriples, there is no header section at all, so this would be a no-op. hWriteH :: forall rdf. (RDF rdf) => s -> Handle -> rdf -> IO () -- |Write header information to stdout; equivalent to @'hWriteRdf' stdout@. writeH :: forall rdf. (RDF rdf) => s -> rdf -> IO () -- |Write some triples to a file handle using whatever configuration is specified -- by the first argument. -- -- WARNING: if the serialization format has header-level information -- that should be output (e.g., \@prefix declarations for Turtle), then you should -- use 'hWriteG' instead of this method unless you're sure this is safe to use, since -- otherwise the resultant document will be missing the header information and -- will not be valid. hWriteTs :: s -> Handle -> Triples -> IO () -- |Write some triples to stdout; equivalent to @'hWriteTs' stdout@. writeTs :: s -> Triples -> IO () -- |Write a single triple to the file handle using whatever configuration is -- specified by the first argument. The same WARNING applies as to 'hWriteTs'. hWriteT :: s -> Handle -> Triple -> IO () -- |Write a single triple to stdout; equivalent to @'hWriteT' stdout@. writeT :: s -> Triple -> IO () -- |Write a single node to the file handle using whatever configuration is -- specified by the first argument. The same WARNING applies as to 'hWriteTs'. hWriteN :: s -> Handle -> Node -> IO () -- |Write a single node to sdout; equivalent to @'hWriteN' stdout@. writeN :: s -> Node -> IO () -- |An RDF node, which may be either a URIRef node ('UNode'), a blank -- node ('BNode'), or a literal node ('LNode'). data Node = -- |An RDF URI reference. See -- for more -- information. UNode !T.Text -- |An RDF blank node. See -- for more -- information. | BNode !T.Text -- |An RDF blank node with an auto-generated identifier, as used in -- Turtle. | BNodeGen !Int -- |An RDF literal. See -- for more -- information. | LNode !LValue -- ============================== -- Constructor functions for Node -- |Return a URIRef node for the given bytetring URI. {-# INLINE unode #-} unode :: T.Text -> Node unode = UNode -- |Return a blank node using the given string identifier. {-# INLINE bnode #-} bnode :: T.Text -> Node bnode = BNode -- |Return a literal node using the given LValue. {-# INLINE lnode #-} lnode :: LValue -> Node lnode = LNode -- Constructor functions for Node -- ============================== -- |A list of triples. This is defined for convenience and readability. type Triples = [Triple] -- |An RDF triple is a statement consisting of a subject, predicate, -- and object, respectively. -- -- See for -- more information. data Triple = Triple !Node !Node !Node -- |A smart constructor function for 'Triple' that verifies the node arguments -- are of the correct type and creates the new 'Triple' if so or calls 'error'. -- /subj/ must be a 'UNode' or 'BNode', and /pred/ must be a 'UNode'. triple :: Subject -> Predicate -> Object -> Triple triple subj pred obj | isLNode subj = error $ "subject must be UNode or BNode: " ++ show subj | isLNode pred = error $ "predicate must be UNode, not LNode: " ++ show pred | isBNode pred = error $ "predicate must be UNode, not BNode: " ++ show pred | otherwise = Triple subj pred obj -- |The actual value of an RDF literal, represented as the 'LValue' -- parameter of an 'LNode'. data LValue = -- Constructors are not exported, because we need to have more -- control over the format of the literal bytestring that we store. -- |A plain (untyped) literal value in an unspecified language. PlainL !T.Text -- |A plain (untyped) literal value with a language specifier. | PlainLL !T.Text !T.Text -- |A typed literal value consisting of the literal value and -- the URI of the datatype of the value, respectively. | TypedL !T.Text !T.Text -- ================================ -- Constructor functions for LValue -- |Return a PlainL LValue for the given string value. {-# INLINE plainL #-} plainL :: T.Text -> LValue plainL = PlainL -- |Return a PlainLL LValue for the given string value and language, -- respectively. {-# INLINE plainLL #-} plainLL :: T.Text -> T.Text -> LValue plainLL = PlainLL -- |Return a TypedL LValue for the given string value and datatype URI, -- respectively. {-# INLINE typedL #-} typedL :: T.Text -> T.Text -> LValue typedL val dtype = TypedL (canonicalize dtype val) dtype -- Constructor functions for LValue -- ================================ -- |The base URL of an RDF. newtype BaseUrl = BaseUrl T.Text deriving (Eq, Ord, Show) -- |A 'NodeSelector' is either a function that returns 'True' -- or 'False' for a node, or Nothing, which indicates that all -- nodes would return 'True'. -- -- The selector is said to select, or match, the nodes for -- which it returns 'True'. -- -- When used in conjunction with the 'select' method of 'Graph', three -- node selectors are used to match a triple. type NodeSelector = Maybe (Node -> Bool) -- |Represents a failure in parsing an N-Triples document, including -- an error message with information about the cause for the failure. newtype ParseFailure = ParseFailure String deriving (Eq, Show) -- |A node is equal to another node if they are both the same type -- of node and if the field values are equal. instance Eq Node where (UNode bs1) == (UNode bs2) = bs1 == bs2 (BNode bs1) == (BNode bs2) = bs1 == bs2 (BNodeGen i1) == (BNodeGen i2) = i1 == i2 (LNode l1) == (LNode l2) = l1 == l2 _ == _ = False -- |Node ordering is defined first by type, with Unode < BNode < BNodeGen -- < LNode PlainL < LNode PlainLL < LNode TypedL, and secondly by -- the natural ordering of the node value. -- -- E.g., a '(UNode _)' is LT any other type of node, and a -- '(LNode (TypedL _ _))' is GT any other type of node, and the ordering -- of '(BNodeGen 44)' and '(BNodeGen 3)' is that of the values, or -- 'compare 44 3', GT. instance Ord Node where compare = compareNode compareNode :: Node -> Node -> Ordering compareNode (UNode bs1) (UNode bs2) = compare bs1 bs2 compareNode (UNode _) _ = LT compareNode (BNode bs1) (BNode bs2) = compare bs1 bs2 compareNode (BNode _) (UNode _) = GT compareNode (BNode _) _ = LT compareNode (BNodeGen i1) (BNodeGen i2) = compare i1 i2 compareNode (BNodeGen _) (LNode _) = LT compareNode (BNodeGen _) _ = GT compareNode (LNode (PlainL bs1)) (LNode (PlainL bs2)) = compare bs1 bs2 compareNode (LNode (PlainL _)) (LNode _) = LT compareNode (LNode (PlainLL bs1 bs1')) (LNode (PlainLL bs2 bs2')) = case compare bs1' bs2' of EQ -> compare bs1 bs2 LT -> LT GT -> GT compareNode (LNode (PlainLL _ _)) (LNode (PlainL _)) = GT compareNode (LNode (PlainLL _ _)) (LNode _) = LT compareNode (LNode (TypedL bsType1 bs1)) (LNode (TypedL bsType2 bs2)) = case compare bs1 bs2 of EQ -> compare bsType1 bsType2 LT -> LT GT -> GT compareNode (LNode (TypedL _ _)) (LNode _) = GT compareNode (LNode _) _ = GT -- |Two triples are equal iff their respective subjects, predicates, and objects -- are equal. instance Eq Triple where (Triple s1 p1 o1) == (Triple s2 p2 o2) = s1 == s2 && p1 == p2 && o1 == o2 -- |The ordering of triples is based on that of the subject, predicate, and object -- of the triple, in that order. instance Ord Triple where (Triple s1 p1 o1) `compare` (Triple s2 p2 o2) = case compareNode s1 s2 of EQ -> case compareNode p1 p2 of EQ -> compareNode o1 o2 LT -> LT GT -> GT GT -> GT LT -> LT -- |Two 'LValue' values are equal iff they are of the same type and all fields are -- equal. instance Eq LValue where (PlainL bs1) == (PlainL bs2) = bs1 == bs2 (PlainLL bs1 bs1') == (PlainLL bs2 bs2') = bs1' == bs2' && bs1 == bs2 (TypedL bsType1 bs1) == (TypedL bsType2 bs2) = bsType1 == bsType2 && bs1 == bs2 _ == _ = False -- |Ordering of 'LValue' values is as follows: (PlainL _) < (PlainLL _ _) -- < (TypedL _ _), and values of the same type are ordered by field values, -- with '(PlainLL literalValue language)' being ordered by language first and -- literal value second, and '(TypedL literalValue datatypeUri)' being ordered -- by datatype first and literal value second. instance Ord LValue where compare = compareLValue {-# INLINE compareLValue #-} compareLValue :: LValue -> LValue -> Ordering compareLValue (PlainL bs1) (PlainL bs2) = compare bs1 bs2 compareLValue (PlainL _) _ = LT compareLValue _ (PlainL _) = GT compareLValue (PlainLL bs1 bs1') (PlainLL bs2 bs2') = case compare bs1' bs2' of EQ -> compare bs1 bs2 GT -> GT LT -> LT compareLValue (PlainLL _ _) _ = LT compareLValue _ (PlainLL _ _) = GT compareLValue (TypedL l1 t1) (TypedL l2 t2) = case compare t1 t2 of EQ -> compare l1 l2 GT -> GT LT -> LT -- String representations of the various data types; generally NTriples-like. instance Show Triple where show (Triple s p o) = printf "Triple(%s,%s,%s)" (show s) (show p) (show o) instance Show Node where show (UNode uri) = "UNode(" ++ show uri ++ ")" show (BNode i) = "BNode(" ++ show i ++ ")" show (BNodeGen genId) = "BNodeGen(" ++ show genId ++ ")" show (LNode lvalue) = "LNode(" ++ show lvalue ++ ")" instance Show LValue where show (PlainL lit) = "PlainL(" ++ T.unpack lit ++ ")" show (PlainLL lit lang) = "PlainLL(" ++ T.unpack lit ++ ", " ++ T.unpack lang ++ ")" show (TypedL lit dtype) = "TypedL(" ++ T.unpack lit ++ "," ++ show dtype ++ ")" -- |Answer the given list of triples in sorted order. sortTriples :: Triples -> Triples sortTriples = sort -- |Answer the subject node of the triple. {-# INLINE subjectOf #-} subjectOf :: Triple -> Node subjectOf (Triple s _ _) = s -- |Answer the predicate node of the triple. {-# INLINE predicateOf #-} predicateOf :: Triple -> Node predicateOf (Triple _ p _) = p -- |Answer the object node of the triple. {-# INLINE objectOf #-} objectOf :: Triple -> Node objectOf (Triple _ _ o) = o -- |Answer if rdf contains node. rdfContainsNode :: forall rdf. (RDF rdf) => rdf -> Node -> Bool rdfContainsNode rdf node = let ts = triplesOf rdf xs = map (tripleContainsNode node) ts in elem True xs -- |Answer if triple contains node. tripleContainsNode :: Node -> Triple -> Bool {-# INLINE tripleContainsNode #-} tripleContainsNode node t = subjectOf t == node || predicateOf t == node || objectOf t == node -- |Answer if given node is a URI Ref node. {-# INLINE isUNode #-} isUNode :: Node -> Bool isUNode (UNode _) = True isUNode _ = False -- |Answer if given node is a blank node. {-# INLINE isBNode #-} isBNode :: Node -> Bool isBNode (BNode _) = True isBNode (BNodeGen _) = True isBNode _ = False -- |Answer if given node is a literal node. {-# INLINE isLNode #-} isLNode :: Node -> Bool isLNode (LNode _) = True isLNode _ = False -- |Determine whether two triples have equal subjects. equalSubjects :: Triple -> Triple -> Bool equalSubjects (Triple s1 _ _) (Triple s2 _ _) = s1 == s2 -- |Determine whether two triples have equal predicates. equalPredicates :: Triple -> Triple -> Bool equalPredicates (Triple _ p1 _) (Triple _ p2 _) = p1 == p2 -- |Determine whether two triples have equal objects. equalObjects :: Triple -> Triple -> Bool equalObjects (Triple _ _ o1) (Triple _ _ o2) = o1 == o2 -- |Determines whether the 'RDF' contains zero triples. isEmpty :: RDF rdf => rdf -> Bool isEmpty rdf = let ts = triplesOf rdf in null ts -- |Lists of all subjects of triples with the given predicate. listSubjectsWithPredicate :: RDF rdf => rdf -> Predicate -> [Subject] listSubjectsWithPredicate rdf pred = listNodesWithPredicate rdf pred subjectOf -- |Lists of all objects of triples with the given predicate. listObjectsOfPredicate :: RDF rdf => rdf -> Predicate -> [Object] listObjectsOfPredicate rdf pred = listNodesWithPredicate rdf pred objectOf listNodesWithPredicate :: RDF rdf => rdf -> Predicate -> (Triple -> Node) -> [Node] listNodesWithPredicate rdf pred f = let ts = triplesOf rdf xs = filter (\t -> predicateOf t == pred) ts in map f xs -- |Convert a parse result into an RDF if it was successful -- and error and terminate if not. fromEither :: RDF rdf => Either ParseFailure rdf -> rdf fromEither res = case res of (Left err) -> error (show err) (Right rdf) -> rdf -- |Remove duplicate triples, returning unique triples. This -- function may return the triples in a different order than -- given. removeDupes :: Triples -> Triples removeDupes = map head . group . sort -- |This determines if two RDF representations are equal regardless of blank -- nodc names, triple order and prefixes. In math terms, this is the \simeq -- latex operator, or ~= isIsomorphic :: forall rdf1 rdf2. (RDF rdf1, RDF rdf2) => rdf1 -> rdf2 -> Bool isIsomorphic g1 g2 = normalize g1 == normalize g2 where normalize :: forall rdf. (RDF rdf) => rdf -> Triples normalize = sort . nub . expandTriples -- |Expand the triples in a graph with the prefix map and base URL for that -- graph. expandTriples :: (RDF rdf) => rdf -> Triples expandTriples rdf = expandTriples' [] (baseUrl rdf) (prefixMappings rdf) (triplesOf rdf) expandTriples' :: Triples -> Maybe BaseUrl -> PrefixMappings -> Triples -> Triples expandTriples' acc _ _ [] = acc expandTriples' acc baseUrl prefixMappings (t:rest) = expandTriples' (normalize baseUrl prefixMappings t : acc) baseUrl prefixMappings rest where normalize baseUrl prefixMappings = expandPrefixes prefixMappings . expandBaseUrl baseUrl expandBaseUrl (Just _) triple = triple expandBaseUrl Nothing triple = triple expandPrefixes _ triple = triple