module Swish.RDF.TurtleFormatter
( NodeGenLookupMap
, formatGraphAsText
, formatGraphAsLazyText
, formatGraphAsBuilder
, formatGraphIndent
, formatGraphDiag
, quoteText
)
where
import Swish.RDF.RDFGraph (
RDFGraph, RDFLabel(..)
, NamespaceMap, RevNamespaceMap
, getArcs
, labels
, getNamespaces,
emptyRDFGraph
, quote
, quoteT
, resRdfFirst, resRdfRest, resRdfNil
)
import Swish.RDF.Vocabulary (
isLang
, langTag
, rdfType
, rdfNil
, xsdBoolean, xsdDecimal, xsdInteger, xsdDouble
)
import Swish.RDF.GraphClass (Arc(..))
import Swish.Utils.LookupMap
( LookupEntryClass(..)
, LookupMap, emptyLookupMap, reverseLookupMap
, listLookupMap
, mapFind, mapFindMaybe, mapAdd
, mapMerge
)
import Swish.Utils.Namespace (ScopedName, getScopeLocal, getScopeURI)
import Data.Char (isDigit)
import Data.List (foldl', delete, groupBy, partition, sort, intersperse)
import Data.Monoid (Monoid(..))
import Control.Monad (liftM, when)
import Control.Monad.State (State, modify, get, put, runState)
import qualified Data.Text as T
import qualified Data.Text.Lazy as L
import qualified Data.Text.Lazy.Builder as B
quoteB :: Bool -> String -> B.Builder
quoteB f v = B.fromString $ quote f v
type SubjTree lb = [(lb,PredTree lb)]
type PredTree lb = [(lb,[lb])]
data TurtleFormatterState = TFS
{ indent :: B.Builder
, lineBreak :: Bool
, graph :: RDFGraph
, subjs :: SubjTree RDFLabel
, props :: PredTree RDFLabel
, objs :: [RDFLabel]
, nodeGenSt :: NodeGenState
, bNodesCheck :: [RDFLabel]
, traceBuf :: [String]
}
type Formatter a = State TurtleFormatterState a
emptyTFS :: NodeGenState -> TurtleFormatterState
emptyTFS ngs = TFS
{ indent = "\n"
, lineBreak = False
, graph = emptyRDFGraph
, subjs = []
, props = []
, objs = []
, nodeGenSt = ngs
, bNodesCheck = []
, traceBuf = []
}
type NodeGenLookupMap = LookupMap (RDFLabel,Int)
data NodeGenState = Ngs
{ prefixes :: NamespaceMap
, nodeMap :: NodeGenLookupMap
, nodeGen :: Int
}
emptyNgs :: NodeGenState
emptyNgs = Ngs
{ prefixes = emptyLookupMap
, nodeMap = emptyLookupMap
, nodeGen = 0
}
data LabelContext = SubjContext | PredContext | ObjContext
deriving (Eq, Show)
getIndent :: Formatter B.Builder
getIndent = indent `liftM` get
setIndent :: B.Builder -> Formatter ()
setIndent ind = modify $ \st -> st { indent = ind }
getLineBreak :: Formatter Bool
getLineBreak = lineBreak `liftM` get
setLineBreak :: Bool -> Formatter ()
setLineBreak brk = modify $ \st -> st { lineBreak = brk }
getNgs :: Formatter NodeGenState
getNgs = nodeGenSt `liftM` get
setNgs :: NodeGenState -> Formatter ()
setNgs ngs = modify $ \st -> st { nodeGenSt = ngs }
getPrefixes :: Formatter NamespaceMap
getPrefixes = prefixes `liftM` getNgs
getSubjs :: Formatter (SubjTree RDFLabel)
getSubjs = subjs `liftM` get
setSubjs :: SubjTree RDFLabel -> Formatter ()
setSubjs sl = modify $ \st -> st { subjs = sl }
getProps :: Formatter (PredTree RDFLabel)
getProps = props `liftM` get
setProps :: PredTree RDFLabel -> Formatter ()
setProps ps = modify $ \st -> st { props = ps }
getBnodesCheck :: Formatter [RDFLabel]
getBnodesCheck = bNodesCheck `liftM` get
len1 :: [a] -> Bool
len1 (_:[]) = True
len1 _ = False
getCollection ::
SubjTree RDFLabel
-> RDFLabel
-> Maybe (SubjTree RDFLabel, [RDFLabel], [RDFLabel])
getCollection subjList lbl = go subjList lbl ([],[])
where
go sl l (cs,ss) | l == resRdfNil = Just (sl, reverse cs, ss)
| otherwise = do
(pList1, sl') <- removeItem sl l
(pFirst, pList2) <- removeItem pList1 resRdfFirst
(pNext, pList3) <- removeItem pList2 resRdfRest
if and [len1 pFirst, len1 pNext, null pList3]
then go sl' (head pNext) (head pFirst : cs, l : ss)
else Nothing
extractList :: LabelContext -> RDFLabel -> Formatter (Maybe [RDFLabel])
extractList lctxt ln = do
osubjs <- getSubjs
oprops <- getProps
let mlst = getCollection osubjs' ln
fprops = filter ((`elem` [resRdfFirst, resRdfRest]) . fst) oprops
osubjs' =
case lctxt of
SubjContext -> (ln, fprops) : osubjs
_ -> osubjs
case mlst of
Just (sl,ls,_) -> do
setSubjs sl
when (lctxt == SubjContext) $ setProps $ filter ((`notElem` [resRdfFirst, resRdfRest]) . fst) oprops
return (Just ls)
Nothing -> return Nothing
removeItem :: (Eq a) => [(a,b)] -> a -> Maybe (b, [(a,b)])
removeItem os x =
let (as, bs) = break (\a -> fst a == x) os
in case bs of
((_,b):bbs) -> Just (b, as ++ bbs)
[] -> Nothing
formatGraphAsText :: RDFGraph -> T.Text
formatGraphAsText = L.toStrict . formatGraphAsLazyText
formatGraphAsLazyText :: RDFGraph -> L.Text
formatGraphAsLazyText = B.toLazyText . formatGraphAsBuilder
formatGraphAsBuilder :: RDFGraph -> B.Builder
formatGraphAsBuilder = formatGraphIndent "\n" True
formatGraphIndent :: B.Builder -> Bool -> RDFGraph -> B.Builder
formatGraphIndent indnt flag gr =
let (res, _, _, _) = formatGraphDiag indnt flag gr
in res
formatGraphDiag ::
B.Builder
-> Bool
-> RDFGraph
-> (B.Builder, NodeGenLookupMap, Int, [String])
formatGraphDiag indnt flag gr =
let fg = formatGraph indnt " .\n" False flag gr
ngs = emptyNgs {
prefixes = emptyLookupMap,
nodeGen = findMaxBnode gr
}
(out, fgs) = runState fg (emptyTFS ngs)
ogs = nodeGenSt fgs
in (out, nodeMap ogs, nodeGen ogs, traceBuf fgs)
formatGraph ::
B.Builder
-> B.Builder
-> Bool
-> Bool
-> RDFGraph
-> Formatter B.Builder
formatGraph ind end dobreak dopref gr = do
setIndent ind
setLineBreak dobreak
setGraph gr
fp <- if dopref
then formatPrefixes (getNamespaces gr)
else return mempty
more <- moreSubjects
if more
then do
fr <- formatSubjects
return $ mconcat [fp, fr, end]
else return fp
formatPrefixes :: NamespaceMap -> Formatter B.Builder
formatPrefixes pmap = do
let mls = map (pref . keyVal) (listLookupMap pmap)
ls <- sequence mls
return $ mconcat ls
where
pref (Just p,u) = nextLine $ mconcat ["@prefix ", B.fromText p, ": <", quoteB True (show u), "> ."]
pref (_,u) = nextLine $ mconcat ["@prefix : <", quoteB True (show u), "> ."]
formatSubjects :: Formatter B.Builder
formatSubjects = do
sb <- nextSubject
sbstr <- formatLabel SubjContext sb
flagP <- moreProperties
if flagP
then do
prstr <- formatProperties sb sbstr
flagS <- moreSubjects
if flagS
then do
fr <- formatSubjects
return $ mconcat [prstr, " .", fr]
else return prstr
else do
txt <- nextLine sbstr
flagS <- moreSubjects
if flagS
then do
fr <- formatSubjects
return $ mconcat [txt, " .", fr]
else return txt
hackIndent :: B.Builder
hackIndent = " "
formatProperties :: RDFLabel -> B.Builder -> Formatter B.Builder
formatProperties sb sbstr = do
pr <- nextProperty sb
prstr <- formatLabel PredContext pr
obstr <- formatObjects sb pr $ mconcat [sbstr, " ", prstr]
more <- moreProperties
let sbindent = hackIndent
if more
then do
fr <- formatProperties sb sbindent
nl <- nextLine $ obstr `mappend` " ;"
return $ nl `mappend` fr
else nextLine obstr
formatObjects :: RDFLabel -> RDFLabel -> B.Builder -> Formatter B.Builder
formatObjects sb pr prstr = do
ob <- nextObject sb pr
obstr <- formatLabel ObjContext ob
more <- moreObjects
if more
then do
let prindent = hackIndent
fr <- formatObjects sb pr prindent
nl <- nextLine $ mconcat [prstr, " ", obstr, ","]
return $ nl `mappend` fr
else return $ mconcat [prstr, " ", obstr]
insertList :: [RDFLabel] -> Formatter B.Builder
insertList [] = return "()"
insertList xs = do
ls <- mapM (formatLabel ObjContext) xs
return $ mconcat ("( " : intersperse " " ls) `mappend` " )"
insertBnode :: LabelContext -> RDFLabel -> Formatter B.Builder
insertBnode SubjContext lbl = do
flag <- moreProperties
if flag
then do
txt <- (`mappend` "\n") `liftM` formatProperties lbl ""
return $ mconcat ["[] ", txt]
else error $ "Internal error: expected properties with label: " ++ show lbl
insertBnode _ lbl = do
ost <- get
let osubjs = subjs ost
oprops = props ost
oobjs = objs ost
(bsubj, rsubjs) = partition ((== lbl) . fst) osubjs
rprops = case bsubj of
[(_,rs)] -> rs
_ -> []
nst = ost { subjs = rsubjs,
props = rprops,
objs = []
}
put nst
flag <- moreProperties
txt <- if flag
then (`mappend` "\n") `liftM` formatProperties lbl ""
else return ""
nst' <- get
let slist = map fst $ subjs nst'
nsubjs = filter (\(l,_) -> l `elem` slist) osubjs
put $ nst' { subjs = nsubjs,
props = oprops,
objs = oobjs
}
return $ mconcat ["[", txt, "]"]
setGraph :: RDFGraph -> Formatter ()
setGraph gr = do
st <- get
let ngs0 = nodeGenSt st
pre' = mapMerge (prefixes ngs0) (getNamespaces gr)
ngs' = ngs0 { prefixes = pre' }
arcs = sortArcs $ getArcs gr
nst = st { graph = gr
, subjs = arcTree arcs
, props = []
, objs = []
, nodeGenSt = ngs'
, bNodesCheck = countBnodes arcs
}
put nst
hasMore :: (TurtleFormatterState -> [b]) -> Formatter Bool
hasMore lens = (not . null . lens) `liftM` get
moreSubjects :: Formatter Bool
moreSubjects = hasMore subjs
moreProperties :: Formatter Bool
moreProperties = hasMore props
moreObjects :: Formatter Bool
moreObjects = hasMore objs
nextSubject :: Formatter RDFLabel
nextSubject = do
st <- get
let sb:sbs = subjs st
nst = st { subjs = sbs
, props = snd sb
, objs = []
}
put nst
return $ fst sb
nextProperty :: RDFLabel -> Formatter RDFLabel
nextProperty _ = do
st <- get
let pr:prs = props st
nst = st { props = prs
, objs = snd pr
}
put nst
return $ fst pr
nextObject :: RDFLabel -> RDFLabel -> Formatter RDFLabel
nextObject _ _ = do
st <- get
let ob:obs = objs st
nst = st { objs = obs }
put nst
return ob
nextLine :: B.Builder -> Formatter B.Builder
nextLine str = do
ind <- getIndent
brk <- getLineBreak
if brk
then return $ ind `mappend` str
else do
setLineBreak True
return str
formatLabel :: LabelContext -> RDFLabel -> Formatter B.Builder
formatLabel lctxt lab@(Blank (_:_)) = do
mlst <- extractList lctxt lab
case mlst of
Just lst -> insertList lst
Nothing -> do
nb1 <- getBnodesCheck
if lctxt /= PredContext && lab `notElem` nb1
then insertBnode lctxt lab
else formatNodeId lab
formatLabel ctxt (Res sn)
| ctxt == PredContext && sn == rdfType = return "a"
| ctxt == ObjContext && sn == rdfNil = return "()"
| otherwise = do
pr <- getPrefixes
let nsuri = getScopeURI sn
local = getScopeLocal sn
premap = reverseLookupMap pr :: RevNamespaceMap
prefix = mapFindMaybe nsuri premap
name = case prefix of
Just (Just p) -> B.fromText $ quoteT True $ mconcat [p, ":", local]
_ -> mconcat ["<", quoteB True (show nsuri ++ T.unpack local), ">"]
return name
formatLabel _ (Lit lit (Just dtype))
| dtype == xsdDouble = return $ B.fromText $ T.toLower lit
| dtype `elem` [xsdBoolean, xsdDecimal, xsdInteger] = return $ B.fromText lit
| otherwise = return $ quoteText lit `mappend` formatAnnotation dtype
formatLabel _ (Lit lit Nothing) = return $ quoteText lit
formatLabel _ lab = return $ B.fromString $ show lab
formatAnnotation :: ScopedName -> B.Builder
formatAnnotation a | isLang a = "@" `mappend` B.fromText (langTag a)
| otherwise = "^^" `mappend` showScopedName a
quoteText :: T.Text -> B.Builder
quoteText txt =
let st = T.unpack txt
qst = quoteB (n==1) st
n = if '\n' `elem` st || '"' `elem` st then 3 else 1
qch = B.fromString (replicate n '"')
in mconcat [qch, qst, qch]
formatNodeId :: RDFLabel -> Formatter B.Builder
formatNodeId lab@(Blank (lnc:_)) =
if isDigit lnc then mapBlankNode lab else return $ B.fromString $ show lab
formatNodeId other = error $ "formatNodeId not expecting a " ++ show other
mapBlankNode :: RDFLabel -> Formatter B.Builder
mapBlankNode lab = do
ngs <- getNgs
let cmap = nodeMap ngs
cval = nodeGen ngs
nv <- case mapFind 0 lab cmap of
0 -> do
let nval = succ cval
nmap = mapAdd cmap (lab, nval)
setNgs $ ngs { nodeGen = nval, nodeMap = nmap }
return nval
n -> return n
return $ "_:swish" `mappend` B.fromString (show nv)
showScopedName :: ScopedName -> B.Builder
showScopedName = quoteB True . show
newtype SortedArcs lb = SA [Arc lb]
sortArcs :: (Ord lb) => [Arc lb] -> SortedArcs lb
sortArcs = SA . sort
arcTree :: (Eq lb) => SortedArcs lb -> SubjTree lb
arcTree (SA as) = commonFstEq (commonFstEq id) $ map spopair as
where
spopair (Arc s p o) = (s,(p,o))
commonFstEq :: (Eq a) => ( [b] -> c ) -> [(a,b)] -> [(a,c)]
commonFstEq f ps =
[ (fst $ head sps,f $ map snd sps) | sps <- groupBy fstEq ps ]
where
fstEq (f1,_) (f2,_) = f1 == f2
findMaxBnode :: RDFGraph -> Int
findMaxBnode = maximum . map getAutoBnodeIndex . labels
getAutoBnodeIndex :: RDFLabel -> Int
getAutoBnodeIndex (Blank ('_':lns)) = res where
res = case [x | (x,t) <- reads lns, ("","") <- lex t] of
[x] -> x
_ -> 0
getAutoBnodeIndex _ = 0
countBnodes :: SortedArcs RDFLabel -> [RDFLabel]
countBnodes (SA as) = snd (foldl' ctr ([],[]) as)
where
inc b@(b1s,bms) l@(Blank _) | l `elem` bms = b
| l `elem` b1s = (delete l b1s, l:bms)
| otherwise = (l:b1s, bms)
inc b _ = b
incP b@(b1s,bms) l@(Blank _) | l `elem` bms = b
| l `elem` b1s = (delete l b1s, l:bms)
| otherwise = (b1s, l:bms)
incP b _ = b
ctr orig (Arc _ p o) = inc (incP orig p) o