-- |
--  Module      :  N3Parser
--  Copyright   :  (c) 2003, Graham Klyne, 2009 Vasili I Galchin, 2011 Douglas Burke
--  License     :  GPL V2
--  Maintainer  :  Douglas Burke
--  Stability   :  experimental
--  Portability :  H98
--  This Module implements a Notation 3 parser (see [1], [2], [3]), returning a
--  new 'RDFGraph' consisting of triples and namespace information parsed from
--  the supplied N3 input string, or an error indication.
--  Uses the Parsec monadic parser library.
-- 1 <http://www.w3.org/TeamSubmission/2008/SUBM-n3-20080114/>
--     Notation3 (N3): A readable RDF syntax,
--     W3C Team Submission 14 January 2008
-- 2 <http://www.w3.org/DesignIssues/Notation3.html>
--     Tim Berners-Lee's design issues series notes and description
-- 3 <http://www.w3.org/2000/10/swap/Primer.html>
--     Notation 3 Primer by Sean Palmer
--  UTF-8 handling is not really tested.
--  Several items seem to be allowed (from looking at N3 test suites and files
--  'in the wild') that are not given supported by the N3 grammar [1]. We try
--  to support these, including
--    - \"@:@\" and \"@base:@\" as valid QNames (ie a blank local component)
--    - @true@ and @false@ as well as @\@true@ and @\@false@
--    - use of lower-case characters for @\\u@ and @\\U@ escape codes
--  No performance testing has been applied.
--  Not all N3 grammar elements are supported, including:
--    - @\@forSome@ (we read it in but ignore the arguments)
--    - @\@forAll@  (this causes a parse error)
--    - formulae are lightly tested
--    - string support is incomplete (e.g. unrecognized escape characters
--      such as @\\q@ are probably handled incorrectly)

module Swish.RDF.N3Parser
    ( ParseResult
    , parseN3      
    , parseN3fromString
    , parseAnyfromString
    , parseTextFromString, parseAltFromString
    , parseNameFromString, parsePrefixFromString
    , parseAbsURIrefFromString, parseLexURIrefFromString, parseURIref2FromString
    -- * Exports for parsers that embed Notation3 in a bigger syntax
    , N3Parser, N3State(..), SpecialMap
    , whiteSpace, symbol, lexeme, eof, identStart, identLetter
    , getPrefix -- a combination of the old defaultPrefix and namedPrefix productions
    , n3symbol -- replacement for uriRef2 -- TODO: check this is semantically correct      
    , quickVariable -- was varid      
    , lexUriRef       
    , document, subgraph                                                   
    , newBlankNode

import Swish.RDF.RDFGraph
    ( RDFGraph, RDFLabel(..)
    , NamespaceMap
    , LookupFormula(..) 
    , addArc 
    , setFormula
    , setNamespaces
    , emptyRDFGraph

import Swish.RDF.GraphClass
    ( arc )

import Swish.Utils.LookupMap
    ( LookupMap(..)
    , mapFind, mapFindMaybe, mapReplaceOrAdd )

import Swish.Utils.Namespace
    ( Namespace(..)
    , ScopedName(..)
    , getScopePrefix 
    , getScopedNameURI
    , makeScopedName, makeUriScopedName
    , makeQNameScopedName
    , nullScopedName

import Swish.Utils.QName (QName, getQNameURI)

import Swish.RDF.Vocabulary
    ( langName
    , rdf_type
    , rdf_first, rdf_rest, rdf_nil
    , owl_sameAs, log_implies
    , xsd_boolean, xsd_integer, xsd_decimal, xsd_double

import Swish.RDF.RDFParser
    ( SpecialMap
    , mapPrefix
    , prefixTable, specialTable
    , ParseResult, RDFParser
    , n3Style, n3Lexer, ignore
    , annotateParsecError
    , mkTypedLit

import Control.Applicative
import Control.Monad (forM_, foldM)

import Network.URI (URI, 
                    parseURI, parseURIReference, uriToString)

import Data.Maybe (fromMaybe, fromJust)

import Text.ParserCombinators.Parsec hiding (many, optional, (<|>))
import qualified Text.ParserCombinators.Parsec as PC
import qualified Text.ParserCombinators.Parsec.Token as P

import Data.Char (isSpace, chr) 

--  Set up token parsers

lexer :: P.TokenParser N3State
lexer = n3Lexer

whiteSpace :: N3Parser ()
whiteSpace = P.whiteSpace lexer

symbol :: String -> N3Parser String
symbol     = P.symbol     lexer

lexeme :: N3Parser a -> N3Parser a
lexeme     = P.lexeme     lexer

identStart , identLetter :: CharParser st Char
identStart  = P.identStart  n3Style
identLetter = P.identLetter n3Style

-- Define parser state and helper functions

-- | N3 parser state
data N3State = N3State
        { graphState :: RDFGraph            -- Graph under construction
        , thisNode   :: RDFLabel            -- current context node (aka 'this')
        , prefixUris :: NamespaceMap        -- namespace prefix mapping table
        , syntaxUris :: SpecialMap          -- special name mapping table
        , nodeGen    :: Int                 -- blank node id generator
        , keywordsList :: [String]          -- contents of the @keywords statement
        , allowLocalNames :: Bool           -- True if @keywords used so that bare names are QNames in default namespace

-- | Functions to update N3State vector (use with Parsec updateState)
setPrefix :: String -> String -> N3State -> N3State
setPrefix pre uri st =  st { prefixUris=p' }
        p'    = mapReplaceOrAdd (Namespace pre uri) (prefixUris st)

-- | Set name for special syntax element
setSName :: String -> ScopedName -> N3State -> N3State
setSName nam snam st =  st { syntaxUris=s' }
        s' = mapReplaceOrAdd (nam,snam) (syntaxUris st)
setSUri :: String -> String -> N3State -> N3State
setSUri nam suri = setSName nam (makeScopedName "" suri "")

-- | Set the list of tokens that can be used without needing the leading 
-- \@ symbol.
setKeywordsList :: [String] -> N3State -> N3State
setKeywordsList ks st = st { keywordsList = ks, allowLocalNames = True }

--  Functions to access state:

-- | Get name for special syntax element, default null
getSName :: N3State -> String -> ScopedName
getSName st nam =  mapFind nullScopedName nam (syntaxUris st)

getSUri :: N3State -> String -> String
getSUri st nam = getScopedNameURI $ getSName st nam

--  Map prefix to namespace
getPrefixNs :: N3State -> String -> Namespace
getPrefixNs st pre = Namespace pre (mapPrefix (prefixUris st) pre)

--  Map ScopedName using prefix table
--  (Ignore URI in supplied ScopedName)
getPrefixScopedName :: N3State -> ScopedName -> ScopedName
getPrefixScopedName st snam = ScopedName (getPrefixNs st pre) loc
        pre = getScopePrefix snam
        loc = snLocal snam

getKeywordsList :: N3State -> [String]
getKeywordsList = keywordsList

getAllowLocalNames :: N3State -> Bool
getAllowLocalNames = allowLocalNames

--  Return function to update graph in N3 parser state,
--  using the supplied function of a graph
--  (use returned function with Parsec updateState)
updateGraph :: ( RDFGraph -> RDFGraph ) -> ( N3State -> N3State )
updateGraph f s = s { graphState = f (graphState s) }

--  Define top-level parser function:
--  accepts a string and returns a graph or error

type N3Parser a = RDFParser N3State a

-- | Parse a string as N3 (with no real base URI).
-- See 'parseN3' if you need to provide a base URI.
parseN3fromString ::
  String -- ^ input in N3 format.
  -> ParseResult
parseN3fromString = parseAnyfromString document Nothing 

-- | Parse a string with an optional base URI.
-- See also 'parseN3fromString'.            
parseN3 ::
  String -- ^ input in N3 format.
  -> Maybe QName -- ^ optional base URI
  -> ParseResult
parseN3 = flip (parseAnyfromString document)

-- useful for testing
test :: String -> RDFGraph
test = either error id . parseAnyfromString document Nothing

-- | Function to supply initial context and parse supplied term.
-- We augment the Parsec error with the context.
parseAnyfromString :: N3Parser a      -- ^ parser to apply
                      -> Maybe QName  -- ^ base URI of the input, or @Nothing@ to use default base value
                      -> String       -- ^ input to be parsed
                      -> Either String a
parseAnyfromString parser mbase input =
  let pmap   = LookupMap prefixTable
      muri   = fmap makeQNameScopedName mbase
      smap   = LookupMap $ specialTable muri
      pstate = N3State
              { graphState = emptyRDFGraph
              , thisNode   = NoNode
              , prefixUris = pmap
              , syntaxUris = smap
              , nodeGen    = 0
              , keywordsList = ["a", "is", "of", "true", "false"] -- not 100% sure about true/false here
              , allowLocalNames = False
      puri = case mbase of
        Just base -> fmap showURI $ appendUris (getQNameURI base) "#"
        _ -> Right "#"

      -- this is getting a bit ugly
  in case puri of
    Left emsg -> Left $ "Invalid base: " ++ emsg
    Right p -> case runParser parser (setPrefix "" p pstate) "" input of
      Right res -> Right res
      Left  err -> Left $ annotateParsecError 1 (lines input) err

newBlankNode :: N3Parser RDFLabel
newBlankNode = do
  s <- getState
  let n = succ (nodeGen s)
  setState $ s { nodeGen = n } 
  return $ Blank (show n)
--  Test functions for selected element parsing

parseTextFromString :: String -> String -> Either String String
parseTextFromString s =
    parseAnyfromString (string s) Nothing

parseAltFromString :: String -> String -> String -> Either String String
parseAltFromString s1 s2 =
    parseAnyfromString ( string s1 <|> string s2 ) Nothing

parseNameFromString :: String -> Either String String
parseNameFromString =
    parseAnyfromString n3Name Nothing

parsePrefixFromString :: String -> Either String Namespace
parsePrefixFromString =
    parseAnyfromString p Nothing
        p = do
          pref <- n3Name
          st   <- getState
          return (getPrefixNs st pref)   -- map prefix to namespace

parseAbsURIrefFromString :: String -> Either String String
parseAbsURIrefFromString =
    parseAnyfromString (fmap showURI explicitURI) Nothing
    -- parseAnyfromString absUriRef Nothing

parseLexURIrefFromString :: String -> Either String String
parseLexURIrefFromString =
    parseAnyfromString lexUriRef Nothing

parseURIref2FromString :: String -> Either String ScopedName
parseURIref2FromString =
    parseAnyfromString n3symbol Nothing
    -- parseAnyfromString uriRef2 Nothing

--  Syntax productions

    - this parser is a *lot* slower than the original one

-- helper routines

comma, semiColon , fullStop :: N3Parser ()
comma = ignore $ symbol ","
semiColon = ignore $ symbol ";"
fullStop = ignore $ symbol "."

-- a specialization of bracket/between 
br :: String -> String -> N3Parser a -> N3Parser a
br lsym rsym = between (symbol lsym) (symbol rsym)

-- The @ character is optional if the keyword is in the
-- keyword list
atSign :: String -> N3Parser ()
atSign s = do
  st <- getState
  let p = ignore $ char '@'
  if s `elem` getKeywordsList st
    then PC.optional p
    else p
atWord :: String -> N3Parser String
atWord s = do
  atSign s
  -- TODO: does it really make sense to add the not-followed-by-a-colon rule here?
  -- apply to both cases even though should only really be necessary
  -- when the at sign is not given
  lexeme $ string s *> notFollowedBy (char ':')
  return s

showURI :: URI -> String
showURI u = uriToString id u ""

-- TODO: look at using URIs throughout
getScopedNameURI' :: URI -> String
getScopedNameURI' = showURI
-- getScopedNameURI' = getScopedNameURI . makeUriScopedName . showURI

operatorLabel :: ScopedName -> N3Parser RDFLabel
operatorLabel snam = do
  s <- getState
  return $ Res $ getPrefixScopedName s snam
operatorLabel snam = (Res . flip getPrefixScopedName snam) <$> getState

-- Add statement to graph in N3 parser state

type AddStatement = RDFLabel -> N3Parser ()

addStatement :: RDFLabel -> RDFLabel -> AddStatement
addStatement s p o = updateState (updateGraph (addArc (arc s p o) ))

addStatementRev :: RDFLabel -> RDFLabel -> AddStatement
addStatementRev o p s = addStatement s p o

A number of productions require a name, which starts with


and then has


we encode this as the n3Name production

initChar , bodyChar :: String
initChar =
  ['A'..'Z'] ++ "_" ++ ['a'..'z'] ++
  map chr 
  ([0x00c0..0x00d6] ++ [0x00d8..0x00f6] ++ [0x00f8..0x02ff] ++ [0x0370..0x037d] ++ [0x037f..0x1fff] ++ [0x200c..0x200d] ++ [0x2070..0x218f] ++ [0x2c00..0x2fef] ++ [0x3001..0xd7ff] ++ [0xf900..0xfdcf] ++ [0xfdf0..0xfffd] ++ [0x00010000..0x000effff])
bodyChar = 
  '-' : ['0'..'9'] ++ ['A'..'Z'] ++ "_" ++ ['a'..'z'] ++
  map chr
  (0x00b7 : [0x00c0..0x00d6] ++ [0x00d8..0x00f6] ++ [0x00f8..0x037d] ++ [0x037f..0x1fff] ++ [0x200c..0x200d] ++ [0x203f..0x2040] ++ [0x2070..0x218f] ++ [0x2c00..0x2fef] ++ [0x3001..0xd7ff] ++ [0xf900..0xfdcf] ++ [0xfdf0..0xfffd] ++ [0x00010000..0x000effff])

n3Name :: N3Parser String
n3Name = (:) <$> n3Init <*> n3Body
    n3Init = oneOf initChar <?> "Initial character of a name"
    n3Body = many (oneOf bodyChar) <?> "Body of the name"

quickvariable ::=	\?[A-Z_a-z#x00c0-#x00d6#x00d8-#x00f6#x00f8-#x02ff#x0370-#x037d#x037f-#x1fff#x200c-#x200d#x2070-#x218f#x2c00-#x2fef#x3001-#xd7ff#xf900-#xfdcf#xfdf0-#xfffd#x00010000-#x000effff][\-0-9A-Z_a-z#x00b7#x00c0-#x00d6#x00d8-#x00f6#x00f8-#x037d#x037f-#x1fff#x200c-#x200d#x203f-#x2040#x2070-#x218f#x2c00-#x2fef#x3001-#xd7ff#xf900-#xfdcf#xfdf0-#xfffd#x00010000-#x000effff]*

-- TODO: is mapping to Var correct?
quickVariable :: N3Parser RDFLabel
quickVariable = char '?' *> (Var <$> n3Name) <?> "quickvariable"

string ::=	("""[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*""")|("[^"\\]*(?:\\.[^"\\]*)*")


string ::= tripleQuoted | singleQUoted


n3string :: N3Parser String
n3string = tripleQuoted <|> singleQuoted <?> "string"

singleQuoted ::=  "[^"\\]*(?:\\.[^"\\]*)*"

asciiChars :: String
asciiChars = map chr [0x20..0x7e]

asciiCharsN3 :: String
asciiCharsN3 = filter (`notElem` "\\\"") asciiChars


-- the grammer has only upper-case A-F but some lower case values
-- seen in the wild, so support them
ntHexDigit :: N3Parser Char
ntHexDigit = oneOf $ ['0'..'9'] ++ ['A'..'F'] ++ ['a'..'f']

hex4 :: N3Parser Char
hex4 = do
  digs <- count 4 ntHexDigit
  let dstr = "0x" ++ digs
      dchar = read dstr :: Int
  return $ chr dchar
hex8 :: N3Parser Char
hex8 = do
  digs <- count 8 ntHexDigit
  let dstr = "0x" ++ digs
      dchar = read dstr :: Int
  if dchar <= 0x10FFFF
    then return $ chr dchar
    else unexpected "\\UHHHHHHHH format is limited to a maximum of \\U0010FFFF"

This is very similar to NTriples accept that also allow the escaping of '
even though it is not required.

The Python rules allow \N{name}, where name is the Unicode name. It's
not clear whether we need to support this too, so for now we do not.

protectedChar :: N3Parser Char
protectedChar =
  (char 't' *> return '\t')
  <|> (char 'n' *> return '\n')
  <|> (char 'r' *> return '\r')
  <|> (char '"' *> return '"')
  <|> (char '\'' *> return '\'')
  <|> (char '\\' *> return '\\')
  <|> (char 'u' *> hex4)
  <|> (char 'U' *> hex8)

-- Accept an escape character or any character as long as it isn't
-- a new-line or quote. Unrecognized escape sequences should therefore
-- be left alone by this. 
n3Character :: N3Parser Char
n3Character = 
  (char '\\' *> (protectedChar <|> return '\\'))
  <|> noneOf "\"\n"
      <|> (oneOf asciiCharsN3 <?> "ASCII character")
              -- TODO: bodyChar and asciiCharsN3 overlap
      <|> (oneOf bodyChar <?> "Unicode character")

sQuot :: N3Parser Char
sQuot = char '"'

singleQuoted :: N3Parser String
singleQuoted = between sQuot sQuot $ many n3Character
tripleQUoted ::=	"""[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""
tripleQuoted :: N3Parser String
tripleQuoted = tQuot *> manyTill (n3Character <|> sQuot <|> char '\n') tQuot
    tQuot = try (count 3 sQuot)

getDefaultPrefix :: N3Parser Namespace
getDefaultPrefix = do
  s <- getState
  return (getPrefixNs s "")

addBase :: URI -> N3Parser ()
addBase = updateState . setSUri "base" . getScopedNameURI'

addPrefix :: Maybe String -> URI -> N3Parser ()
addPrefix p = updateState . setPrefix (fromMaybe "" p) . getScopedNameURI'

Update the set of keywords that can be given without
an \@ sign.
updateKeywordsList :: [String] -> N3Parser ()
updateKeywordsList = updateState . setKeywordsList

document ::=		|	statements_optional EOF

document :: N3Parser RDFGraph
document = mkGr <$> (whiteSpace *> statementsOptional *> eof *> getState)
    mkGr s = setNamespaces (prefixUris s) (graphState s)

statements_optional ::=		|	statement  "."  statements_optional
		|	void


statementsOptional :: N3Parser ()
statementsOptional = ignore $ endBy (lexeme statement) fullStop
statement ::=		|	declaration
		|	existential
		|	simpleStatement
		|	universal


statement :: N3Parser ()
statement =
  <|> existential
  <|> universal
  <|> simpleStatement
  -- having an error here leads to less informative errors in general, it seems
  -- <?> "statement (existential or universal quantification or a simple statement)"
declaration ::=		|	 "@base"  explicituri
		|	 "@keywords"  barename_csl
		|	 "@prefix"  prefix explicituri

-- TODO: do we need the try statements here? atWord would need to have a try on '@'
-- (if applicable) which should mean being able to get rid of try
declaration :: N3Parser ()
declaration = 
  (try (atWord "base") >> explicitURI >>= addBase)
  (try (atWord "keywords") >> bareNameCsl >>= updateKeywordsList)
  (try (atWord "prefix") *> getPrefix)
  <?> "declaration"
getPrefix :: N3Parser ()  
getPrefix = do
  p <- lexeme prefix
  u <- explicitURI
  addPrefix p u

explicituri ::=	<[^>]*>

Note: white space is to be ignored within <>

explicitURI :: N3Parser URI
explicitURI = do
  let lb = char '<'
      rb = char '>'
  -- TODO: do the whitespace definitions match?
  ustr <- between lb (rb <?> "end of URI '>'") $ many (satisfy (/= '>'))
  let uclean = filter (not . isSpace) ustr
  s <- getState
  let base = getSUri s "base"
  case appendUris base uclean of 
    Right uri -> return uri
    Left emsg -> fail emsg
appendUris :: String -> String -> Either String URI
appendUris base uri =
  case parseURI uri of
    Just absuri -> Right absuri
    _ -> case parseURIReference uri of
      Just reluri -> 
        let baseuri = fromJust $ parseURI base
        in case relativeTo reluri baseuri of
          Just resuri -> Right resuri
          _ -> Left $ "Unable to append <" ++ uri ++ "> to base=<" ++ base ++ ">"
      _ -> Left $ "Invalid URI: <" ++ uri ++ ">"
-- production from the old parser
lexUriRef :: N3Parser String
lexUriRef = fmap showURI $ lexeme explicitURI

barename ::=	[A-Z_a-z#x00c0-#x00d6#x00d8-#x00f6#x00f8-#x02ff#x0370-#x037d#x037f-#x1fff#x200c-#x200d#x2070-#x218f#x2c00-#x2fef#x3001-#xd7ff#xf900-#xfdcf#xfdf0-#xfffd#x00010000-#x000effff][\-0-9A-Z_a-z#x00b7#x00c0-#x00d6#x00d8-#x00f6#x00f8-#x037d#x037f-#x1fff#x200c-#x200d#x203f-#x2040#x2070-#x218f#x2c00-#x2fef#x3001-#xd7ff#xf900-#xfdcf#xfdf0-#xfffd#x00010000-#x000effff]*
barename_csl ::=		|	barename barename_csl_tail
		|	void
barename_csl_tail ::=		|	 ","  barename barename_csl_tail
		|	void

bareNameCsl :: N3Parser [String]
bareNameCsl = sepBy (lexeme bareName) comma

bareName :: N3Parser String
bareName = n3Name <?> "barename"

prefix ::=	([A-Z_a-z#x00c0-#x00d6#x00d8-#x00f6#x00f8-#x02ff#x0370-#x037d#x037f-#x1fff#x200c-#x200d#x2070-#x218f#x2c00-#x2fef#x3001-#xd7ff#xf900-#xfdcf#xfdf0-#xfffd#x00010000-#x000effff][\-0-9A-Z_a-z#x00b7#x00c0-#x00d6#x00d8-#x00f6#x00f8-#x037d#x037f-#x1fff#x200c-#x200d#x203f-#x2040#x2070-#x218f#x2c00-#x2fef#x3001-#xd7ff#xf900-#xfdcf#xfdf0-#xfffd#x00010000-#x000effff]*)?:

prefix :: N3Parser (Maybe String)
prefix = optional (lexeme n3Name) <* char ':'
         <?> "prefix name"

symbol ::=		|	explicituri
		|	qname
symbol_csl ::=		|	symbol symbol_csl_tail
		|	void
symbol_csl_tail ::=		|	 ","  symbol symbol_csl_tail
		|	void


n3symbol :: N3Parser ScopedName
n3symbol = 
  ((makeUriScopedName . showURI) <$> explicitURI)
  <|> qname
  <?> "symbol"

symbolCsl :: N3Parser [ScopedName]
symbolCsl = sepBy (lexeme n3symbol) comma

qname ::=	(([A-Z_a-z#x00c0-#x00d6#x00d8-#x00f6#x00f8-#x02ff#x0370-#x037d#x037f-#x1fff#x200c-#x200d#x2070-#x218f#x2c00-#x2fef#x3001-#xd7ff#xf900-#xfdcf#xfdf0-#xfffd#x00010000-#x000effff][\-0-9A-Z_a-z#x00b7#x00c0-#x00d6#x00d8-#x00f6#x00f8-#x037d#x037f-#x1fff#x200c-#x200d#x203f-#x2040#x2070-#x218f#x2c00-#x2fef#x3001-#xd7ff#xf900-#xfdcf#xfdf0-#xfffd#x00010000-#x000effff]*)?:)?[A-Z_a-z#x00c0-#x00d6#x00d8-#x00f6#x00f8-#x02ff#x0370-#x037d#x037f-#x1fff#x200c-#x200d#x2070-#x218f#x2c00-#x2fef#x3001-#xd7ff#xf900-#xfdcf#xfdf0-#xfffd#x00010000-#x000effff][\-0-9A-Z_a-z#x00b7#x00c0-#x00d6#x00d8-#x00f6#x00f8-#x037d#x037f-#x1fff#x200c-#x200d#x203f-#x2040#x2070-#x218f#x2c00-#x2fef#x3001-#xd7ff#xf900-#xfdcf#xfdf0-#xfffd#x00010000-#x000effff]*

  Note that, for now, we explicitly handle blank nodes
  (of the form _:name) direcly in pathItem'.
  This is not a good idea since qname' is used elsewhere
  and so shouldn't we do the same thing there too?

qname :: N3Parser ScopedName
qname =
  (char ':' *> toSN getDefaultPrefix)
  <|> (n3Name >>= fullOrLocalQName)
  <?> "QName"
      toSN p = ScopedName <$> p <*> (n3Name <|> return "")
fullOrLocalQName :: String -> N3Parser ScopedName
fullOrLocalQName name = 
  (char ':' *> fullQName name)
  <|> localQName name
fullQName :: String -> N3Parser ScopedName
fullQName name = do
  pre <- findPrefix name
  lname <- n3Name <|> return ""
  return $ ScopedName pre lname
findPrefix :: String -> N3Parser Namespace
findPrefix pre = do
  st <- getState
  case mapFindMaybe pre (prefixUris st) of
    Just uri -> return $ Namespace pre uri
    Nothing  -> unexpected $ "Prefix '" ++ pre ++ ":' not bound."
localQName :: String -> N3Parser ScopedName
localQName name = do
  st <- getState
  if getAllowLocalNames st
    then do
      pre <- getDefaultPrefix
      return $ ScopedName pre name
    else fail "Invalid 'bare' word" -- TODO: not ideal error message; can we handle this case differently?

existential ::=		|	 "@forSome"  symbol_csl

For now we just read in the symbols and ignore them,
since we do not mark blank nodes as existentially quantified
(we assume this is the case).

TODO: fix this?

existential :: N3Parser ()
existential = try (atWord "forSome") *> symbolCsl >> return ()

simpleStatement ::=		|	subject propertylist

simpleStatement :: N3Parser ()
simpleStatement = subject >>= propertyListWith
subject ::=		|	expression

subject :: N3Parser RDFLabel
subject = lexeme expression

expression ::=		|	pathitem pathtail
pathtail ::=		|	 "!"  expression
		|	 "^"  expression
		|	void


expression :: N3Parser RDFLabel
expression = do
  i <- pathItem
  let backwardExpr = char '!' *> return addStatementRev 
      forwardExpr  = char '^' *> return addStatement
  mpt <- optional
        ( (,) <$> lexeme (forwardExpr <|> backwardExpr) <*> lexeme expression )
  case mpt of
    Nothing -> return i 
    Just (addFunc, pt) -> do
      bNode <- newBlankNode
      addFunc bNode pt i
      return bNode
pathitem ::=		|	 "("  pathlist  ")" 
		|	 "["  propertylist  "]" 
		|	 "{"  formulacontent  "}" 
		|	boolean
		|	literal
		|	numericliteral
		|	quickvariable
		|	symbol

pathlist ::=		|	expression pathlist
		|	void

Need to think about how to handle formulae, since need to know the context
of the call to know where to add them.

TOOD: may include direct support for blank nodes here,
namely convert _:stringval -> Blank stringval since although
this should be done by symbol the types don't seem to easily match
up (at first blush anyway)

pathItem :: N3Parser RDFLabel
pathItem = 
  br "(" ")" pathList
  <|> br "[" "]" propertyListBNode
  <|> br "{" "}" formulaContent
  <|> try boolean
  <|> literal
  <|> numericLiteral
  <|> quickVariable
  <|> Blank <$> (string "_:" *> n3Name) -- TODO a hack that needs fixing
  <|> Res <$> n3symbol
  <?> "pathitem"
we create a blank node for the list and return it, whilst
adding the list contents to the graph
pathList :: N3Parser RDFLabel
pathList = do
  cts <- many (lexeme expression) <?> "pathlist"
  eNode <- operatorLabel rdf_nil
  case cts of
    [] -> return eNode
    (c:cs) -> do
      sNode <- newBlankNode
      first <- operatorLabel rdf_first
      addStatement sNode first c
      lNode <- foldM addElem sNode cs
      rest <- operatorLabel rdf_rest
      addStatement lNode rest eNode
      return sNode

      addElem prevNode curElem = do
        bNode <- newBlankNode
        first <- operatorLabel rdf_first
        rest <- operatorLabel rdf_rest
        addStatement prevNode rest bNode
        addStatement bNode first curElem
        return bNode
formulacontent ::=		|	statementlist

statementlist ::=		|	statement statementtail
		|	void
statementtail ::=		|	 "."  statementlist
		|	void

We create a subgraph and assign it to a blank node, returning the
blank node. At present it is a combination of the subgraph and formula
productions from the origial parser.

TODO: is it correct?
formulaContent :: N3Parser RDFLabel
formulaContent = do
  bNode <- newBlankNode
  pstate <- getState
  let fstate = pstate { graphState = emptyRDFGraph, thisNode = bNode }
  setState fstate
  fstate' <- getState
  let nstate = pstate { nodeGen = nodeGen fstate' }
  setState nstate
  updateState $ updateGraph $ setFormula (Formula bNode (graphState fstate'))
  return bNode
subgraph :: RDFLabel -> N3Parser RDFGraph
subgraph this = do
  pstate <- getState
  let fstate = pstate { graphState = emptyRDFGraph, thisNode = this }
  setState fstate       -- switch new state into parser
  statementsOptional    -- parse statements of formula
  fstate' <- getState
  let nstate = pstate { nodeGen = nodeGen fstate' }
  setState nstate       -- swap back state, with updated nodeGen
  return (graphState fstate')

statementList :: N3Parser ()
statementList = ignore $ sepEndBy (lexeme statement) fullStop

boolean ::=		|	 "@false" 
		|	 "@true" 

boolean :: N3Parser RDFLabel
boolean = mkTypedLit xsd_boolean <$> 
          (try (atWord "false") <|> atWord "true")
dtlang ::=		|	 "@"  langcode
		|	 "^^"  symbol
		|	void
literal ::=		|	string dtlang

langcode ::=	[a-z]+(-[a-z0-9]+)*


literal :: N3Parser RDFLabel
literal = Lit <$> n3string <*> optionMaybe dtlang
dtlang :: N3Parser ScopedName
dtlang = 
  (char '@' *> langcode <?> "langcode")
  <|> (try (string "^^") *> n3symbol)
  <?> "dtlang"

langcode :: N3Parser ScopedName
langcode = do
  h <- many1 (oneOf ['a'..'z']) <?> "start of langcode (a to z)"
  mt <- optionMaybe ( (:) <$> char '-' <*> many1 (oneOf (['a'..'z'] ++ ['0'..'9']))) <?> "a to z or 0 to 9 (langcode after the hyphen)"
  return $ langName $ h ++ fromMaybe "" mt
decimal ::=	[-+]?[0-9]+(\.[0-9]+)?
double ::=	[-+]?[0-9]+(\.[0-9]+)?([eE][-+]?[0-9]+)
integer ::=	[-+]?[0-9]+
numericliteral ::=		|	decimal
		|	double
		|	integer

numericLiteral :: N3Parser RDFLabel
numericLiteral =
  try (mkTypedLit xsd_double <$> n3double)
  <|> try (mkTypedLit xsd_decimal <$> n3decimal)
  <|> mkTypedLit xsd_integer <$> n3integer
  <?> "numericliteral"

n3sign :: N3Parser Char
n3sign = char '+' <|> char '-'

n3integer :: N3Parser String
n3integer = do
  ms <- optionMaybe n3sign
  ds <- many1 digit
  case ms of
    Just s -> return $ s : ds
    _ -> return ds

n3decimal :: N3Parser String
n3decimal = (++) <$> n3integer <*> ( (:) <$> char '.' <*> many1 digit )
n3double :: N3Parser String  
n3double = (++) <$> n3decimal <*> ( (:) <$> oneOf "eE" <*> n3integer )

propertylist ::=		|	verb object objecttail propertylisttail
		|	void
propertylisttail ::=		|	 ";"  propertylist
		|	void


-- it's probably important that bNode is created *after*
-- processing the plist (mainly for the assumptions made by
-- formatting the output as N3; e.g. list/sequence ordering)
propertyListBNode :: N3Parser RDFLabel
propertyListBNode = do
  plist <- sepEndBy ((,) <$> lexeme verb <*> objectList) semiColon
  bNode <- newBlankNode
  let addList ((addFunc,vrb),items) = mapM_ (addFunc bNode vrb) items
  forM_ plist addList
  return bNode

propertyListWith :: RDFLabel -> N3Parser ()
propertyListWith subj = 
  let -- term = lexeme verb >>= objectListWith subj
      term = lexeme verb >>= \(addFunc, vrb) -> objectListWith (addFunc subj vrb)
  in ignore $ sepEndBy term semiColon
object ::=		|	expression
objecttail ::=		|	 ","  object objecttail
		|	void

We change the production rule from objecttail to objectlist for lists of
objects (may change back).

May be an optimisation needed in the case of

 :s :p :o1 , .. , :o<large number>.

Is parsec creating the list of actions, using sepBy1
in objectListWith, and then evaluating them all once the list
has been created?


object :: N3Parser RDFLabel
object = lexeme expression

objectList :: N3Parser [RDFLabel]
objectList = sepBy1 object comma

objectWith :: AddStatement -> N3Parser ()
objectWith addFunc = object >>= addFunc 

objectListWith :: AddStatement -> N3Parser ()
objectListWith addFunc =
  ignore $ sepBy1 (objectWith addFunc) comma

objectList1 :: N3Parser [RDFLabel]
objectList1 = sepBy1 object comma

verb ::=		|	 "<=" 
		|	 "=" 
		|	 "=>" 
		|	 "@a" 
		|	 "@has"  expression
		|	 "@is"  expression  "@of" 
		|	expression

verb :: N3Parser (RDFLabel -> RDFLabel -> AddStatement, RDFLabel)
verb = 
  -- we check reverse first so that <= is tried before looking for a URI via expression rule
  (,) addStatementRev <$> verbReverse
  <|> (,) addStatement <$> verbForward
  <?> "verb"

-- those verbs for which subject is on the right and object on the left
verbReverse :: N3Parser RDFLabel
verbReverse =
  try (string "<=") *> operatorLabel log_implies
  <|> between (try (atWord "is")) (atWord "of") (lexeme expression)

-- those verbs with subject on the left and object on the right
verbForward :: N3Parser RDFLabel
verbForward =  
  (try (string "=>") *> operatorLabel log_implies)
  <|> (string "=" *> operatorLabel owl_sameAs)
  <|> (try (atWord "a") *> operatorLabel rdf_type)
  <|> (atWord "has" *> lexeme expression)
  <|> lexeme expression

universal ::=		|	 "@forAll"  symbol_csl

TODO: what needs to be done to support universal quantification
universal :: N3Parser ()
universal = 
  try (atWord "forAll") *> 
  unexpected "universal (@forAll) currently unsupported." 
  -- will be something like: *> symbolCsl


