{-# LANGUAGE CPP #-}
{-# LANGUAGE OverloadedStrings #-}

--------------------------------------------------------------------------------
--  See end of this file for licence information.
--------------------------------------------------------------------------------
-- |
--  Module      :  Utils
--  Copyright   :  (c) 2003, Graham Klyne, 2009 Vasili I Galchin, 2011, 2012, 2013, 2014, 2018, 2022 Douglas Burke
--  License     :  GPL V2
--
--  Maintainer  :  Douglas Burke
--  Stability   :  experimental
--  Portability :  CPP, OverloadedStrings
--
--  Support for the RDF Parsing modules.
--
--------------------------------------------------------------------------------

module Swish.RDF.Parser.Utils
    ( SpecialMap
    -- , mapPrefix
              
    -- tables
    , prefixTable, specialTable

    -- parser
    , runParserWithError
    , ParseResult
    , ignore
    , char
    , ichar
    , string
    , stringT
    , symbol
    , isymbol
    , lexeme
    , notFollowedBy
    , whiteSpace
    , skipMany
    , skipMany1
    , endBy
    , sepEndBy
    , sepEndBy1
    , manyTill
    , noneOf
    , eoln
    , fullStop
    , hex4
    , hex8
    , appendURIs
    )
    where

import Swish.Namespace (Namespace, makeNamespace, ScopedName)

import Swish.RDF.Graph (RDFGraph)
import Swish.RDF.Vocabulary
    ( namespaceRDF
    , namespaceRDFS
    , namespaceRDFD
    , namespaceOWL
    , namespaceLOG
    , rdfType
    , rdfFirst, rdfRest, rdfNil
    , owlSameAs, logImplies
    , defaultBase
    )

import Data.Char (isSpace, isHexDigit, chr)

#if MIN_VERSION_base(4, 7, 0)
import Data.Functor (($>))
#endif

import Data.Maybe (fromMaybe, fromJust)

import Network.URI (URI(..), relativeTo, parseURIReference)

import Text.ParserCombinators.Poly.StateText

import qualified Data.Map       as M
import qualified Data.Text      as T
import qualified Data.Text.Lazy as L
import qualified Data.Text.Read as R

#if !MIN_VERSION_base(4, 7, 0)
($>) :: Functor f => f a -> b -> f b
($>) = flip (<$)
#endif

-- Code

-- | Append the two URIs. Given the change in signature of
--   `Network.URI.relativeTo` in version @2.4.0.0@ of @network@,
--   it is not clear that this function is necessary. At the
--   very least, it should be changed to just return a `URI`.
--
appendURIs ::
  URI     -- ^ The base URI
  -> URI  -- ^ The URI to append (it can be an absolute URI).
  -> Either String URI
appendURIs :: URI -> URI -> Either String URI
appendURIs URI
base URI
uri =
  case URI -> String
uriScheme URI
uri of
    String
"" -> forall a b. b -> Either a b
Right forall a b. (a -> b) -> a -> b
$ URI
uri URI -> URI -> URI
`relativeTo` URI
base
    String
_  -> forall a b. b -> Either a b
Right URI
uri

-- | Type for special name lookup table
type SpecialMap = M.Map String ScopedName

-- | Define default table of namespaces
prefixTable :: [Namespace]
prefixTable :: [Namespace]
prefixTable =   [ Namespace
namespaceRDF
                , Namespace
namespaceRDFS
                , Namespace
namespaceRDFD     -- datatypes
                , Namespace
namespaceOWL
                , Namespace
namespaceLOG
                , Maybe Text -> URI -> Namespace
makeNamespace forall a. Maybe a
Nothing forall a b. (a -> b) -> a -> b
$ forall a. HasCallStack => Maybe a -> a
fromJust (String -> Maybe URI
parseURIReference String
"#") -- is this correct?
                ]

-- | Define default special-URI table.
specialTable ::
    Maybe ScopedName  -- ^ initial base URI, otherwise uses 'defaultBase'
    -> [(String,ScopedName)]
specialTable :: Maybe ScopedName -> [(String, ScopedName)]
specialTable Maybe ScopedName
mbase =
  [ (String
"a",         ScopedName
rdfType    ),
    (String
"equals",    ScopedName
owlSameAs  ),
    (String
"implies",   ScopedName
logImplies ),
    (String
"listfirst", ScopedName
rdfFirst   ),
    (String
"listrest",  ScopedName
rdfRest    ),
    (String
"listnull",  ScopedName
rdfNil     ),
    (String
"base",      forall a. a -> Maybe a -> a
fromMaybe ScopedName
defaultBase Maybe ScopedName
mbase ) 
  ]

-- Parser routines, heavily based on Parsec combinators

-- | Run the parser and return the successful parse or an error
-- message which consists of the standard Polyparse error plus
-- a fragment of the unparsed input to provide context.
--
runParserWithError :: 
  Parser a b -- ^ parser (carrying state) to apply
  -> a       -- ^ starting state for the parser
  -> L.Text       -- ^ input to be parsed
  -> Either String b
runParserWithError :: forall a b. Parser a b -> a -> Text -> Either String b
runParserWithError Parser a b
parser a
state0 Text
input = 
  let (Either String b
result, a
_, Text
unparsed) = forall s a. Parser s a -> s -> Text -> (Either String a, s, Text)
runParser Parser a b
parser a
state0 Text
input
     
      -- TODO: work out how best to report error context; for now just take the
      -- next 40 characters and assume there is enough context.
      econtext :: String
econtext = if Text -> Bool
L.null Text
unparsed
                 then String
"\n(at end of the text)\n"
                 else String
"\nRemaining input:\n" forall a. [a] -> [a] -> [a]
++ 
                      case Text -> Int64 -> Ordering
L.compareLength Text
unparsed Int64
40 of
                        Ordering
GT -> Text -> String
L.unpack (Int64 -> Text -> Text
L.take Int64
40 Text
unparsed) forall a. [a] -> [a] -> [a]
++ String
"..."
                        Ordering
_ -> Text -> String
L.unpack Text
unparsed

  in case Either String b
result of
    Left String
emsg -> forall a b. a -> Either a b
Left forall a b. (a -> b) -> a -> b
$ String
emsg forall a. [a] -> [a] -> [a]
++ String
econtext
    Either String b
_ -> Either String b
result

-- | The result of a parse, which is either an error message or a graph.
type ParseResult = Either String RDFGraph


-- | Run the parser and ignore the result.
ignore :: (Applicative f) => f a -> f ()
ignore :: forall (f :: * -> *) a. Applicative f => f a -> f ()
ignore f a
f = f a
f forall (f :: * -> *) a b. Functor f => f a -> b -> f b
$> ()

-- | Match the character.
char :: Char -> Parser s Char
char :: forall s. Char -> Parser s Char
char Char
c = forall s. (Char -> Bool) -> Parser s Char
satisfy (forall a. Eq a => a -> a -> Bool
== Char
c)

-- | Match the character, ignoring the result.
ichar :: Char -> Parser s ()
ichar :: forall s. Char -> Parser s ()
ichar = forall (f :: * -> *) a. Applicative f => f a -> f ()
ignore forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall s. Char -> Parser s Char
char

-- TODO: is there a better way to do this?
-- | Match the text.
string :: String -> Parser s String
string :: forall s. String -> Parser s String
string = forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM forall s. Char -> Parser s Char
char
  
-- | Match the text.
stringT :: T.Text -> Parser s T.Text
stringT :: forall s. Text -> Parser s Text
stringT Text
s = forall s. String -> Parser s String
string (Text -> String
T.unpack Text
s) forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return Text
s

-- | Run the parser 'many' times and ignore the result.
skipMany :: Parser s a -> Parser s ()
skipMany :: forall s a. Parser s a -> Parser s ()
skipMany = forall (f :: * -> *) a. Applicative f => f a -> f ()
ignore forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (f :: * -> *) a. Alternative f => f a -> f [a]
many
  
-- | Run the parser 'many1' times and ignore the result.
skipMany1 :: Parser s a -> Parser s ()
skipMany1 :: forall s a. Parser s a -> Parser s ()
skipMany1 = forall (f :: * -> *) a. Applicative f => f a -> f ()
ignore forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (p :: * -> *) a. PolyParse p => p a -> p [a]
many1

-- | Match zero or more occurences of
-- parser followed by separator.
endBy :: 
    Parser s a    -- ^ parser
    -> Parser s b -- ^ separator
    -> Parser s [a]
endBy :: forall s a b. Parser s a -> Parser s b -> Parser s [a]
endBy Parser s a
p Parser s b
sep = forall (f :: * -> *) a. Alternative f => f a -> f [a]
many (Parser s a
p forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser s b
sep)

-- | Match zero or more occurences of the parser followed
-- by the separator.
sepEndBy :: 
    Parser s a    -- ^ parser
    -> Parser s b -- ^ separator
    -> Parser s [a]
sepEndBy :: forall s a b. Parser s a -> Parser s b -> Parser s [a]
sepEndBy Parser s a
p Parser s b
sep = forall s a b. Parser s a -> Parser s b -> Parser s [a]
sepEndBy1 Parser s a
p Parser s b
sep forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall (f :: * -> *) a. Applicative f => a -> f a
pure []

-- | Accept one or more occurences of the parser
-- separated by the separator. Unlike 'endBy' the
-- last separator is optional.
sepEndBy1 :: 
    Parser s a    -- ^ parser
    -> Parser s b -- ^ separator
    -> Parser s [a]
sepEndBy1 :: forall s a b. Parser s a -> Parser s b -> Parser s [a]
sepEndBy1 Parser s a
p Parser s b
sep = do
  a
x <- Parser s a
p
  (Parser s b
sep forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> ((a
xforall a. a -> [a] -> [a]
:) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall s a b. Parser s a -> Parser s b -> Parser s [a]
sepEndBy Parser s a
p Parser s b
sep)) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall (m :: * -> *) a. Monad m => a -> m a
return [a
x]
  
-- | Accept zero or more runs of the parser
-- ending with the delimiter.
manyTill :: 
    Parser s a    -- ^ parser
    -> Parser s b -- ^ delimiter
    -> Parser s [a]
manyTill :: forall s a b. Parser s a -> Parser s b -> Parser s [a]
manyTill Parser s a
p Parser s b
end = Parser s [a]
go
  where
    go :: Parser s [a]
go = (Parser s b
end forall (f :: * -> *) a b. Functor f => f a -> b -> f b
$> [])
         forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
         ((:) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser s a
p forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser s [a]
go)

-- | Accept any character that is not a member of the given string.
noneOf :: String -> Parser s Char           
noneOf :: forall s. String -> Parser s Char
noneOf String
istr = forall s. (Char -> Bool) -> Parser s Char
satisfy (forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` String
istr)

-- | Matches '.'.           
fullStop :: Parser s ()
fullStop :: forall s. Parser s ()
fullStop = forall s. Char -> Parser s ()
ichar Char
'.'

-- | Match the end-of-line sequence (@"\\n"@, @"\\r"@, or @"\\r\\n"@). 
eoln :: Parser s ()
-- eoln = ignore (newline <|> (lineFeed *> optional newline))
-- eoln = ignore (try (string "\r\n") <|> string "\r" <|> string "\n")
eoln :: forall s. Parser s ()
eoln = forall (f :: * -> *) a. Applicative f => f a -> f ()
ignore (forall (p :: * -> *) a. PolyParse p => [p a] -> p a
oneOf [forall s. String -> Parser s String
string String
"\r\n", forall s. String -> Parser s String
string String
"\r", forall s. String -> Parser s String
string String
"\n"])

-- | Succeed if the next character does not match the given function.
notFollowedBy :: (Char -> Bool) -> Parser s ()
notFollowedBy :: forall s. (Char -> Bool) -> Parser s ()
notFollowedBy Char -> Bool
p = do
  Char
c <- forall s. Parser s Char
next
  if Char -> Bool
p Char
c 
    then forall (m :: * -> *) a. MonadFail m => String -> m a
fail forall a b. (a -> b) -> a -> b
$ String
"Unexpected character: " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> String
show [Char
c]
    else forall s. Text -> Parser s ()
reparse forall a b. (a -> b) -> a -> b
$ Char -> Text
L.singleton Char
c

-- | Match the given string and any trailing 'whiteSpace'.
symbol :: String -> Parser s String
symbol :: forall s. String -> Parser s String
symbol = forall s a. Parser s a -> Parser s a
lexeme forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall s. String -> Parser s String
string

-- | As 'symbol' but ignoring the result.
isymbol :: String -> Parser s ()
isymbol :: forall s. String -> Parser s ()
isymbol = forall (f :: * -> *) a. Applicative f => f a -> f ()
ignore forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall s. String -> Parser s String
symbol

-- | Convert a parser into one that also matches, and ignores,
-- trailing 'whiteSpace'.
lexeme :: Parser s a -> Parser s a
lexeme :: forall s a. Parser s a -> Parser s a
lexeme Parser s a
p = Parser s a
p forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall s. Parser s ()
whiteSpace

-- | Match white space: a space or a comment (@#@ character and anything following it
-- up to to a new line).
whiteSpace :: Parser s ()
whiteSpace :: forall s. Parser s ()
whiteSpace = forall s a. Parser s a -> Parser s ()
skipMany (forall s. Parser s ()
simpleSpace forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall s. Parser s ()
oneLineComment)

simpleSpace :: Parser s ()
simpleSpace :: forall s. Parser s ()
simpleSpace = forall (f :: * -> *) a. Applicative f => f a -> f ()
ignore forall a b. (a -> b) -> a -> b
$ forall s. (Char -> Bool) -> Parser s Text
many1Satisfy Char -> Bool
isSpace

-- TODO: this should use eoln rather than a check on \n
oneLineComment :: Parser s ()
oneLineComment :: forall s. Parser s ()
oneLineComment = (forall s. Char -> Parser s ()
ichar Char
'#' forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall s. (Char -> Bool) -> Parser s Text
manySatisfy (forall a. Eq a => a -> a -> Bool
/= Char
'\n')) forall (f :: * -> *) a b. Functor f => f a -> b -> f b
$> ()

{-

Not sure we can get this with polyparse

-- | Annotate a Parsec error with the local context - i.e. the actual text
-- that caused the error and preceeding/succeeding lines (if available)
--
annotateParsecError :: 
    Int -- ^ the number of extra lines to include in the context (<=0 is ignored)
    -> [String] -- ^ text being parsed
    -> ParseError -- ^ the parse error
    -> String -- ^ Parsec error with additional context
annotateParsecError extraLines ls err = 
    -- the following is based on the show instance of ParseError
    let ePos = errorPos err
        lNum = sourceLine ePos
        cNum = sourceColumn ePos
        -- it is possible to be at the end of the input so need
        -- to check; should produce better output than this in this
        -- case
        nLines = length ls
        ln1 = lNum - 1
        eln = max 0 extraLines
        lNums = [max 0 (ln1 - eln) .. min (nLines-1) (ln1 + eln)]
        
        beforeLines = map (ls !!) $ filter (< ln1) lNums
        afterLines  = map (ls !!) $ filter (> ln1) lNums
        
        -- in testing was able to get a line number after the text so catch this
        -- case; is it still necessary?
        errorLine = if ln1 >= nLines then "" else ls !! ln1
        arrowLine = replicate (cNum-1) ' ' ++ "^"
        finalLine = "(line " ++ show lNum ++ ", column " ++ show cNum ++ " indicated by the '^' sign above):"
        
        eHdr = "" : beforeLines ++ errorLine : arrowLine : afterLines ++ [finalLine]
        eMsg = showErrorMessages "or" "unknown parse error" "expecting" "unexpected" "end of input"
               (errorMessages err)

    in unlines eHdr ++ eMsg

-}

{-
Handle hex encoding; the spec for N3 and NTriples suggest that
only upper-case A..F are valid but you can find lower-case values
out there so support these too.
-}

hexDigit :: Parser a Char
-- hexDigit = satisfy (`elem` ['0'..'9'] ++ ['A'..'F'])
hexDigit :: forall s. Parser s Char
hexDigit = forall s. (Char -> Bool) -> Parser s Char
satisfy Char -> Bool
isHexDigit

-- | A four-digit hex value (e.g. @1a34@ or @03F1@).
hex4 :: Parser a Char
hex4 :: forall s. Parser s Char
hex4 = do
  String
digs <- forall (p :: * -> *) a. PolyParse p => Int -> p a -> p [a]
exactly Int
4 forall s. Parser s Char
hexDigit
  let mhex :: Either String (Int, Text)
mhex = forall a. Integral a => Reader a
R.hexadecimal (String -> Text
T.pack String
digs)
  case Either String (Int, Text)
mhex of
    Left String
emsg     -> forall (p :: * -> *) a. PolyParse p => String -> p a
failBad forall a b. (a -> b) -> a -> b
$ String
"Internal error: unable to parse hex4: " forall a. [a] -> [a] -> [a]
++ String
emsg
    Right (Int
v, Text
"") -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Int -> Char
chr Int
v
    Right (Int
_, Text
vs) -> forall (p :: * -> *) a. PolyParse p => String -> p a
failBad forall a b. (a -> b) -> a -> b
$ String
"Internal error: hex4 remainder = " forall a. [a] -> [a] -> [a]
++ Text -> String
T.unpack Text
vs

-- | An eight-digit hex value that has a maximum of @0010FFFF@.
hex8 :: Parser a Char
hex8 :: forall s. Parser s Char
hex8 = do
  String
digs <- forall (p :: * -> *) a. PolyParse p => Int -> p a -> p [a]
exactly Int
8 forall s. Parser s Char
hexDigit
  let mhex :: Either String (Int, Text)
mhex = forall a. Integral a => Reader a
R.hexadecimal (String -> Text
T.pack String
digs)
  case Either String (Int, Text)
mhex of
    Left String
emsg     -> forall (p :: * -> *) a. PolyParse p => String -> p a
failBad forall a b. (a -> b) -> a -> b
$ String
"Internal error: unable to parse hex8: " forall a. [a] -> [a] -> [a]
++ String
emsg
    Right (Int
v, Text
"") -> if Int
v forall a. Ord a => a -> a -> Bool
<= Int
0x10FFFF
                     then forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Int -> Char
chr Int
v
                     else forall (p :: * -> *) a. PolyParse p => String -> p a
failBad String
"\\UHHHHHHHH format is limited to a maximum of \\U0010FFFF"
    Right (Int
_, Text
vs) -> forall (p :: * -> *) a. PolyParse p => String -> p a
failBad forall a b. (a -> b) -> a -> b
$ String
"Internal error: hex8 remainder = " forall a. [a] -> [a] -> [a]
++ Text -> String
T.unpack Text
vs
        
--------------------------------------------------------------------------------
--
--  Copyright (c) 2003, Graham Klyne, 2009 Vasili I Galchin,
--    2011, 2012, 2014, 2018, 2022 Douglas Burke
--  All rights reserved.
--
--  This file is part of Swish.
--
--  Swish is free software; you can redistribute it and/or modify
--  it under the terms of the GNU General Public License as published by
--  the Free Software Foundation; either version 2 of the License, or
--  (at your option) any later version.
--
--  Swish is distributed in the hope that it will be useful,
--  but WITHOUT ANY WARRANTY; without even the implied warranty of
--  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
--  GNU General Public License for more details.
--
--  You should have received a copy of the GNU General Public License
--  along with Swish; if not, write to:
--    The Free Software Foundation, Inc.,
--    59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
--
--------------------------------------------------------------------------------