{-# LANGUAGE ViewPatterns #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE CPP #-}
{- |
   Module      : Text.Pandoc.URI
   Copyright   : Copyright (C) 2006-2023 John MacFarlane
   License     : GNU GPL, version 2 or above

   Maintainer  : John MacFarlane <jgm@berkeley.edu>
   Stability   : alpha
   Portability : portable
-}
module Text.Pandoc.URI ( urlEncode
                       , escapeURI
                       , isURI
                       , schemes
                       , uriPathToPath
                       ) where
import qualified Network.HTTP.Types as HTTP
import qualified Text.Pandoc.UTF8 as UTF8
import qualified Data.Text as T
import qualified Data.Set as Set
import Data.Char (isSpace, isAscii)
import Network.URI (URI (uriScheme), parseURI, escapeURIString)

urlEncode :: T.Text -> T.Text
urlEncode :: Text -> Text
urlEncode = ByteString -> Text
UTF8.toText (ByteString -> Text) -> (Text -> ByteString) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Bool -> ByteString -> ByteString
HTTP.urlEncode Bool
True (ByteString -> ByteString)
-> (Text -> ByteString) -> Text -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> ByteString
UTF8.fromText

-- | Escape whitespace and some punctuation characters in URI.
escapeURI :: T.Text -> T.Text
escapeURI :: Text -> Text
escapeURI = String -> Text
T.pack (String -> Text) -> (Text -> String) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> String -> String
escapeURIString (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
needsEscaping) (String -> String) -> (Text -> String) -> Text -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
T.unpack
  where needsEscaping :: Char -> Bool
needsEscaping Char
c = Char -> Bool
isSpace Char
c Bool -> Bool -> Bool
|| (Char -> Bool) -> Text -> Bool
T.any (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
c) Text
"<>|\"{}[]^`"

--
-- IANA URIs
--

-- | Schemes from http://www.iana.org/assignments/uri-schemes.html plus
-- the unofficial schemes doi, javascript, isbn, pmid.
schemes :: Set.Set T.Text
schemes :: Set Text
schemes = [Text] -> Set Text
forall a. Ord a => [a] -> Set a
Set.fromList
  -- Official IANA schemes
  [ Text
"aaa", Text
"aaas", Text
"about", Text
"acap", Text
"acct", Text
"acr", Text
"adiumxtra", Text
"afp", Text
"afs"
  , Text
"aim", Text
"appdata", Text
"apt", Text
"attachment", Text
"aw", Text
"barion", Text
"beshare", Text
"bitcoin"
  , Text
"blob", Text
"bolo", Text
"browserext", Text
"callto", Text
"cap", Text
"chrome", Text
"chrome-extension"
  , Text
"cid", Text
"coap", Text
"coaps", Text
"com-eventbrite-attendee", Text
"content", Text
"crid", Text
"cvs"
  , Text
"data", Text
"dav", Text
"dict", Text
"dis", Text
"dlna-playcontainer", Text
"dlna-playsingle"
  , Text
"dns", Text
"dntp", Text
"dtn", Text
"dvb", Text
"ed2k", Text
"example", Text
"facetime", Text
"fax", Text
"feed"
  , Text
"feedready", Text
"file", Text
"filesystem", Text
"finger", Text
"fish", Text
"ftp", Text
"geo", Text
"gg"
  , Text
"git", Text
"gizmoproject", Text
"go", Text
"gopher", Text
"graph", Text
"gtalk", Text
"h323", Text
"ham"
  , Text
"hcp", Text
"http", Text
"https", Text
"hxxp", Text
"hxxps", Text
"hydrazone", Text
"iax", Text
"icap", Text
"icon"
  , Text
"im", Text
"imap", Text
"info", Text
"iotdisco", Text
"ipn", Text
"ipp", Text
"ipps", Text
"irc", Text
"irc6"
  , Text
"ircs", Text
"iris", Text
"iris.beep", Text
"iris.lwz", Text
"iris.xpc", Text
"iris.xpcs"
  , Text
"isostore", Text
"itms", Text
"jabber", Text
"jar", Text
"jms", Text
"keyparc", Text
"lastfm", Text
"ldap"
  , Text
"ldaps", Text
"lvlt", Text
"magnet", Text
"mailserver", Text
"mailto", Text
"maps", Text
"market"
  , Text
"message", Text
"mid", Text
"mms", Text
"modem", Text
"mongodb", Text
"moz", Text
"ms-access"
  , Text
"ms-browser-extension", Text
"ms-drive-to", Text
"ms-enrollment", Text
"ms-excel"
  , Text
"ms-gamebarservices", Text
"ms-getoffice", Text
"ms-help", Text
"ms-infopath"
  , Text
"ms-media-stream-id", Text
"ms-officeapp", Text
"ms-project", Text
"ms-powerpoint"
  , Text
"ms-publisher", Text
"ms-search-repair", Text
"ms-secondary-screen-controller"
  , Text
"ms-secondary-screen-setup", Text
"ms-settings", Text
"ms-settings-airplanemode"
  , Text
"ms-settings-bluetooth", Text
"ms-settings-camera", Text
"ms-settings-cellular"
  , Text
"ms-settings-cloudstorage", Text
"ms-settings-connectabledevices"
  , Text
"ms-settings-displays-topology", Text
"ms-settings-emailandaccounts"
  , Text
"ms-settings-language", Text
"ms-settings-location", Text
"ms-settings-lock"
  , Text
"ms-settings-nfctransactions", Text
"ms-settings-notifications"
  , Text
"ms-settings-power", Text
"ms-settings-privacy", Text
"ms-settings-proximity"
  , Text
"ms-settings-screenrotation", Text
"ms-settings-wifi", Text
"ms-settings-workplace"
  , Text
"ms-spd", Text
"ms-sttoverlay", Text
"ms-transit-to", Text
"ms-virtualtouchpad"
  , Text
"ms-visio", Text
"ms-walk-to", Text
"ms-whiteboard", Text
"ms-whiteboard-cmd", Text
"ms-word"
  , Text
"msnim", Text
"msrp", Text
"msrps", Text
"mtqp", Text
"mumble", Text
"mupdate", Text
"mvn", Text
"news", Text
"nfs"
  , Text
"ni", Text
"nih", Text
"nntp", Text
"notes", Text
"ocf", Text
"oid", Text
"onenote", Text
"onenote-cmd"
  , Text
"opaquelocktoken", Text
"pack", Text
"palm", Text
"paparazzi", Text
"pkcs11", Text
"platform", Text
"pop"
  , Text
"pres", Text
"prospero", Text
"proxy", Text
"pwid", Text
"psyc", Text
"qb", Text
"query", Text
"redis"
  , Text
"rediss", Text
"reload", Text
"res", Text
"resource", Text
"rmi", Text
"rsync", Text
"rtmfp", Text
"rtmp"
  , Text
"rtsp", Text
"rtsps", Text
"rtspu", Text
"secondlife", Text
"service", Text
"session", Text
"sftp", Text
"sgn"
  , Text
"shttp", Text
"sieve", Text
"sip", Text
"sips", Text
"skype", Text
"smb", Text
"sms", Text
"smtp", Text
"snews"
  , Text
"snmp", Text
"soap.beep", Text
"soap.beeps", Text
"soldat", Text
"spotify", Text
"ssh", Text
"steam"
  , Text
"stun", Text
"stuns", Text
"submit", Text
"svn", Text
"tag", Text
"teamspeak", Text
"tel", Text
"teliaeid"
  , Text
"telnet", Text
"tftp", Text
"things", Text
"thismessage", Text
"tip", Text
"tn3270", Text
"tool", Text
"turn"
  , Text
"turns", Text
"tv", Text
"udp", Text
"unreal", Text
"urn", Text
"ut2004", Text
"v-event", Text
"vemmi"
  , Text
"ventrilo", Text
"videotex", Text
"vnc", Text
"view-source", Text
"wais", Text
"webcal", Text
"wpid"
  , Text
"ws", Text
"wss", Text
"wtai", Text
"wyciwyg", Text
"xcon", Text
"xcon-userid", Text
"xfire"
  , Text
"xmlrpc.beep", Text
"xmlrpc.beeps", Text
"xmpp", Text
"xri", Text
"ymsgr", Text
"z39.50", Text
"z39.50r"
  , Text
"z39.50s"
  -- Unofficial schemes
  , Text
"doi", Text
"isbn", Text
"javascript", Text
"pmid"
  ]

-- | Check if the string is a valid URL with a IANA or frequently used but
-- unofficial scheme (see @schemes@).
isURI :: T.Text -> Bool
isURI :: Text -> Bool
isURI =
  -- we URI-escape non-ASCII characters because otherwise parseURI will choke:
  Bool -> (URI -> Bool) -> Maybe URI -> Bool
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Bool
False URI -> Bool
hasKnownScheme (Maybe URI -> Bool) -> (Text -> Maybe URI) -> Text -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Maybe URI
parseURI (String -> Maybe URI) -> (Text -> String) -> Text -> Maybe URI
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> String -> String
escapeURIString Char -> Bool
isAscii (String -> String) -> (Text -> String) -> Text -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
T.unpack
  where
    hasKnownScheme :: URI -> Bool
hasKnownScheme = (Text -> Set Text -> Bool
forall a. Ord a => a -> Set a -> Bool
`Set.member` Set Text
schemes) (Text -> Bool) -> (URI -> Text) -> URI -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text
T.toLower (Text -> Text) -> (URI -> Text) -> URI -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
                     (Char -> Bool) -> Text -> Text
T.filter (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
':') (Text -> Text) -> (URI -> Text) -> URI -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Text
T.pack (String -> Text) -> (URI -> String) -> URI -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. URI -> String
uriScheme

-- | Converts the path part of a file: URI to a regular path.
-- On windows, @/c:/foo@ should be @c:/foo@.
-- On linux, @/foo@ should be @/foo@.
uriPathToPath :: T.Text -> FilePath
uriPathToPath :: Text -> String
uriPathToPath (Text -> String
T.unpack -> String
path) =
#ifdef _WINDOWS
  case path of
    '/':ps -> ps
    ps     -> ps
#else
  String
path
#endif