{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE RecordWildCards #-}
{-# OPTIONS_GHC -Wno-orphans -Wno-duplicate-exports #-}

-- | 'Validity' instances for 'URI' and 'URIAuth'
--
-- The main API of this module is in the orphan instances @Validity URI@ and @Validity URIAuth@.
--
-- [RFC 3986, section 3](https://datatracker.ietf.org/doc/html/rfc3986#section-3)
module Data.Validity.URI
  ( -- ** Helper functions
    dangerousURIToString,
    dangerousDetailedURIToString,

    -- * Export everything for testing, **You probably do not want to use any of the functions below**.
    module Data.Validity.URI,
  )
where

import Data.Char as Char
import Data.List (intercalate)
import Data.Maybe (listToMaybe)
import Data.Validity
import Network.URI

instance Validity URIAuth where
  validate ua@URIAuth {..} =
    mconcat
      [ genericValidate ua,
        validateUserInfo uriUserInfo,
        validateHost uriRegName,
        validatePort uriPort
      ]

instance Validity URI where
  validate u@URI {..} =
    mconcat
      [ genericValidate u,
        let rendered = dangerousURIToString u
            parsed = parseURIReference rendered
            explanation =
              unlines
                [ "Roundtrips through a parse",
                  "rendered:",
                  rendered,
                  "parsed:",
                  show $ dangerousURIToString <$> parsed
                ]
         in declare explanation $
              case parsed of
                Nothing -> False
                Just u' -> u' == u,
        validateScheme uriScheme,
        validatePath uriPath,
        validateQuery uriQuery,
        validateFragment uriFragment
      ]

-- | Validate the 'uriScheme' part of an URI
--
-- NOTE: Watch out with using this validation separately, it may not reject enough.
--
-- [RFC 3986, section 3.1](https://datatracker.ietf.org/doc/html/rfc3986#section-3.1)
-- @
-- scheme      = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
-- @
validateScheme :: String -> Validation
validateScheme uriScheme =
  mconcat
    [ declare (unwords ["The scheme", show uriScheme, "is empty or ends in ':'"]) $
        -- Laziness prevents the partial 'last' from blowing up.
        null uriScheme || last uriScheme == ':',
      case uriScheme of
        [] -> valid
        [c] -> declare "The first character is ALPHA" (charIsALPHA c)
        (c : rest) ->
          mconcat
            [ declare "The first character is ALPHA" (charIsALPHA c),
              decorateString
                -- 'init' is safe because of case match above
                (init rest)
                validateSchemeChar
            ]
    ]

-- [RFC 3986, section 3.1](https://datatracker.ietf.org/doc/html/rfc3986#section-3.1)
-- @
-- scheme      = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
-- @
validateSchemeChar :: Char -> Validation
validateSchemeChar c =
  declare "The character is alphanumeric, '+', '-' or '.'." $
    case c of
      '+' -> True
      '-' -> True
      '.' -> True
      _ -> charIsALPHA c || charIsDIGIT c

-- | Validate the 'uriUserInfo' part of an URI
--
-- NOTE: Watch out with using this validation separately, it may not reject enough.
--
-- [RFC 3986, section 3.2.1](https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.1)
--
-- @
-- userinfo    = *( unreserved / pct-encoded / sub-delims / ":" )
-- @
validateUserInfo :: String -> Validation
validateUserInfo uriUserInfo =
  mconcat
    [ declare "The user info is empty or ends in @" $
        -- Laziness prevents the partial 'last' from blowing up.
        null uriUserInfo || last uriUserInfo == '@',
      case uriUserInfo of
        [] -> valid
        _ ->
          decorateString
            -- init is safe because of the case above
            (init uriUserInfo)
            validateUserInfoChar
    ]

-- [RFC 3986, section 3.2.1](https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.1)
--
-- @
-- userinfo    = *( unreserved / pct-encoded / sub-delims / ":" )
-- @
validateUserInfoChar :: Char -> Validation
validateUserInfoChar c =
  declare "The character is unreserved, part of a percent-encoding, a sub-delimiter, or ':'" $
    case c of
      ':' -> True
      _ ->
        charIsUnreserved c
          ||
          -- NOTE:
          -- Technically this is not good enough, because incorrectly-percent-encoded values should be disallowed.
          -- However, this is good enough because we do the extra parsing elsewhere
          charIsPossiblyPartOfPercentEncoding c
          || charIsSubDelim c

-- | Validate the 'uriRegName' part of an URI
--
-- NOTE: Watch out with using this validation separately, it may not reject enough.
--
-- [RFC 3986, section 3.2.2](https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.2)
-- @
-- host        = IP-literal / IPv4address / reg-name
-- @
--
-- @
-- IP-literal = "[" ( IPv6address / IPvFuture  ) "]"
--
-- IPvFuture  = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
-- @
--
-- @
-- reg-name    = *( unreserved / pct-encoded / sub-delims )
-- @
validateHost :: String -> Validation
validateHost s =
  declare "The host looks like an IP literal, an IPv4 Address, or a reg-name" $
    stringIsIPLiteral s
      || isIPv4address s
      || stringIsRegName s

stringIsIPLiteral :: String -> Bool
stringIsIPLiteral =
  -- NOTE this is technically not good enough but it is made up for in other parts of the validation.
  const True

stringIsRegName :: String -> Bool
stringIsRegName = all isRegNameChar

isRegNameChar :: Char -> Bool
isRegNameChar c =
  charIsUnreserved c
    ||
    -- NOTE:
    -- Technically this is not good enough, because incorrectly-percent-encoded values should be disallowed.
    -- However, this is good enough because we do the extra parsing elsewhere
    charIsPossiblyPartOfPercentEncoding c
    || charIsSubDelim c

-- | Validate the 'uriPort' part of an URI
--
-- NOTE: Watch out with using this validation separately, it may not reject enough.
--
-- [RFC 3986, section 3.2.3](https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.3)
-- @
-- port        = *DIGIT
-- @
--
-- NOTE: The spec does not specify a maximum length of ports so we do not validate this either.
validatePort :: String -> Validation
validatePort uriPort =
  mconcat
    [ case uriPort of
        [] -> valid
        [':'] -> invalid "The port must not just be a ':'"
        (':' : rest) -> decorateString rest validatePortChar
        _ -> invalid "The port is empty or starts with ':'"
    ]

-- [RFC 3986, section 3.2.3](https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.3)
-- @
-- port        = *DIGIT
-- @
validatePortChar :: Char -> Validation
validatePortChar c = declare "The character is a digit" $ charIsDIGIT c

-- [RFC 3986, section 2.2](https://datatracker.ietf.org/doc/html/rfc3986#section-2.2)
-- @
-- unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
-- @
charIsUnreserved :: Char -> Bool
charIsUnreserved = \case
  '+' -> True
  '-' -> True
  '.' -> True
  '~' -> True
  c -> charIsALPHA c || charIsDIGIT c

-- [RFC 3986, section 2.2](https://datatracker.ietf.org/doc/html/rfc3986#section-2.2)
-- @
-- reserved      = gen-delims / sub-delims
-- @
charIsReserved :: Char -> Bool
charIsReserved c = charIsGenDelim c || charIsSubDelim c

-- [RFC 3986, section 2.2](https://datatracker.ietf.org/doc/html/rfc3986#section-2.2)
-- @
-- gen-delims  = ":" / "/" / "?" / "#" / "[" / "]" / "@"
-- @
charIsGenDelim :: Char -> Bool
charIsGenDelim = \case
  ':' -> True
  '/' -> True
  '?' -> True
  '#' -> True
  '[' -> True
  ']' -> True
  '@' -> True
  _ -> False

-- [RFC 3986, section 2.2](https://datatracker.ietf.org/doc/html/rfc3986#section-2.2)
-- @
-- sub-delims  = "!" / "$" / "&" / "'" / "(" / ")"
--             / "*" / "+" / "," / ";" / "="
-- @
charIsSubDelim :: Char -> Bool
charIsSubDelim = \case
  '!' -> True
  '$' -> True
  '&' -> True
  '\'' -> True
  '(' -> True
  ')' -> True
  '*' -> True
  '+' -> True
  ',' -> True
  ';' -> True
  '=' -> True
  _ -> False

-- [RFC 3986, section 2.1](https://datatracker.ietf.org/doc/html/rfc3986#section-2.1)
-- @
-- pct-encoded = "%" HEXDIG HEXDIG
-- @
charIsPossiblyPartOfPercentEncoding :: Char -> Bool
charIsPossiblyPartOfPercentEncoding = \case
  '%' -> True
  c -> charIsHEXDIG c

-- [RFC 3986, section 2.3](https://datatracker.ietf.org/doc/html/rfc3986#section-2.3)
-- @
-- ALPHA (%41-%5A and %61-%7A)
-- @
charIsALPHA :: Char -> Bool
charIsALPHA c =
  let o = Char.ord c
   in (0x41 <= o && o <= 0x5A)
        || (0x61 <= o && o <= 0x7A)

-- [RFC 3986, section 2.3](https://datatracker.ietf.org/doc/html/rfc3986#section-2.3)
-- @
-- The uppercase hexadecimal digits 'A' through 'F' are equivalent to
-- the lowercase digits 'a' through 'f', respectively.  If two URIs
-- differ only in the case of hexadecimal digits used in percent-encoded
-- octets, they are equivalent.
-- @
charIsHEXDIG :: Char -> Bool
charIsHEXDIG c =
  charIsDIGIT c
    || ('A' <= c && c <= 'F')
    || ('a' <= c && c <= 'f')

--  [RFC 3986, section 2.3](https://datatracker.ietf.org/doc/html/rfc3986#section-2.3)
-- @
-- DIGIT (%30-%39)
-- @
charIsDIGIT :: Char -> Bool
charIsDIGIT c =
  let o = Char.ord c
   in (0x30 <= o && o <= 0x39)

-- | Validate the 'uriPath' part of an URI
--
-- NOTE: Watch out with using this validation separately, it may not reject enough.
validatePath :: String -> Validation
validatePath _uriPath =
  mconcat
    [ -- We could check the following, but it actually does not hold, see also:
      -- https://github.com/haskell/network-uri/issues/76
      --
      -- declare (unwords ["The path", show uriPath, "is empty or starts with '/'"]) $
      --   -- Laziness prevents the partial 'head' from blowing up.
      --   null uriPath || head uriPath == '/',

      valid
    ]

-- | Validate the 'uriQuery' part of an URI
--
-- NOTE: Watch out with using this validation separately, it may not reject enough.
validateQuery :: String -> Validation
validateQuery uriQuery =
  declare (unwords ["The query", show uriQuery, "is empty or starts with '?'"]) $
    case listToMaybe uriQuery of
      Nothing -> True
      Just q -> q == '?'

validateFragment :: String -> Validation
validateFragment uriFragment =
  declare (unwords ["The fragment", show uriFragment, "is empty or starts with '#'"]) $
    case listToMaybe uriFragment of
      Nothing -> True
      Just f -> f == '#'

-- | Render a URI to a 'String', for use in testing
--
-- This uses @uriToString id@ as the docs specify.
-- It potentially exposes passwords, so only use it if you know what you're
-- doing.
dangerousURIToString :: URI -> String
dangerousURIToString u = uriToString id u ""

-- | Render a URI to a 'String', for use in debugging.
--
-- This uses custom string juggling.
-- It's what happened when you don't derive show but instead write your own
-- 'show' function. :(
-- It potentially exposes passwords, so only use it if you know what you're
-- doing.
dangerousDetailedURIToString :: URI -> String
dangerousDetailedURIToString URI {..} =
  unlines
    [ "URI {",
      concat ["  uriScheme = ", show uriScheme, ","],
      case uriAuthority of
        Nothing -> "  uriAuthority = Nothing,"
        Just URIAuth {..} ->
          intercalate
            "\n"
            [ "  uriAuthority = Just URIAuth {",
              concat ["    uriUserInfo = ", show uriUserInfo],
              concat ["    uriRegName = ", show uriRegName],
              concat ["    uriPort = ", show uriPort],
              "  }"
            ],
      concat ["  uriPath = ", show uriPath, ","],
      concat ["  uriQuery = ", show uriQuery, ","],
      concat ["  uriFragment = ", show uriFragment],
      "}"
    ]