{- | Implementation of Double-X-Encoding encoder and decoder in Haskell

Main functions:

* `doubleXEncode`
* `doubleXEncodeGql`
* `doubleXDecode`
-}

{- ORMOLU_DISABLE -}

{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE MultiWayIf #-}
{-# LANGUAGE OverloadedRecordDot #-}


module DoubleXEncoding where

import Data.Function ((&))
import qualified Data.Text as T
import Data.Text (replace, Text)
import Data.Functor ((<&>))
import Distribution.Utils.Generic (isAsciiAlphaNum)
import Numeric (showHex, readHex)
import Data.Char (ord, isDigit, chr)
import Control.Monad (join)


-- | Encoder mapping for ASCII characters.
charEncode :: Char -> Char
charEncode :: Char -> Char
charEncode = \case
  Char
' ' -> Char
'0'
  Char
'!' -> Char
'1'
  Char
'"' -> Char
'2'
  Char
'#' -> Char
'3'
  Char
'$' -> Char
'4'
  Char
'%' -> Char
'5'
  Char
'&' -> Char
'6'
  Char
'\'' -> Char
'7'
  Char
'(' -> Char
'8'
  Char
')' -> Char
'9'
  Char
'*' -> Char
'A'
  Char
'+' -> Char
'B'
  Char
',' -> Char
'C'
  Char
'-' -> Char
'D'
  Char
'.' -> Char
'E'
  Char
'/' -> Char
'F'

  Char
':' -> Char
'G'
  Char
';' -> Char
'H'
  Char
'<' -> Char
'I'
  Char
'=' -> Char
'J'
  Char
'>' -> Char
'K'
  Char
'?' -> Char
'L'
  Char
'@' -> Char
'M'

  Char
'[' -> Char
'N'
  Char
'\\' -> Char
'O'
  Char
']' -> Char
'P'
  Char
'^' -> Char
'Q'
  Char
'_' -> Char
'R'
  Char
'`' -> Char
'S'

  Char
'{' -> Char
'T'
  Char
'|' -> Char
'U'
  Char
'}' -> Char
'V'
  Char
'~' -> Char
'W'

  -- TODO: Remove this parsing workaround. Should be "X": "X"
  Char
'X' -> Char
'x'

  -- '': 'Z',  -- Reserved for encoding digits

  Char
_ -> Char
'\0'


-- | Decoder mapping for ASCII characters.
charDecode :: Char -> Char
charDecode :: Char -> Char
charDecode = \case
  Char
'0' -> Char
' '
  Char
'1' -> Char
'!'
  Char
'2' -> Char
'"'
  Char
'3' -> Char
'#'
  Char
'4' -> Char
'$'
  Char
'5' -> Char
'%'
  Char
'6' -> Char
'&'
  Char
'7' -> Char
'\''
  Char
'8' -> Char
'('
  Char
'9' -> Char
')'
  Char
'A' -> Char
'*'
  Char
'B' -> Char
'+'
  Char
'C' -> Char
','
  Char
'D' -> Char
'-'
  Char
'E' -> Char
'.'
  Char
'F' -> Char
'/'

  Char
'G' -> Char
':'
  Char
'H' -> Char
';'
  Char
'I' -> Char
'<'
  Char
'J' -> Char
'='
  Char
'K' -> Char
'>'
  Char
'L' -> Char
'?'
  Char
'M' -> Char
'@'

  Char
'N' -> Char
'['
  Char
'O' -> Char
'\\'
  Char
'P' -> Char
']'
  Char
'Q' -> Char
'^'
  Char
'R' -> Char
'_'
  Char
'S' -> Char
'`'

  Char
'T' -> Char
'{'
  Char
'U' -> Char
'|'
  Char
'V' -> Char
'}'
  Char
'W' -> Char
'~'

  -- TODO: Remove this parsing workaround. Should be "X": "X"
  Char
'x' -> Char
'X'

  Char
_ -> Char
'\0'


-- | Map hex characters to an alternative hex alphabet
-- ranging from a to p instead of 0 to f.
hexShiftEncode :: Char -> Char
hexShiftEncode :: Char -> Char
hexShiftEncode = \case
  Char
'0' -> Char
'a'
  Char
'1' -> Char
'b'
  Char
'2' -> Char
'c'
  Char
'3' -> Char
'd'
  Char
'4' -> Char
'e'
  Char
'5' -> Char
'f'
  Char
'6' -> Char
'g'
  Char
'7' -> Char
'h'
  Char
'8' -> Char
'i'
  Char
'9' -> Char
'j'
  Char
'a' -> Char
'k'
  Char
'b' -> Char
'l'
  Char
'c' -> Char
'm'
  Char
'd' -> Char
'n'
  Char
'e' -> Char
'o'
  Char
'f' -> Char
'p'

  Char
_ -> Char
'\0'


-- | Map alternative hex alphabet back to the original hex alphabet.
hexShiftDecode :: Char -> Char
hexShiftDecode :: Char -> Char
hexShiftDecode = \case
  Char
'a' -> Char
'0'
  Char
'b' -> Char
'1'
  Char
'c' -> Char
'2'
  Char
'd' -> Char
'3'
  Char
'e' -> Char
'4'
  Char
'f' -> Char
'5'
  Char
'g' -> Char
'6'
  Char
'h' -> Char
'7'
  Char
'i' -> Char
'8'
  Char
'j' -> Char
'9'
  Char
'k' -> Char
'a'
  Char
'l' -> Char
'b'
  Char
'm' -> Char
'c'
  Char
'n' -> Char
'd'
  Char
'o' -> Char
'e'
  Char
'p' -> Char
'f'

  Char
_ -> Char
'\0'


{-|
  Options for encoding:

  [`encodeLeadingDigit`]: Encode the leading digit of the input string

  [`encodeDoubleUnderscore`]: Encode double underscores `__` as `XXRXXR`

  Especially relevant for GraphQL, as
  [the spec](https://spec.graphql.org/October2021/#sec-Names)
  does not allow leading digits or double underscores for field names.
-}
data EncodeOptions = EncodeOptions
  { EncodeOptions -> Bool
encodeLeadingDigit :: Bool
  , EncodeOptions -> Bool
encodeDoubleUnderscore :: Bool
  }


-- | Encode a text using the Double-X-Encoding algorithm with provided options.
doubleXEncodeWithOptions :: EncodeOptions -> Text -> Text
doubleXEncodeWithOptions :: EncodeOptions -> Text -> Text
doubleXEncodeWithOptions EncodeOptions
encodeOptions Text
text = do
  let
    encodeStandard :: Text -> Text
encodeStandard Text
text = Text
text
      Text -> (Text -> Text) -> Text
forall a b. a -> (a -> b) -> b
& HasCallStack => Text -> Text -> Text -> Text
Text -> Text -> Text -> Text
replace Text
"XX" Text
"XXXXXX"
      Text -> (Text -> Text) -> Text
forall a b. a -> (a -> b) -> b
& (\Text
txt ->
          if EncodeOptions
encodeOptions.encodeDoubleUnderscore
          then Text
txt Text -> (Text -> Text) -> Text
forall a b. a -> (a -> b) -> b
& HasCallStack => Text -> Text -> Text -> Text
Text -> Text -> Text -> Text
replace Text
"__" Text
"XXRXXR"
          else Text
txt
        )
      Text -> (Text -> Text) -> Text
forall a b. a -> (a -> b) -> b
& (Char -> Text) -> Text -> Text
T.concatMap (\Char
char ->
          if Char -> Bool
isAsciiAlphaNum Char
char Bool -> Bool -> Bool
|| Char
char Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'_'
          then Char -> Text
T.singleton Char
char
          else
            if Char -> Char
charEncode Char
char Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'\0'
            then String -> Text
T.pack [Char
'X', Char
'X', Char -> Char
charEncode Char
char]
            else do
              let
                charHex :: Text
charHex = String -> Text
T.pack (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$ Int -> ShowS
forall a. Integral a => a -> ShowS
showHex (Char -> Int
ord Char
char) String
""
                charHexEncoded :: Text
charHexEncoded = Text
charHex Text -> (Text -> Text) -> Text
forall a b. a -> (a -> b) -> b
& (Char -> Char) -> Text -> Text
T.map Char -> Char
hexShiftEncode
                padStart :: Int -> Text -> Text
padStart Int
n Text
txt =
                  (Text
"a" Text -> (Text -> Text) -> Text
forall a b. a -> (a -> b) -> b
& Int -> Text -> Text
T.replicate (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
- Text -> Int
T.length Text
txt)) Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
txt
                charHexLength :: Int
charHexLength = Text -> Int
T.length Text
charHex

              if
                | Int
charHexLength Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
5 ->
                    Text
"XX" Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Int -> Text -> Text
padStart Int
5 Text
charHexEncoded
                | Int
charHexLength Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
6 ->
                    Text
"XXY" Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Int -> Text -> Text
padStart Int
6 Text
charHexEncoded
                | Bool
otherwise ->
                    String -> Text
forall a. HasCallStack => String -> a
error String
"ERROR: Hex encoding is too long"
        )

    encodeDigit :: Char -> Text
encodeDigit Char
digit =
      String -> Text
T.pack [Char
'X', Char
'X', Char
'Z', Char
digit]

  if EncodeOptions
encodeOptions.encodeLeadingDigit
  then
    case Text -> Maybe (Char, Text)
T.uncons Text
text of
      Maybe (Char, Text)
Nothing -> Text
""

      Just (Char
leadingChar, Text
rest) ->
        if Bool -> Bool
not (Bool -> Bool) -> Bool -> Bool
forall a b. (a -> b) -> a -> b
$ Char -> Bool
isDigit Char
leadingChar
        then Text -> Text
encodeStandard Text
text
        else
          if Text -> Bool
T.null Text
rest
          then Char -> Text
encodeDigit Char
leadingChar
          else
            Char -> Text
encodeDigit Char
leadingChar
            Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> EncodeOptions -> Text -> Text
doubleXEncodeWithOptions
                  EncodeOptions
encodeOptions { encodeLeadingDigit = False }
                  Text
rest
  else
    Text -> Text
encodeStandard Text
text


-- | Default options with no leading digit encoding
-- and no double underscore encoding.
defaultOptions :: EncodeOptions
defaultOptions :: EncodeOptions
defaultOptions = EncodeOptions
    { encodeLeadingDigit :: Bool
encodeLeadingDigit = Bool
False
    , encodeDoubleUnderscore :: Bool
encodeDoubleUnderscore = Bool
False
    }


-- | Encode a text using the Double-X-Encoding algorithm.
--
-- >>> doubleXEncode "id-with.special$chars!"
-- "idXXDwithXXEspecialXX4charsXX1"
doubleXEncode :: Text -> Text
doubleXEncode :: Text -> Text
doubleXEncode =
    EncodeOptions -> Text -> Text
doubleXEncodeWithOptions EncodeOptions
defaultOptions


-- | Default options for GraphQL encoding.
-- Leading digits or double underscores are not allowed for field names.
gqlOptions :: EncodeOptions
gqlOptions :: EncodeOptions
gqlOptions = EncodeOptions
    { encodeLeadingDigit :: Bool
encodeLeadingDigit = Bool
True
    , encodeDoubleUnderscore :: Bool
encodeDoubleUnderscore = Bool
True
    }


-- | Encode a text using the Double-X-Encoding algorithm with GraphQL options.
--
-- >>> doubleXEncodeGql "1FileFormat__"
-- "XXZ1FileFormatXXRXXR"
doubleXEncodeGql :: Text -> Text
doubleXEncodeGql :: Text -> Text
doubleXEncodeGql =
    EncodeOptions -> Text -> Text
doubleXEncodeWithOptions EncodeOptions
gqlOptions


-- | Decode a Double-X-Encoding encoded text.
--
-- >>> doubleXDecode "idXXDwithXXEspecialXX4charsXX1"
-- "id-with.special$chars!"
doubleXDecode :: Text -> Text
doubleXDecode :: Text -> Text
doubleXDecode Text
text = do
  let
    parseHex :: Text -> Int
    parseHex :: Text -> Int
parseHex Text
text =
      case ReadS Int
forall a. (Eq a, Num a) => ReadS a
readHex (Text -> String
T.unpack Text
text) of
        [(Int
int, String
"")] -> Int
int
        [(Int, String)]
_ -> Int
0

    decodeWord :: Text -> (Text, Text)
    decodeWord :: Text -> (Text, Text)
decodeWord Text
word = do
      let
        noXX :: Text
noXX = Int -> Text -> Text
T.drop Int
2 Text
word
        first :: Text
first = Int -> Text -> Text
T.take Int
1 Text
noXX

      if  | Text
first Text -> Text -> Bool
forall a. Ord a => a -> a -> Bool
>= Text
"a" Bool -> Bool -> Bool
&& Text
first Text -> Text -> Bool
forall a. Ord a => a -> a -> Bool
<= Text
"p" ->
              (Text
noXX
                    Text -> (Text -> Text) -> Text
forall a b. a -> (a -> b) -> b
& Int -> Text -> Text
T.take Int
5
                    Text -> (Text -> Text) -> Text
forall a b. a -> (a -> b) -> b
& (Char -> Char) -> Text -> Text
T.map Char -> Char
hexShiftDecode
                    Text -> (Text -> Int) -> Int
forall a b. a -> (a -> b) -> b
& Text -> Int
parseHex
                    Int -> (Int -> Char) -> Char
forall a b. a -> (a -> b) -> b
& Int -> Char
chr
                    Char -> (Char -> Text) -> Text
forall a b. a -> (a -> b) -> b
& Char -> Text
T.singleton

              , Int -> Text -> Text
T.drop Int
5 Text
noXX
              )

          | Text
first Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"X" ->
              if Text
"XXXX" Text -> Text -> Bool
`T.isPrefixOf` Text
noXX
              then (Text
"XX", Int -> Text -> Text
T.drop Int
4 Text
noXX)
              else (Text
"X", Int -> Text -> Text
T.drop Int
1 Text
word)

          | Text
first Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"Y" ->
              (  Text
noXX
                    Text -> (Text -> Text) -> Text
forall a b. a -> (a -> b) -> b
& Int -> Text -> Text
T.drop Int
1  -- Remove the "Y"
                    Text -> (Text -> Text) -> Text
forall a b. a -> (a -> b) -> b
& Int -> Text -> Text
T.take Int
6
                    Text -> (Text -> Text) -> Text
forall a b. a -> (a -> b) -> b
& (Char -> Char) -> Text -> Text
T.map Char -> Char
hexShiftDecode
                    Text -> (Text -> Int) -> Int
forall a b. a -> (a -> b) -> b
& Text -> Int
parseHex
                    Int -> (Int -> Char) -> Char
forall a b. a -> (a -> b) -> b
& Int -> Char
chr
                    Char -> (Char -> Text) -> Text
forall a b. a -> (a -> b) -> b
& Char -> Text
T.singleton
              , Int -> Text -> Text
T.drop Int
7 Text
noXX
              )

          | Text
first Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"Z" ->
              (Text
noXX
                  Text -> (Text -> Text) -> Text
forall a b. a -> (a -> b) -> b
& Int -> Text -> Text
T.drop Int
1
                  Text -> (Text -> Text) -> Text
forall a b. a -> (a -> b) -> b
& Int -> Text -> Text
T.take Int
1
              , Int -> Text -> Text
T.drop Int
2 Text
noXX
              )

          | Bool
otherwise ->
            (Text
noXX
              Text -> (Text -> Text) -> Text
forall a b. a -> (a -> b) -> b
& Int -> Text -> Text
T.take Int
1
              Text -> (Text -> Text) -> Text
forall a b. a -> (a -> b) -> b
& (Char -> Char) -> Text -> Text
T.map Char -> Char
charDecode
            , Int -> Text -> Text
T.drop Int
1 Text
noXX
            )

  Text
text
    Text -> (Text -> (Text, Text)) -> (Text, Text)
forall a b. a -> (a -> b) -> b
& HasCallStack => Text -> Text -> (Text, Text)
Text -> Text -> (Text, Text)
T.breakOn Text
"XX"
    (Text, Text) -> ((Text, Text) -> Text) -> Text
forall a b. a -> (a -> b) -> b
& \case
        (Text
"", Text
end) -> case Text -> (Text, Text)
decodeWord Text
end of
          (Text
decoded, Text
"") -> Text
decoded
          (Text
decoded, Text
rest) -> Text
decoded Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text -> Text
doubleXDecode Text
rest
        (Text
start, Text
"") -> Text
start
        (Text
start, Text
end) -> Text
start Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text -> Text
doubleXDecode Text
end