{-# LANGUAGE UnboxedTuples, BangPatterns #-}
-- | JSON Canonicalization Scheme https://datatracker.ietf.org/doc/html/rfc8785
module Data.Aeson.RFC8785 (
    encodeCanonical,
) where

import Data.List (sortBy)
import Data.Ord (comparing)
import GHC.Integer (quotRemInteger)
import Math.NumberTheory.Logarithms (integerLog10)

import Data.Aeson
import Data.Aeson.Encoding
import Data.Aeson.Encoding.Internal
import Data.Aeson.Internal.Prelude
import Data.Aeson.Internal.Word8

import qualified Data.Aeson.Key as Key
import qualified Data.Aeson.KeyMap as KM
import qualified Data.ByteString as BS
import qualified Data.ByteString.Builder as B
import qualified Data.ByteString.Builder.Prim as BP
import qualified Data.ByteString.Lazy as LBS
import qualified Data.Scientific as Sci
import qualified Data.Text.Encoding as TE
import qualified Data.Vector as V

-- $setup
-- >>> import Data.Aeson

-- | Encode to JSON according to RFC 8785 canonicalization scheme.
-- https://datatracker.ietf.org/doc/html/rfc8785
--
-- 'encodeCanonical' uses 'toJSON' to produce intermediate 'Value',
-- as 'toEncoding' may (and most likely) produces non-canonical JSON.
--
-- Note: @decode (encodeCanonical v) === Just v@ for all @v :: Value@,
-- i.e. 'encodeCanonical' doesn't lose any information.
--
-- However, the example in RFC8785 /loses/ information as the intermediate
-- number representation is 'Double', also current @toJSON :: Double -> Value@
-- sometimes produces too precise values. For example
--
-- >>> toJSON (1e23 :: Double)
-- Number 9.999999999999999e22
--
-- 'show' also behaves the same:
--
-- >>> 1e23 :: Double
-- 9.999999999999999e22
--
-- Note: RFC8785 is __not the same scheme__ as used in
-- [canonical-json](https://hackage.haskell.org/package/canonical-json) package
-- (https://wiki.laptop.org/go/Canonical_JSON).
-- That scheme produces /invalid/ JSON (e.g. control characters encoded as is, not escaped)
-- and cannot encode non-integral numbers.
--
-- @since 2.2.1.0
--
encodeCanonical :: ToJSON a => a -> LBS.ByteString
encodeCanonical :: forall a. ToJSON a => a -> ByteString
encodeCanonical = forall a. Encoding' a -> ByteString
encodingToLazyByteString forall b c a. (b -> c) -> (a -> b) -> a -> c
. Value -> Encoding' Value
toCanonical forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. ToJSON a => a -> Value
toJSON

toCanonical :: Value -> Encoding
toCanonical :: Value -> Encoding' Value
toCanonical Value
Null       = Encoding' Value
null_
toCanonical (Bool Bool
b)   = Bool -> Encoding' Value
bool Bool
b
toCanonical (Number Scientific
n) = Scientific -> Encoding' Value
canonicalNumber Scientific
n
toCanonical (String Text
s) = forall a. Text -> Encoding' a
canonicalString Text
s
toCanonical (Array Array
v)  = forall a. (a -> Encoding' Value) -> [a] -> Encoding' Value
list Value -> Encoding' Value
toCanonical (forall a. Vector a -> [a]
V.toList Array
v)
toCanonical (Object Object
m) = forall k v m.
(k -> Encoding' Key)
-> (v -> Encoding' Value)
-> (forall a. (k -> v -> a -> a) -> a -> m -> a)
-> m
-> Encoding' Value
dict (forall a. Text -> Encoding' a
canonicalString forall b c a. (b -> c) -> (a -> b) -> a -> c
. Key -> Text
Key.toText) Value -> Encoding' Value
toCanonical forall k v a. (k -> v -> a -> a) -> a -> [(k, v)] -> a
ifr forall a b. (a -> b) -> a -> b
$
    forall a. (a -> a -> Ordering) -> [a] -> [a]
sortBy (\(Key
k1, Value
_) (Key
k2, Value
_) -> Key -> Key -> Ordering
propertyCmp Key
k1 Key
k2) (forall v. KeyMap v -> [(Key, v)]
KM.toList Object
m)

ifr :: (k -> v -> a -> a) -> a -> [(k, v)] -> a
ifr :: forall k v a. (k -> v -> a -> a) -> a -> [(k, v)] -> a
ifr k -> v -> a -> a
f a
z = forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (\(k
k, v
v) -> k -> v -> a -> a
f k
k v
v) a
z
{-# INLINE ifr #-}

-- Property name strings to be sorted are formatted as arrays of UTF-16 code units.
propertyCmp :: Key -> Key -> Ordering
propertyCmp :: Key -> Key -> Ordering
propertyCmp = forall a b. Ord a => (b -> a) -> b -> b -> Ordering
comparing Key -> ByteString
f where
    -- this is slow implementation, but it's obviously not wrong.
    f :: Key -> BS.ByteString
    f :: Key -> ByteString
f = Text -> ByteString
TE.encodeUtf16BE forall b c a. (b -> c) -> (a -> b) -> a -> c
. Key -> Text
Key.toText

-- strings are already serialized canonically.
canonicalString :: Text -> Encoding' a
canonicalString :: forall a. Text -> Encoding' a
canonicalString = forall a. Text -> Encoding' a
text

-- RFC 8785 is outsourcing number format to ECMA-262.
-- 10th edition, 7.1.12.1 NumberToString
-- https://262.ecma-international.org/10.0/#sec-tostring-applied-to-the-number-type
--
-- Note: this specification is not lossy
-- Given 'Scientific' we can choose n,k,s uniquely: 'nks'.
--
-- RFC8785 Appendix D says "don't use bignums".
canonicalNumber :: Scientific -> Encoding
canonicalNumber :: Scientific -> Encoding' Value
canonicalNumber Scientific
m = case forall a. Ord a => a -> a -> Ordering
compare Scientific
m Scientific
0 of
    Ordering
EQ -> forall tag. Builder -> Encoding' tag
Encoding (Word8 -> Builder
B.word8 Word8
W8_0)
    Ordering
LT -> forall tag. Builder -> Encoding' tag
Encoding (Word8 -> Builder
B.word8 Word8
W8_MINUS forall a. Semigroup a => a -> a -> a
<> forall tag. Encoding' tag -> Builder
fromEncoding (Scientific -> Encoding' Value
canonicalNumber' (forall a. Num a => a -> a
negate Scientific
m)))
    Ordering
GT -> Scientific -> Encoding' Value
canonicalNumber' Scientific
m

-- input: Positive number
canonicalNumber' :: Scientific -> Encoding
canonicalNumber' :: Scientific -> Encoding' Value
canonicalNumber' Scientific
m
    | Int
k forall a. Ord a => a -> a -> Bool
<= Int
n, Int
n forall a. Ord a => a -> a -> Bool
<= Int
21
    = forall tag. Builder -> Encoding' tag
Encoding forall a b. (a -> b) -> a -> b
$
        forall a. FixedPrim a -> [a] -> Builder
BP.primMapListFixed FixedPrim Word8
BP.word8 [Word8]
ds forall a. Semigroup a => a -> a -> a
<>
        forall a. FixedPrim a -> [a] -> Builder
BP.primMapListFixed FixedPrim Word8
BP.word8 (forall a. Int -> a -> [a]
replicate (Int
n forall a. Num a => a -> a -> a
- Int
k) Word8
W8_0)

    | Int
0 forall a. Ord a => a -> a -> Bool
< Int
n, Int
n forall a. Ord a => a -> a -> Bool
<= Int
21
    , let ([Word8]
pfx, [Word8]
sfx) = forall a. Int -> [a] -> ([a], [a])
splitAt Int
n [Word8]
ds
    = forall tag. Builder -> Encoding' tag
Encoding forall a b. (a -> b) -> a -> b
$
        forall a. FixedPrim a -> [a] -> Builder
BP.primMapListFixed FixedPrim Word8
BP.word8 [Word8]
pfx forall a. Semigroup a => a -> a -> a
<>
        Word8 -> Builder
B.word8 Word8
W8_DOT forall a. Semigroup a => a -> a -> a
<>
        forall a. FixedPrim a -> [a] -> Builder
BP.primMapListFixed FixedPrim Word8
BP.word8 [Word8]
sfx

    | -Int
6 forall a. Ord a => a -> a -> Bool
< Int
n, Int
n forall a. Ord a => a -> a -> Bool
<= Int
0
    = forall tag. Builder -> Encoding' tag
Encoding forall a b. (a -> b) -> a -> b
$
        Word8 -> Builder
B.word8 Word8
W8_0 forall a. Semigroup a => a -> a -> a
<>
        Word8 -> Builder
B.word8 Word8
W8_DOT forall a. Semigroup a => a -> a -> a
<>
        forall a. FixedPrim a -> [a] -> Builder
BP.primMapListFixed FixedPrim Word8
BP.word8 (forall a. Int -> a -> [a]
replicate (forall a. Num a => a -> a
negate Int
n) Word8
W8_0) forall a. Semigroup a => a -> a -> a
<>
        forall a. FixedPrim a -> [a] -> Builder
BP.primMapListFixed FixedPrim Word8
BP.word8 [Word8]
ds

    | Int
k forall a. Eq a => a -> a -> Bool
== Int
1, [Word8
d] <- [Word8]
ds
    = forall tag. Builder -> Encoding' tag
Encoding forall a b. (a -> b) -> a -> b
$
        Word8 -> Builder
B.word8 Word8
d forall a. Semigroup a => a -> a -> a
<>
        Word8 -> Builder
B.word8 Word8
W8_e forall a. Semigroup a => a -> a -> a
<>
        Word8 -> Builder
B.word8 (if (Int
n forall a. Num a => a -> a -> a
- Int
1) forall a. Ord a => a -> a -> Bool
>= Int
0 then Word8
W8_PLUS else Word8
W8_MINUS) forall a. Semigroup a => a -> a -> a
<>
        forall a. FixedPrim a -> [a] -> Builder
BP.primMapListFixed FixedPrim Word8
BP.word8 (Integer -> [Word8]
integerToDecimalDigits (forall a. Num a => a -> a
abs (forall a. Integral a => a -> Integer
toInteger Int
n forall a. Num a => a -> a -> a
- Integer
1)))

    | (Word8
d:[Word8]
ds') <- [Word8]
ds
    = forall tag. Builder -> Encoding' tag
Encoding forall a b. (a -> b) -> a -> b
$
        Word8 -> Builder
B.word8 Word8
d forall a. Semigroup a => a -> a -> a
<>
        Word8 -> Builder
B.word8 Word8
W8_DOT forall a. Semigroup a => a -> a -> a
<>
        forall a. FixedPrim a -> [a] -> Builder
BP.primMapListFixed FixedPrim Word8
BP.word8 [Word8]
ds' forall a. Semigroup a => a -> a -> a
<>
        Word8 -> Builder
B.word8 Word8
W8_e forall a. Semigroup a => a -> a -> a
<>
        Word8 -> Builder
B.word8 (if (Int
n forall a. Num a => a -> a -> a
- Int
1) forall a. Ord a => a -> a -> Bool
>= Int
0 then Word8
W8_PLUS else Word8
W8_MINUS) forall a. Semigroup a => a -> a -> a
<>
        forall a. FixedPrim a -> [a] -> Builder
BP.primMapListFixed FixedPrim Word8
BP.word8 (Integer -> [Word8]
integerToDecimalDigits (forall a. Num a => a -> a
abs (forall a. Integral a => a -> Integer
toInteger Int
n forall a. Num a => a -> a -> a
- Integer
1)))

    | Bool
otherwise
    = forall a. String -> Encoding' a
string String
"0" -- shouldn't happen, but we need a default case.

  where
    -- 5. Otherwise, let n, k, and s be integers such that
    -- k ≥ 1, 10k - 1 ≤ s < 10k, the Number value for s × 10n - k is m,
    -- and k is as small as possible.
    -- Note that k is the number of digits in the decimal representation of s,
    -- that s is not divisible by 10, and that the least significant digit of s
    -- is not necessarily uniquely determined by these criteria.
    (Int
n, Int
k, Integer
s) = Scientific -> (Int, Int, Integer)
nks Scientific
m
    ds :: [Word8]
ds = Integer -> [Word8]
integerToDecimalDigits Integer
s

-- 5. Otherwise, let n, k, and s be integers such that k ≥ 1, 10^(k - 1) ≤ s < 10^k,
-- the Number value for s × 10^(n - k) is m, and k is as small as possible.
-- Note that k is the number of digits in the decimal representation of s,
-- that s is not divisible by 10, and that the least significant digit of s
-- is not necessarily uniquely determined by these criteria.
nks :: Scientific -> (Int, Int, Integer)
nks :: Scientific -> (Int, Int, Integer)
nks Scientific
m = (Int
e forall a. Num a => a -> a -> a
+ Int
k, Int
k, Integer
c)
  where
    m' :: Scientific
m' = Scientific -> Scientific
Sci.normalize Scientific
m
    c :: Integer
c = Scientific -> Integer
Sci.coefficient Scientific
m'
    e :: Int
e = Scientific -> Int
Sci.base10Exponent Scientific
m'
    k :: Int
k = Integer -> Int
integerLog10 Integer
c forall a. Num a => a -> a -> a
+ Int
1

integerToDecimalDigits :: Integer -> [Word8]
integerToDecimalDigits :: Integer -> [Word8]
integerToDecimalDigits = [Word8] -> Integer -> [Word8]
go [] where
    go :: [Word8] -> Integer -> [Word8]
go [Word8]
acc Integer
0 = [Word8]
acc
    go [Word8]
acc Integer
i = case Integer -> Integer -> (# Integer, Integer #)
quotRemInteger Integer
i Integer
10 of
        (# Integer
q, Integer
r #) -> [Word8] -> Integer -> [Word8]
go (Word8
dforall a. a -> [a] -> [a]
:[Word8]
acc) Integer
q where !d :: Word8
d = forall a b. (Integral a, Num b) => a -> b
fromIntegral Integer
r forall a. Num a => a -> a -> a
+ Word8
W8_0