{-# LANGUAGE UnboxedTuples, BangPatterns #-}
-- | JSON Canonicalization Scheme https://datatracker.ietf.org/doc/html/rfc8785
module Data.Aeson.RFC8785 (
    encodeCanonical,
) where

import Data.List (sortBy)
import Data.Ord (comparing)
import GHC.Integer (quotRemInteger)
import Math.NumberTheory.Logarithms (integerLog10)

import Data.Aeson
import Data.Aeson.Encoding
import Data.Aeson.Encoding.Internal
import Data.Aeson.Internal.Prelude

import qualified Data.Aeson.Key as Key
import qualified Data.Aeson.KeyMap as KM
import qualified Data.ByteString as BS
import qualified Data.ByteString.Builder as B
import qualified Data.ByteString.Builder.Prim as BP
import qualified Data.ByteString.Lazy as LBS
import qualified Data.Scientific as Sci
import qualified Data.Text.Encoding as TE
import qualified Data.Vector as V
import qualified Data.Word8.Patterns as W8

-- $setup
-- >>> import Data.Aeson

-- | Encode to JSON according to RFC 8785 canonicalization scheme.
-- https://datatracker.ietf.org/doc/html/rfc8785
--
-- 'encodeCanonical' uses 'toJSON' to produce intermediate 'Value',
-- as 'toEncoding' may (and most likely) produces non-canonical JSON.
--
-- Note: @decode (encodeCanonical v) === Just v@ for all @v :: Value@,
-- i.e. 'encodeCanonical' doesn't lose any information.
--
-- However, the example in RFC8785 /loses/ information as the intermediate
-- number representation is 'Double', also current @toJSON :: Double -> Value@
-- sometimes produces too precise values. For example
--
-- >>> toJSON (1e23 :: Double)
-- Number 9.999999999999999e22
--
-- 'show' also behaves the same:
--
-- >>> 1e23 :: Double
-- 9.999999999999999e22
--
-- Note: RFC8785 is __not the same scheme__ as used in
-- [canonical-json](https://hackage.haskell.org/package/canonical-json) package
-- (https://wiki.laptop.org/go/Canonical_JSON).
-- That scheme produces /invalid/ JSON (e.g. control characters encoded as is, not escaped)
-- and cannot encode non-integral numbers.
--
-- @since 2.2.1.0
--
encodeCanonical :: ToJSON a => a -> LBS.ByteString
encodeCanonical :: forall a. ToJSON a => a -> ByteString
encodeCanonical = Encoding' Value -> ByteString
forall a. Encoding' a -> ByteString
encodingToLazyByteString (Encoding' Value -> ByteString)
-> (a -> Encoding' Value) -> a -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Value -> Encoding' Value
toCanonical (Value -> Encoding' Value) -> (a -> Value) -> a -> Encoding' Value
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> Value
forall a. ToJSON a => a -> Value
toJSON

toCanonical :: Value -> Encoding
toCanonical :: Value -> Encoding' Value
toCanonical Value
Null       = Encoding' Value
null_
toCanonical (Bool Bool
b)   = Bool -> Encoding' Value
bool Bool
b
toCanonical (Number Scientific
n) = Scientific -> Encoding' Value
canonicalNumber Scientific
n
toCanonical (String Text
s) = Text -> Encoding' Value
forall a. Text -> Encoding' a
canonicalString Text
s
toCanonical (Array Array
v)  = (Value -> Encoding' Value) -> [Value] -> Encoding' Value
forall a. (a -> Encoding' Value) -> [a] -> Encoding' Value
list Value -> Encoding' Value
toCanonical (Array -> [Value]
forall a. Vector a -> [a]
V.toList Array
v)
toCanonical (Object Object
m) = (Key -> Encoding' Key)
-> (Value -> Encoding' Value)
-> (forall a. (Key -> Value -> a -> a) -> a -> [(Key, Value)] -> a)
-> [(Key, Value)]
-> Encoding' Value
forall k v m.
(k -> Encoding' Key)
-> (v -> Encoding' Value)
-> (forall a. (k -> v -> a -> a) -> a -> m -> a)
-> m
-> Encoding' Value
dict (Text -> Encoding' Key
forall a. Text -> Encoding' a
canonicalString (Text -> Encoding' Key) -> (Key -> Text) -> Key -> Encoding' Key
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Key -> Text
Key.toText) Value -> Encoding' Value
toCanonical (Key -> Value -> a -> a) -> a -> [(Key, Value)] -> a
forall a. (Key -> Value -> a -> a) -> a -> [(Key, Value)] -> a
forall k v a. (k -> v -> a -> a) -> a -> [(k, v)] -> a
ifr ([(Key, Value)] -> Encoding' Value)
-> [(Key, Value)] -> Encoding' Value
forall a b. (a -> b) -> a -> b
$
    ((Key, Value) -> (Key, Value) -> Ordering)
-> [(Key, Value)] -> [(Key, Value)]
forall a. (a -> a -> Ordering) -> [a] -> [a]
sortBy (\(Key
k1, Value
_) (Key
k2, Value
_) -> Key -> Key -> Ordering
propertyCmp Key
k1 Key
k2) (Object -> [(Key, Value)]
forall v. KeyMap v -> [(Key, v)]
KM.toList Object
m)

ifr :: (k -> v -> a -> a) -> a -> [(k, v)] -> a
ifr :: forall k v a. (k -> v -> a -> a) -> a -> [(k, v)] -> a
ifr k -> v -> a -> a
f a
z = ((k, v) -> a -> a) -> a -> [(k, v)] -> a
forall a b. (a -> b -> b) -> b -> [a] -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (\(k
k, v
v) -> k -> v -> a -> a
f k
k v
v) a
z
{-# INLINE ifr #-}

-- Property name strings to be sorted are formatted as arrays of UTF-16 code units.
propertyCmp :: Key -> Key -> Ordering
propertyCmp :: Key -> Key -> Ordering
propertyCmp = (Key -> ByteString) -> Key -> Key -> Ordering
forall a b. Ord a => (b -> a) -> b -> b -> Ordering
comparing Key -> ByteString
f where
    -- this is slow implementation, but it's obviously not wrong.
    f :: Key -> BS.ByteString
    f :: Key -> ByteString
f = Text -> ByteString
TE.encodeUtf16BE (Text -> ByteString) -> (Key -> Text) -> Key -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Key -> Text
Key.toText

-- strings are already serialized canonically.
canonicalString :: Text -> Encoding' a
canonicalString :: forall a. Text -> Encoding' a
canonicalString = Text -> Encoding' a
forall a. Text -> Encoding' a
text

-- RFC 8785 is outsourcing number format to ECMA-262.
-- 10th edition, 7.1.12.1 NumberToString
-- https://262.ecma-international.org/10.0/#sec-tostring-applied-to-the-number-type
--
-- Note: this specification is not lossy
-- Given 'Scientific' we can choose n,k,s uniquely: 'nks'.
--
-- RFC8785 Appendix D says "don't use bignums".
canonicalNumber :: Scientific -> Encoding
canonicalNumber :: Scientific -> Encoding' Value
canonicalNumber Scientific
m = case Scientific -> Scientific -> Ordering
forall a. Ord a => a -> a -> Ordering
compare Scientific
m Scientific
0 of
    Ordering
EQ -> Builder -> Encoding' Value
forall tag. Builder -> Encoding' tag
Encoding (Word8 -> Builder
B.word8 Word8
W8.DIGIT_0)
    Ordering
LT -> Builder -> Encoding' Value
forall tag. Builder -> Encoding' tag
Encoding (Word8 -> Builder
B.word8 Word8
W8.HYPHEN Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Encoding' Value -> Builder
forall tag. Encoding' tag -> Builder
fromEncoding (Scientific -> Encoding' Value
canonicalNumber' (Scientific -> Scientific
forall a. Num a => a -> a
negate Scientific
m)))
    Ordering
GT -> Scientific -> Encoding' Value
canonicalNumber' Scientific
m

-- input: Positive number
canonicalNumber' :: Scientific -> Encoding
canonicalNumber' :: Scientific -> Encoding' Value
canonicalNumber' Scientific
m
    | Int
k Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
n, Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
21
    = Builder -> Encoding' Value
forall tag. Builder -> Encoding' tag
Encoding (Builder -> Encoding' Value) -> Builder -> Encoding' Value
forall a b. (a -> b) -> a -> b
$
        FixedPrim Word8 -> [Word8] -> Builder
forall a. FixedPrim a -> [a] -> Builder
BP.primMapListFixed FixedPrim Word8
BP.word8 [Word8]
ds Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<>
        FixedPrim Word8 -> [Word8] -> Builder
forall a. FixedPrim a -> [a] -> Builder
BP.primMapListFixed FixedPrim Word8
BP.word8 (Int -> Word8 -> [Word8]
forall a. Int -> a -> [a]
replicate (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
k) Word8
W8.DIGIT_0)

    | Int
0 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
n, Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
21
    , let ([Word8]
pfx, [Word8]
sfx) = Int -> [Word8] -> ([Word8], [Word8])
forall a. Int -> [a] -> ([a], [a])
splitAt Int
n [Word8]
ds
    = Builder -> Encoding' Value
forall tag. Builder -> Encoding' tag
Encoding (Builder -> Encoding' Value) -> Builder -> Encoding' Value
forall a b. (a -> b) -> a -> b
$
        FixedPrim Word8 -> [Word8] -> Builder
forall a. FixedPrim a -> [a] -> Builder
BP.primMapListFixed FixedPrim Word8
BP.word8 [Word8]
pfx Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<>
        Word8 -> Builder
B.word8 Word8
W8.PERIOD Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<>
        FixedPrim Word8 -> [Word8] -> Builder
forall a. FixedPrim a -> [a] -> Builder
BP.primMapListFixed FixedPrim Word8
BP.word8 [Word8]
sfx

    | -Int
6 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
n, Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0
    = Builder -> Encoding' Value
forall tag. Builder -> Encoding' tag
Encoding (Builder -> Encoding' Value) -> Builder -> Encoding' Value
forall a b. (a -> b) -> a -> b
$
        Word8 -> Builder
B.word8 Word8
W8.DIGIT_0 Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<>
        Word8 -> Builder
B.word8 Word8
W8.PERIOD Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<>
        FixedPrim Word8 -> [Word8] -> Builder
forall a. FixedPrim a -> [a] -> Builder
BP.primMapListFixed FixedPrim Word8
BP.word8 (Int -> Word8 -> [Word8]
forall a. Int -> a -> [a]
replicate (Int -> Int
forall a. Num a => a -> a
negate Int
n) Word8
W8.DIGIT_0) Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<>
        FixedPrim Word8 -> [Word8] -> Builder
forall a. FixedPrim a -> [a] -> Builder
BP.primMapListFixed FixedPrim Word8
BP.word8 [Word8]
ds

    | Int
k Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
1, [Word8
d] <- [Word8]
ds
    = Builder -> Encoding' Value
forall tag. Builder -> Encoding' tag
Encoding (Builder -> Encoding' Value) -> Builder -> Encoding' Value
forall a b. (a -> b) -> a -> b
$
        Word8 -> Builder
B.word8 Word8
d Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<>
        Word8 -> Builder
B.word8 Word8
W8.LOWER_E Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<>
        Word8 -> Builder
B.word8 (if (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1) Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0 then Word8
W8.PLUS else Word8
W8.HYPHEN) Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<>
        FixedPrim Word8 -> [Word8] -> Builder
forall a. FixedPrim a -> [a] -> Builder
BP.primMapListFixed FixedPrim Word8
BP.word8 (Integer -> [Word8]
integerToDecimalDigits (Integer -> Integer
forall a. Num a => a -> a
abs (Int -> Integer
forall a. Integral a => a -> Integer
toInteger Int
n Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
- Integer
1)))

    | (Word8
d:[Word8]
ds') <- [Word8]
ds
    = Builder -> Encoding' Value
forall tag. Builder -> Encoding' tag
Encoding (Builder -> Encoding' Value) -> Builder -> Encoding' Value
forall a b. (a -> b) -> a -> b
$
        Word8 -> Builder
B.word8 Word8
d Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<>
        Word8 -> Builder
B.word8 Word8
W8.PERIOD Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<>
        FixedPrim Word8 -> [Word8] -> Builder
forall a. FixedPrim a -> [a] -> Builder
BP.primMapListFixed FixedPrim Word8
BP.word8 [Word8]
ds' Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<>
        Word8 -> Builder
B.word8 Word8
W8.LOWER_E Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<>
        Word8 -> Builder
B.word8 (if (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1) Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0 then Word8
W8.PLUS else Word8
W8.HYPHEN) Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<>
        FixedPrim Word8 -> [Word8] -> Builder
forall a. FixedPrim a -> [a] -> Builder
BP.primMapListFixed FixedPrim Word8
BP.word8 (Integer -> [Word8]
integerToDecimalDigits (Integer -> Integer
forall a. Num a => a -> a
abs (Int -> Integer
forall a. Integral a => a -> Integer
toInteger Int
n Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
- Integer
1)))

    | Bool
otherwise
    = String -> Encoding' Value
forall a. String -> Encoding' a
string String
"0" -- shouldn't happen, but we need a default case.

  where
    -- 5. Otherwise, let n, k, and s be integers such that
    -- k ≥ 1, 10k - 1 ≤ s < 10k, the Number value for s × 10n - k is m,
    -- and k is as small as possible.
    -- Note that k is the number of digits in the decimal representation of s,
    -- that s is not divisible by 10, and that the least significant digit of s
    -- is not necessarily uniquely determined by these criteria.
    (Int
n, Int
k, Integer
s) = Scientific -> (Int, Int, Integer)
nks Scientific
m
    ds :: [Word8]
ds = Integer -> [Word8]
integerToDecimalDigits Integer
s

-- 5. Otherwise, let n, k, and s be integers such that k ≥ 1, 10^(k - 1) ≤ s < 10^k,
-- the Number value for s × 10^(n - k) is m, and k is as small as possible.
-- Note that k is the number of digits in the decimal representation of s,
-- that s is not divisible by 10, and that the least significant digit of s
-- is not necessarily uniquely determined by these criteria.
nks :: Scientific -> (Int, Int, Integer)
nks :: Scientific -> (Int, Int, Integer)
nks Scientific
m = (Int
e Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
k, Int
k, Integer
c)
  where
    m' :: Scientific
m' = Scientific -> Scientific
Sci.normalize Scientific
m
    c :: Integer
c = Scientific -> Integer
Sci.coefficient Scientific
m'
    e :: Int
e = Scientific -> Int
Sci.base10Exponent Scientific
m'
    k :: Int
k = Integer -> Int
integerLog10 Integer
c Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1

integerToDecimalDigits :: Integer -> [Word8]
integerToDecimalDigits :: Integer -> [Word8]
integerToDecimalDigits = [Word8] -> Integer -> [Word8]
go [] where
    go :: [Word8] -> Integer -> [Word8]
go [Word8]
acc Integer
0 = [Word8]
acc
    go [Word8]
acc Integer
i = case Integer -> Integer -> (# Integer, Integer #)
quotRemInteger Integer
i Integer
10 of
        (# Integer
q, Integer
r #) -> [Word8] -> Integer -> [Word8]
go (Word8
dWord8 -> [Word8] -> [Word8]
forall a. a -> [a] -> [a]
:[Word8]
acc) Integer
q where !d :: Word8
d = Integer -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral Integer
r Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
+ Word8
W8.DIGIT_0