{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE StrictData #-}

-- | Encodes and decodes 'Text's to 'Bottom's. For details, see the
-- [Bottom spec](https://github.com/bottom-software-foundation/spec).
module Data.Encoding.Bottom
  ( Bottom,
    unBottom,
    encode,
    decode,
    decode',
  )
where

import Control.DeepSeq (NFData)
import Control.Monad (void)
import Data.Bits (zeroBits)
import Data.ByteString (ByteString)
import qualified Data.ByteString as BS
import qualified Data.Foldable as F
import Data.List (intercalate, unfoldr)
import Data.List.NonEmpty (NonEmpty (..))
import qualified Data.List.NonEmpty as NE
import Data.Text (Text)
import qualified Data.Text as T
import Data.Text.Encoding (decodeUtf8, encodeUtf8)
import Data.Void (Void)
import Data.Word (Word8)
import Text.Megaparsec (Parsec, Token, chunk, eof, manyTill, runParser, someTill, (<|>))
import Text.Megaparsec.Error (ErrorItem (..), ParseError (..), ParseErrorBundle (..), errorBundlePretty)

-- | A 'Bottom' is a wrapper around well-formed, Bottom-encoded 'ByteString'.
-- Its instances are derived from those of 'ByteString'.
newtype Bottom = Bottom ByteString
  deriving (Int -> Bottom -> ShowS
[Bottom] -> ShowS
Bottom -> String
(Int -> Bottom -> ShowS)
-> (Bottom -> String) -> ([Bottom] -> ShowS) -> Show Bottom
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Bottom] -> ShowS
$cshowList :: [Bottom] -> ShowS
show :: Bottom -> String
$cshow :: Bottom -> String
showsPrec :: Int -> Bottom -> ShowS
$cshowsPrec :: Int -> Bottom -> ShowS
Show, Bottom -> Bottom -> Bool
(Bottom -> Bottom -> Bool)
-> (Bottom -> Bottom -> Bool) -> Eq Bottom
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Bottom -> Bottom -> Bool
$c/= :: Bottom -> Bottom -> Bool
== :: Bottom -> Bottom -> Bool
$c== :: Bottom -> Bottom -> Bool
Eq, Eq Bottom
Eq Bottom
-> (Bottom -> Bottom -> Ordering)
-> (Bottom -> Bottom -> Bool)
-> (Bottom -> Bottom -> Bool)
-> (Bottom -> Bottom -> Bool)
-> (Bottom -> Bottom -> Bool)
-> (Bottom -> Bottom -> Bottom)
-> (Bottom -> Bottom -> Bottom)
-> Ord Bottom
Bottom -> Bottom -> Bool
Bottom -> Bottom -> Ordering
Bottom -> Bottom -> Bottom
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: Bottom -> Bottom -> Bottom
$cmin :: Bottom -> Bottom -> Bottom
max :: Bottom -> Bottom -> Bottom
$cmax :: Bottom -> Bottom -> Bottom
>= :: Bottom -> Bottom -> Bool
$c>= :: Bottom -> Bottom -> Bool
> :: Bottom -> Bottom -> Bool
$c> :: Bottom -> Bottom -> Bool
<= :: Bottom -> Bottom -> Bool
$c<= :: Bottom -> Bottom -> Bool
< :: Bottom -> Bottom -> Bool
$c< :: Bottom -> Bottom -> Bool
compare :: Bottom -> Bottom -> Ordering
$ccompare :: Bottom -> Bottom -> Ordering
$cp1Ord :: Eq Bottom
Ord, b -> Bottom -> Bottom
NonEmpty Bottom -> Bottom
Bottom -> Bottom -> Bottom
(Bottom -> Bottom -> Bottom)
-> (NonEmpty Bottom -> Bottom)
-> (forall b. Integral b => b -> Bottom -> Bottom)
-> Semigroup Bottom
forall b. Integral b => b -> Bottom -> Bottom
forall a.
(a -> a -> a)
-> (NonEmpty a -> a)
-> (forall b. Integral b => b -> a -> a)
-> Semigroup a
stimes :: b -> Bottom -> Bottom
$cstimes :: forall b. Integral b => b -> Bottom -> Bottom
sconcat :: NonEmpty Bottom -> Bottom
$csconcat :: NonEmpty Bottom -> Bottom
<> :: Bottom -> Bottom -> Bottom
$c<> :: Bottom -> Bottom -> Bottom
Semigroup, Semigroup Bottom
Bottom
Semigroup Bottom
-> Bottom
-> (Bottom -> Bottom -> Bottom)
-> ([Bottom] -> Bottom)
-> Monoid Bottom
[Bottom] -> Bottom
Bottom -> Bottom -> Bottom
forall a.
Semigroup a -> a -> (a -> a -> a) -> ([a] -> a) -> Monoid a
mconcat :: [Bottom] -> Bottom
$cmconcat :: [Bottom] -> Bottom
mappend :: Bottom -> Bottom -> Bottom
$cmappend :: Bottom -> Bottom -> Bottom
mempty :: Bottom
$cmempty :: Bottom
$cp1Monoid :: Semigroup Bottom
Monoid, Bottom -> ()
(Bottom -> ()) -> NFData Bottom
forall a. (a -> ()) -> NFData a
rnf :: Bottom -> ()
$crnf :: Bottom -> ()
NFData)

-- | 'unBottom' unwraps the underlying 'ByteString'.
unBottom :: Bottom -> ByteString
unBottom :: Bottom -> ByteString
unBottom (Bottom ByteString
bs) = ByteString
bs

-- Value characters for encoding.
singleton :: Char -> ByteString
singleton :: Char -> ByteString
singleton = Text -> ByteString
encodeUtf8 (Text -> ByteString) -> (Char -> Text) -> Char -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Text
T.singleton

twoHundred :: ByteString
twoHundred :: ByteString
twoHundred = Char -> ByteString
singleton Char
'\x1FAC2'

fifty :: ByteString
fifty :: ByteString
fifty = Char -> ByteString
singleton Char
'\x1F496'

ten :: ByteString
ten :: ByteString
ten = Char -> ByteString
singleton Char
'\x2728'

five :: ByteString
five :: ByteString
five = Char -> ByteString
singleton Char
'\x1F97A'

one :: ByteString
one :: ByteString
one = Char -> ByteString
singleton Char
'\x002C'

zero :: ByteString
zero :: ByteString
zero = Text -> ByteString
encodeUtf8 (Text -> ByteString) -> Text -> ByteString
forall a b. (a -> b) -> a -> b
$ String -> Text
T.pack [Char
'\x2764', Char
'\xFE0F']

separator :: ByteString
separator :: ByteString
separator = Text -> ByteString
encodeUtf8 (Text -> ByteString) -> Text -> ByteString
forall a b. (a -> b) -> a -> b
$ String -> Text
T.pack [Char
'\x1F449', Char
'\x1F448']

-- Decoding functions.

-- | 'decode' decodes a 'Bottom' into its corresponding Unicode 'Text'.
decode :: Bottom -> Text
decode :: Bottom -> Text
decode (Bottom ByteString
bs) = case ByteString -> Either String Text
decode' ByteString
bs of
  Right Text
r -> Text
r
  Left String
err -> String -> Text
forall a. HasCallStack => String -> a
error (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$ String
"Data.Encoding.Bottom.decode: malformed Bottom: " String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
err

type Parser = Parsec Void ByteString

-- | 'decode'' decodes an arbitrary Bottom-encoded 'ByteString' into a 'Text',
-- or returns a parse error message if the 'ByteString' is malformed.
decode' :: ByteString -> Either String Text
decode' :: ByteString -> Either String Text
decode' ByteString
bs = case Parsec Void ByteString Text
-> String
-> ByteString
-> Either (ParseErrorBundle ByteString Void) Text
forall e s a.
Parsec e s a -> String -> s -> Either (ParseErrorBundle s e) a
runParser Parsec Void ByteString Text
bottomParser String
"" ByteString
bs of
  Left ParseErrorBundle ByteString Void
err -> String -> Either String Text
forall a b. a -> Either a b
Left (String -> Either String Text) -> String -> Either String Text
forall a b. (a -> b) -> a -> b
$ ParseErrorBundle ByteString Void -> String
renderError ParseErrorBundle ByteString Void
err
  Right Text
r -> Text -> Either String Text
forall a b. b -> Either a b
Right Text
r
  where
    bottomParser :: Parser Text
    bottomParser :: Parsec Void ByteString Text
bottomParser = ByteString -> Text
decodeUtf8 (ByteString -> Text) -> ([Word8] -> ByteString) -> [Word8] -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Word8] -> ByteString
BS.pack ([Word8] -> Text)
-> ParsecT Void ByteString Identity [Word8]
-> Parsec Void ByteString Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (([Int] -> [Word8])
-> ParsecT Void ByteString Identity [Int]
-> ParsecT Void ByteString Identity [Word8]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (([Int] -> [Word8])
 -> ParsecT Void ByteString Identity [Int]
 -> ParsecT Void ByteString Identity [Word8])
-> ((Int -> Word8) -> [Int] -> [Word8])
-> (Int -> Word8)
-> ParsecT Void ByteString Identity [Int]
-> ParsecT Void ByteString Identity [Word8]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Int -> Word8) -> [Int] -> [Word8]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap) Int -> Word8
forall a. Enum a => Int -> a
toEnum (Parser Int
groupParser Parser Int
-> ParsecT Void ByteString Identity ()
-> ParsecT Void ByteString Identity [Int]
forall (m :: * -> *) a end. MonadPlus m => m a -> m end -> m [a]
`manyTill` ParsecT Void ByteString Identity ()
forall e s (m :: * -> *). MonadParsec e s m => m ()
eof)

    groupParser :: Parser Int
    groupParser :: Parser Int
groupParser = Parser Int
parseNull Parser Int -> Parser Int -> Parser Int
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Int
parseValues
      where
        parseNull :: Parser Int
parseNull = Parser Int
zeroParser Parser Int
-> ParsecT Void ByteString Identity ()
-> ParsecT Void ByteString Identity ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ParsecT Void ByteString Identity ()
separatorParser ParsecT Void ByteString Identity () -> Parser Int -> Parser Int
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Int -> Parser Int
forall (m :: * -> *) a. Monad m => a -> m a
return Int
0
        parseValues :: Parser Int
parseValues = [Int] -> Int
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum ([Int] -> Int)
-> ParsecT Void ByteString Identity [Int] -> Parser Int
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Parser Int
twoHundredParser Parser Int -> Parser Int -> Parser Int
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Int
fiftyParser Parser Int -> Parser Int -> Parser Int
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Int
tenParser Parser Int -> Parser Int -> Parser Int
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Int
fiveParser Parser Int -> Parser Int -> Parser Int
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Int
oneParser) Parser Int
-> ParsecT Void ByteString Identity ()
-> ParsecT Void ByteString Identity [Int]
forall (m :: * -> *) a end. MonadPlus m => m a -> m end -> m [a]
`someTill` ParsecT Void ByteString Identity ()
separatorParser

    twoHundredParser :: Parser Int
    twoHundredParser :: Parser Int
twoHundredParser = Tokens ByteString
-> ParsecT Void ByteString Identity (Tokens ByteString)
forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
chunk ByteString
Tokens ByteString
twoHundred ParsecT Void ByteString Identity ByteString
-> Parser Int -> Parser Int
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Int -> Parser Int
forall (m :: * -> *) a. Monad m => a -> m a
return Int
200

    fiftyParser :: Parser Int
    fiftyParser :: Parser Int
fiftyParser = Tokens ByteString
-> ParsecT Void ByteString Identity (Tokens ByteString)
forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
chunk ByteString
Tokens ByteString
fifty ParsecT Void ByteString Identity ByteString
-> Parser Int -> Parser Int
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Int -> Parser Int
forall (m :: * -> *) a. Monad m => a -> m a
return Int
50

    tenParser :: Parser Int
    tenParser :: Parser Int
tenParser = Tokens ByteString
-> ParsecT Void ByteString Identity (Tokens ByteString)
forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
chunk ByteString
Tokens ByteString
ten ParsecT Void ByteString Identity ByteString
-> Parser Int -> Parser Int
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Int -> Parser Int
forall (m :: * -> *) a. Monad m => a -> m a
return Int
10

    fiveParser :: Parser Int
    fiveParser :: Parser Int
fiveParser = Tokens ByteString
-> ParsecT Void ByteString Identity (Tokens ByteString)
forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
chunk ByteString
Tokens ByteString
five ParsecT Void ByteString Identity ByteString
-> Parser Int -> Parser Int
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Int -> Parser Int
forall (m :: * -> *) a. Monad m => a -> m a
return Int
5

    oneParser :: Parser Int
    oneParser :: Parser Int
oneParser = Tokens ByteString
-> ParsecT Void ByteString Identity (Tokens ByteString)
forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
chunk ByteString
Tokens ByteString
one ParsecT Void ByteString Identity ByteString
-> Parser Int -> Parser Int
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Int -> Parser Int
forall (m :: * -> *) a. Monad m => a -> m a
return Int
1

    zeroParser :: Parser Int
    zeroParser :: Parser Int
zeroParser = Tokens ByteString
-> ParsecT Void ByteString Identity (Tokens ByteString)
forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
chunk ByteString
Tokens ByteString
zero ParsecT Void ByteString Identity ByteString
-> Parser Int -> Parser Int
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Int -> Parser Int
forall (m :: * -> *) a. Monad m => a -> m a
return Int
0

    separatorParser :: Parser ()
    separatorParser :: ParsecT Void ByteString Identity ()
separatorParser = ParsecT Void ByteString Identity ByteString
-> ParsecT Void ByteString Identity ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (ParsecT Void ByteString Identity ByteString
 -> ParsecT Void ByteString Identity ())
-> ParsecT Void ByteString Identity ByteString
-> ParsecT Void ByteString Identity ()
forall a b. (a -> b) -> a -> b
$ Tokens ByteString
-> ParsecT Void ByteString Identity (Tokens ByteString)
forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
chunk ByteString
Tokens ByteString
separator

    -- Custom error messages, because the default error messages print raw
    -- ByteStrings and don't render correctly.
    renderError :: ParseErrorBundle ByteString Void -> String
    renderError :: ParseErrorBundle ByteString Void -> String
renderError ParseErrorBundle {bundleErrors :: forall s e. ParseErrorBundle s e -> NonEmpty (ParseError s e)
bundleErrors = (TrivialError Int
offset Maybe (ErrorItem (Token ByteString))
unexpected Set (ErrorItem (Token ByteString))
expected) :| []} =
      String
unexpectedMessage String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
expectedMessage String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
" at offset " String -> ShowS
forall a. Semigroup a => a -> a -> a
<> Int -> String
forall a. Show a => a -> String
show Int
offset
      where
        renderErrorItem :: ErrorItem (Token ByteString) -> String
        renderErrorItem :: ErrorItem (Token ByteString) -> String
renderErrorItem (Tokens NonEmpty (Token ByteString)
tokens) = Text -> String
T.unpack (Text -> String) -> Text -> String
forall a b. (a -> b) -> a -> b
$ Text
"\"" Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> ByteString -> Text
decodeUtf8 ([Word8] -> ByteString
BS.pack ([Word8] -> ByteString) -> [Word8] -> ByteString
forall a b. (a -> b) -> a -> b
$ NonEmpty Word8 -> [Word8]
forall a. NonEmpty a -> [a]
NE.toList NonEmpty Word8
NonEmpty (Token ByteString)
tokens) Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
"\""
        renderErrorItem (Label NonEmpty Char
name) = NonEmpty Char -> String
forall a. NonEmpty a -> [a]
NE.toList NonEmpty Char
name
        renderErrorItem ErrorItem (Token ByteString)
EndOfInput = String
"end of input"

        unexpectedMessage :: String
unexpectedMessage = case Maybe (ErrorItem (Token ByteString))
unexpected of
          Just ErrorItem (Token ByteString)
unx -> String
"unexpected " String -> ShowS
forall a. Semigroup a => a -> a -> a
<> ErrorItem (Token ByteString) -> String
renderErrorItem ErrorItem (Token ByteString)
unx String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
": "
          Maybe (ErrorItem (Token ByteString))
Nothing -> String
""

        expectedMessage :: String
expectedMessage = String
"expected " String -> ShowS
forall a. Semigroup a => a -> a -> a
<> (if [String] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [String]
expecteds Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
2 then String
"one of " else String
"") String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String -> [String] -> String
forall a. [a] -> [[a]] -> [a]
intercalate String
", " [String]
expecteds
          where
            expecteds :: [String]
expecteds = ErrorItem Word8 -> String
ErrorItem (Token ByteString) -> String
renderErrorItem (ErrorItem Word8 -> String) -> [ErrorItem Word8] -> [String]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Set (ErrorItem Word8) -> [ErrorItem Word8]
forall (t :: * -> *) a. Foldable t => t a -> [a]
F.toList Set (ErrorItem Word8)
Set (ErrorItem (Token ByteString))
expected
    renderError ParseErrorBundle ByteString Void
err = ParseErrorBundle ByteString Void -> String
forall s e.
(VisualStream s, TraversableStream s, ShowErrorComponent e) =>
ParseErrorBundle s e -> String
errorBundlePretty ParseErrorBundle ByteString Void
err

-- Encoding functions.

-- | 'encode' takes a 'Text', and encodes it into a 'Bottom'. To get at the
-- underlying 'ByteString', unwrap the returned value with 'unBottom'.
encode :: Text -> Bottom
encode :: Text -> Bottom
encode = ByteString -> Bottom
Bottom (ByteString -> Bottom) -> (Text -> ByteString) -> Text -> Bottom
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Word8 -> ByteString) -> ByteString -> ByteString
BS.concatMap Word8 -> ByteString
encodeByte (ByteString -> ByteString)
-> (Text -> ByteString) -> Text -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> ByteString
encodeUtf8

encodeByte :: Word8 -> ByteString
encodeByte :: Word8 -> ByteString
encodeByte Word8
b
  | Word8
b Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
forall a. Bits a => a
zeroBits = ByteString
zero ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
separator
  | Bool
otherwise = [ByteString] -> ByteString
BS.concat ((Int -> Maybe (ByteString, Int)) -> Int -> [ByteString]
forall b a. (b -> Maybe (a, b)) -> b -> [a]
unfoldr Int -> Maybe (ByteString, Int)
encodeByte' (Word8 -> Int
forall a. Enum a => a -> Int
fromEnum Word8
b)) ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
separator
  where
    encodeByte' :: Int -> Maybe (ByteString, Int)
    encodeByte' :: Int -> Maybe (ByteString, Int)
encodeByte' Int
n
      | Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
200 = (ByteString, Int) -> Maybe (ByteString, Int)
forall a. a -> Maybe a
Just (ByteString
twoHundred, Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
200)
      | Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
50 = (ByteString, Int) -> Maybe (ByteString, Int)
forall a. a -> Maybe a
Just (ByteString
fifty, Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
50)
      | Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
10 = (ByteString, Int) -> Maybe (ByteString, Int)
forall a. a -> Maybe a
Just (ByteString
ten, Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
10)
      | Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
5 = (ByteString, Int) -> Maybe (ByteString, Int)
forall a. a -> Maybe a
Just (ByteString
five, Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
5)
      | Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
1 = (ByteString, Int) -> Maybe (ByteString, Int)
forall a. a -> Maybe a
Just (ByteString
one, Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
      | Int
n Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = Maybe (ByteString, Int)
forall a. Maybe a
Nothing
      | Bool
otherwise = String -> Maybe (ByteString, Int)
forall a. HasCallStack => String -> a
error String
"Data.Encoding.Bottom.encodeByte': impossible: unsigned byte is negative"