{-# LANGUAGE DeriveFoldable #-}
{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE DeriveTraversable #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE TypeFamilies #-}
module Waargonaut.Types.JChar
(
HexDigit4 (..)
, HasHexDigit4 (..)
, JChar (..)
, AsJChar (..)
, HasJChar (..)
, JCharEscaped (..)
, AsJCharEscaped (..)
, JCharUnescaped (..)
, AsJCharUnescaped (..)
, parseJChar
, parseJCharEscaped
, parseJCharUnescaped
, jCharBuilder
, jCharToChar
, utf8CharToJChar
, jCharToUtf8Char
) where
import Prelude (Char, Eq, Int, Ord, Show,
otherwise, quotRem, (&&),
(*), (+), (-), (/=), (<=), (==),
(>=))
import Control.Category (id, (.))
import Control.Lens (Lens', Prism', Rewrapped,
Wrapped (..), failing, has, iso,
prism, prism', to, ( # ), (^?),
_Just)
import Control.Applicative (pure, (*>), (<$>), (<*>), (<|>))
import Control.Monad ((=<<))
import Control.Error.Util (note)
import Data.Bits ((.&.))
import Data.Char (chr, ord)
import Data.Either (Either (..))
import Data.Foldable (Foldable, any, asum, foldMap,
foldl)
import Data.Function (const, ($))
import Data.Functor (Functor)
import Data.Maybe (Maybe (..), fromMaybe)
import Data.Semigroup ((<>))
import Data.Traversable (Traversable, traverse)
import qualified Data.Text.Internal as Text
import Data.Digit (HeXDigit, HeXaDeCiMaL)
import qualified Data.Digit as D
import Data.ByteString.Builder (Builder)
import qualified Data.ByteString.Builder as BB
import Waargonaut.Types.Whitespace (Whitespace (..),
escapedWhitespaceChar,
unescapedWhitespaceChar,
_WhitespaceChar)
import Text.Parser.Char (CharParsing, char, satisfy)
data HexDigit4 d =
HexDigit4 d d d d
deriving (Eq, Show, Ord, Functor, Foldable, Traversable)
class HasHexDigit4 c d | c -> d where
hexDigit4 :: Lens' c (HexDigit4 d)
instance HasHexDigit4 (HexDigit4 d) d where
hexDigit4 = id
newtype JCharUnescaped =
JCharUnescaped Char
deriving (Eq, Ord, Show)
instance JCharUnescaped ~ t => Rewrapped JCharUnescaped t
instance Wrapped JCharUnescaped where
type Unwrapped JCharUnescaped = Char
_Wrapped' = iso (\ (JCharUnescaped x) -> x) JCharUnescaped
class AsJCharUnescaped a where
_JCharUnescaped :: Prism' a JCharUnescaped
instance AsJCharUnescaped JCharUnescaped where
_JCharUnescaped = id
instance AsJCharUnescaped Char where
_JCharUnescaped = prism'
(\(JCharUnescaped c) -> c)
(\c -> if any ($ c) excluded then Nothing
else Just (JCharUnescaped c)
)
where
excluded =
[ (== '\NUL')
, (== '"')
, (== '\\')
, \x -> x >= '\x00' && x <= '\x1f'
]
data JCharEscaped digit
= QuotationMark
| ReverseSolidus
| Solidus
| Backspace
| WhiteSpace Whitespace
| Hex ( HexDigit4 digit )
deriving (Eq, Ord, Show, Functor, Foldable, Traversable)
class AsJCharEscaped r digit | r -> digit where
_JCharEscaped :: Prism' r (JCharEscaped digit)
_QuotationMark :: Prism' r ()
_ReverseSolidus :: Prism' r ()
_Solidus :: Prism' r ()
_Backspace :: Prism' r ()
_WhiteSpace :: Prism' r Whitespace
_Hex :: Prism' r (HexDigit4 digit)
_QuotationMark = _JCharEscaped . _QuotationMark
_ReverseSolidus = _JCharEscaped . _ReverseSolidus
_Solidus = _JCharEscaped . _Solidus
_Backspace = _JCharEscaped . _Backspace
_WhiteSpace = _JCharEscaped . _WhiteSpace
_Hex = _JCharEscaped . _Hex
instance AsJCharEscaped (JCharEscaped digit) digit where
_JCharEscaped = id
_QuotationMark = prism (const QuotationMark)
(\ x -> case x of
QuotationMark -> Right ()
_ -> Left x
)
_ReverseSolidus = prism (const ReverseSolidus)
(\ x -> case x of
ReverseSolidus -> Right ()
_ -> Left x
)
_Solidus = prism (const Solidus)
(\ x -> case x of
Solidus -> Right ()
_ -> Left x
)
_Backspace = prism (const Backspace)
(\ x -> case x of
Backspace -> Right ()
_ -> Left x
)
_WhiteSpace = prism WhiteSpace
(\ x -> case x of
WhiteSpace y1 -> Right y1
_ -> Left x
)
_Hex = prism Hex
(\ x -> case x of
Hex y1 -> Right y1
_ -> Left x
)
hexDigit4ToChar
:: HeXaDeCiMaL digit
=> HexDigit4 digit
-> Char
hexDigit4ToChar (HexDigit4 a b c d) =
chr (foldl (\acc x -> 16 * acc + (D.integralHexadecimal # x)) 0 [a,b,c,d])
sandblast :: Char -> Maybe (HexDigit4 HeXDigit)
sandblast x = if x >= '\x0' && x <= '\xffff'
then shuriken =<< traverse (^? D.integralHexadecimal) (lavawave 4 [] (bile (ord x)))
else Nothing
where
shuriken (a:b:c:d:_) = Just (HexDigit4 a b c d)
shuriken _ = Nothing
bile n = quotRem n 16
lavawave :: Int -> [Int] -> (Int,Int) -> [Int]
lavawave 0 acc _ = acc
lavawave n acc (0,0) = lavawave (n - 1) (0:acc) (0,0)
lavawave n acc (q,r) = lavawave (n - 1) (r:acc) (bile q)
{-# INLINE lavawave #-}
{-# INLINE sandblast #-}
instance AsJCharEscaped Char HeXDigit where
_JCharEscaped = prism
(\case
QuotationMark -> '"'
ReverseSolidus -> '\\'
Solidus -> '/'
Backspace -> '\b'
WhiteSpace wc -> escapedWhitespaceChar wc
Hex hd -> hexDigit4ToChar hd
)
(\c -> case c of
'"' -> Right QuotationMark
'\\' -> Right ReverseSolidus
'/' -> Right Solidus
'\b' -> Right Backspace
_ -> note c $ c ^? failing (_WhitespaceChar . to WhiteSpace) (to sandblast . _Just . to Hex)
)
data JChar digit
= EscapedJChar ( JCharEscaped digit )
| UnescapedJChar JCharUnescaped
deriving (Eq, Ord, Show, Functor, Foldable, Traversable)
class HasJChar c digit | c -> digit where
jChar :: Lens' c (JChar digit)
instance HasJChar (JChar digit) digit where
jChar = id
class AsJChar r digit | r -> digit where
_JChar :: Prism' r (JChar digit)
_EscapedJChar :: Prism' r (JCharEscaped digit)
_UnescapedJChar :: Prism' r JCharUnescaped
_EscapedJChar = _JChar . _EscapedJChar
_UnescapedJChar = _JChar . _UnescapedJChar
instance AsJChar (JChar digit) digit where
_JChar = id
_EscapedJChar = prism EscapedJChar
(\ x -> case x of
EscapedJChar y1 -> Right y1
_ -> Left x
)
_UnescapedJChar = prism UnescapedJChar
(\ x -> case x of
UnescapedJChar y1 -> Right y1
_ -> Left x
)
instance AsJCharEscaped (JChar digit) digit where
_JCharEscaped = _JChar . _JCharEscaped
instance AsJCharUnescaped (JChar digit) where
_JCharUnescaped = _JChar . _JCharUnescaped
instance AsJChar Char HeXDigit where
_JChar = prism
(\case
UnescapedJChar jcu -> _JCharUnescaped # jcu
EscapedJChar jce -> _JCharEscaped # jce
)
(\c -> note c $ c ^? failing
(_JCharUnescaped . to UnescapedJChar)
(_JCharEscaped . to EscapedJChar)
)
utf8SafeChar :: Char -> Maybe Char
utf8SafeChar c | ord c .&. 0x1ff800 /= 0xd800 = Just c
| otherwise = Nothing
utf8CharToJChar :: Char -> JChar HeXDigit
utf8CharToJChar c = fromMaybe scalarReplacement (Text.safe c ^? _JChar)
where scalarReplacement = EscapedJChar (Hex (HexDigit4 D.xf D.xf D.xf D.xd))
{-# INLINE utf8CharToJChar #-}
jCharToUtf8Char :: JChar HeXDigit -> Maybe Char
jCharToUtf8Char jc = utf8SafeChar (_JChar # jc)
{-# INLINE jCharToUtf8Char #-}
parseHexDigit4 ::
( CharParsing f, HeXaDeCiMaL digit ) =>
f ( HexDigit4 digit )
parseHexDigit4 = HexDigit4
<$> D.parseHeXaDeCiMaL
<*> D.parseHeXaDeCiMaL
<*> D.parseHeXaDeCiMaL
<*> D.parseHeXaDeCiMaL
parseJCharUnescaped ::
CharParsing f =>
f JCharUnescaped
parseJCharUnescaped =
JCharUnescaped <$> satisfy (has _JCharUnescaped)
parseJCharEscaped ::
(CharParsing f, HeXaDeCiMaL digit) =>
f ( JCharEscaped digit )
parseJCharEscaped =
let
z =
asum ((\(c, p) -> char c *> pure p) <$>
[
('"' , QuotationMark)
, ('\\', ReverseSolidus)
, ('/' , Solidus)
, ('b' , Backspace)
, (' ' , WhiteSpace Space)
, ('f' , WhiteSpace LineFeed)
, ('n' , WhiteSpace NewLine)
, ('r' , WhiteSpace CarriageReturn)
, ('t' , WhiteSpace HorizontalTab)
])
h =
Hex <$> (char 'u' *> parseHexDigit4)
in
char '\\' *> (z <|> h)
parseJChar ::
(CharParsing f, HeXaDeCiMaL digit) =>
f ( JChar digit )
parseJChar = asum
[ EscapedJChar <$> parseJCharEscaped
, UnescapedJChar <$> parseJCharUnescaped
]
jCharToChar
:: HeXaDeCiMaL digit
=> JChar digit
-> Char
jCharToChar (UnescapedJChar (JCharUnescaped c)) = c
jCharToChar (EscapedJChar jca) = case jca of
QuotationMark -> '"'
ReverseSolidus -> '\\'
Solidus -> '/'
Backspace -> '\b'
(WhiteSpace ws) -> _WhiteSpace # ws
Hex hexDig4 -> hexDigit4ToChar hexDig4
jCharBuilder
:: HeXaDeCiMaL digit
=> JChar digit
-> Builder
jCharBuilder (UnescapedJChar (JCharUnescaped c)) = BB.charUtf8 c
jCharBuilder (EscapedJChar jca) = BB.charUtf8 '\\' <> case jca of
QuotationMark -> BB.charUtf8 '"'
ReverseSolidus -> BB.charUtf8 '\\'
Solidus -> BB.charUtf8 '/'
Backspace -> BB.charUtf8 'b'
(WhiteSpace ws) -> BB.charUtf8 (unescapedWhitespaceChar ws)
Hex (HexDigit4 a b c d) -> BB.charUtf8 'u' <> foldMap hexChar [a,b,c,d]
where
hexChar =
BB.charUtf8 . (D.charHeXaDeCiMaL #)