module Web.Willow.Common.Encoding.ShiftJis
( decoder
, encoder
) where
import qualified Control.Applicative as A
import qualified Data.ByteString.Short as BS.SH
import qualified Data.Char as C
import qualified Data.Tuple as U
import qualified Data.Word as W
import Web.Willow.Common.Parser
import Web.Willow.Common.Parser.Switch
import Web.Willow.Common.Encoding.Common
import Web.Willow.Common.Encoding.EucJp
( decodeIndex0208, encodeIndex0208 )
decoder :: TextBuilder
decoder :: TextBuilder
decoder = StateT (Confidence, ()) (Parser ByteString) Word8
forall (m :: * -> *) stream token.
MonadParser m stream token =>
m token
next StateT (Confidence, ()) (Parser ByteString) Word8
-> (Word8 -> TextBuilder) -> TextBuilder
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= [SwitchCase
Word8
(StateT (Confidence, ()) (Parser ByteString))
(DecoderError String)]
-> Word8 -> TextBuilder
forall (m :: * -> *) test out.
Alternative m =>
[SwitchCase test m out] -> test -> m out
switch
[ (Word8 -> Bool)
-> (Word8 -> TextBuilder)
-> SwitchCase
Word8
(StateT (Confidence, ()) (Parser ByteString))
(DecoderError String)
forall test (m :: * -> *) out.
(test -> Bool) -> (test -> m out) -> SwitchCase test m out
If Word8 -> Bool
isAsciiByte Word8 -> TextBuilder
forall state. Word8 -> StateTextBuilder state
toUnicode1
, (Word8 -> Bool)
-> TextBuilder
-> SwitchCase
Word8
(StateT (Confidence, ()) (Parser ByteString))
(DecoderError String)
forall test (m :: * -> *) out.
(test -> Bool) -> m out -> SwitchCase test m out
If_ (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
0x80) (TextBuilder
-> SwitchCase
Word8
(StateT (Confidence, ()) (Parser ByteString))
(DecoderError String))
-> TextBuilder
-> SwitchCase
Word8
(StateT (Confidence, ()) (Parser ByteString))
(DecoderError String)
forall a b. (a -> b) -> a -> b
$ [Word8] -> Char -> TextBuilder
forall state. [Word8] -> Char -> StateTextBuilder state
emit [Word8
0x80] Char
'\x80'
, (Word8 -> Bool)
-> (Word8 -> TextBuilder)
-> SwitchCase
Word8
(StateT (Confidence, ()) (Parser ByteString))
(DecoderError String)
forall test (m :: * -> *) out.
(test -> Bool) -> (test -> m out) -> SwitchCase test m out
If (Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
0x9F) Word8 -> TextBuilder
decoder'
, (Word8 -> Bool)
-> (Word8 -> TextBuilder)
-> SwitchCase
Word8
(StateT (Confidence, ()) (Parser ByteString))
(DecoderError String)
forall test (m :: * -> *) out.
(test -> Bool) -> (test -> m out) -> SwitchCase test m out
If (Word8 -> Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> a -> Bool
range Word8
0xA1 Word8
0xDF) ((Word8 -> TextBuilder)
-> SwitchCase
Word8
(StateT (Confidence, ()) (Parser ByteString))
(DecoderError String))
-> (Word8 -> TextBuilder)
-> SwitchCase
Word8
(StateT (Confidence, ()) (Parser ByteString))
(DecoderError String)
forall a b. (a -> b) -> a -> b
$ \Word8
b ->
[Word8] -> Char -> TextBuilder
forall state. [Word8] -> Char -> StateTextBuilder state
emit [Word8
b] (Char -> TextBuilder) -> (Int -> Char) -> Int -> TextBuilder
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Char
forall a. Enum a => Int -> a
toEnum (Int -> TextBuilder) -> Int -> TextBuilder
forall a b. (a -> b) -> a -> b
$ Int
0xFEC0 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
b
, (Word8 -> Bool)
-> (Word8 -> TextBuilder)
-> SwitchCase
Word8
(StateT (Confidence, ()) (Parser ByteString))
(DecoderError String)
forall test (m :: * -> *) out.
(test -> Bool) -> (test -> m out) -> SwitchCase test m out
If (Word8 -> Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> a -> Bool
range Word8
0xE0 Word8
0xFC) Word8 -> TextBuilder
decoder'
, (Word8 -> TextBuilder)
-> SwitchCase
Word8
(StateT (Confidence, ()) (Parser ByteString))
(DecoderError String)
forall test (m :: * -> *) out.
(test -> m out) -> SwitchCase test m out
Else Word8 -> TextBuilder
forall state. Word8 -> StateTextBuilder state
decoderFailure1
]
decoder' :: W.Word8 -> TextBuilder
decoder' :: Word8 -> TextBuilder
decoder' Word8
lead = StateT (Confidence, ()) (Parser ByteString) Word8
forall (m :: * -> *) stream token.
MonadParser m stream token =>
m token
next StateT (Confidence, ()) (Parser ByteString) Word8
-> (Word8 -> TextBuilder) -> TextBuilder
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= [SwitchCase
Word8
(StateT (Confidence, ()) (Parser ByteString))
(DecoderError String)]
-> Word8 -> TextBuilder
forall (m :: * -> *) test out.
Alternative m =>
[SwitchCase test m out] -> test -> m out
switch
[ (Word8 -> Bool)
-> (Word8 -> TextBuilder)
-> SwitchCase
Word8
(StateT (Confidence, ()) (Parser ByteString))
(DecoderError String)
forall test (m :: * -> *) out.
(test -> Bool) -> (test -> m out) -> SwitchCase test m out
If Word8 -> Bool
isAsciiByte ((Word8 -> TextBuilder)
-> SwitchCase
Word8
(StateT (Confidence, ()) (Parser ByteString))
(DecoderError String))
-> (Word8 -> TextBuilder)
-> SwitchCase
Word8
(StateT (Confidence, ()) (Parser ByteString))
(DecoderError String)
forall a b. (a -> b) -> a -> b
$ Word8 -> Word8 -> TextBuilder
decodeIndex Word8
lead
, (Word8 -> TextBuilder)
-> SwitchCase
Word8
(StateT (Confidence, ()) (Parser ByteString))
(DecoderError String)
forall test (m :: * -> *) out.
(test -> m out) -> SwitchCase test m out
Else ((Word8 -> TextBuilder)
-> SwitchCase
Word8
(StateT (Confidence, ()) (Parser ByteString))
(DecoderError String))
-> (Word8 -> TextBuilder)
-> SwitchCase
Word8
(StateT (Confidence, ()) (Parser ByteString))
(DecoderError String)
forall a b. (a -> b) -> a -> b
$ Word8 -> Word8 -> TextBuilder
decodeIndex Word8
lead
]
decodeIndex :: W.Word8 -> W.Word8 -> TextBuilder
decodeIndex :: Word8 -> Word8 -> TextBuilder
decodeIndex Word8
lead Word8
trail = case Word
lead' Word -> Word -> Word
forall a. Num a => a -> a -> a
* Word
188 Word -> Word -> Word
forall a. Num a => a -> a -> a
+ Word
trail' of
Word
educ | Word -> Word -> Word -> Bool
forall a. Ord a => a -> a -> a -> Bool
range Word
8836 Word
10715 Word
educ ->
[Word8] -> Char -> TextBuilder
forall state. [Word8] -> Char -> StateTextBuilder state
emit [Word8
lead, Word8
trail] (Char -> TextBuilder) -> (Int -> Char) -> Int -> TextBuilder
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Char
forall a. Enum a => Int -> a
toEnum (Int -> TextBuilder) -> Int -> TextBuilder
forall a b. (a -> b) -> a -> b
$ Int
0xBD7C Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Word -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word
educ
Word
pointer ->
TextBuilder -> (Char -> TextBuilder) -> Maybe Char -> TextBuilder
forall b a. b -> (a -> b) -> Maybe a -> b
maybe TextBuilder
forall state.
StateT
(Confidence, state) (Parser ByteString) (DecoderError String)
failure ([Word8] -> Char -> TextBuilder
forall state. [Word8] -> Char -> StateTextBuilder state
emit [Word8
lead, Word8
trail]) (Maybe Char -> TextBuilder) -> Maybe Char -> TextBuilder
forall a b. (a -> b) -> a -> b
$ Word -> Maybe Char
decodeIndex0208 Word
pointer
where lead' :: Word
lead'
| Word8
lead Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
0xA0 = Word8 -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
lead Word -> Word -> Word
forall a. Num a => a -> a -> a
- Word
0x81
| Bool
otherwise = Word8 -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
lead Word -> Word -> Word
forall a. Num a => a -> a -> a
- Word
0xC1
trail' :: Word
trail'
| Word8
trail Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
0x7F = Word8 -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
trail Word -> Word -> Word
forall a. Num a => a -> a -> a
- Word
0x40
| Bool
otherwise = Word8 -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
trail Word -> Word -> Word
forall a. Num a => a -> a -> a
- Word
0x41
failure :: StateT
(Confidence, state) (Parser ByteString) (DecoderError String)
failure
| Word8
trail Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
0x7F = StateT
(Confidence, state) (Parser ByteString) (DecoderError String)
forall (f :: * -> *) a. Alternative f => f a
A.empty
| Bool
otherwise = [Word8]
-> StateT
(Confidence, state) (Parser ByteString) (DecoderError String)
forall state. [Word8] -> StateTextBuilder state
decoderFailure [Word8
lead, Word8
trail]
encoder :: BinaryBuilder
encoder :: BinaryBuilder
encoder = StateT () (Parser Text) Char
forall (m :: * -> *) stream token.
MonadParser m stream token =>
m token
next StateT () (Parser Text) Char
-> (Char -> BinaryBuilder) -> BinaryBuilder
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= [SwitchCase
Char (StateT () (Parser Text)) (EncoderError ShortByteString)]
-> Char -> BinaryBuilder
forall (m :: * -> *) test out.
Alternative m =>
[SwitchCase test m out] -> test -> m out
switch
[ (Char -> Bool)
-> (Char -> BinaryBuilder)
-> SwitchCase
Char (StateT () (Parser Text)) (EncoderError ShortByteString)
forall test (m :: * -> *) out.
(test -> Bool) -> (test -> m out) -> SwitchCase test m out
If Char -> Bool
C.isAscii Char -> BinaryBuilder
forall state. Char -> StateBinaryBuilder state
fromAscii
, (Char -> Bool)
-> BinaryBuilder
-> SwitchCase
Char (StateT () (Parser Text)) (EncoderError ShortByteString)
forall test (m :: * -> *) out.
(test -> Bool) -> m out -> SwitchCase test m out
If_ (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\x80') (BinaryBuilder
-> SwitchCase
Char (StateT () (Parser Text)) (EncoderError ShortByteString))
-> BinaryBuilder
-> SwitchCase
Char (StateT () (Parser Text)) (EncoderError ShortByteString)
forall a b. (a -> b) -> a -> b
$ EncoderError ShortByteString -> BinaryBuilder
forall (m :: * -> *) a. Monad m => a -> m a
return (ShortByteString -> EncoderError ShortByteString
forall (f :: * -> *) a. Applicative f => a -> f a
pure (ShortByteString -> EncoderError ShortByteString)
-> ShortByteString -> EncoderError ShortByteString
forall a b. (a -> b) -> a -> b
$ [Word8] -> ShortByteString
BS.SH.pack [Word8
0x80])
, (Char -> Bool)
-> BinaryBuilder
-> SwitchCase
Char (StateT () (Parser Text)) (EncoderError ShortByteString)
forall test (m :: * -> *) out.
(test -> Bool) -> m out -> SwitchCase test m out
If_ (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\xA5') (BinaryBuilder
-> SwitchCase
Char (StateT () (Parser Text)) (EncoderError ShortByteString))
-> BinaryBuilder
-> SwitchCase
Char (StateT () (Parser Text)) (EncoderError ShortByteString)
forall a b. (a -> b) -> a -> b
$ EncoderError ShortByteString -> BinaryBuilder
forall (m :: * -> *) a. Monad m => a -> m a
return (ShortByteString -> EncoderError ShortByteString
forall (f :: * -> *) a. Applicative f => a -> f a
pure (ShortByteString -> EncoderError ShortByteString)
-> ShortByteString -> EncoderError ShortByteString
forall a b. (a -> b) -> a -> b
$ [Word8] -> ShortByteString
BS.SH.pack [Word8
0x5C])
, (Char -> Bool)
-> BinaryBuilder
-> SwitchCase
Char (StateT () (Parser Text)) (EncoderError ShortByteString)
forall test (m :: * -> *) out.
(test -> Bool) -> m out -> SwitchCase test m out
If_ (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\x203E') (BinaryBuilder
-> SwitchCase
Char (StateT () (Parser Text)) (EncoderError ShortByteString))
-> BinaryBuilder
-> SwitchCase
Char (StateT () (Parser Text)) (EncoderError ShortByteString)
forall a b. (a -> b) -> a -> b
$ EncoderError ShortByteString -> BinaryBuilder
forall (m :: * -> *) a. Monad m => a -> m a
return (ShortByteString -> EncoderError ShortByteString
forall (f :: * -> *) a. Applicative f => a -> f a
pure (ShortByteString -> EncoderError ShortByteString)
-> ShortByteString -> EncoderError ShortByteString
forall a b. (a -> b) -> a -> b
$ [Word8] -> ShortByteString
BS.SH.pack [Word8
0x7E])
, (Char -> Bool)
-> BinaryBuilder
-> SwitchCase
Char (StateT () (Parser Text)) (EncoderError ShortByteString)
forall test (m :: * -> *) out.
(test -> Bool) -> m out -> SwitchCase test m out
If_ (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\x2212') (BinaryBuilder
-> SwitchCase
Char (StateT () (Parser Text)) (EncoderError ShortByteString))
-> BinaryBuilder
-> SwitchCase
Char (StateT () (Parser Text)) (EncoderError ShortByteString)
forall a b. (a -> b) -> a -> b
$ Char -> BinaryBuilder
encodeIndex Char
'\xFF0D'
, (Char -> Bool)
-> (Char -> BinaryBuilder)
-> SwitchCase
Char (StateT () (Parser Text)) (EncoderError ShortByteString)
forall test (m :: * -> *) out.
(test -> Bool) -> (test -> m out) -> SwitchCase test m out
If (Char -> Char -> Char -> Bool
forall a. Ord a => a -> a -> a -> Bool
range Char
'\xFF61' Char
'\xFF9F') ((Char -> BinaryBuilder)
-> SwitchCase
Char (StateT () (Parser Text)) (EncoderError ShortByteString))
-> (Char -> BinaryBuilder)
-> SwitchCase
Char (StateT () (Parser Text)) (EncoderError ShortByteString)
forall a b. (a -> b) -> a -> b
$
EncoderError ShortByteString -> BinaryBuilder
forall (m :: * -> *) a. Monad m => a -> m a
return (EncoderError ShortByteString -> BinaryBuilder)
-> (Char -> EncoderError ShortByteString) -> Char -> BinaryBuilder
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShortByteString -> EncoderError ShortByteString
forall (f :: * -> *) a. Applicative f => a -> f a
pure (ShortByteString -> EncoderError ShortByteString)
-> (Char -> ShortByteString)
-> Char
-> EncoderError ShortByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Word8] -> ShortByteString
BS.SH.pack ([Word8] -> ShortByteString)
-> (Char -> [Word8]) -> Char -> ShortByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Word8 -> [Word8] -> [Word8]
forall a. a -> [a] -> [a]
: []) (Word8 -> [Word8]) -> (Char -> Word8) -> Char -> [Word8]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word8) -> (Char -> Int) -> Char -> Word8
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Int -> Int
forall a. Num a => a -> a -> a
subtract Int
0xFEC0 (Int -> Int) -> (Char -> Int) -> Char -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Int
forall a. Enum a => a -> Int
fromEnum
, (Char -> BinaryBuilder)
-> SwitchCase
Char (StateT () (Parser Text)) (EncoderError ShortByteString)
forall test (m :: * -> *) out.
(test -> m out) -> SwitchCase test m out
Else Char -> BinaryBuilder
encodeIndex
]
encodeIndex :: Char -> BinaryBuilder
encodeIndex :: Char -> BinaryBuilder
encodeIndex Char
char = case Char -> Maybe Word
encodeIndexShiftJis Char
char of
Just Word
i ->
let (Word
lead, Word
trail) = Word -> Word -> (Word, Word)
forall a. Integral a => a -> a -> (a, a)
divMod Word
i Word
188
lead' :: Word
lead' = Word
lead Word -> Word -> Word
forall a. Num a => a -> a -> a
+ if Word
lead Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
< Word
0x1F then Word
0x81 else Word
0xC1
trail' :: Word
trail' = Word
trail Word -> Word -> Word
forall a. Num a => a -> a -> a
+ if Word
trail Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
< Word
0x3F then Word
0x40 else Word
0x41
in EncoderError ShortByteString -> BinaryBuilder
forall (m :: * -> *) a. Monad m => a -> m a
return (EncoderError ShortByteString -> BinaryBuilder)
-> (ShortByteString -> EncoderError ShortByteString)
-> ShortByteString
-> BinaryBuilder
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShortByteString -> EncoderError ShortByteString
forall (f :: * -> *) a. Applicative f => a -> f a
pure (ShortByteString -> BinaryBuilder)
-> ShortByteString -> BinaryBuilder
forall a b. (a -> b) -> a -> b
$ [Word8] -> ShortByteString
BS.SH.pack [Word -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word
lead', Word -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word
trail']
Maybe Word
Nothing -> Char -> BinaryBuilder
forall state out. Char -> StateEncoder state (EncoderError out)
encoderFailure Char
char
encodeIndexShiftJis :: Char -> Maybe Word
encodeIndexShiftJis :: Char -> Maybe Word
encodeIndexShiftJis Char
char = Char -> Maybe Word
encodeIndex0208 Char
char Maybe Word -> (Word -> Maybe Word) -> Maybe Word
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Word -> Maybe Word
redirect
where redirect :: Word -> Maybe Word
redirect Word
i
| Word
i Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
>= Word
8272 Bool -> Bool -> Bool
&& Word
i Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
<= Word
8835 =
IO (MemoizationTable Char (Maybe Word))
-> Maybe (IO (MemoizationTable Word (Maybe Char)))
-> Char
-> (Char -> Maybe Word)
-> Maybe Word
forall k v.
(Eq k, Hashable k, Eq v, Hashable v) =>
IO (MemoizationTable k (Maybe v))
-> Maybe (IO (MemoizationTable v (Maybe k)))
-> k
-> (k -> Maybe v)
-> Maybe v
lookupMemoizedIndex IO (MemoizationTable Char (Maybe Word))
encodeIndexShiftJisM Maybe (IO (MemoizationTable Word (Maybe Char)))
forall a. Maybe a
Nothing Char
char Char -> Maybe Word
readEncodeIndexShiftJis
| Bool
otherwise = Word -> Maybe Word
forall a. a -> Maybe a
Just Word
i
encodeIndexShiftJisM :: EncoderMemoTable
encodeIndexShiftJisM :: IO (MemoizationTable Char (Maybe Word))
encodeIndexShiftJisM = IO (MemoizationTable Char (Maybe Word))
forall k v. IO (MemoizationTable k v)
newMemoizationTable
{-# NOINLINE encodeIndexShiftJisM #-}
readEncodeIndexShiftJis :: Char -> Maybe Word
readEncodeIndexShiftJis :: Char -> Maybe Word
readEncodeIndexShiftJis Char
char = Char -> [(Char, Word)] -> Maybe Word
forall k v. Ord k => k -> [(k, v)] -> Maybe v
search Char
char ([(Char, Word)] -> Maybe Word)
-> ([(Word, Char)] -> [(Char, Word)])
-> [(Word, Char)]
-> Maybe Word
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Word, Char) -> (Char, Word)) -> [(Word, Char)] -> [(Char, Word)]
forall a b. (a -> b) -> [a] -> [b]
map (Word, Char) -> (Char, Word)
forall a b. (a, b) -> (b, a)
U.swap ([(Word, Char)] -> Maybe Word) -> [(Word, Char)] -> Maybe Word
forall a b. (a -> b) -> a -> b
$ ((Word, Char) -> Bool) -> String -> [(Word, Char)]
loadIndex' (Word, Char) -> Bool
forall a b. (Ord a, Num a) => (a, b) -> Bool
filterIndex String
"jis0208"
where filterIndex :: (a, b) -> Bool
filterIndex (a
i, b
_) = a
i a -> a -> Bool
forall a. Ord a => a -> a -> Bool
< a
8272 Bool -> Bool -> Bool
|| a
i a -> a -> Bool
forall a. Ord a => a -> a -> Bool
> a
8835