module Web.Willow.Common.Encoding.EucKr
( decoder
, encoder
) where
import qualified Control.Applicative as A
import qualified Data.ByteString.Short as BS.SH
import qualified Data.Char as C
import qualified Data.Tuple as U
import qualified Data.Word as W
import Web.Willow.Common.Encoding.Common
import Web.Willow.Common.Parser
import Web.Willow.Common.Parser.Switch
decoder :: TextBuilder
decoder :: TextBuilder
decoder = StateT (Confidence, ()) (Parser ByteString) Word8
forall (m :: * -> *) stream token.
MonadParser m stream token =>
m token
next StateT (Confidence, ()) (Parser ByteString) Word8
-> (Word8 -> TextBuilder) -> TextBuilder
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= [SwitchCase
Word8
(StateT (Confidence, ()) (Parser ByteString))
(DecoderError String)]
-> Word8 -> TextBuilder
forall (m :: * -> *) test out.
Alternative m =>
[SwitchCase test m out] -> test -> m out
switch
[ (Word8 -> Bool)
-> (Word8 -> TextBuilder)
-> SwitchCase
Word8
(StateT (Confidence, ()) (Parser ByteString))
(DecoderError String)
forall test (m :: * -> *) out.
(test -> Bool) -> (test -> m out) -> SwitchCase test m out
If Word8 -> Bool
isAsciiByte Word8 -> TextBuilder
forall state. Word8 -> StateTextBuilder state
toUnicode1
, (Word8 -> Bool)
-> (Word8 -> TextBuilder)
-> SwitchCase
Word8
(StateT (Confidence, ()) (Parser ByteString))
(DecoderError String)
forall test (m :: * -> *) out.
(test -> Bool) -> (test -> m out) -> SwitchCase test m out
If (Word8 -> Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> a -> Bool
range Word8
0x81 Word8
0xFE) Word8 -> TextBuilder
decoder'
, (Word8 -> TextBuilder)
-> SwitchCase
Word8
(StateT (Confidence, ()) (Parser ByteString))
(DecoderError String)
forall test (m :: * -> *) out.
(test -> m out) -> SwitchCase test m out
Else Word8 -> TextBuilder
forall state. Word8 -> StateTextBuilder state
decoderFailure1
]
decoder' :: W.Word8 -> TextBuilder
decoder' :: Word8 -> TextBuilder
decoder' Word8
lead = StateT (Confidence, ()) (Parser ByteString) Word8
forall (m :: * -> *) stream token.
MonadParser m stream token =>
m token
next StateT (Confidence, ()) (Parser ByteString) Word8
-> (Word8 -> TextBuilder) -> TextBuilder
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= [SwitchCase
Word8
(StateT (Confidence, ()) (Parser ByteString))
(DecoderError String)]
-> Word8 -> TextBuilder
forall (m :: * -> *) test out.
Alternative m =>
[SwitchCase test m out] -> test -> m out
switch
[ (Word8 -> Bool)
-> (Word8 -> TextBuilder)
-> SwitchCase
Word8
(StateT (Confidence, ()) (Parser ByteString))
(DecoderError String)
forall test (m :: * -> *) out.
(test -> Bool) -> (test -> m out) -> SwitchCase test m out
If (Word8 -> Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> a -> Bool
range Word8
0x41 Word8
0xFE) ((Word8 -> TextBuilder)
-> SwitchCase
Word8
(StateT (Confidence, ()) (Parser ByteString))
(DecoderError String))
-> (Word8 -> TextBuilder)
-> SwitchCase
Word8
(StateT (Confidence, ()) (Parser ByteString))
(DecoderError String)
forall a b. (a -> b) -> a -> b
$ Word8 -> Word8 -> TextBuilder
lookupDecode Word8
lead
, (Word8 -> Bool)
-> (Word8 -> TextBuilder)
-> SwitchCase
Word8
(StateT (Confidence, ()) (Parser ByteString))
(DecoderError String)
forall test (m :: * -> *) out.
(test -> Bool) -> (test -> m out) -> SwitchCase test m out
If Word8 -> Bool
isAsciiByte ((Word8 -> TextBuilder)
-> SwitchCase
Word8
(StateT (Confidence, ()) (Parser ByteString))
(DecoderError String))
-> (Word8 -> TextBuilder)
-> SwitchCase
Word8
(StateT (Confidence, ()) (Parser ByteString))
(DecoderError String)
forall a b. (a -> b) -> a -> b
$ \Word8
b -> Word8 -> StateT (Confidence, ()) (Parser ByteString) ()
forall (m :: * -> *) stream token.
MonadParser m stream token =>
token -> m ()
push Word8
b StateT (Confidence, ()) (Parser ByteString) ()
-> TextBuilder -> TextBuilder
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Word8 -> TextBuilder
forall state. Word8 -> StateTextBuilder state
decoderFailure1 Word8
lead
, (Word8 -> TextBuilder)
-> SwitchCase
Word8
(StateT (Confidence, ()) (Parser ByteString))
(DecoderError String)
forall test (m :: * -> *) out.
(test -> m out) -> SwitchCase test m out
Else ((Word8 -> TextBuilder)
-> SwitchCase
Word8
(StateT (Confidence, ()) (Parser ByteString))
(DecoderError String))
-> (Word8 -> TextBuilder)
-> SwitchCase
Word8
(StateT (Confidence, ()) (Parser ByteString))
(DecoderError String)
forall a b. (a -> b) -> a -> b
$ Word8 -> Word8 -> TextBuilder
forall state. Word8 -> Word8 -> StateTextBuilder state
decoderFailure2 Word8
lead
]
lookupDecode :: W.Word8 -> W.Word8 -> TextBuilder
lookupDecode :: Word8 -> Word8 -> TextBuilder
lookupDecode Word8
lead Word8
trail = TextBuilder -> (Char -> TextBuilder) -> Maybe Char -> TextBuilder
forall b a. b -> (a -> b) -> Maybe a -> b
maybe TextBuilder
forall state.
StateT
(Confidence, state) (Parser ByteString) (DecoderError String)
failure ([Word8] -> Char -> TextBuilder
forall state. [Word8] -> Char -> StateTextBuilder state
emit [Word8]
bs) (Maybe Char -> TextBuilder)
-> (Word -> Maybe Char) -> Word -> TextBuilder
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word -> Maybe Char
decodeIndex (Word -> TextBuilder) -> Word -> TextBuilder
forall a b. (a -> b) -> a -> b
$
(Word8 -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
lead Word -> Word -> Word
forall a. Num a => a -> a -> a
- Word
0x81) Word -> Word -> Word
forall a. Num a => a -> a -> a
* Word
190 Word -> Word -> Word
forall a. Num a => a -> a -> a
+ Word8 -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
trail Word -> Word -> Word
forall a. Num a => a -> a -> a
- Word
0x41
where failure :: StateT
(Confidence, state) (Parser ByteString) (DecoderError String)
failure
| Word8 -> Bool
isAsciiByte Word8
trail = Word8 -> StateT (Confidence, state) (Parser ByteString) ()
forall (m :: * -> *) stream token.
MonadParser m stream token =>
token -> m ()
push Word8
trail StateT (Confidence, state) (Parser ByteString) ()
-> StateT
(Confidence, state) (Parser ByteString) (DecoderError String)
-> StateT
(Confidence, state) (Parser ByteString) (DecoderError String)
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Word8
-> StateT
(Confidence, state) (Parser ByteString) (DecoderError String)
forall state. Word8 -> StateTextBuilder state
decoderFailure1 Word8
lead
| Bool
otherwise = [Word8]
-> StateT
(Confidence, state) (Parser ByteString) (DecoderError String)
forall state. [Word8] -> StateTextBuilder state
decoderFailure [Word8]
bs
bs :: [Word8]
bs = [Word8
lead, Word8
trail]
encoder :: BinaryBuilder
encoder :: BinaryBuilder
encoder = StateT () (Parser Text) Char
forall (m :: * -> *) stream token.
MonadParser m stream token =>
m token
next StateT () (Parser Text) Char
-> (Char -> BinaryBuilder) -> BinaryBuilder
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= [SwitchCase
Char (StateT () (Parser Text)) (EncoderError ShortByteString)]
-> Char -> BinaryBuilder
forall (m :: * -> *) test out.
Alternative m =>
[SwitchCase test m out] -> test -> m out
switch
[ (Char -> Bool)
-> (Char -> BinaryBuilder)
-> SwitchCase
Char (StateT () (Parser Text)) (EncoderError ShortByteString)
forall test (m :: * -> *) out.
(test -> Bool) -> (test -> m out) -> SwitchCase test m out
If Char -> Bool
C.isAscii Char -> BinaryBuilder
forall state. Char -> StateBinaryBuilder state
fromAscii
, (Char -> BinaryBuilder)
-> SwitchCase
Char (StateT () (Parser Text)) (EncoderError ShortByteString)
forall test (m :: * -> *) out.
(test -> m out) -> SwitchCase test m out
Else Char -> BinaryBuilder
encodeIndex
]
decodeIndex :: Word -> Maybe Char
decodeIndex :: Word -> Maybe Char
decodeIndex Word
index = IO (MemoizationTable Word (Maybe Char))
-> Maybe (IO (MemoizationTable Char (Maybe Word)))
-> Word
-> (Word -> Maybe Char)
-> Maybe Char
forall k v.
(Eq k, Hashable k, Eq v, Hashable v) =>
IO (MemoizationTable k (Maybe v))
-> Maybe (IO (MemoizationTable v (Maybe k)))
-> k
-> (k -> Maybe v)
-> Maybe v
lookupMemoizedIndex IO (MemoizationTable Word (Maybe Char))
decodeIndexM (IO (MemoizationTable Char (Maybe Word))
-> Maybe (IO (MemoizationTable Char (Maybe Word)))
forall a. a -> Maybe a
Just IO (MemoizationTable Char (Maybe Word))
encodeIndexM) Word
index Word -> Maybe Char
readDecodeIndex
decodeIndexM :: DecoderMemoTable
decodeIndexM :: IO (MemoizationTable Word (Maybe Char))
decodeIndexM = IO (MemoizationTable Word (Maybe Char))
forall k v. IO (MemoizationTable k v)
newMemoizationTable
{-# NOINLINE decodeIndexM #-}
readDecodeIndex :: Word -> Maybe Char
readDecodeIndex :: Word -> Maybe Char
readDecodeIndex Word
index = Word -> [(Word, Char)] -> Maybe Char
forall k v. Ord k => k -> [(k, v)] -> Maybe v
search Word
index ([(Word, Char)] -> Maybe Char) -> [(Word, Char)] -> Maybe Char
forall a b. (a -> b) -> a -> b
$ String -> [(Word, Char)]
loadIndex String
"euc-kr"
encodeIndex :: Char -> BinaryBuilder
encodeIndex :: Char -> BinaryBuilder
encodeIndex Char
char = case IO (MemoizationTable Char (Maybe Word))
-> Maybe (IO (MemoizationTable Word (Maybe Char)))
-> Char
-> (Char -> Maybe Word)
-> Maybe Word
forall k v.
(Eq k, Hashable k, Eq v, Hashable v) =>
IO (MemoizationTable k (Maybe v))
-> Maybe (IO (MemoizationTable v (Maybe k)))
-> k
-> (k -> Maybe v)
-> Maybe v
lookupMemoizedIndex IO (MemoizationTable Char (Maybe Word))
encodeIndexM (IO (MemoizationTable Word (Maybe Char))
-> Maybe (IO (MemoizationTable Word (Maybe Char)))
forall a. a -> Maybe a
Just IO (MemoizationTable Word (Maybe Char))
decodeIndexM) Char
char Char -> Maybe Word
readEncodeIndex of
Just Word
i ->
let (Word
lead, Word
trail) = Word -> Word -> (Word, Word)
forall a. Integral a => a -> a -> (a, a)
divMod Word
i Word
188
lead' :: Word
lead' = Word
lead Word -> Word -> Word
forall a. Num a => a -> a -> a
+ if Word
lead Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
< Word
0x1F then Word
0x81 else Word
0xC1
trail' :: Word
trail' = Word
trail Word -> Word -> Word
forall a. Num a => a -> a -> a
+ if Word
trail Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
< Word
0x3F then Word
0x40 else Word
0x41
in EncoderError ShortByteString -> BinaryBuilder
forall (m :: * -> *) a. Monad m => a -> m a
return (EncoderError ShortByteString -> BinaryBuilder)
-> (ShortByteString -> EncoderError ShortByteString)
-> ShortByteString
-> BinaryBuilder
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShortByteString -> EncoderError ShortByteString
forall (f :: * -> *) a. Applicative f => a -> f a
pure (ShortByteString -> BinaryBuilder)
-> ShortByteString -> BinaryBuilder
forall a b. (a -> b) -> a -> b
$ [Word8] -> ShortByteString
BS.SH.pack [Word -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word
lead', Word -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word
trail']
Maybe Word
Nothing -> BinaryBuilder
forall (f :: * -> *) a. Alternative f => f a
A.empty
encodeIndexM :: EncoderMemoTable
encodeIndexM :: IO (MemoizationTable Char (Maybe Word))
encodeIndexM = IO (MemoizationTable Char (Maybe Word))
forall k v. IO (MemoizationTable k v)
newMemoizationTable
{-# NOINLINE encodeIndexM #-}
readEncodeIndex :: Char -> Maybe Word
readEncodeIndex :: Char -> Maybe Word
readEncodeIndex Char
char = Char -> [(Char, Word)] -> Maybe Word
forall k v. Ord k => k -> [(k, v)] -> Maybe v
search Char
char ([(Char, Word)] -> Maybe Word)
-> ([(Word, Char)] -> [(Char, Word)])
-> [(Word, Char)]
-> Maybe Word
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Word, Char) -> (Char, Word)) -> [(Word, Char)] -> [(Char, Word)]
forall a b. (a -> b) -> [a] -> [b]
map (Word, Char) -> (Char, Word)
forall a b. (a, b) -> (b, a)
U.swap ([(Word, Char)] -> Maybe Word) -> [(Word, Char)] -> Maybe Word
forall a b. (a -> b) -> a -> b
$ String -> [(Word, Char)]
loadIndex String
"euc-kr"