module Data.Text.Encoding.ANSEL (
decodeANSEL
) where
import Control.Monad.Loops (whileM)
import Control.Monad.State (State, evalState, get, put)
import Data.Array
import Data.Word
import qualified Data.ByteString as B
import qualified Data.Text.All as T
decodeANSEL ::
B.ByteString
-> T.Text
decodeANSEL bs = T.pack . concat .
evalState (whileM ((not.null) <$> get) decodeANSELChar)$ B.unpack bs
decodeANSELChar :: State [Word8] String
decodeANSELChar = do
(dias', rest) <- span isDiacritic <$> get
let dias = filter (/= '\xFFFD').fmap (combiningTable!)$ dias'
case rest of
[] -> put [] >> if null dias then return "" else return "\xFFFD"
r:rs -> put rs >> let c = encode r in
if c == '\xFFFD' then return "\xFFFD" else return$ c:dias
where encode r = if isAscii r then toEnum (fromIntegral r) else composedTable!r
isAscii :: Word8 -> Bool
isAscii x = x < 0x80
isDiacritic :: Word8 -> Bool
isDiacritic x = x >= 0xE0
composedTable :: Array Word8 Char
composedTable = accumArray (flip const) '\xFFFD' (0x00, 0xFF) [
(0xA1, 'Ł'),
(0xA2, 'Ø'),
(0xA3, 'Đ'),
(0xA4, 'Þ'),
(0xA5, 'Æ'),
(0xA6, 'Œ'),
(0xA7, 'ʹ'),
(0xA8, '·'),
(0xA9, '♭'),
(0xAA, '®'),
(0xAB, '±'),
(0xAC, 'Ơ'),
(0xAD, 'Ư'),
(0xAE, 'ʼ'),
(0xB0, 'ʻ'),
(0xB1, 'ł'),
(0xB2, 'ø'),
(0xB3, 'đ'),
(0xB4, 'þ'),
(0xB5, 'æ'),
(0xB6, 'œ'),
(0xB7, 'ʺ'),
(0xB8, 'ı'),
(0xB9, '£'),
(0xBA, 'ð'),
(0xBC, 'ơ'),
(0xBD, 'ư'),
(0xBE, '□'),
(0xBF, '■'),
(0xC0, '°'),
(0xC1, 'ℓ'),
(0xC2, '℗'),
(0xC3, '©'),
(0xC4, '♯'),
(0xC5, '¿'),
(0xC6, '¡'),
(0xCD, 'e'),
(0xCE, 'o'),
(0xCF, 'ß')]
combiningTable :: Array Word8 Char
combiningTable = accumArray (flip const) '\xFFFD' (0x00, 0xFF) [
(0xE0, '\x0309'),
(0xE1, '\x0300'),
(0xE2, '\x0301'),
(0xE3, '\x0302'),
(0xE4, '\x0303'),
(0xE5, '\x0304'),
(0xE6, '\x0306'),
(0xE7, '\x0307'),
(0xE8, '\x0308'),
(0xE9, '\x030C'),
(0xEA, '\x030A'),
(0xEB, '\xFE20'),
(0xEC, '\xFE21'),
(0xED, '\x0315'),
(0xEE, '\x030B'),
(0xEF, '\x0310'),
(0xF0, '\x0327'),
(0xF1, '\x0328'),
(0xF2, '\x0323'),
(0xF3, '\x0324'),
(0xF4, '\x0325'),
(0xF5, '\x0333'),
(0xF6, '\x0332'),
(0xF7, '\x0326'),
(0xF8, '\x031C'),
(0xF9, '\x032E'),
(0xFA, '\xFE22'),
(0xFB, '\xFE23'),
(0xFE, '\x0313'),
(0xFF, '\x0338')]