{-# LANGUAGE BlockArguments #-} module Codec.Text.UTF8 (Error, decode1) where import Control.Arrow import Data.Bits import qualified Data.List as List import Data.Word import Util decode1 :: [Word8] -> Maybe (Either Error Char, [Word8]) decode1 = List.uncons & fmap (uncurry (choose . fromEnum) >>> fmap toEnum *** id) where choose b bs | b < 0x80 = (Right $ toEnum b, drop 1 bs) | l < 2 || l > 6 = (Left InvalidSequence, drop 1 bs) | otherwise = go' l (fromEnum b .&. shiftR 0x7F l, bs) where l = countLeadingZeros (complement b) go (x, b:bs') | b .&. 0xC0 == 0x80 = (Right $ shiftL x 6 .|. fromEnum b .&. 0x3F, bs') | otherwise = (Left InvalidSequence, bs') go (_, bs) = (Left InvalidSequence, bs) go' 0 = Right *** id go' k = go & uncurry \ case Right x -> seq x $ go' (k-1) `curry` x z -> (,) z {-# INLINABLE decode1 #-} data Error = InvalidSequence deriving (Eq, Show)