-- | Tests for encoding and decoding {-# LANGUAGE CPP, OverloadedStrings, ScopedTypeVariables #-} {-# OPTIONS_GHC -fno-warn-missing-signatures #-} module Tests.Properties.Transcoding ( testTranscoding ) where import Prelude hiding (head, tail) import Data.Bits ((.&.), shiftR) import Data.Char (chr, ord) import Data.Functor (void) import Data.Maybe (isNothing) #if !MIN_VERSION_base(4,11,0) import Data.Semigroup ((<>)) #endif import Data.Word (Word8) import Test.QuickCheck hiding ((.&.)) import Test.Tasty (TestTree, testGroup) import Test.Tasty.QuickCheck (testProperty) import Test.Tasty.HUnit ((@?=), assertBool, assertFailure, testCase) import Tests.QuickCheckUtils import qualified Control.Exception as Exception import qualified Data.Bits as Bits (shiftL, shiftR) import qualified Data.ByteString as B import qualified Data.ByteString.Builder as B import qualified Data.ByteString.Builder.Extra as B import qualified Data.ByteString.Builder.Prim as BP import qualified Data.ByteString.Char8 as BC import qualified Data.ByteString.Lazy as BL import qualified Data.ByteString.Lazy.Char8 as BLC import qualified Data.Text as T import qualified Data.Text.Encoding as E import qualified Data.Text.Encoding.Error as E import qualified Data.Text.Internal.Encoding as E import qualified Data.Text.Lazy as TL import qualified Data.Text.Lazy.Encoding as EL t_ascii t = E.decodeASCII (E.encodeUtf8 a) === a where a = T.map (\c -> chr (ord c `mod` 128)) t tl_ascii t = EL.decodeASCII (EL.encodeUtf8 a) === a where a = TL.map (\c -> chr (ord c `mod` 128)) t t_latin1 = E.decodeLatin1 `eq` (T.pack . BC.unpack) tl_latin1 = EL.decodeLatin1 `eq` (TL.pack . BLC.unpack) t_p_utf8_1 = testValidateUtf8_ [0x63] 1 t_p_utf8_2 = testValidateUtf8_ [0x63, 0x63, 0x63] 3 t_p_utf8_3 = testValidateUtf8_ [0x63, 0x63, 0xc2, 0x80, 0x63] 5 t_p_utf8_4 = testValidateUtf8_ [0x63, 0xe1, 0x80, 0x80, 0x63] 5 t_p_utf8_5 = testValidateUtf8_ [0xF0, 0x90, 0x80, 0x80, 0x63] 5 t_p_utf8_6 = testValidateUtf8_ [0x63, 0x63, 0xF0, 0x90, 0x80] 2 t_p_utf8_7 = testValidateUtf8_ [0x63, 0x63, 0x63, 0xF0, 0x90] 3 t_p_utf8_8 = testValidateUtf8Fail [0xF0, 0x90, 0x80, 0x63, 0x63] 0 t_p_utf8_9 = testValidateUtf8Fail [0x63, 0x63, 0x80, 0x63, 0x63] 2 t_p_utf8_0 = testValidateUtf8Fail [0x63, 0x63, 0xe1, 0x63, 0x63] 2 testValidateUtf8With :: (B.ByteString -> (Int, Maybe E.Utf8State)) -> (Maybe E.Utf8State -> IO r) -> [Word8] -> Int -> IO r testValidateUtf8With validate k xs expectedLen = case validate (B.pack xs) of (len, s) -> do len @?= expectedLen k s expectJust :: Maybe a -> IO a expectJust Nothing = assertFailure "Unexpected Nothing" expectJust (Just s) = pure s expectNothing :: Maybe a -> IO () expectNothing Nothing = pure () expectNothing (Just _) = assertFailure "Unexpected Just" testValidateUtf8 :: [Word8] -> Int -> IO E.Utf8State testValidateUtf8 = testValidateUtf8With E.validateUtf8Chunk expectJust testValidateUtf8_ :: [Word8] -> Int -> IO () testValidateUtf8_ = testValidateUtf8With E.validateUtf8Chunk (void . expectJust) testValidateUtf8Fail :: [Word8] -> Int -> IO () testValidateUtf8Fail = testValidateUtf8With E.validateUtf8Chunk expectNothing testValidateUtf8More :: E.Utf8State -> [Word8] -> Int -> IO E.Utf8State testValidateUtf8More s = testValidateUtf8With (E.validateUtf8More s) expectJust testValidateUtf8MoreFail :: E.Utf8State -> [Word8] -> Int -> IO () testValidateUtf8MoreFail s = testValidateUtf8With (E.validateUtf8More s) expectNothing t_pn_utf8_1 = do s <- testValidateUtf8 [0xF0, 0x90, 0x80] 0 _ <- testValidateUtf8More s [0x80] 1 testValidateUtf8MoreFail s [0x7f] (-3) t_pn_utf8_2 = do s0 <- testValidateUtf8 [0xF0] 0 testValidateUtf8MoreFail s0 [0x7f] (-1) s1 <- testValidateUtf8More s0 [0x90] (-1) testValidateUtf8MoreFail s1 [0x7f] (-2) s2 <- testValidateUtf8More s1 [0x80] (-2) testValidateUtf8MoreFail s2 [0x7f] (-3) _ <- testValidateUtf8More s2 [0x80] 1 pure () t_pn_utf8_3 = do s1 <- testValidateUtf8 [0xc2] 0 assertBool "PartialUtf8 must be partial" $ B.length (E.getPartialUtf8 s1) < E.getCompleteLen s1 testValidateUtf8MoreFail s1 [0x80, 0x80] 1 -- Precondition: (i, ms1) = E.validateUtf8More s chunk -- -- The index points to the end of the longest valid prefix -- of prechunk `B.append` chunk pre_validateUtf8More_validPrefix s chunk i = let prechunk = E.getPartialUtf8 s in -- Note: i <= 0 implies take i = id let (j, ms2) = E.validateUtf8Chunk (B.take (B.length prechunk + i) (prechunk `B.append` chunk)) in counterexample (show prechunk) $ (B.length prechunk + i, ms2) === (j, Just E.startUtf8State) -- Precondition: (i, Nothing) = E.validateUtf8More s chunk -- -- Appending to an invalid chunk yields another invalid chunk. pre_validateUtf8More_maximalPrefix s chunk i more = E.validateUtf8More s (chunk `B.append` more) === (i, Nothing) -- Precondition: (i, Just s1) = E.validateUtf8More s chunk pre_validateUtf8More_suffix s chunk i s1 = if 0 <= i then B.drop i chunk === p2b s1 -- The state s1 contains a suffix of the chunk. else p2b s `B.append` chunk === p2b s1 -- Or the chunk extends the incomplete code point in s1. -- Precondition: (i, Just s1) = E.validateUtf8More s chunk1 -- -- Validating two chunks sequentially is equivalent to validating them at once. pre_validateUtf8More_append s chunk1 s1 chunk2 = let (j, ms2) = E.validateUtf8More s1 chunk2 in (B.length chunk1 + j, ms2) === E.validateUtf8More s (chunk1 `B.append` chunk2) -- These wrappers use custom generators to satisfy the above properties. t_validateUtf8More_validPrefix = property $ do cex@(s, chunk, i, _ms1) <- randomMoreChunk pure $ counterexample (show cex) $ pre_validateUtf8More_validPrefix s chunk i t_validateUtf8More_maximalPrefix = property $ do -- We want chunks that fail validation: force their size to be big,.. cex@(s, chunk, i, ms1) <- scale (* 3) arbitraryMoreChunk pure $ counterexample (show cex) $ -- ... and just use rejection sampling isNothing ms1 ==> pre_validateUtf8More_maximalPrefix s chunk i t_validateUtf8More_valid = property $ do cex@(s, chunk1, i, s1, chunk2) <- validMoreChunks pure $ counterexample (show cex) $ pre_validateUtf8More_suffix s chunk1 i s1 .&&. pre_validateUtf8More_append s chunk1 s1 chunk2 randomMoreChunk, arbitraryMoreChunk, validMoreChunk :: Gen (E.Utf8State, B.ByteString, Int, Maybe E.Utf8State) randomMoreChunk = oneof [arbitraryMoreChunk, validMoreChunk] arbitraryMoreChunk = do s <- randomUtf8State chunk <- arbitrary let (i, ms1) = E.validateUtf8More s chunk pure (s, chunk, i, ms1) -- | Generate a random state by parsing a prefix of a Char randomUtf8State :: Gen E.Utf8State randomUtf8State = do c <- arbitrary chunk <- elements (B.inits (E.encodeUtf8 (T.singleton c))) case E.validateUtf8Chunk chunk of (_, Just s) -> pure s (_, Nothing) -> error "should not happen" -- | Make a valid chunk, i.e., (s, chunk) such that -- -- validateUtf8More s chunk = (i, Just s1) -- -- Also returning i and s1 to not repeat work. validMoreChunk = do (s, chunk, i, s1, _chunk2) <- validMoreChunks pure (s, chunk, i, Just s1) -- | Make a valid chunk by slicing a valid UTF8 bs, -- and also provide a second chunk which is a valid extension -- with 0.5 probability. validMoreChunks :: Gen (E.Utf8State, B.ByteString, Int, E.Utf8State, B.ByteString) validMoreChunks = do bs <- E.encodeUtf8 <$> scale (* 3) arbitrary -- Take an intermediate state. -- No need to go too far since code points are at most 4 bytes long i <- choose (0, 3) let (bs0, bs1) = B.splitAt i bs case E.validateUtf8Chunk bs0 of (_, Just s) -> do j <- choose (0, B.length bs1) let (chunk1, chunk2') = B.splitAt j bs1 case E.validateUtf8More s chunk1 of (n1, Just s1) -> do chunk2 <- oneof [pure chunk2', arbitrary] pure (s, chunk1, n1, s1, chunk2) (_, Nothing) -> error "should not happen" (_, Nothing) -> error "should not happen" t_utf8_c = (E.strictBuilderToText . fst3 . E.decodeUtf8Chunk . E.encodeUtf8) `eq` id t_utf8 = (E.decodeUtf8 . E.encodeUtf8) `eq` id t_utf8' = (E.decodeUtf8' . E.encodeUtf8) `eq` (id . Right) tl_utf8 = (EL.decodeUtf8 . EL.encodeUtf8) `eq` id tl_utf8' = (EL.decodeUtf8' . EL.encodeUtf8) `eq` (id . Right) t_utf16LE = (E.decodeUtf16LE . E.encodeUtf16LE) `eq` id tl_utf16LE = (EL.decodeUtf16LE . EL.encodeUtf16LE) `eq` id t_utf16BE = (E.decodeUtf16BE . E.encodeUtf16BE) `eq` id tl_utf16BE = (EL.decodeUtf16BE . EL.encodeUtf16BE) `eq` id t_utf32LE = (E.decodeUtf32LE . E.encodeUtf32LE) `eq` id tl_utf32LE = (EL.decodeUtf32LE . EL.encodeUtf32LE) `eq` id t_utf32BE = (E.decodeUtf32BE . E.encodeUtf32BE) `eq` id tl_utf32BE = (EL.decodeUtf32BE . EL.encodeUtf32BE) `eq` id fst3 :: (a, b, c) -> a fst3 (a, _, _) = a runBuilder :: B.Builder -> B.ByteString runBuilder = -- Use smallish buffers to exercise bufferFull case as well BL.toStrict . B.toLazyByteStringWith (B.safeStrategy 5 5) "" t_encodeUtf8Builder_ toBuilder = (runBuilder . toBuilder) `eq` E.encodeUtf8 t_encodeUtf8Builder_nonZeroOffset_ toBuilder (Positive n) = (runBuilder . toBuilder . T.drop n) `eq` (E.encodeUtf8 . T.drop n) t_encodeUtf8Builder = t_encodeUtf8Builder_ E.encodeUtf8Builder t_encodeUtf8Builder_nonZeroOffset = t_encodeUtf8Builder_nonZeroOffset_ E.encodeUtf8Builder t_encodeUtf8BuilderEscaped = t_encodeUtf8Builder_ (E.encodeUtf8BuilderEscaped (BP.liftFixedToBounded BP.word8)) t_encodeUtf8BuilderEscaped_nonZeroOffset = t_encodeUtf8Builder_nonZeroOffset_ (E.encodeUtf8BuilderEscaped (BP.liftFixedToBounded BP.word8)) t_encodeUtf8Builder_sanity t = (runBuilder . E.encodeUtf8Builder) t === (runBuilder . E.encodeUtf8BuilderEscaped (BP.liftFixedToBounded BP.word8)) t t_utf8_incr (Positive n) = (T.concat . map fst . feedChunksOf n E.streamDecodeUtf8 . E.encodeUtf8) `eq` id feedChunksOf :: Int -> (B.ByteString -> E.Decoding) -> B.ByteString -> [(T.Text, B.ByteString)] feedChunksOf n f bs | B.null bs = [] | otherwise = let (x,y) = B.splitAt n bs E.Some t b f' = f x in (t,b) : feedChunksOf n f' y t_utf8_undecoded t = let b = E.encodeUtf8 t ls = concatMap (leftover . E.encodeUtf8 . T.singleton) . T.unpack $ t leftover = (++ [B.empty]) . init . drop 1 . B.inits in (map snd . feedChunksOf 1 E.streamDecodeUtf8) b === ls data InvalidUtf8 = InvalidUtf8 { iu8Prefix :: T.Text , iu8Invalid :: B.ByteString , iu8Suffix :: T.Text } deriving (Eq) instance Show InvalidUtf8 where show i = "InvalidUtf8 {prefix = " ++ show (iu8Prefix i) ++ ", invalid = " ++ show (iu8Invalid i) ++ ", suffix = " ++ show (iu8Suffix i) ++ ", asBS = " ++ show (toByteString i) ++ ", length = " ++ show (B.length (toByteString i)) ++ "}" toByteString :: InvalidUtf8 -> B.ByteString toByteString (InvalidUtf8 a b c) = E.encodeUtf8 a `B.append` b `B.append` E.encodeUtf8 c instance Arbitrary InvalidUtf8 where arbitrary = oneof [ InvalidUtf8 <$> pure mempty <*> genInvalidUTF8 <*> pure mempty , InvalidUtf8 <$> pure mempty <*> genInvalidUTF8 <*> arbitrary , InvalidUtf8 <$> arbitrary <*> genInvalidUTF8 <*> pure mempty , InvalidUtf8 <$> arbitrary <*> genInvalidUTF8 <*> arbitrary ] shrink (InvalidUtf8 a b c) = map (\c' -> InvalidUtf8 a b c') (shrink c) ++ map (\a' -> InvalidUtf8 a' b c) (shrink a) t_utf8_err :: InvalidUtf8 -> DecodeErr -> Property t_utf8_err bad de = forAll (Blind <$> genDecodeErr de) $ \(Blind onErr) -> ioProperty $ do let decoded = E.decodeUtf8With onErr (toByteString bad) len = T.length (E.decodeUtf8With onErr (toByteString bad)) l <- Exception.try (Exception.evaluate len) pure $ case l of Left (err :: Exception.SomeException) -> counterexample (show err) $ length (show err) >= 0 Right _ -> counterexample (show (decoded, l)) $ de /= Strict t_utf8_err' :: B.ByteString -> Bool t_utf8_err' bs = case E.decodeUtf8' bs of Left err -> length (show err) >= 0 Right t -> T.length t >= 0 genInvalidUTF8 :: Gen B.ByteString genInvalidUTF8 = B.pack <$> oneof [ -- invalid leading byte of a 2-byte sequence (:) <$> choose (0xC0, 0xC1) <*> upTo 1 contByte -- invalid leading byte of a 4-byte sequence , (:) <$> choose (0xF5, 0xFF) <*> upTo 3 contByte -- 4-byte sequence greater than U+10FFFF , do k <- choose (0x11, 0x13) let w0 = 0xF0 + (k `Bits.shiftR` 2) w1 = 0x80 + ((k .&. 3) `Bits.shiftL` 4) ([w0,w1]++) <$> vectorOf 2 contByte -- continuation bytes without a start byte , listOf1 contByte -- short 2-byte sequence , (:[]) <$> choose (0xC2, 0xDF) -- short 3-byte sequence , (:) <$> choose (0xE0, 0xEF) <*> upTo 1 contByte -- short 4-byte sequence , (:) <$> choose (0xF0, 0xF4) <*> upTo 2 contByte -- overlong encoding , do k <- choose (0 :: Int, 0xFFFF) case k of _ | k < 0x80 -> elements [ord2_ k, ord3_ k, ord4_ k] | k < 0x7FF -> elements [ord3_ k, ord4_ k] | otherwise -> return (ord4_ k) ] where contByte = (0x80 +) <$> choose (0, 0x3f) upTo n gen = do k <- choose (0,n) vectorOf k gen -- Data.Text.Internal.Encoding.Utf8.ord{2,3,4} without sanity checks ord2_ n = map fromIntegral [(n `shiftR` 6) + 0xC0, (n .&. 0x3F) + 0x80] ord3_ n = map fromIntegral [(n `shiftR` 12) + 0xE0, ((n `shiftR` 6) .&. 0x3F) + 0x80, (n .&. 0x3F) + 0x80] ord4_ n = map fromIntegral [(n `shiftR` 18) + 0xF0, ((n `shiftR` 12) .&. 0x3F) + 0x80, ((n `shiftR` 6) .&. 0x3F) + 0x80, (n .&. 0x3F) + 0x80] decodeLL :: BL.ByteString -> TL.Text decodeLL = EL.decodeUtf8With E.lenientDecode decodeL :: B.ByteString -> T.Text decodeL = E.decodeUtf8With E.lenientDecode -- The lenient decoding of lazy bytestrings should not depend on how they are chunked, -- and it should behave the same as decoding of strict bytestrings. t_decode_utf8_lenient :: Property t_decode_utf8_lenient = forAllShrinkShow arbitrary shrink (show . BL.toChunks) $ \bs -> decodeLL bs === (TL.fromStrict . decodeL . B.concat . BL.toChunks) bs -- See http://unicode.org/faq/utf_bom.html#gen8 -- A sequence such as <110xxxxx2 0xxxxxxx2> is illegal ... -- When faced with this illegal byte sequence ... a UTF-8 conformant process -- must treat the first byte 110xxxxx2 as an illegal termination error -- (e.g. filter it out or replace by 0xFFFD) ... -- ... and continue processing at the second byte 0xxxxxxx2 t_decode_with_error2 = E.decodeUtf8With (\_ _ -> Just 'x') (B.pack [0xC2, 97]) === "xa" t_decode_with_error3 = E.decodeUtf8With (\_ _ -> Just 'x') (B.pack [0xE0, 97, 97]) === "xaa" t_decode_with_error4 = E.decodeUtf8With (\_ _ -> Just 'x') (B.pack [0xF0, 97, 97, 97]) === "xaaa" t_decode_with_error1' = do E.Some x1 bs1 f1 <- pure $ E.streamDecodeUtf8With (\_ _ -> Just 'x') (B.pack [0xc2]) x1 @?= "" bs1 @?= B.pack [0xc2] E.Some x2 bs2 _ <- pure $ f1 $ B.pack [0x80, 0x80] x2 @?= "\x80x" bs2 @?= mempty t_decode_with_error2' = case E.streamDecodeUtf8With (\_ _ -> Just 'x') (B.pack [0xC2, 97]) of E.Some x _ _ -> x @?= "xa" t_decode_with_error3' = case E.streamDecodeUtf8With (\_ _ -> Just 'x') (B.pack [0xC2, 97, 97]) of E.Some x _ _ -> x @?= "xaa" t_decode_with_error4' = case E.streamDecodeUtf8With (\_ _ -> Just 'x') (B.pack [0xC2, 97, 97, 97]) of E.Some x _ _ -> x @?= "xaaa" t_decode_with_error5' = do ret <- Exception.try $ Exception.evaluate $ E.streamDecodeUtf8 (B.pack [0x81]) case ret of Left (_ :: E.UnicodeException) -> pure () Right{} -> assertFailure "Unexpected success" testDecodeUtf8With :: (Maybe E.Utf8State -> IO r) -> E.Utf8State -> [Word8] -> T.Text -> IO r testDecodeUtf8With k s xs expected = let xs' = B.pack xs in case E.decodeUtf8More s xs' of (prefix, bs, s') -> do let txt = E.strictBuilderToText prefix txt @?= expected if T.null txt then bs @?= xs' else E.encodeUtf8 txt `B.append` bs @?= E.getPartialUtf8 s `B.append` xs' k s' testDecodeUtf8 :: E.Utf8State -> [Word8] -> T.Text -> IO E.Utf8State testDecodeUtf8 = testDecodeUtf8With (\ms -> case ms of Just s -> pure s Nothing -> assertFailure "Unexpected failure") testDecodeUtf8Fail :: E.Utf8State -> [Word8] -> T.Text -> IO () testDecodeUtf8Fail = testDecodeUtf8With (\ms -> case ms of Just _ -> assertFailure "Unexpected failure" Nothing -> pure ()) t_decode_chunk1 = do s1 <- testDecodeUtf8 E.startUtf8State [0xc2] "" B.length (E.getPartialUtf8 s1) @?= 1 testDecodeUtf8Fail s1 [0x80, 0x80] "\128" t_decode_chunk2 = do s1 <- testDecodeUtf8 E.startUtf8State [0xf0] "" s2 <- testDecodeUtf8 s1 [0x90, 0x80] "" _ <- testDecodeUtf8 s2 [0x80, 0x41] "\65536A" pure () t_infix_concat bs1 text bs2 = forAll (Blind <$> genDecodeErr Replace) $ \(Blind onErr) -> text `T.isInfixOf` E.decodeUtf8With onErr (B.concat [bs1, E.encodeUtf8 text, bs2]) t_textToStrictBuilder = (E.strictBuilderToText . E.textToStrictBuilder) `eq` id -- decodeUtf8Chunk splits the input bytestring t_decodeUtf8Chunk_split chunk = let (pre, suf, _ms) = E.decodeUtf8Chunk chunk in s2b pre `B.append` suf === chunk -- decodeUtf8More mostly splits the input bytestring, -- also inserting bytes from the partial code point in s. -- -- This is wrapped by t_decodeUtf8More_split to have more -- likely valid chunks. t_decodeUtf8More_split' s chunk = let (pre, suf, _ms) = E.decodeUtf8More s chunk in if B.length chunk > B.length suf then s2b pre `B.append` suf === p2b s `B.append` chunk else suf === chunk -- The output state of decodeUtf8More contains the suffix. -- -- Precondition (valid chunk): ms = Just s' pre_decodeUtf8More_suffix s chunk = let (_pre, suf, ms) = E.decodeUtf8More s chunk in case ms of Nothing -> discard Just s' -> if B.length chunk > B.length suf then p2b s' === suf else p2b s' === p2b s `B.append` suf -- Decoding chunks separately is equivalent to decoding their concatenation. pre_decodeUtf8More_append s chunk1 chunk2 = let (pre1, _, ms1) = E.decodeUtf8More s chunk1 in case ms1 of Nothing -> discard Just s1 -> let (pre2, _, ms2) = E.decodeUtf8More s1 chunk2 in let (pre3, _, ms3) = E.decodeUtf8More s (chunk1 `B.append` chunk2) in (s2b (pre1 <> pre2), ms2) === (s2b pre3, ms3) -- Properties for any chunk -- (but do try to generate valid chunks often enough) t_decodeUtf8More1 = property $ do cex@(s, chunk, _, _) <- randomMoreChunk pure $ counterexample (show cex) $ t_decodeUtf8More_split' s chunk -- Properties that require valid chunks t_decodeUtf8More2 = property $ do cex@(s, chunk, _, _, chunk2) <- validMoreChunks pure $ counterexample (show cex) $ pre_decodeUtf8More_suffix s chunk .&&. pre_decodeUtf8More_append s chunk chunk2 s2b = E.encodeUtf8 . E.strictBuilderToText p2b = E.getPartialUtf8 testTranscoding :: TestTree testTranscoding = testGroup "transcoding" [ testProperty "t_ascii" t_ascii, testProperty "tl_ascii" tl_ascii, testProperty "t_latin1" t_latin1, testProperty "tl_latin1" tl_latin1, testProperty "t_utf8" t_utf8, testProperty "t_utf8'" t_utf8', testProperty "t_utf8_undecoded" t_utf8_undecoded, testProperty "t_utf8_incr" t_utf8_incr, testProperty "tl_utf8" tl_utf8, testProperty "tl_utf8'" tl_utf8', testProperty "t_utf16LE" t_utf16LE, testProperty "tl_utf16LE" tl_utf16LE, testProperty "t_utf16BE" t_utf16BE, testProperty "tl_utf16BE" tl_utf16BE, testProperty "t_utf32LE" t_utf32LE, testProperty "tl_utf32LE" tl_utf32LE, testProperty "t_utf32BE" t_utf32BE, testProperty "tl_utf32BE" tl_utf32BE, testGroup "builder" [ testProperty "t_encodeUtf8Builder" t_encodeUtf8Builder, testProperty "t_encodeUtf8Builder_nonZeroOffset" t_encodeUtf8Builder_nonZeroOffset, testProperty "t_encodeUtf8BuilderEscaped" t_encodeUtf8BuilderEscaped, testProperty "t_encodeUtf8BuilderEscaped_nonZeroOffset" t_encodeUtf8BuilderEscaped_nonZeroOffset, testProperty "t_encodeUtf8Builder_sanity" t_encodeUtf8Builder_sanity ], testGroup "errors" [ testProperty "t_utf8_err" t_utf8_err, testProperty "t_utf8_err'" t_utf8_err' ], testGroup "error recovery" [ testProperty "t_decode_utf8_lenient" t_decode_utf8_lenient, testProperty "t_decode_with_error2" t_decode_with_error2, testProperty "t_decode_with_error3" t_decode_with_error3, testProperty "t_decode_with_error4" t_decode_with_error4, testCase "t_decode_with_error1'" t_decode_with_error1', testCase "t_decode_with_error2'" t_decode_with_error2', testCase "t_decode_with_error3'" t_decode_with_error3', testCase "t_decode_with_error4'" t_decode_with_error4', testCase "t_decode_with_error5'" t_decode_with_error5', testProperty "t_infix_concat" t_infix_concat ], testGroup "validate" [ testProperty "t_validateUtf8More_validPrefix" t_validateUtf8More_validPrefix, testProperty "t_validateUtf8More_maximalPrefix" t_validateUtf8More_maximalPrefix, testProperty "t_validateUtf8More_valid" t_validateUtf8More_valid ], testGroup "streaming" [ testProperty "t_utf8_c" t_utf8_c, testCase "t_p_utf8_1" t_p_utf8_1, testCase "t_p_utf8_2" t_p_utf8_2, testCase "t_p_utf8_3" t_p_utf8_3, testCase "t_p_utf8_4" t_p_utf8_4, testCase "t_p_utf8_5" t_p_utf8_5, testCase "t_p_utf8_6" t_p_utf8_6, testCase "t_p_utf8_7" t_p_utf8_7, testCase "t_p_utf8_8" t_p_utf8_8, testCase "t_p_utf8_9" t_p_utf8_9, testCase "t_p_utf8_0" t_p_utf8_0, testCase "t_pn_utf8_1" t_pn_utf8_1, testCase "t_pn_utf8_2" t_pn_utf8_2, testCase "t_pn_utf8_3" t_pn_utf8_3, testCase "t_decode_chunk1" t_decode_chunk1, testCase "t_decode_chunk2" t_decode_chunk2, testProperty "t_decodeUtf8Chunk_split" t_decodeUtf8Chunk_split, testProperty "t_decodeUtf8More1" t_decodeUtf8More1, testProperty "t_decodeUtf8More2" t_decodeUtf8More2 ], testGroup "strictBuilder" [ testProperty "textToStrictBuilder" t_textToStrictBuilder ] ]