{-# LANGUAGE Haskell2010, DeriveDataTypeable #-}
module Data.Monoid.Instances.ByteString.UTF8 (
ByteStringUTF8(..), decode
)
where
import Control.Exception (assert)
import Data.Bits ((.&.), (.|.), shiftL, shiftR)
import Data.Char (chr, ord, isDigit, isPrint)
import Data.Data (Data, Typeable)
import qualified Data.Foldable as Foldable
import qualified Data.List as List
import Data.Maybe (fromMaybe, isJust, isNothing)
import Data.String (IsString(fromString))
import Data.Word (Word8)
import Data.ByteString (ByteString)
import qualified Data.ByteString as ByteString
import qualified Data.ByteString.Char8 as ByteString.Char8
import Data.ByteString.Internal (w2c)
import Data.ByteString.Unsafe (unsafeDrop, unsafeHead, unsafeTail, unsafeTake, unsafeIndex)
import Data.Text (pack, unpack)
import Data.Text.Encoding (decodeUtf8', encodeUtf8)
import Data.Semigroup (Semigroup(..))
import Data.Monoid (Monoid(..))
import Data.Semigroup.Cancellative (LeftReductive(..), LeftCancellative)
import Data.Semigroup.Factorial (Factorial(..))
import Data.Monoid.GCD (LeftGCDMonoid(..))
import Data.Monoid.Null (MonoidNull(..), PositiveMonoid)
import Data.Monoid.Factorial (FactorialMonoid(..))
import Data.Monoid.Textual (TextualMonoid(..))
import qualified Data.Monoid.Factorial as Factorial (FactorialMonoid(..))
import qualified Data.Monoid.Textual as Textual (TextualMonoid(..))
import Prelude hiding (any, drop, dropWhile, foldl, foldl1, foldr, foldr1, scanl, scanr, scanl1, scanr1,
map, concatMap, break, span)
newtype ByteStringUTF8 = ByteStringUTF8 ByteString deriving (Typeable ByteStringUTF8
ByteStringUTF8 -> DataType
ByteStringUTF8 -> Constr
(forall b. Data b => b -> b) -> ByteStringUTF8 -> ByteStringUTF8
forall a.
Typeable a
-> (forall (c :: * -> *).
(forall d b. Data d => c (d -> b) -> d -> c b)
-> (forall g. g -> c g) -> a -> c a)
-> (forall (c :: * -> *).
(forall b r. Data b => c (b -> r) -> c r)
-> (forall r. r -> c r) -> Constr -> c a)
-> (a -> Constr)
-> (a -> DataType)
-> (forall (t :: * -> *) (c :: * -> *).
Typeable t =>
(forall d. Data d => c (t d)) -> Maybe (c a))
-> (forall (t :: * -> * -> *) (c :: * -> *).
Typeable t =>
(forall d e. (Data d, Data e) => c (t d e)) -> Maybe (c a))
-> ((forall b. Data b => b -> b) -> a -> a)
-> (forall r r'.
(r -> r' -> r) -> r -> (forall d. Data d => d -> r') -> a -> r)
-> (forall r r'.
(r' -> r -> r) -> r -> (forall d. Data d => d -> r') -> a -> r)
-> (forall u. (forall d. Data d => d -> u) -> a -> [u])
-> (forall u. Int -> (forall d. Data d => d -> u) -> a -> u)
-> (forall (m :: * -> *).
Monad m =>
(forall d. Data d => d -> m d) -> a -> m a)
-> (forall (m :: * -> *).
MonadPlus m =>
(forall d. Data d => d -> m d) -> a -> m a)
-> (forall (m :: * -> *).
MonadPlus m =>
(forall d. Data d => d -> m d) -> a -> m a)
-> Data a
forall u.
Int -> (forall d. Data d => d -> u) -> ByteStringUTF8 -> u
forall u. (forall d. Data d => d -> u) -> ByteStringUTF8 -> [u]
forall r r'.
(r -> r' -> r)
-> r -> (forall d. Data d => d -> r') -> ByteStringUTF8 -> r
forall r r'.
(r' -> r -> r)
-> r -> (forall d. Data d => d -> r') -> ByteStringUTF8 -> r
forall (m :: * -> *).
Monad m =>
(forall d. Data d => d -> m d)
-> ByteStringUTF8 -> m ByteStringUTF8
forall (m :: * -> *).
MonadPlus m =>
(forall d. Data d => d -> m d)
-> ByteStringUTF8 -> m ByteStringUTF8
forall (c :: * -> *).
(forall b r. Data b => c (b -> r) -> c r)
-> (forall r. r -> c r) -> Constr -> c ByteStringUTF8
forall (c :: * -> *).
(forall d b. Data d => c (d -> b) -> d -> c b)
-> (forall g. g -> c g) -> ByteStringUTF8 -> c ByteStringUTF8
forall (t :: * -> *) (c :: * -> *).
Typeable t =>
(forall d. Data d => c (t d)) -> Maybe (c ByteStringUTF8)
forall (t :: * -> * -> *) (c :: * -> *).
Typeable t =>
(forall d e. (Data d, Data e) => c (t d e))
-> Maybe (c ByteStringUTF8)
gmapMo :: forall (m :: * -> *).
MonadPlus m =>
(forall d. Data d => d -> m d)
-> ByteStringUTF8 -> m ByteStringUTF8
$cgmapMo :: forall (m :: * -> *).
MonadPlus m =>
(forall d. Data d => d -> m d)
-> ByteStringUTF8 -> m ByteStringUTF8
gmapMp :: forall (m :: * -> *).
MonadPlus m =>
(forall d. Data d => d -> m d)
-> ByteStringUTF8 -> m ByteStringUTF8
$cgmapMp :: forall (m :: * -> *).
MonadPlus m =>
(forall d. Data d => d -> m d)
-> ByteStringUTF8 -> m ByteStringUTF8
gmapM :: forall (m :: * -> *).
Monad m =>
(forall d. Data d => d -> m d)
-> ByteStringUTF8 -> m ByteStringUTF8
$cgmapM :: forall (m :: * -> *).
Monad m =>
(forall d. Data d => d -> m d)
-> ByteStringUTF8 -> m ByteStringUTF8
gmapQi :: forall u.
Int -> (forall d. Data d => d -> u) -> ByteStringUTF8 -> u
$cgmapQi :: forall u.
Int -> (forall d. Data d => d -> u) -> ByteStringUTF8 -> u
gmapQ :: forall u. (forall d. Data d => d -> u) -> ByteStringUTF8 -> [u]
$cgmapQ :: forall u. (forall d. Data d => d -> u) -> ByteStringUTF8 -> [u]
gmapQr :: forall r r'.
(r' -> r -> r)
-> r -> (forall d. Data d => d -> r') -> ByteStringUTF8 -> r
$cgmapQr :: forall r r'.
(r' -> r -> r)
-> r -> (forall d. Data d => d -> r') -> ByteStringUTF8 -> r
gmapQl :: forall r r'.
(r -> r' -> r)
-> r -> (forall d. Data d => d -> r') -> ByteStringUTF8 -> r
$cgmapQl :: forall r r'.
(r -> r' -> r)
-> r -> (forall d. Data d => d -> r') -> ByteStringUTF8 -> r
gmapT :: (forall b. Data b => b -> b) -> ByteStringUTF8 -> ByteStringUTF8
$cgmapT :: (forall b. Data b => b -> b) -> ByteStringUTF8 -> ByteStringUTF8
dataCast2 :: forall (t :: * -> * -> *) (c :: * -> *).
Typeable t =>
(forall d e. (Data d, Data e) => c (t d e))
-> Maybe (c ByteStringUTF8)
$cdataCast2 :: forall (t :: * -> * -> *) (c :: * -> *).
Typeable t =>
(forall d e. (Data d, Data e) => c (t d e))
-> Maybe (c ByteStringUTF8)
dataCast1 :: forall (t :: * -> *) (c :: * -> *).
Typeable t =>
(forall d. Data d => c (t d)) -> Maybe (c ByteStringUTF8)
$cdataCast1 :: forall (t :: * -> *) (c :: * -> *).
Typeable t =>
(forall d. Data d => c (t d)) -> Maybe (c ByteStringUTF8)
dataTypeOf :: ByteStringUTF8 -> DataType
$cdataTypeOf :: ByteStringUTF8 -> DataType
toConstr :: ByteStringUTF8 -> Constr
$ctoConstr :: ByteStringUTF8 -> Constr
gunfold :: forall (c :: * -> *).
(forall b r. Data b => c (b -> r) -> c r)
-> (forall r. r -> c r) -> Constr -> c ByteStringUTF8
$cgunfold :: forall (c :: * -> *).
(forall b r. Data b => c (b -> r) -> c r)
-> (forall r. r -> c r) -> Constr -> c ByteStringUTF8
gfoldl :: forall (c :: * -> *).
(forall d b. Data d => c (d -> b) -> d -> c b)
-> (forall g. g -> c g) -> ByteStringUTF8 -> c ByteStringUTF8
$cgfoldl :: forall (c :: * -> *).
(forall d b. Data d => c (d -> b) -> d -> c b)
-> (forall g. g -> c g) -> ByteStringUTF8 -> c ByteStringUTF8
Data, ByteStringUTF8 -> ByteStringUTF8 -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: ByteStringUTF8 -> ByteStringUTF8 -> Bool
$c/= :: ByteStringUTF8 -> ByteStringUTF8 -> Bool
== :: ByteStringUTF8 -> ByteStringUTF8 -> Bool
$c== :: ByteStringUTF8 -> ByteStringUTF8 -> Bool
Eq, Eq ByteStringUTF8
ByteStringUTF8 -> ByteStringUTF8 -> Bool
ByteStringUTF8 -> ByteStringUTF8 -> Ordering
ByteStringUTF8 -> ByteStringUTF8 -> ByteStringUTF8
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: ByteStringUTF8 -> ByteStringUTF8 -> ByteStringUTF8
$cmin :: ByteStringUTF8 -> ByteStringUTF8 -> ByteStringUTF8
max :: ByteStringUTF8 -> ByteStringUTF8 -> ByteStringUTF8
$cmax :: ByteStringUTF8 -> ByteStringUTF8 -> ByteStringUTF8
>= :: ByteStringUTF8 -> ByteStringUTF8 -> Bool
$c>= :: ByteStringUTF8 -> ByteStringUTF8 -> Bool
> :: ByteStringUTF8 -> ByteStringUTF8 -> Bool
$c> :: ByteStringUTF8 -> ByteStringUTF8 -> Bool
<= :: ByteStringUTF8 -> ByteStringUTF8 -> Bool
$c<= :: ByteStringUTF8 -> ByteStringUTF8 -> Bool
< :: ByteStringUTF8 -> ByteStringUTF8 -> Bool
$c< :: ByteStringUTF8 -> ByteStringUTF8 -> Bool
compare :: ByteStringUTF8 -> ByteStringUTF8 -> Ordering
$ccompare :: ByteStringUTF8 -> ByteStringUTF8 -> Ordering
Ord, Typeable)
decode :: ByteString -> (ByteStringUTF8, ByteString)
decode :: ByteString -> (ByteStringUTF8, ByteString)
decode ByteString
bs
| ByteString -> Bool
ByteString.null ByteString
bs Bool -> Bool -> Bool
|| Word8
l forall a. Ord a => a -> a -> Bool
< Word8
0x80 = (ByteString -> ByteStringUTF8
ByteStringUTF8 ByteString
bs, forall a. Monoid a => a
mempty)
| Word8
l forall a. Ord a => a -> a -> Bool
>= Word8
0xC0 = (ByteString -> ByteStringUTF8
ByteStringUTF8 (HasCallStack => ByteString -> ByteString
ByteString.init ByteString
bs), Word8 -> ByteString
ByteString.singleton Word8
l)
| ByteString -> Bool
ByteString.null ByteString
prefix = (forall a. Monoid a => a
mempty, ByteString
bs)
| Bool
otherwise =
case Word8 -> ByteString -> Maybe (Char, ByteStringUTF8)
toChar (HasCallStack => ByteString -> Word8
ByteString.last ByteString
prefix) ByteString
suffix
of Maybe (Char, ByteStringUTF8)
Nothing -> (ByteString -> ByteStringUTF8
ByteStringUTF8 (HasCallStack => ByteString -> ByteString
ByteString.init ByteString
prefix), forall m. FactorialMonoid m => Int -> m -> m
drop (ByteString -> Int
ByteString.length ByteString
prefix forall a. Num a => a -> a -> a
- Int
1) ByteString
bs)
Just{} -> (ByteString -> ByteStringUTF8
ByteStringUTF8 ByteString
bs, forall a. Monoid a => a
mempty)
where (ByteString
prefix, ByteString
suffix) = (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
ByteString.breakEnd Word8 -> Bool
byteStartsCharacter ByteString
bs
l :: Word8
l = HasCallStack => ByteString -> Word8
ByteString.last ByteString
bs
instance Semigroup ByteStringUTF8 where
ByteStringUTF8 ByteString
a <> :: ByteStringUTF8 -> ByteStringUTF8 -> ByteStringUTF8
<> ByteStringUTF8 ByteString
b = ByteString -> ByteStringUTF8
ByteStringUTF8 (ByteString
a forall a. Semigroup a => a -> a -> a
<> ByteString
b)
{-# INLINE (<>) #-}
instance Monoid ByteStringUTF8 where
mempty :: ByteStringUTF8
mempty = ByteString -> ByteStringUTF8
ByteStringUTF8 ByteString
ByteString.empty
{-# INLINE mempty #-}
mappend :: ByteStringUTF8 -> ByteStringUTF8 -> ByteStringUTF8
mappend = forall a. Semigroup a => a -> a -> a
(<>)
{-# INLINE mappend #-}
instance MonoidNull ByteStringUTF8 where
null :: ByteStringUTF8 -> Bool
null (ByteStringUTF8 ByteString
b) = ByteString -> Bool
ByteString.null ByteString
b
{-# INLINE null #-}
instance LeftReductive ByteStringUTF8 where
stripPrefix :: ByteStringUTF8 -> ByteStringUTF8 -> Maybe ByteStringUTF8
stripPrefix (ByteStringUTF8 ByteString
a) (ByteStringUTF8 ByteString
b) = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ByteString -> ByteStringUTF8
ByteStringUTF8 (forall m. LeftReductive m => m -> m -> Maybe m
stripPrefix ByteString
a ByteString
b)
{-# INLINE stripPrefix #-}
ByteStringUTF8 ByteString
a isPrefixOf :: ByteStringUTF8 -> ByteStringUTF8 -> Bool
`isPrefixOf` ByteStringUTF8 ByteString
b = ByteString
a forall m. LeftReductive m => m -> m -> Bool
`isPrefixOf` ByteString
b
{-# INLINE isPrefixOf #-}
instance LeftCancellative ByteStringUTF8
instance LeftGCDMonoid ByteStringUTF8 where
commonPrefix :: ByteStringUTF8 -> ByteStringUTF8 -> ByteStringUTF8
commonPrefix (ByteStringUTF8 ByteString
a) (ByteStringUTF8 ByteString
b) = ByteString -> ByteStringUTF8
ByteStringUTF8 (forall m. LeftGCDMonoid m => m -> m -> m
commonPrefix ByteString
a ByteString
b)
{-# INLINE commonPrefix #-}
stripCommonPrefix :: ByteStringUTF8
-> ByteStringUTF8
-> (ByteStringUTF8, ByteStringUTF8, ByteStringUTF8)
stripCommonPrefix (ByteStringUTF8 ByteString
a) (ByteStringUTF8 ByteString
b) = (ByteString, ByteString, ByteString)
-> (ByteStringUTF8, ByteStringUTF8, ByteStringUTF8)
wrapTriple (forall m. LeftGCDMonoid m => m -> m -> (m, m, m)
stripCommonPrefix ByteString
a ByteString
b)
{-# INLINE stripCommonPrefix #-}
instance Show ByteStringUTF8 where
showsPrec :: Int -> ByteStringUTF8 -> ShowS
showsPrec Int
_ ByteStringUTF8
bs String
s0 = Char
'"' forall a. a -> [a] -> [a]
: forall t a.
TextualMonoid t =>
(t -> a -> a) -> (Char -> a -> a) -> a -> t -> a
Textual.foldr ByteStringUTF8 -> ShowS
showsBytes Char -> ShowS
showsChar (Char
'"' forall a. a -> [a] -> [a]
: String
s0) ByteStringUTF8
bs
where showsBytes :: ByteStringUTF8 -> ShowS
showsBytes (ByteStringUTF8 ByteString
b) String
s = Char
'\\' forall a. a -> [a] -> [a]
: forall a. Show a => a -> ShowS
shows (ByteString -> [Word8]
ByteString.unpack ByteString
b) String
s
showsChar :: Char -> ShowS
showsChar Char
c String
s
| Char -> Bool
isPrint Char
c = Char
c forall a. a -> [a] -> [a]
: String
s
| Char
h:String
_ <- String
s, Char -> Bool
isDigit Char
h = String
"\\" forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> String
show (Char -> Int
ord Char
c) forall a. [a] -> [a] -> [a]
++ String
"\\&" forall a. [a] -> [a] -> [a]
++ String
s
| Bool
otherwise = String
"\\" forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> String
show (Char -> Int
ord Char
c) forall a. [a] -> [a] -> [a]
++ String
s
instance IsString ByteStringUTF8 where
fromString :: String -> ByteStringUTF8
fromString = ByteString -> ByteStringUTF8
ByteStringUTF8 forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (t :: * -> *) m a.
(Foldable t, Monoid m) =>
(a -> m) -> t a -> m
Foldable.foldMap Char -> ByteString
fromChar
{-# INLINE fromString #-}
instance PositiveMonoid ByteStringUTF8
instance Factorial ByteStringUTF8 where
primePrefix :: ByteStringUTF8 -> ByteStringUTF8
primePrefix utf8 :: ByteStringUTF8
utf8@(ByteStringUTF8 ByteString
bs)
| ByteString -> Bool
ByteString.null ByteString
bs = ByteStringUTF8
utf8
| ByteString -> Word8
unsafeHead ByteString
bs forall a. Ord a => a -> a -> Bool
< Word8
0x80 = ByteString -> ByteStringUTF8
ByteStringUTF8 (Int -> ByteString -> ByteString
ByteString.take Int
1 ByteString
bs)
| Bool
otherwise = case (Word8 -> Bool) -> ByteString -> Maybe Int
ByteString.findIndex Word8 -> Bool
byteStartsCharacter (ByteString -> ByteString
unsafeTail ByteString
bs)
of Just Int
i -> ByteString -> ByteStringUTF8
ByteStringUTF8 (Int -> ByteString -> ByteString
ByteString.take (forall a. Enum a => a -> a
succ Int
i) ByteString
bs)
Maybe Int
Nothing -> ByteStringUTF8
utf8
{-# INLINABLE primePrefix #-}
factors :: ByteStringUTF8 -> [ByteStringUTF8]
factors (ByteStringUTF8 ByteString
bs) = forall a b. (a -> b) -> [a] -> [b]
List.map ByteString -> ByteStringUTF8
ByteStringUTF8 forall a b. (a -> b) -> a -> b
$ (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
ByteString.groupBy forall {a} {a}. (Ord a, Ord a, Num a, Num a) => a -> a -> Bool
continued ByteString
bs
where continued :: a -> a -> Bool
continued a
a a
b = a
a forall a. Ord a => a -> a -> Bool
>= a
0x80 Bool -> Bool -> Bool
&& a
b forall a. Ord a => a -> a -> Bool
>= a
0x80 Bool -> Bool -> Bool
&& a
b forall a. Ord a => a -> a -> Bool
< a
0xC0
{-# INLINABLE factors #-}
length :: ByteStringUTF8 -> Int
length (ByteStringUTF8 ByteString
bs) = forall a b. (a, b) -> a
fst (forall a. (a -> Word8 -> a) -> a -> ByteString -> a
ByteString.foldl' forall {a} {a}.
(Ord a, Num a, Enum a) =>
(a, Bool) -> a -> (a, Bool)
count (Int
0, Bool
False) ByteString
bs)
where count :: (a, Bool) -> a -> (a, Bool)
count (a
n, Bool
high) a
byte | a
byte forall a. Ord a => a -> a -> Bool
< a
0x80 = (forall a. Enum a => a -> a
succ a
n, Bool
False)
| a
byte forall a. Ord a => a -> a -> Bool
< a
0xC0 = (if Bool
high then a
n else forall a. Enum a => a -> a
succ a
n, Bool
True)
| Bool
otherwise = (forall a. Enum a => a -> a
succ a
n, Bool
True)
{-# INLINABLE length #-}
foldl :: forall a. (a -> ByteStringUTF8 -> a) -> a -> ByteStringUTF8 -> a
foldl a -> ByteStringUTF8 -> a
f a
a0 (ByteStringUTF8 ByteString
bs) = forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
List.foldl a -> ByteString -> a
f' a
a0 (ByteString -> [ByteString]
groupASCII ByteString
bs)
where f' :: a -> ByteString -> a
f' a
a ByteString
b | ByteString -> Word8
unsafeHead ByteString
b forall a. Ord a => a -> a -> Bool
< Word8
0x80 = forall a. (a -> Word8 -> a) -> a -> ByteString -> a
ByteString.foldl a -> Word8 -> a
f'' a
a ByteString
b
| Bool
otherwise = a -> ByteStringUTF8 -> a
f a
a (ByteString -> ByteStringUTF8
ByteStringUTF8 ByteString
b)
f'' :: a -> Word8 -> a
f'' a
a Word8
w = a -> ByteStringUTF8 -> a
f a
a (ByteString -> ByteStringUTF8
ByteStringUTF8 forall a b. (a -> b) -> a -> b
$ Word8 -> ByteString
ByteString.singleton Word8
w)
{-# INLINABLE foldl #-}
foldl' :: forall a. (a -> ByteStringUTF8 -> a) -> a -> ByteStringUTF8 -> a
foldl' a -> ByteStringUTF8 -> a
f a
a0 (ByteStringUTF8 ByteString
bs) = forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
List.foldl' a -> ByteString -> a
f' a
a0 (ByteString -> [ByteString]
groupASCII ByteString
bs)
where f' :: a -> ByteString -> a
f' a
a ByteString
b | ByteString -> Word8
unsafeHead ByteString
b forall a. Ord a => a -> a -> Bool
< Word8
0x80 = forall a. (a -> Word8 -> a) -> a -> ByteString -> a
ByteString.foldl' a -> Word8 -> a
f'' a
a ByteString
b
| Bool
otherwise = a -> ByteStringUTF8 -> a
f a
a (ByteString -> ByteStringUTF8
ByteStringUTF8 ByteString
b)
f'' :: a -> Word8 -> a
f'' a
a Word8
w = a -> ByteStringUTF8 -> a
f a
a (ByteString -> ByteStringUTF8
ByteStringUTF8 forall a b. (a -> b) -> a -> b
$ Word8 -> ByteString
ByteString.singleton Word8
w)
{-# INLINABLE foldl' #-}
foldr :: forall a. (ByteStringUTF8 -> a -> a) -> a -> ByteStringUTF8 -> a
foldr ByteStringUTF8 -> a -> a
f a
a0 (ByteStringUTF8 ByteString
bs) = forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
List.foldr ByteString -> a -> a
f' a
a0 (ByteString -> [ByteString]
groupASCII ByteString
bs)
where f' :: ByteString -> a -> a
f' ByteString
b a
a | ByteString -> Word8
unsafeHead ByteString
b forall a. Ord a => a -> a -> Bool
< Word8
0x80 = forall a. (Word8 -> a -> a) -> a -> ByteString -> a
ByteString.foldr Word8 -> a -> a
f'' a
a ByteString
b
| Bool
otherwise = ByteStringUTF8 -> a -> a
f (ByteString -> ByteStringUTF8
ByteStringUTF8 ByteString
b) a
a
f'' :: Word8 -> a -> a
f'' Word8
w a
a = ByteStringUTF8 -> a -> a
f (ByteString -> ByteStringUTF8
ByteStringUTF8 forall a b. (a -> b) -> a -> b
$ Word8 -> ByteString
ByteString.singleton Word8
w) a
a
{-# INLINABLE foldr #-}
reverse :: ByteStringUTF8 -> ByteStringUTF8
reverse (ByteStringUTF8 ByteString
bs) =
ByteString -> ByteStringUTF8
ByteStringUTF8 ([ByteString] -> ByteString
ByteString.concat forall a b. (a -> b) -> a -> b
$ forall a. [a] -> [a]
List.reverse forall a b. (a -> b) -> a -> b
$ forall a b. (a -> b) -> [a] -> [b]
List.map ByteString -> ByteString
reverseASCII forall a b. (a -> b) -> a -> b
$ ByteString -> [ByteString]
groupASCII ByteString
bs)
where reverseASCII :: ByteString -> ByteString
reverseASCII ByteString
b | ByteString -> Word8
unsafeHead ByteString
b forall a. Ord a => a -> a -> Bool
< Word8
0x80 = ByteString -> ByteString
ByteString.reverse ByteString
b
| Bool
otherwise = ByteString
b
{-# INLINABLE reverse #-}
instance FactorialMonoid ByteStringUTF8 where
splitPrimePrefix :: ByteStringUTF8 -> Maybe (ByteStringUTF8, ByteStringUTF8)
splitPrimePrefix utf8 :: ByteStringUTF8
utf8@(ByteStringUTF8 ByteString
bs)
| ByteString -> Bool
ByteString.null ByteString
bs = forall a. Maybe a
Nothing
| ByteString -> Word8
unsafeHead ByteString
bs forall a. Ord a => a -> a -> Bool
< Word8
0x80 = forall a. a -> Maybe a
Just ((ByteString, ByteString) -> (ByteStringUTF8, ByteStringUTF8)
wrapPair forall a b. (a -> b) -> a -> b
$ Int -> ByteString -> (ByteString, ByteString)
ByteString.splitAt Int
1 ByteString
bs)
| Bool
otherwise = case (Word8 -> Bool) -> ByteString -> Maybe Int
ByteString.findIndex Word8 -> Bool
byteStartsCharacter (ByteString -> ByteString
unsafeTail ByteString
bs)
of Just Int
i -> forall a. a -> Maybe a
Just ((ByteString, ByteString) -> (ByteStringUTF8, ByteStringUTF8)
wrapPair forall a b. (a -> b) -> a -> b
$ Int -> ByteString -> (ByteString, ByteString)
ByteString.splitAt (forall a. Enum a => a -> a
succ Int
i) ByteString
bs)
Maybe Int
Nothing -> forall a. a -> Maybe a
Just (ByteStringUTF8
utf8, ByteString -> ByteStringUTF8
ByteStringUTF8 forall a b. (a -> b) -> a -> b
$ ByteString
ByteString.empty)
{-# INLINABLE splitPrimePrefix #-}
splitPrimeSuffix :: ByteStringUTF8 -> Maybe (ByteStringUTF8, ByteStringUTF8)
splitPrimeSuffix (ByteStringUTF8 ByteString
bs)
| ByteString -> Bool
ByteString.null ByteString
bs = forall a. Maybe a
Nothing
| ByteString -> Bool
ByteString.null ByteString
prefix = forall a. a -> Maybe a
Just ((ByteString, ByteString) -> (ByteStringUTF8, ByteStringUTF8)
wrapPair (ByteString, ByteString)
splitBS)
| Bool -> Bool
not (ByteString -> Bool
ByteString.null ByteString
suffix) Bool -> Bool -> Bool
&& HasCallStack => ByteString -> Word8
ByteString.last ByteString
prefix forall a. Ord a => a -> a -> Bool
< Word8
0x80 = forall a. a -> Maybe a
Just ((ByteString, ByteString) -> (ByteStringUTF8, ByteStringUTF8)
wrapPair (ByteString, ByteString)
splitBS)
| Bool
otherwise = forall a. a -> Maybe a
Just ((ByteString, ByteString) -> (ByteStringUTF8, ByteStringUTF8)
wrapPair forall a b. (a -> b) -> a -> b
$ Int -> ByteString -> (ByteString, ByteString)
ByteString.splitAt (forall a. Enum a => a -> a
pred forall a b. (a -> b) -> a -> b
$ ByteString -> Int
ByteString.length ByteString
prefix) ByteString
bs)
where splitBS :: (ByteString, ByteString)
splitBS@(ByteString
prefix, ByteString
suffix) = (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
ByteString.breakEnd Word8 -> Bool
byteStartsCharacter ByteString
bs
{-# INLINABLE splitPrimeSuffix #-}
splitAt :: Int -> ByteStringUTF8 -> (ByteStringUTF8, ByteStringUTF8)
splitAt Int
n (ByteStringUTF8 ByteString
bs) = (ByteString, ByteString) -> (ByteStringUTF8, ByteStringUTF8)
wrapPair (Int -> ByteString -> (ByteString, ByteString)
ByteString.splitAt (Int -> ByteString -> Int
charStartIndex Int
n ByteString
bs) ByteString
bs)
{-# INLINE splitAt #-}
take :: Int -> ByteStringUTF8 -> ByteStringUTF8
take Int
n (ByteStringUTF8 ByteString
bs) = ByteString -> ByteStringUTF8
ByteStringUTF8 (Int -> ByteString -> ByteString
ByteString.take (Int -> ByteString -> Int
charStartIndex Int
n ByteString
bs) ByteString
bs)
{-# INLINE take #-}
drop :: Int -> ByteStringUTF8 -> ByteStringUTF8
drop Int
n (ByteStringUTF8 ByteString
bs) = ByteString -> ByteStringUTF8
ByteStringUTF8 (Int -> ByteString -> ByteString
ByteString.drop (Int -> ByteString -> Int
charStartIndex Int
n ByteString
bs) ByteString
bs)
{-# INLINE drop #-}
dropWhile :: (ByteStringUTF8 -> Bool) -> ByteStringUTF8 -> ByteStringUTF8
dropWhile ByteStringUTF8 -> Bool
p (ByteStringUTF8 ByteString
bs0) = ByteString -> ByteStringUTF8
dropASCII ByteString
bs0
where dropASCII :: ByteString -> ByteStringUTF8
dropASCII ByteString
bs =
let suffix :: ByteString
suffix = (Word8 -> Bool) -> ByteString -> ByteString
ByteString.dropWhile (\Word8
w-> Word8
w forall a. Ord a => a -> a -> Bool
< Word8
0x80 Bool -> Bool -> Bool
&& ByteStringUTF8 -> Bool
p (ByteString -> ByteStringUTF8
ByteStringUTF8 forall a b. (a -> b) -> a -> b
$ Word8 -> ByteString
ByteString.singleton Word8
w)) ByteString
bs
in if ByteString -> Bool
ByteString.null ByteString
suffix Bool -> Bool -> Bool
|| ByteString -> Word8
unsafeHead ByteString
suffix forall a. Ord a => a -> a -> Bool
< Word8
0x80
then ByteString -> ByteStringUTF8
ByteStringUTF8 ByteString
suffix
else ByteString -> ByteStringUTF8
dropMultiByte ByteString
suffix
dropMultiByte :: ByteString -> ByteStringUTF8
dropMultiByte ByteString
bs =
let utf8 :: ByteStringUTF8
utf8 = ByteString -> ByteStringUTF8
ByteStringUTF8 ByteString
bs
in case (Word8 -> Bool) -> ByteString -> Maybe Int
ByteString.findIndex Word8 -> Bool
byteStartsCharacter (ByteString -> ByteString
unsafeTail ByteString
bs)
of Maybe Int
Nothing -> if ByteStringUTF8 -> Bool
p ByteStringUTF8
utf8 then ByteString -> ByteStringUTF8
ByteStringUTF8 ByteString
ByteString.empty else ByteStringUTF8
utf8
Just Int
i -> let (ByteString
hd, ByteString
tl) = Int -> ByteString -> (ByteString, ByteString)
ByteString.splitAt (forall a. Enum a => a -> a
succ Int
i) ByteString
bs
in if ByteStringUTF8 -> Bool
p (ByteString -> ByteStringUTF8
ByteStringUTF8 ByteString
hd)
then ByteString -> ByteStringUTF8
dropASCII ByteString
tl
else ByteStringUTF8
utf8
{-# INLINE dropWhile #-}
takeWhile :: (ByteStringUTF8 -> Bool) -> ByteStringUTF8 -> ByteStringUTF8
takeWhile ByteStringUTF8 -> Bool
p utf8 :: ByteStringUTF8
utf8@(ByteStringUTF8 ByteString
bs) =
ByteString -> ByteStringUTF8
ByteStringUTF8 forall a b. (a -> b) -> a -> b
$ Int -> ByteString -> ByteString
ByteString.take (ByteString -> Int
ByteString.length ByteString
bs forall a. Num a => a -> a -> a
- ByteString -> Int
ByteString.length ByteString
s) ByteString
bs
where (ByteStringUTF8 ByteString
s) = forall m. FactorialMonoid m => (m -> Bool) -> m -> m
Factorial.dropWhile ByteStringUTF8 -> Bool
p ByteStringUTF8
utf8
{-# INLINE takeWhile #-}
span :: (ByteStringUTF8 -> Bool)
-> ByteStringUTF8 -> (ByteStringUTF8, ByteStringUTF8)
span ByteStringUTF8 -> Bool
p utf8 :: ByteStringUTF8
utf8@(ByteStringUTF8 ByteString
bs) =
(ByteString -> ByteStringUTF8
ByteStringUTF8 forall a b. (a -> b) -> a -> b
$ Int -> ByteString -> ByteString
ByteString.take (ByteString -> Int
ByteString.length ByteString
bs forall a. Num a => a -> a -> a
- ByteString -> Int
ByteString.length ByteString
s) ByteString
bs, ByteStringUTF8
suffix)
where suffix :: ByteStringUTF8
suffix@(ByteStringUTF8 ByteString
s) = forall m. FactorialMonoid m => (m -> Bool) -> m -> m
Factorial.dropWhile ByteStringUTF8 -> Bool
p ByteStringUTF8
utf8
{-# INLINE span #-}
break :: (ByteStringUTF8 -> Bool)
-> ByteStringUTF8 -> (ByteStringUTF8, ByteStringUTF8)
break ByteStringUTF8 -> Bool
p = forall m. FactorialMonoid m => (m -> Bool) -> m -> (m, m)
Factorial.span (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteStringUTF8 -> Bool
p)
{-# INLINE break #-}
spanMaybe :: forall s.
s
-> (s -> ByteStringUTF8 -> Maybe s)
-> ByteStringUTF8
-> (ByteStringUTF8, ByteStringUTF8, s)
spanMaybe s
s0 s -> ByteStringUTF8 -> Maybe s
f (ByteStringUTF8 ByteString
bs0) = (ByteString -> ByteStringUTF8
ByteStringUTF8 forall a b. (a -> b) -> a -> b
$ Int -> ByteString -> ByteString
ByteString.take (ByteString -> Int
ByteString.length ByteString
bs0 forall a. Num a => a -> a -> a
- ByteString -> Int
ByteString.length ByteString
dropped) ByteString
bs0,
ByteString -> ByteStringUTF8
ByteStringUTF8 ByteString
dropped,
s
s')
where (ByteString
dropped, s
s') = s -> ByteString -> (ByteString, s)
dropASCII s
s0 ByteString
bs0
dropASCII :: s -> ByteString -> (ByteString, s)
dropASCII s
s ByteString
bs =
let suffix :: ByteString
suffix = Int -> ByteString -> ByteString
ByteString.drop Int
index ByteString
bs
(Int
index, s
s1) = forall a. (Word8 -> a -> a) -> a -> ByteString -> a
ByteString.foldr Word8 -> ((Int, s) -> (Int, s)) -> (Int, s) -> (Int, s)
f8 forall a. a -> a
id ByteString
bs (Int
0, s
s)
f8 :: Word8 -> ((Int, s) -> (Int, s)) -> (Int, s) -> (Int, s)
f8 Word8
w (Int, s) -> (Int, s)
cont (Int
i, s
s2)
| Word8
w forall a. Ord a => a -> a -> Bool
< Word8
0x80, Just s
s3 <- s -> ByteStringUTF8 -> Maybe s
f s
s2 (ByteString -> ByteStringUTF8
ByteStringUTF8 forall a b. (a -> b) -> a -> b
$ Word8 -> ByteString
ByteString.singleton Word8
w) =
let i' :: Int
i' = forall a. Enum a => a -> a
succ Int
i :: Int in seq :: forall a b. a -> b -> b
seq Int
i' forall a b. (a -> b) -> a -> b
$ (Int, s) -> (Int, s)
cont (Int
i', s
s3)
| Bool
otherwise = (Int
i, s
s2)
in if ByteString -> Bool
ByteString.null ByteString
suffix Bool -> Bool -> Bool
|| ByteString -> Word8
unsafeHead ByteString
suffix forall a. Ord a => a -> a -> Bool
< Word8
0x80
then (ByteString
suffix, s
s1)
else s -> ByteString -> (ByteString, s)
dropMultiByte s
s1 ByteString
suffix
dropMultiByte :: s -> ByteString -> (ByteString, s)
dropMultiByte s
s ByteString
bs =
case (Word8 -> Bool) -> ByteString -> Maybe Int
ByteString.findIndex Word8 -> Bool
byteStartsCharacter (ByteString -> ByteString
unsafeTail ByteString
bs)
of Maybe Int
Nothing -> case s -> ByteStringUTF8 -> Maybe s
f s
s (ByteString -> ByteStringUTF8
ByteStringUTF8 ByteString
bs)
of Just s
s1 -> (ByteString
ByteString.empty, s
s1)
Maybe s
Nothing -> (ByteString
bs, s
s)
Just Int
i -> let (ByteString
hd, ByteString
tl) = Int -> ByteString -> (ByteString, ByteString)
ByteString.splitAt (forall a. Enum a => a -> a
succ Int
i) ByteString
bs
in case s -> ByteStringUTF8 -> Maybe s
f s
s (ByteString -> ByteStringUTF8
ByteStringUTF8 ByteString
hd)
of Just s
s1 -> s -> ByteString -> (ByteString, s)
dropASCII s
s1 ByteString
tl
Maybe s
Nothing -> (ByteString
bs, s
s)
{-# INLINE spanMaybe #-}
spanMaybe' :: forall s.
s
-> (s -> ByteStringUTF8 -> Maybe s)
-> ByteStringUTF8
-> (ByteStringUTF8, ByteStringUTF8, s)
spanMaybe' s
s0 s -> ByteStringUTF8 -> Maybe s
f (ByteStringUTF8 ByteString
bs0) = (ByteString -> ByteStringUTF8
ByteStringUTF8 forall a b. (a -> b) -> a -> b
$
Int -> ByteString -> ByteString
ByteString.take (ByteString -> Int
ByteString.length ByteString
bs0 forall a. Num a => a -> a -> a
- ByteString -> Int
ByteString.length ByteString
dropped) ByteString
bs0,
ByteString -> ByteStringUTF8
ByteStringUTF8 ByteString
dropped,
s
s')
where (ByteString
dropped, s
s') = s -> ByteString -> (ByteString, s)
dropASCII s
s0 ByteString
bs0
dropASCII :: s -> ByteString -> (ByteString, s)
dropASCII s
s ByteString
bs =
let suffix :: ByteString
suffix = Int -> ByteString -> ByteString
ByteString.drop Int
index ByteString
bs
(Int
index, s
s1) = forall a. (Word8 -> a -> a) -> a -> ByteString -> a
ByteString.foldr Word8 -> ((Int, s) -> (Int, s)) -> (Int, s) -> (Int, s)
f8 forall a. a -> a
id ByteString
bs (Int
0, s
s)
f8 :: Word8 -> ((Int, s) -> (Int, s)) -> (Int, s) -> (Int, s)
f8 Word8
w (Int, s) -> (Int, s)
cont (Int
i, s
s2)
| Word8
w forall a. Ord a => a -> a -> Bool
< Word8
0x80, Just s
s3 <- s -> ByteStringUTF8 -> Maybe s
f s
s2 (ByteString -> ByteStringUTF8
ByteStringUTF8 forall a b. (a -> b) -> a -> b
$ Word8 -> ByteString
ByteString.singleton Word8
w) =
let i' :: Int
i' = forall a. Enum a => a -> a
succ Int
i :: Int in seq :: forall a b. a -> b -> b
seq Int
i' forall a b. (a -> b) -> a -> b
$ seq :: forall a b. a -> b -> b
seq s
s3 forall a b. (a -> b) -> a -> b
$ (Int, s) -> (Int, s)
cont (Int
i', s
s3)
| Bool
otherwise = (Int
i, s
s)
in if ByteString -> Bool
ByteString.null ByteString
suffix Bool -> Bool -> Bool
|| ByteString -> Word8
unsafeHead ByteString
suffix forall a. Ord a => a -> a -> Bool
< Word8
0x80
then (ByteString
suffix, s
s1)
else s -> ByteString -> (ByteString, s)
dropMultiByte s
s1 ByteString
suffix
dropMultiByte :: s -> ByteString -> (ByteString, s)
dropMultiByte s
s ByteString
bs =
case (Word8 -> Bool) -> ByteString -> Maybe Int
ByteString.findIndex Word8 -> Bool
byteStartsCharacter (ByteString -> ByteString
unsafeTail ByteString
bs)
of Maybe Int
Nothing -> case s -> ByteStringUTF8 -> Maybe s
f s
s (ByteString -> ByteStringUTF8
ByteStringUTF8 ByteString
bs)
of Just s
s1 -> seq :: forall a b. a -> b -> b
seq s
s1 (ByteString
ByteString.empty, s
s1)
Maybe s
Nothing -> (ByteString
bs, s
s)
Just Int
i -> let (ByteString
hd, ByteString
tl) = Int -> ByteString -> (ByteString, ByteString)
ByteString.splitAt (forall a. Enum a => a -> a
succ Int
i) ByteString
bs
in case s -> ByteStringUTF8 -> Maybe s
f s
s (ByteString -> ByteStringUTF8
ByteStringUTF8 ByteString
hd)
of Just s
s1 -> seq :: forall a b. a -> b -> b
seq s
s1 (s -> ByteString -> (ByteString, s)
dropASCII s
s1 ByteString
tl)
Maybe s
Nothing -> (ByteString
bs, s
s)
{-# INLINE spanMaybe' #-}
instance TextualMonoid ByteStringUTF8 where
singleton :: Char -> ByteStringUTF8
singleton = ByteString -> ByteStringUTF8
ByteStringUTF8 forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> ByteString
fromChar
{-# INLINE singleton #-}
splitCharacterPrefix :: ByteStringUTF8 -> Maybe (Char, ByteStringUTF8)
splitCharacterPrefix (ByteStringUTF8 ByteString
bs) = ByteString -> Maybe (Word8, ByteString)
ByteString.uncons ByteString
bs forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry Word8 -> ByteString -> Maybe (Char, ByteStringUTF8)
toChar
{-# INLINE splitCharacterPrefix #-}
foldl :: forall a.
(a -> ByteStringUTF8 -> a)
-> (a -> Char -> a) -> a -> ByteStringUTF8 -> a
foldl a -> ByteStringUTF8 -> a
ft a -> Char -> a
fc a
a0 (ByteStringUTF8 ByteString
bs) = case forall a. (a -> Char -> a) -> a -> ByteString -> a
ByteString.Char8.foldl (a, [Word8]) -> Char -> (a, [Word8])
f (a
a0, []) ByteString
bs
of (a
a, []) -> a
a
(a
a, [Word8]
acc) -> a -> [Word8] -> a
multiByte a
a [Word8]
acc
where f :: (a, [Word8]) -> Char -> (a, [Word8])
f (a
a, []) Char
c | Char
c forall a. Ord a => a -> a -> Bool
< Char
'\x80' = (a -> Char -> a
fc a
a Char
c, [])
| Bool
otherwise = (a
a, [forall a b. (Integral a, Num b) => a -> b
fromIntegral forall a b. (a -> b) -> a -> b
$ Char -> Int
ord Char
c])
f (a
a, [Word8]
acc) Char
c | Char
c forall a. Ord a => a -> a -> Bool
< Char
'\x80' = (a -> Char -> a
fc (a -> [Word8] -> a
multiByte a
a [Word8]
acc) Char
c, [])
| Char
c forall a. Ord a => a -> a -> Bool
< Char
'\xC0' = (a
a, forall a b. (Integral a, Num b) => a -> b
fromIntegral (Char -> Int
ord Char
c) forall a. a -> [a] -> [a]
: [Word8]
acc)
| Bool
otherwise = (a -> [Word8] -> a
multiByte a
a [Word8]
acc, [forall a b. (Integral a, Num b) => a -> b
fromIntegral forall a b. (a -> b) -> a -> b
$ Char -> Int
ord Char
c])
multiByte :: a -> [Word8] -> a
multiByte a
a [Word8]
acc = forall a. (ByteString -> a) -> (Char -> a) -> [Word8] -> a
reverseBytesToChar (a -> ByteStringUTF8 -> a
ft a
a forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ByteStringUTF8
ByteStringUTF8) (a -> Char -> a
fc a
a) [Word8]
acc
{-# INLINE foldl #-}
foldl' :: forall a.
(a -> ByteStringUTF8 -> a)
-> (a -> Char -> a) -> a -> ByteStringUTF8 -> a
foldl' a -> ByteStringUTF8 -> a
ft a -> Char -> a
fc a
a0 (ByteStringUTF8 ByteString
bs) = case forall a. (a -> Char -> a) -> a -> ByteString -> a
ByteString.Char8.foldl' (a, [Word8]) -> Char -> (a, [Word8])
f (a
a0, []) ByteString
bs
of (a
a, []) -> a
a
(a
a, [Word8]
acc) -> a -> [Word8] -> a
multiByte a
a [Word8]
acc
where f :: (a, [Word8]) -> Char -> (a, [Word8])
f (a
a, []) Char
c | Char
c forall a. Ord a => a -> a -> Bool
< Char
'\x80' = (a -> Char -> a
fc a
a Char
c, [])
| Bool
otherwise = seq :: forall a b. a -> b -> b
seq a
a (a
a, [forall a b. (Integral a, Num b) => a -> b
fromIntegral forall a b. (a -> b) -> a -> b
$ Char -> Int
ord Char
c])
f (a
a, [Word8]
acc) Char
c | seq :: forall a b. a -> b -> b
seq a
a Char
c forall a. Ord a => a -> a -> Bool
< Char
'\x80' = let a' :: a
a' = a -> [Word8] -> a
multiByte a
a [Word8]
acc in seq :: forall a b. a -> b -> b
seq a
a' (a -> Char -> a
fc a
a' Char
c, [])
| Char
c forall a. Ord a => a -> a -> Bool
< Char
'\xC0' = (a
a, forall a b. (Integral a, Num b) => a -> b
fromIntegral (Char -> Int
ord Char
c) forall a. a -> [a] -> [a]
: [Word8]
acc)
| Bool
otherwise = let a' :: a
a' = a -> [Word8] -> a
multiByte a
a [Word8]
acc in seq :: forall a b. a -> b -> b
seq a
a' (a
a', [forall a b. (Integral a, Num b) => a -> b
fromIntegral forall a b. (a -> b) -> a -> b
$ Char -> Int
ord Char
c])
multiByte :: a -> [Word8] -> a
multiByte a
a [Word8]
acc = forall a. (ByteString -> a) -> (Char -> a) -> [Word8] -> a
reverseBytesToChar (a -> ByteStringUTF8 -> a
ft a
a forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ByteStringUTF8
ByteStringUTF8) (a -> Char -> a
fc a
a) [Word8]
acc
{-# INLINE foldl' #-}
foldr :: forall a.
(ByteStringUTF8 -> a -> a)
-> (Char -> a -> a) -> a -> ByteStringUTF8 -> a
foldr ByteStringUTF8 -> a -> a
ft Char -> a -> a
fc a
a0 (ByteStringUTF8 ByteString
bs) = case forall a. (Char -> a -> a) -> a -> ByteString -> a
ByteString.Char8.foldr Char -> (a, [Word8]) -> (a, [Word8])
f (a
a0, []) ByteString
bs
of (a
a, []) -> a
a
(a
a, [Word8]
acc) -> a -> [Word8] -> a
multiByte a
a [Word8]
acc
where f :: Char -> (a, [Word8]) -> (a, [Word8])
f Char
c (a
a, []) | Char
c forall a. Ord a => a -> a -> Bool
< Char
'\x80' = (Char -> a -> a
fc Char
c a
a, [])
| Char
c forall a. Ord a => a -> a -> Bool
< Char
'\xC0' = (a
a, [forall a b. (Integral a, Num b) => a -> b
fromIntegral forall a b. (a -> b) -> a -> b
$ Char -> Int
ord Char
c])
| Bool
otherwise = (ByteStringUTF8 -> a -> a
ft (ByteString -> ByteStringUTF8
ByteStringUTF8 forall a b. (a -> b) -> a -> b
$ Char -> ByteString
ByteString.Char8.singleton Char
c) a
a, [])
f Char
c (a
a, [Word8]
acc) | Char
c forall a. Ord a => a -> a -> Bool
< Char
'\x80' = (Char -> a -> a
fc Char
c (ByteStringUTF8 -> a -> a
ft (ByteString -> ByteStringUTF8
ByteStringUTF8 forall a b. (a -> b) -> a -> b
$ [Word8] -> ByteString
ByteString.pack [Word8]
acc) a
a), [])
| Char
c forall a. Ord a => a -> a -> Bool
< Char
'\xC0' = (a
a, forall a b. (Integral a, Num b) => a -> b
fromIntegral (Char -> Int
ord Char
c) forall a. a -> [a] -> [a]
: [Word8]
acc)
| Bool
otherwise = (a -> [Word8] -> a
multiByte a
a (forall a b. (Integral a, Num b) => a -> b
fromIntegral (Char -> Int
ord Char
c) forall a. a -> [a] -> [a]
: [Word8]
acc), [])
multiByte :: a -> [Word8] -> a
multiByte a
a [Word8]
acc = forall a. (ByteString -> a) -> (Char -> a) -> [Word8] -> a
bytesToChar ((ByteStringUTF8 -> a -> a
`ft` a
a) forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ByteStringUTF8
ByteStringUTF8) (Char -> a -> a
`fc` a
a) [Word8]
acc
{-# INLINE foldr #-}
dropWhile :: (ByteStringUTF8 -> Bool)
-> (Char -> Bool) -> ByteStringUTF8 -> ByteStringUTF8
dropWhile ByteStringUTF8 -> Bool
pb Char -> Bool
pc (ByteStringUTF8 ByteString
bs) = ByteString -> ByteStringUTF8
ByteStringUTF8 forall a b. (a -> b) -> a -> b
$ ByteString -> ByteString
dropASCII ByteString
bs
where dropASCII :: ByteString -> ByteString
dropASCII ByteString
rest = case (Char -> Bool) -> ByteString -> Maybe Int
ByteString.Char8.findIndex (\Char
c-> Char
c forall a. Ord a => a -> a -> Bool
> Char
'\x7f' Bool -> Bool -> Bool
|| Bool -> Bool
not (Char -> Bool
pc Char
c)) ByteString
rest
of Maybe Int
Nothing -> ByteString
ByteString.empty
Just Int
j -> let rest' :: ByteString
rest' = Int -> ByteString -> ByteString
unsafeDrop Int
j ByteString
rest
in if ByteString -> Word8
unsafeHead ByteString
rest' forall a. Ord a => a -> a -> Bool
> Word8
0x7f
then ByteString -> ByteString
dropMultiByte ByteString
rest'
else ByteString
rest'
dropMultiByte :: ByteString -> ByteString
dropMultiByte ByteString
rest = case forall t. TextualMonoid t => t -> Maybe (Char, t)
splitCharacterPrefix (ByteString -> ByteStringUTF8
ByteStringUTF8 ByteString
rest)
of Just (Char
c, ByteStringUTF8 ByteString
rest') | Char -> Bool
pc Char
c -> ByteString -> ByteString
dropASCII ByteString
rest'
Maybe (Char, ByteStringUTF8)
Nothing -> let j :: Int
j = forall a. Enum a => a -> a
succ (ByteString -> Int
headIndex forall a b. (a -> b) -> a -> b
$ forall m. FactorialMonoid m => Int -> m -> m
drop Int
1 ByteString
rest)
in if ByteStringUTF8 -> Bool
pb (ByteString -> ByteStringUTF8
ByteStringUTF8 forall a b. (a -> b) -> a -> b
$ Int -> ByteString -> ByteString
ByteString.take Int
j ByteString
rest)
then ByteString -> ByteString
dropASCII (Int -> ByteString -> ByteString
unsafeDrop Int
j ByteString
rest)
else ByteString
rest
Maybe (Char, ByteStringUTF8)
_ -> ByteString
rest
{-# INLINE dropWhile #-}
takeWhile :: (ByteStringUTF8 -> Bool)
-> (Char -> Bool) -> ByteStringUTF8 -> ByteStringUTF8
takeWhile ByteStringUTF8 -> Bool
pb Char -> Bool
pc utf8 :: ByteStringUTF8
utf8@(ByteStringUTF8 ByteString
bs) =
ByteString -> ByteStringUTF8
ByteStringUTF8 forall a b. (a -> b) -> a -> b
$ Int -> ByteString -> ByteString
unsafeTake (ByteString -> Int
ByteString.length ByteString
bs forall a. Num a => a -> a -> a
- ByteString -> Int
ByteString.length ByteString
suffix) ByteString
bs
where ByteStringUTF8 ByteString
suffix = forall t.
TextualMonoid t =>
(t -> Bool) -> (Char -> Bool) -> t -> t
Textual.dropWhile ByteStringUTF8 -> Bool
pb Char -> Bool
pc ByteStringUTF8
utf8
{-# INLINE takeWhile #-}
span :: (ByteStringUTF8 -> Bool)
-> (Char -> Bool)
-> ByteStringUTF8
-> (ByteStringUTF8, ByteStringUTF8)
span ByteStringUTF8 -> Bool
pb Char -> Bool
pc utf8 :: ByteStringUTF8
utf8@(ByteStringUTF8 ByteString
bs) = (ByteString -> ByteStringUTF8
ByteStringUTF8 forall a b. (a -> b) -> a -> b
$ Int -> ByteString -> ByteString
unsafeTake (ByteString -> Int
ByteString.length ByteString
bs forall a. Num a => a -> a -> a
- ByteString -> Int
ByteString.length ByteString
suffix') ByteString
bs, ByteStringUTF8
suffix)
where suffix :: ByteStringUTF8
suffix@(ByteStringUTF8 ByteString
suffix') = forall t.
TextualMonoid t =>
(t -> Bool) -> (Char -> Bool) -> t -> t
Textual.dropWhile ByteStringUTF8 -> Bool
pb Char -> Bool
pc ByteStringUTF8
utf8
{-# INLINE span #-}
break :: (ByteStringUTF8 -> Bool)
-> (Char -> Bool)
-> ByteStringUTF8
-> (ByteStringUTF8, ByteStringUTF8)
break ByteStringUTF8 -> Bool
pb Char -> Bool
pc = forall t.
TextualMonoid t =>
(t -> Bool) -> (Char -> Bool) -> t -> (t, t)
Textual.span (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteStringUTF8 -> Bool
pb) (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
pc)
{-# INLINE break #-}
spanMaybe :: forall s.
s
-> (s -> ByteStringUTF8 -> Maybe s)
-> (s -> Char -> Maybe s)
-> ByteStringUTF8
-> (ByteStringUTF8, ByteStringUTF8, s)
spanMaybe s
s0 s -> ByteStringUTF8 -> Maybe s
ft s -> Char -> Maybe s
fc (ByteStringUTF8 ByteString
bs) =
let inner :: Int -> s -> (ByteStringUTF8, ByteStringUTF8, s)
inner Int
i s
s
| Int
i forall a. Ord a => a -> a -> Bool
< Int
len =
let w :: Word8
w = ByteString -> Int -> Word8
unsafeIndex ByteString
bs Int
i
in if Word8
w forall a. Ord a => a -> a -> Bool
< Word8
0x80
then case s -> Char -> Maybe s
fc s
s (Word8 -> Char
w2c Word8
w)
of Just s
s' -> Int -> s -> (ByteStringUTF8, ByteStringUTF8, s)
inner (Int
i forall a. Num a => a -> a -> a
+ Int
1) s
s'
Maybe s
Nothing -> forall {c}. Int -> c -> (ByteStringUTF8, ByteStringUTF8, c)
done Int
i s
s
else case forall t. TextualMonoid t => t -> Maybe (Char, t)
splitCharacterPrefix (ByteString -> ByteStringUTF8
ByteStringUTF8 forall a b. (a -> b) -> a -> b
$ Int -> ByteString -> ByteString
unsafeDrop Int
i ByteString
bs)
of Just (Char
c, ByteStringUTF8 ByteString
rest) | Just s
s' <- s -> Char -> Maybe s
fc s
s Char
c -> Int -> s -> (ByteStringUTF8, ByteStringUTF8, s)
inner (Int
len forall a. Num a => a -> a -> a
- ByteString -> Int
ByteString.length ByteString
rest) s
s'
Maybe (Char, ByteStringUTF8)
Nothing -> let j :: Int
j = forall a. Enum a => a -> a
succ (ByteString -> Int
headIndex forall a b. (a -> b) -> a -> b
$ forall m. FactorialMonoid m => Int -> m -> m
drop (Int
i forall a. Num a => a -> a -> a
+ Int
1) ByteString
bs)
in case s -> ByteStringUTF8 -> Maybe s
ft s
s (ByteString -> ByteStringUTF8
ByteStringUTF8 forall a b. (a -> b) -> a -> b
$ Int -> ByteString -> ByteString
ByteString.take Int
j forall a b. (a -> b) -> a -> b
$ Int -> ByteString -> ByteString
unsafeDrop Int
i ByteString
bs)
of Just s
s' -> Int -> s -> (ByteStringUTF8, ByteStringUTF8, s)
inner (Int
i forall a. Num a => a -> a -> a
+ Int
j) s
s'
Maybe s
Nothing -> forall {c}. Int -> c -> (ByteStringUTF8, ByteStringUTF8, c)
done Int
i s
s
Maybe (Char, ByteStringUTF8)
_ -> forall {c}. Int -> c -> (ByteStringUTF8, ByteStringUTF8, c)
done Int
i s
s
| Bool
otherwise = forall {c}. Int -> c -> (ByteStringUTF8, ByteStringUTF8, c)
done Int
i s
s
done :: Int -> c -> (ByteStringUTF8, ByteStringUTF8, c)
done Int
i c
s = Int
i seq :: forall a b. a -> b -> b
`seq` c
s seq :: forall a b. a -> b -> b
`seq` (ByteString -> ByteStringUTF8
ByteStringUTF8 forall a b. (a -> b) -> a -> b
$ Int -> ByteString -> ByteString
unsafeTake Int
i ByteString
bs, ByteString -> ByteStringUTF8
ByteStringUTF8 forall a b. (a -> b) -> a -> b
$ Int -> ByteString -> ByteString
unsafeDrop Int
i ByteString
bs, c
s)
len :: Int
len = ByteString -> Int
ByteString.length ByteString
bs
in Int -> s -> (ByteStringUTF8, ByteStringUTF8, s)
inner Int
0 s
s0
{-# INLINE spanMaybe #-}
spanMaybe' :: forall s.
s
-> (s -> ByteStringUTF8 -> Maybe s)
-> (s -> Char -> Maybe s)
-> ByteStringUTF8
-> (ByteStringUTF8, ByteStringUTF8, s)
spanMaybe' s
s0 s -> ByteStringUTF8 -> Maybe s
ft s -> Char -> Maybe s
fc (ByteStringUTF8 ByteString
bs) =
let inner :: Int -> s -> (ByteStringUTF8, ByteStringUTF8, s)
inner Int
i s
s
| Int
i forall a. Ord a => a -> a -> Bool
< Int
len =
s
s seq :: forall a b. a -> b -> b
`seq`
let w :: Word8
w = ByteString -> Int -> Word8
unsafeIndex ByteString
bs Int
i
in if Word8
w forall a. Ord a => a -> a -> Bool
< Word8
0x80
then case s -> Char -> Maybe s
fc s
s (Word8 -> Char
w2c Word8
w)
of Just s
s' -> Int -> s -> (ByteStringUTF8, ByteStringUTF8, s)
inner (Int
i forall a. Num a => a -> a -> a
+ Int
1) s
s'
Maybe s
Nothing -> forall {c}. Int -> c -> (ByteStringUTF8, ByteStringUTF8, c)
done Int
i s
s
else case forall t. TextualMonoid t => t -> Maybe (Char, t)
splitCharacterPrefix (ByteString -> ByteStringUTF8
ByteStringUTF8 forall a b. (a -> b) -> a -> b
$ Int -> ByteString -> ByteString
unsafeDrop Int
i ByteString
bs)
of Just (Char
c, ByteStringUTF8 ByteString
rest) | Just s
s' <- s -> Char -> Maybe s
fc s
s Char
c -> Int -> s -> (ByteStringUTF8, ByteStringUTF8, s)
inner (Int
len forall a. Num a => a -> a -> a
- ByteString -> Int
ByteString.length ByteString
rest) s
s'
Maybe (Char, ByteStringUTF8)
Nothing -> let j :: Int
j = forall a. Enum a => a -> a
succ (ByteString -> Int
headIndex forall a b. (a -> b) -> a -> b
$ forall m. FactorialMonoid m => Int -> m -> m
drop (Int
i forall a. Num a => a -> a -> a
+ Int
1) ByteString
bs)
in case s -> ByteStringUTF8 -> Maybe s
ft s
s (ByteString -> ByteStringUTF8
ByteStringUTF8 forall a b. (a -> b) -> a -> b
$ Int -> ByteString -> ByteString
ByteString.take Int
j forall a b. (a -> b) -> a -> b
$ Int -> ByteString -> ByteString
unsafeDrop Int
i ByteString
bs)
of Just s
s' -> Int -> s -> (ByteStringUTF8, ByteStringUTF8, s)
inner (Int
i forall a. Num a => a -> a -> a
+ Int
j) s
s'
Maybe s
Nothing -> forall {c}. Int -> c -> (ByteStringUTF8, ByteStringUTF8, c)
done Int
i s
s
Maybe (Char, ByteStringUTF8)
_ -> forall {c}. Int -> c -> (ByteStringUTF8, ByteStringUTF8, c)
done Int
i s
s
| Bool
otherwise = forall {c}. Int -> c -> (ByteStringUTF8, ByteStringUTF8, c)
done Int
i s
s
done :: Int -> c -> (ByteStringUTF8, ByteStringUTF8, c)
done Int
i c
s = Int
i seq :: forall a b. a -> b -> b
`seq` c
s seq :: forall a b. a -> b -> b
`seq` (ByteString -> ByteStringUTF8
ByteStringUTF8 forall a b. (a -> b) -> a -> b
$ Int -> ByteString -> ByteString
unsafeTake Int
i ByteString
bs, ByteString -> ByteStringUTF8
ByteStringUTF8 forall a b. (a -> b) -> a -> b
$ Int -> ByteString -> ByteString
unsafeDrop Int
i ByteString
bs, c
s)
len :: Int
len = ByteString -> Int
ByteString.length ByteString
bs
in Int -> s -> (ByteStringUTF8, ByteStringUTF8, s)
inner Int
0 s
s0
{-# INLINE spanMaybe' #-}
find :: (Char -> Bool) -> ByteStringUTF8 -> Maybe Char
find Char -> Bool
p (ByteStringUTF8 ByteString
bs0) = ByteString -> Maybe Char
loop ByteString
bs0
where loop :: ByteString -> Maybe Char
loop ByteString
bs = case (Char -> Bool) -> ByteString -> Maybe Int
ByteString.Char8.findIndex (\Char
c-> Char
c forall a. Ord a => a -> a -> Bool
>= Char
'\x80' Bool -> Bool -> Bool
|| Char -> Bool
p Char
c) ByteString
bs
of Maybe Int
Nothing -> forall a. Maybe a
Nothing
Just Int
i -> let x :: Word8
x = ByteString -> Int -> Word8
unsafeIndex ByteString
bs Int
i
bs' :: ByteString
bs' = Int -> ByteString -> ByteString
unsafeDrop (Int
i forall a. Num a => a -> a -> a
+ Int
1) ByteString
bs
in if Word8
x forall a. Ord a => a -> a -> Bool
< Word8
0x80
then forall a. a -> Maybe a
Just (Word8 -> Char
w2c Word8
x)
else case Word8 -> ByteString -> Maybe (Char, ByteStringUTF8)
toChar Word8
x ByteString
bs'
of Just (Char
c, ByteStringUTF8 ByteString
rest) | Char -> Bool
p Char
c -> forall a. a -> Maybe a
Just Char
c
| Bool
otherwise -> ByteString -> Maybe Char
loop ByteString
rest
Maybe (Char, ByteStringUTF8)
Nothing -> ByteString -> Maybe Char
loop ((Word8 -> Bool) -> ByteString -> ByteString
ByteString.dropWhile (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word8 -> Bool
byteStartsCharacter) ByteString
bs')
{-# INLINE find #-}
any :: (Char -> Bool) -> ByteStringUTF8 -> Bool
any Char -> Bool
p ByteStringUTF8
utf8 = forall a. Maybe a -> Bool
isJust (forall t. TextualMonoid t => (Char -> Bool) -> t -> Maybe Char
find Char -> Bool
p ByteStringUTF8
utf8)
{-# INLINE any #-}
all :: (Char -> Bool) -> ByteStringUTF8 -> Bool
all Char -> Bool
p ByteStringUTF8
utf8 = forall a. Maybe a -> Bool
isNothing (forall t. TextualMonoid t => (Char -> Bool) -> t -> Maybe Char
find (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
p) ByteStringUTF8
utf8)
{-# INLINE all #-}
elem :: Char -> ByteStringUTF8 -> Bool
elem Char
c utf8 :: ByteStringUTF8
utf8@(ByteStringUTF8 ByteString
bs)
| Char
c forall a. Ord a => a -> a -> Bool
< Char
'\x80' = Char -> ByteString -> Bool
ByteString.Char8.elem Char
c ByteString
bs
| Bool
otherwise = forall t. TextualMonoid t => (Char -> Bool) -> t -> Bool
any (forall a. Eq a => a -> a -> Bool
== Char
c) ByteStringUTF8
utf8
{-# INLINE elem #-}
fromText :: Text -> ByteStringUTF8
fromText = ByteString -> ByteStringUTF8
ByteStringUTF8 forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> ByteString
encodeUtf8
toText :: (ByteStringUTF8 -> Text) -> ByteStringUTF8 -> Text
toText ByteStringUTF8 -> Text
f t :: ByteStringUTF8
t@(ByteStringUTF8 ByteString
bs) = forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (forall a b. a -> b -> a
const forall a b. (a -> b) -> a -> b
$ String -> Text
pack forall a b. (a -> b) -> a -> b
$ forall t. TextualMonoid t => (t -> String) -> t -> String
toString (Text -> String
unpack forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteStringUTF8 -> Text
f) ByteStringUTF8
t) forall a. a -> a
id (ByteString -> Either UnicodeException Text
decodeUtf8' ByteString
bs)
reverseBytesToChar :: (ByteString -> a) -> (Char -> a) -> [Word8] -> a
reverseBytesToChar :: forall a. (ByteString -> a) -> (Char -> a) -> [Word8] -> a
reverseBytesToChar ByteString -> a
ft Char -> a
fc [Word8
w] = if Word8
w forall a. Ord a => a -> a -> Bool
< Word8
0x80 then Char -> a
fc (Word8 -> Char
w2c Word8
w) else ByteString -> a
ft (Word8 -> ByteString
ByteString.singleton Word8
w)
reverseBytesToChar ByteString -> a
ft Char -> a
fc [Word8
b0, Word8
b1] =
forall a. HasCallStack => Bool -> a -> a
assert (Word8
0x80 forall a. Ord a => a -> a -> Bool
<= Word8
b0 Bool -> Bool -> Bool
&& Word8
b0 forall a. Ord a => a -> a -> Bool
< Word8
0xC0) forall a b. (a -> b) -> a -> b
$
if Word8
0xC2 forall a. Ord a => a -> a -> Bool
<= Word8
b1 Bool -> Bool -> Bool
&& Word8
b1 forall a. Ord a => a -> a -> Bool
< Word8
0xE0
then Char -> a
fc (Int -> Char
chr (forall a. Bits a => a -> Int -> a
shiftL (forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
b1 forall a. Bits a => a -> a -> a
.&. Int
0x1F) Int
6 forall a. Bits a => a -> a -> a
.|. forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
b0 forall a. Bits a => a -> a -> a
.&. Int
0x3F))
else ByteString -> a
ft ([Word8] -> ByteString
ByteString.pack [Word8
b1, Word8
b0])
reverseBytesToChar ByteString -> a
ft Char -> a
fc [Word8
b0, Word8
b1, Word8
b2] =
forall a. HasCallStack => Bool -> a -> a
assert (Word8
0x80 forall a. Ord a => a -> a -> Bool
<= Word8
b0 Bool -> Bool -> Bool
&& Word8
b0 forall a. Ord a => a -> a -> Bool
< Word8
0xC0 Bool -> Bool -> Bool
&& Word8
0x80 forall a. Ord a => a -> a -> Bool
<= Word8
b1 Bool -> Bool -> Bool
&& Word8
b1 forall a. Ord a => a -> a -> Bool
< Word8
0xC0) forall a b. (a -> b) -> a -> b
$
if (Word8
0xE0 forall a. Ord a => a -> a -> Bool
< Word8
b2 Bool -> Bool -> Bool
|| Word8
0xE0 forall a. Eq a => a -> a -> Bool
== Word8
b2 Bool -> Bool -> Bool
&& Word8
0xA0 forall a. Ord a => a -> a -> Bool
<= Word8
b1) Bool -> Bool -> Bool
&& Word8
b2 forall a. Ord a => a -> a -> Bool
< Word8
0xF0
then Char -> a
fc (Int -> Char
chr (forall a. Bits a => a -> Int -> a
shiftL (forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
b2 forall a. Bits a => a -> a -> a
.&. Int
0xF) Int
12
forall a. Bits a => a -> a -> a
.|. forall a. Bits a => a -> Int -> a
shiftL (forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
b1 forall a. Bits a => a -> a -> a
.&. Int
0x3F) Int
6
forall a. Bits a => a -> a -> a
.|. forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
b0 forall a. Bits a => a -> a -> a
.&. Int
0x3F))
else ByteString -> a
ft ([Word8] -> ByteString
ByteString.pack [Word8
b2, Word8
b1, Word8
b0])
reverseBytesToChar ByteString -> a
ft Char -> a
fc [Word8
b0, Word8
b1, Word8
b2, Word8
b3] =
forall a. HasCallStack => Bool -> a -> a
assert (Word8
0x80 forall a. Ord a => a -> a -> Bool
<= Word8
b0 Bool -> Bool -> Bool
&& Word8
b0 forall a. Ord a => a -> a -> Bool
< Word8
0xC0 Bool -> Bool -> Bool
&& Word8
0x80 forall a. Ord a => a -> a -> Bool
<= Word8
b1 Bool -> Bool -> Bool
&& Word8
b1 forall a. Ord a => a -> a -> Bool
< Word8
0xC0 Bool -> Bool -> Bool
&& Word8
0x80 forall a. Ord a => a -> a -> Bool
<= Word8
b2 Bool -> Bool -> Bool
&& Word8
b2 forall a. Ord a => a -> a -> Bool
< Word8
0xC0) forall a b. (a -> b) -> a -> b
$
if (Word8
0xF0 forall a. Ord a => a -> a -> Bool
< Word8
b3 Bool -> Bool -> Bool
|| Word8
0xF0 forall a. Eq a => a -> a -> Bool
== Word8
b3 Bool -> Bool -> Bool
&& Word8
0x90 forall a. Ord a => a -> a -> Bool
<= Word8
b2) Bool -> Bool -> Bool
&& Word8
b3 forall a. Ord a => a -> a -> Bool
< Word8
0xF5 Bool -> Bool -> Bool
&& (Word8
b3 forall a. Ord a => a -> a -> Bool
< Word8
0xF4 Bool -> Bool -> Bool
|| Word8
b2 forall a. Ord a => a -> a -> Bool
< Word8
0x90)
then Char -> a
fc (Int -> Char
chr (forall a. Bits a => a -> Int -> a
shiftL (forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
b3 forall a. Bits a => a -> a -> a
.&. Int
0x7) Int
18
forall a. Bits a => a -> a -> a
.|. forall a. Bits a => a -> Int -> a
shiftL (forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
b2 forall a. Bits a => a -> a -> a
.&. Int
0x3F) Int
12
forall a. Bits a => a -> a -> a
.|. forall a. Bits a => a -> Int -> a
shiftL (forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
b1 forall a. Bits a => a -> a -> a
.&. Int
0x3F) Int
6
forall a. Bits a => a -> a -> a
.|. forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
b0 forall a. Bits a => a -> a -> a
.&. Int
0x3F))
else ByteString -> a
ft ([Word8] -> ByteString
ByteString.pack [Word8
b3, Word8
b2, Word8
b1, Word8
b0])
reverseBytesToChar ByteString -> a
ft Char -> a
_fc [Word8]
bytes = ByteString -> a
ft (ByteString -> ByteString
ByteString.reverse forall a b. (a -> b) -> a -> b
$ [Word8] -> ByteString
ByteString.pack [Word8]
bytes)
bytesToChar :: (ByteString -> a) -> (Char -> a) -> [Word8] -> a
bytesToChar :: forall a. (ByteString -> a) -> (Char -> a) -> [Word8] -> a
bytesToChar ByteString -> a
ft Char -> a
fc [Word8
w] = if Word8
w forall a. Ord a => a -> a -> Bool
< Word8
0x80 then Char -> a
fc (Word8 -> Char
w2c Word8
w) else ByteString -> a
ft (Word8 -> ByteString
ByteString.singleton Word8
w)
bytesToChar ByteString -> a
ft Char -> a
fc bytes :: [Word8]
bytes@[Word8
b1, Word8
b0] =
forall a. HasCallStack => Bool -> a -> a
assert (Word8
0x80 forall a. Ord a => a -> a -> Bool
<= Word8
b0 Bool -> Bool -> Bool
&& Word8
b0 forall a. Ord a => a -> a -> Bool
< Word8
0xC0) forall a b. (a -> b) -> a -> b
$
if Word8
0xC2 forall a. Ord a => a -> a -> Bool
<= Word8
b1 Bool -> Bool -> Bool
&& Word8
b1 forall a. Ord a => a -> a -> Bool
< Word8
0xE0
then Char -> a
fc (Int -> Char
chr (forall a. Bits a => a -> Int -> a
shiftL (forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
b1 forall a. Bits a => a -> a -> a
.&. Int
0x1F) Int
6 forall a. Bits a => a -> a -> a
.|. forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
b0 forall a. Bits a => a -> a -> a
.&. Int
0x3F))
else ByteString -> a
ft ([Word8] -> ByteString
ByteString.pack [Word8]
bytes)
bytesToChar ByteString -> a
ft Char -> a
fc bytes :: [Word8]
bytes@[Word8
b2, Word8
b1, Word8
b0] =
forall a. HasCallStack => Bool -> a -> a
assert (Word8
0x80 forall a. Ord a => a -> a -> Bool
<= Word8
b0 Bool -> Bool -> Bool
&& Word8
b0 forall a. Ord a => a -> a -> Bool
< Word8
0xC0 Bool -> Bool -> Bool
&& Word8
0x80 forall a. Ord a => a -> a -> Bool
<= Word8
b1 Bool -> Bool -> Bool
&& Word8
b1 forall a. Ord a => a -> a -> Bool
< Word8
0xC0) forall a b. (a -> b) -> a -> b
$
if (Word8
0xE0 forall a. Ord a => a -> a -> Bool
< Word8
b2 Bool -> Bool -> Bool
|| Word8
0xE0 forall a. Eq a => a -> a -> Bool
== Word8
b2 Bool -> Bool -> Bool
&& Word8
0xA0 forall a. Ord a => a -> a -> Bool
<= Word8
b1) Bool -> Bool -> Bool
&& Word8
b2 forall a. Ord a => a -> a -> Bool
< Word8
0xF0
then Char -> a
fc (Int -> Char
chr (forall a. Bits a => a -> Int -> a
shiftL (forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
b2 forall a. Bits a => a -> a -> a
.&. Int
0xF) Int
12
forall a. Bits a => a -> a -> a
.|. forall a. Bits a => a -> Int -> a
shiftL (forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
b1 forall a. Bits a => a -> a -> a
.&. Int
0x3F) Int
6
forall a. Bits a => a -> a -> a
.|. forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
b0 forall a. Bits a => a -> a -> a
.&. Int
0x3F))
else ByteString -> a
ft ([Word8] -> ByteString
ByteString.pack [Word8]
bytes)
bytesToChar ByteString -> a
ft Char -> a
fc bytes :: [Word8]
bytes@[Word8
b3, Word8
b2, Word8
b1, Word8
b0] =
forall a. HasCallStack => Bool -> a -> a
assert (Word8
0x80 forall a. Ord a => a -> a -> Bool
<= Word8
b0 Bool -> Bool -> Bool
&& Word8
b0 forall a. Ord a => a -> a -> Bool
< Word8
0xC0 Bool -> Bool -> Bool
&& Word8
0x80 forall a. Ord a => a -> a -> Bool
<= Word8
b1 Bool -> Bool -> Bool
&& Word8
b1 forall a. Ord a => a -> a -> Bool
< Word8
0xC0 Bool -> Bool -> Bool
&& Word8
0x80 forall a. Ord a => a -> a -> Bool
<= Word8
b2 Bool -> Bool -> Bool
&& Word8
b2 forall a. Ord a => a -> a -> Bool
< Word8
0xC0) forall a b. (a -> b) -> a -> b
$
if (Word8
0xF0 forall a. Ord a => a -> a -> Bool
< Word8
b3 Bool -> Bool -> Bool
|| Word8
0xF0 forall a. Eq a => a -> a -> Bool
== Word8
b3 Bool -> Bool -> Bool
&& Word8
0x90 forall a. Ord a => a -> a -> Bool
<= Word8
b2) Bool -> Bool -> Bool
&& Word8
b3 forall a. Ord a => a -> a -> Bool
< Word8
0xF5 Bool -> Bool -> Bool
&& (Word8
b3 forall a. Ord a => a -> a -> Bool
< Word8
0xF4 Bool -> Bool -> Bool
|| Word8
b2 forall a. Ord a => a -> a -> Bool
< Word8
0x90)
then Char -> a
fc (Int -> Char
chr (forall a. Bits a => a -> Int -> a
shiftL (forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
b3 forall a. Bits a => a -> a -> a
.&. Int
0x7) Int
18
forall a. Bits a => a -> a -> a
.|. forall a. Bits a => a -> Int -> a
shiftL (forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
b2 forall a. Bits a => a -> a -> a
.&. Int
0x3F) Int
12
forall a. Bits a => a -> a -> a
.|. forall a. Bits a => a -> Int -> a
shiftL (forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
b1 forall a. Bits a => a -> a -> a
.&. Int
0x3F) Int
6
forall a. Bits a => a -> a -> a
.|. forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
b0 forall a. Bits a => a -> a -> a
.&. Int
0x3F))
else ByteString -> a
ft ([Word8] -> ByteString
ByteString.pack [Word8]
bytes)
bytesToChar ByteString -> a
ft Char -> a
_fc [Word8]
bytes = ByteString -> a
ft ([Word8] -> ByteString
ByteString.pack [Word8]
bytes)
wrapPair :: (ByteString, ByteString) -> (ByteStringUTF8, ByteStringUTF8)
wrapPair :: (ByteString, ByteString) -> (ByteStringUTF8, ByteStringUTF8)
wrapPair (ByteString
bs1, ByteString
bs2) = (ByteString -> ByteStringUTF8
ByteStringUTF8 ByteString
bs1, ByteString -> ByteStringUTF8
ByteStringUTF8 ByteString
bs2)
{-# INLINE wrapPair #-}
wrapTriple :: (ByteString, ByteString, ByteString) -> (ByteStringUTF8, ByteStringUTF8, ByteStringUTF8)
wrapTriple :: (ByteString, ByteString, ByteString)
-> (ByteStringUTF8, ByteStringUTF8, ByteStringUTF8)
wrapTriple (ByteString
bs1, ByteString
bs2, ByteString
bs3) = (ByteString -> ByteStringUTF8
ByteStringUTF8 ByteString
bs1, ByteString -> ByteStringUTF8
ByteStringUTF8 ByteString
bs2, ByteString -> ByteStringUTF8
ByteStringUTF8 ByteString
bs3)
{-# INLINE wrapTriple #-}
fromChar :: Char -> ByteString
fromChar :: Char -> ByteString
fromChar Char
c | Char
c forall a. Ord a => a -> a -> Bool
< Char
'\x80' = Char -> ByteString
ByteString.Char8.singleton Char
c
| Char
c forall a. Ord a => a -> a -> Bool
< Char
'\x800' = [Word8] -> ByteString
ByteString.pack [Word8
0xC0 forall a. Num a => a -> a -> a
+ forall a b. (Integral a, Num b) => a -> b
fromIntegral (forall a. Bits a => a -> Int -> a
shiftR Int
n Int
6),
Word8
0x80 forall a. Num a => a -> a -> a
+ forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int
n forall a. Bits a => a -> a -> a
.&. Int
0x3F)]
| Char
c forall a. Ord a => a -> a -> Bool
< Char
'\x10000' = [Word8] -> ByteString
ByteString.pack [Word8
0xE0 forall a. Num a => a -> a -> a
+ forall a b. (Integral a, Num b) => a -> b
fromIntegral (forall a. Bits a => a -> Int -> a
shiftR Int
n Int
12),
Word8
0x80 forall a. Num a => a -> a -> a
+ forall a b. (Integral a, Num b) => a -> b
fromIntegral (forall a. Bits a => a -> Int -> a
shiftR Int
n Int
6 forall a. Bits a => a -> a -> a
.&. Int
0x3F),
Word8
0x80 forall a. Num a => a -> a -> a
+ forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int
n forall a. Bits a => a -> a -> a
.&. Int
0x3F)]
| Int
n forall a. Ord a => a -> a -> Bool
< Int
0x200000 = [Word8] -> ByteString
ByteString.pack [Word8
0xF0 forall a. Num a => a -> a -> a
+ forall a b. (Integral a, Num b) => a -> b
fromIntegral (forall a. Bits a => a -> Int -> a
shiftR Int
n Int
18),
Word8
0x80 forall a. Num a => a -> a -> a
+ forall a b. (Integral a, Num b) => a -> b
fromIntegral (forall a. Bits a => a -> Int -> a
shiftR Int
n Int
12 forall a. Bits a => a -> a -> a
.&. Int
0x3F),
Word8
0x80 forall a. Num a => a -> a -> a
+ forall a b. (Integral a, Num b) => a -> b
fromIntegral (forall a. Bits a => a -> Int -> a
shiftR Int
n Int
6 forall a. Bits a => a -> a -> a
.&. Int
0x3F),
Word8
0x80 forall a. Num a => a -> a -> a
+ forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int
n forall a. Bits a => a -> a -> a
.&. Int
0x3F)]
| Bool
otherwise = forall a. HasCallStack => String -> a
error (String
"Data.Char.ord '" forall a. [a] -> [a] -> [a]
++ (Char
c forall a. a -> [a] -> [a]
: String
"' >=0x200000"))
where n :: Int
n = Char -> Int
ord Char
c
toChar :: Word8 -> ByteString -> Maybe (Char, ByteStringUTF8)
toChar :: Word8 -> ByteString -> Maybe (Char, ByteStringUTF8)
toChar Word8
hd ByteString
tl | Word8
hd forall a. Ord a => a -> a -> Bool
< Word8
0x80 = forall a. a -> Maybe a
Just (Word8 -> Char
w2c Word8
hd, ByteString -> ByteStringUTF8
ByteStringUTF8 ByteString
tl)
| Word8
hd forall a. Ord a => a -> a -> Bool
< Word8
0xC2 = forall a. Maybe a
Nothing
| Word8
hd forall a. Ord a => a -> a -> Bool
< Word8
0xE0 = do (Word8
b0, ByteString
t0) <- ByteString -> Maybe (Word8, ByteString)
ByteString.uncons ByteString
tl
if ByteString -> Int
headIndex ByteString
tl forall a. Eq a => a -> a -> Bool
== Int
1
then forall (m :: * -> *) a. Monad m => a -> m a
return (Int -> Char
chr (forall a. Bits a => a -> Int -> a
shiftL (forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
hd forall a. Bits a => a -> a -> a
.&. Int
0x1F) Int
6
forall a. Bits a => a -> a -> a
.|. forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
b0 forall a. Bits a => a -> a -> a
.&. Int
0x3F),
ByteString -> ByteStringUTF8
ByteStringUTF8 ByteString
t0)
else forall a. Maybe a
Nothing
| Word8
hd forall a. Ord a => a -> a -> Bool
< Word8
0xF0 = do (Word8
b1, ByteString
t1) <- ByteString -> Maybe (Word8, ByteString)
ByteString.uncons ByteString
tl
(Word8
b0, ByteString
t0) <- ByteString -> Maybe (Word8, ByteString)
ByteString.uncons ByteString
t1
if (Word8
hd forall a. Ord a => a -> a -> Bool
> Word8
0xE0 Bool -> Bool -> Bool
|| Word8
b1 forall a. Ord a => a -> a -> Bool
>= Word8
0xA0) Bool -> Bool -> Bool
&& ByteString -> Int
headIndex ByteString
tl forall a. Eq a => a -> a -> Bool
== Int
2
then forall (m :: * -> *) a. Monad m => a -> m a
return (Int -> Char
chr (forall a. Bits a => a -> Int -> a
shiftL (forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
hd forall a. Bits a => a -> a -> a
.&. Int
0xF) Int
12
forall a. Bits a => a -> a -> a
.|. forall a. Bits a => a -> Int -> a
shiftL (forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
b1 forall a. Bits a => a -> a -> a
.&. Int
0x3F) Int
6
forall a. Bits a => a -> a -> a
.|. forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
b0 forall a. Bits a => a -> a -> a
.&. Int
0x3F),
ByteString -> ByteStringUTF8
ByteStringUTF8 ByteString
t0)
else forall a. Maybe a
Nothing
| Word8
hd forall a. Ord a => a -> a -> Bool
< Word8
0xF5 = do (Word8
b2, ByteString
t2) <- ByteString -> Maybe (Word8, ByteString)
ByteString.uncons ByteString
tl
(Word8
b1, ByteString
t1) <- ByteString -> Maybe (Word8, ByteString)
ByteString.uncons ByteString
t2
(Word8
b0, ByteString
t0) <- ByteString -> Maybe (Word8, ByteString)
ByteString.uncons ByteString
t1
if (Word8
hd forall a. Ord a => a -> a -> Bool
> Word8
0xF0 Bool -> Bool -> Bool
|| Word8
b2 forall a. Ord a => a -> a -> Bool
>= Word8
0x90) Bool -> Bool -> Bool
&& (Word8
hd forall a. Ord a => a -> a -> Bool
< Word8
0xF4 Bool -> Bool -> Bool
|| Word8
b2 forall a. Ord a => a -> a -> Bool
< Word8
0x90) Bool -> Bool -> Bool
&& ByteString -> Int
headIndex ByteString
tl forall a. Eq a => a -> a -> Bool
== Int
3
then forall (m :: * -> *) a. Monad m => a -> m a
return (Int -> Char
chr (forall a. Bits a => a -> Int -> a
shiftL (forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
hd forall a. Bits a => a -> a -> a
.&. Int
0x7) Int
18
forall a. Bits a => a -> a -> a
.|. forall a. Bits a => a -> Int -> a
shiftL (forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
b2 forall a. Bits a => a -> a -> a
.&. Int
0x3F) Int
12
forall a. Bits a => a -> a -> a
.|. forall a. Bits a => a -> Int -> a
shiftL (forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
b1 forall a. Bits a => a -> a -> a
.&. Int
0x3F) Int
6
forall a. Bits a => a -> a -> a
.|. forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
b0 forall a. Bits a => a -> a -> a
.&. Int
0x3F),
ByteString -> ByteStringUTF8
ByteStringUTF8 ByteString
t0)
else forall a. Maybe a
Nothing
| Bool
otherwise = forall a. Maybe a
Nothing
groupASCII :: ByteString -> [ByteString]
groupASCII :: ByteString -> [ByteString]
groupASCII = (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
ByteString.groupBy forall {a} {a}. (Ord a, Ord a, Num a, Num a) => a -> a -> Bool
continued
where continued :: a -> a -> Bool
continued a
a a
b = (a
a forall a. Ord a => a -> a -> Bool
< a
0x80) forall a. Eq a => a -> a -> Bool
== (a
b forall a. Ord a => a -> a -> Bool
< a
0x80) Bool -> Bool -> Bool
&& a
b forall a. Ord a => a -> a -> Bool
< a
0xC0
{-# INLINE groupASCII #-}
headIndex :: ByteString -> Int
headIndex :: ByteString -> Int
headIndex ByteString
bs = forall a. a -> Maybe a -> a
fromMaybe (ByteString -> Int
ByteString.length ByteString
bs) forall a b. (a -> b) -> a -> b
$ (Word8 -> Bool) -> ByteString -> Maybe Int
ByteString.findIndex Word8 -> Bool
byteStartsCharacter ByteString
bs
{-# INLINE headIndex #-}
byteStartsCharacter :: Word8 -> Bool
byteStartsCharacter :: Word8 -> Bool
byteStartsCharacter Word8
b = Word8
b forall a. Ord a => a -> a -> Bool
< Word8
0x80 Bool -> Bool -> Bool
|| Word8
b forall a. Ord a => a -> a -> Bool
>= Word8
0xC0
{-# INLINE byteStartsCharacter #-}
charStartIndex :: Int -> ByteString -> Int
charStartIndex :: Int -> ByteString -> Int
charStartIndex Int
n ByteString
_ | Int
n forall a. Ord a => a -> a -> Bool
<= Int
0 = Int
0
charStartIndex Int
n0 ByteString
bs = forall a. (Word8 -> a -> a) -> a -> ByteString -> a
ByteString.foldr forall {a} {a} {c}.
(Ord a, Num a, Num a, Enum a, Enum c, Eq a) =>
a -> ((a, Bool, c) -> c) -> (a, Bool, c) -> c
count (forall a b. a -> b -> a
const forall a b. (a -> b) -> a -> b
$ ByteString -> Int
ByteString.length ByteString
bs) ByteString
bs (Int
n0, Bool
False, Int
0)
where count :: a -> ((a, Bool, c) -> c) -> (a, Bool, c) -> c
count a
byte (a, Bool, c) -> c
_ (a
0, Bool
high, c
i) | a
byte forall a. Ord a => a -> a -> Bool
< a
0x80 Bool -> Bool -> Bool
|| a
byte forall a. Ord a => a -> a -> Bool
>= a
0xC0 Bool -> Bool -> Bool
|| Bool -> Bool
not Bool
high = c
i
count a
byte (a, Bool, c) -> c
cont (a
n, Bool
high, c
i) | a
byte forall a. Ord a => a -> a -> Bool
< a
0x80 = (a, Bool, c) -> c
cont (forall a. Enum a => a -> a
pred a
n, Bool
False, forall a. Enum a => a -> a
succ c
i)
| a
byte forall a. Ord a => a -> a -> Bool
< a
0xC0 = (a, Bool, c) -> c
cont (if Bool
high then a
n else forall a. Enum a => a -> a
pred a
n, Bool
True, forall a. Enum a => a -> a
succ c
i)
| Bool
otherwise = (a, Bool, c) -> c
cont (forall a. Enum a => a -> a
pred a
n, Bool
True, forall a. Enum a => a -> a
succ c
i)
{-# INLINE charStartIndex #-}