Copyright	(c) Iavor S. Diatchki 2009
License	BSD3-style (see LICENSE)
Maintainer	emertens@galois.com
Stability	experimental
Portability	portable
Safe Haskell	Trustworthy
Language	Haskell2010

Data.String.UTF8

Contents

Representation
Character based operations
Representation based operations

Description

Synopsis

data UTF8 string
class (Num s, Ord s) => UTF8Bytes b s | b -> s
fromString :: UTF8Bytes string index => String -> UTF8 string
toString :: UTF8Bytes string index => UTF8 string -> String
fromRep :: string -> UTF8 string
toRep :: UTF8 string -> string
replacement_char :: Char
uncons :: UTF8Bytes string index => UTF8 string -> Maybe (Char, UTF8 string)
splitAt :: UTF8Bytes string index => index -> UTF8 string -> (UTF8 string, UTF8 string)
take :: UTF8Bytes string index => index -> UTF8 string -> UTF8 string
drop :: UTF8Bytes string index => index -> UTF8 string -> UTF8 string
span :: UTF8Bytes string index => (Char -> Bool) -> UTF8 string -> (UTF8 string, UTF8 string)
break :: UTF8Bytes string index => (Char -> Bool) -> UTF8 string -> (UTF8 string, UTF8 string)
foldl :: UTF8Bytes string index => (a -> Char -> a) -> a -> UTF8 string -> a
foldr :: UTF8Bytes string index => (Char -> a -> a) -> a -> UTF8 string -> a
length :: UTF8Bytes string index => UTF8 string -> index
lines :: UTF8Bytes string index => UTF8 string -> [UTF8 string]
lines' :: UTF8Bytes string index => UTF8 string -> [UTF8 string]
null :: UTF8Bytes string index => UTF8 string -> Bool
decode :: UTF8Bytes string index => UTF8 string -> Maybe (Char, index)
byteSplitAt :: UTF8Bytes string index => index -> UTF8 string -> (UTF8 string, UTF8 string)
byteTake :: UTF8Bytes string index => index -> UTF8 string -> UTF8 string
byteDrop :: UTF8Bytes string index => index -> UTF8 string -> UTF8 string

Representation

data UTF8 string Source #

The type of strings that are represented using the UTF8 encoding. The parameter is the type of the container for the representation.

Instances

Instances details

Eq string => Eq (UTF8 string) Source #
Instance details Defined in Data.String.UTF8 Methods (==) :: UTF8 string -> UTF8 string -> Bool # (/=) :: UTF8 string -> UTF8 string -> Bool #
Ord string => Ord (UTF8 string) Source #
Instance details Defined in Data.String.UTF8 Methods compare :: UTF8 string -> UTF8 string -> Ordering # (<) :: UTF8 string -> UTF8 string -> Bool # (<=) :: UTF8 string -> UTF8 string -> Bool # (>) :: UTF8 string -> UTF8 string -> Bool # (>=) :: UTF8 string -> UTF8 string -> Bool # max :: UTF8 string -> UTF8 string -> UTF8 string # min :: UTF8 string -> UTF8 string -> UTF8 string #
UTF8Bytes string index => Show (UTF8 string) Source #
Instance details Defined in Data.String.UTF8 Methods showsPrec :: Int -> UTF8 string -> ShowS # show :: UTF8 string -> String # showList :: [UTF8 string] -> ShowS #
UTF8Bytes string index => IsString (UTF8 string) Source #
Instance details Defined in Data.String.UTF8 Methods fromString :: String -> UTF8 string #

class (Num s, Ord s) => UTF8Bytes b s | b -> s Source #

Minimal complete definition

bsplit, bdrop, buncons, elemIndex, empty, null, pack, tail

Instances

Instances details

UTF8Bytes ByteString Int64 Source #
Instance details Defined in Codec.Binary.UTF8.Generic Methods bsplit :: Int64 -> ByteString -> (ByteString, ByteString) Source # bdrop :: Int64 -> ByteString -> ByteString Source # buncons :: ByteString -> Maybe (Word8, ByteString) Source # elemIndex :: Word8 -> ByteString -> Maybe Int64 Source # empty :: ByteString Source # null :: ByteString -> Bool Source # pack :: [Word8] -> ByteString Source # tail :: ByteString -> ByteString Source #
UTF8Bytes ByteString Int Source #
Instance details Defined in Codec.Binary.UTF8.Generic Methods bsplit :: Int -> ByteString -> (ByteString, ByteString) Source # bdrop :: Int -> ByteString -> ByteString Source # buncons :: ByteString -> Maybe (Word8, ByteString) Source # elemIndex :: Word8 -> ByteString -> Maybe Int Source # empty :: ByteString Source # null :: ByteString -> Bool Source # pack :: [Word8] -> ByteString Source # tail :: ByteString -> ByteString Source #
UTF8Bytes [Word8] Int Source #
Instance details Defined in Codec.Binary.UTF8.Generic Methods bsplit :: Int -> [Word8] -> ([Word8], [Word8]) Source # bdrop :: Int -> [Word8] -> [Word8] Source # buncons :: [Word8] -> Maybe (Word8, [Word8]) Source # elemIndex :: Word8 -> [Word8] -> Maybe Int Source # empty :: [Word8] Source # null :: [Word8] -> Bool Source # pack :: [Word8] -> [Word8] Source # tail :: [Word8] -> [Word8] Source #

fromString :: UTF8Bytes string index => String -> UTF8 string Source #

Converts a Haskell string into a UTF8 encoded string. Complexity: linear.

toString :: UTF8Bytes string index => UTF8 string -> String Source #

Convert a UTF8 encoded string into a Haskell string. Invalid characters are replaced by replacement_char. Complexity: linear.

fromRep :: string -> UTF8 string Source #

toRep :: UTF8 string -> string Source #

replacement_char :: Char Source #

This character is used to mark errors in a UTF8 encoded string.

Character based operations

uncons :: UTF8Bytes string index => UTF8 string -> Maybe (Char, UTF8 string) Source #

Get the first character of a byte string, if any. Invalid characters are replaced by replacement_char.

splitAt :: UTF8Bytes string index => index -> UTF8 string -> (UTF8 string, UTF8 string) Source #

Split after a given number of characters. Negative values are treated as if they are 0.

take :: UTF8Bytes string index => index -> UTF8 string -> UTF8 string Source #

take n s returns the first n characters of s. If s has less than n characters, then we return the whole of s.

drop :: UTF8Bytes string index => index -> UTF8 string -> UTF8 string Source #

drop n s returns the s without its first n characters. If s has less than n characters, then we return an empty string.

span :: UTF8Bytes string index => (Char -> Bool) -> UTF8 string -> (UTF8 string, UTF8 string) Source #

Split a string into two parts: the first is the longest prefix that contains only characters that satisfy the predicate; the second part is the rest of the string. Invalid characters are passed as '\0xFFFD' to the predicate.

break :: UTF8Bytes string index => (Char -> Bool) -> UTF8 string -> (UTF8 string, UTF8 string) Source #

Split a string into two parts: the first is the longest prefix that contains only characters that do not satisfy the predicate; the second part is the rest of the string. Invalid characters are passed as replacement_char to the predicate.

foldl :: UTF8Bytes string index => (a -> Char -> a) -> a -> UTF8 string -> a Source #

Traverse a bytestring (left biased). This function is strict in the accumulator.

foldr :: UTF8Bytes string index => (Char -> a -> a) -> a -> UTF8 string -> a Source #

Traverse a bytestring (right biased).

length :: UTF8Bytes string index => UTF8 string -> index Source #

Counts the number of characters encoded in the bytestring. Note that this includes replacement characters. The function is linear in the number of bytes in the representation.

lines :: UTF8Bytes string index => UTF8 string -> [UTF8 string] Source #

Split a string into a list of lines. Lines are terminated by '\n' or the end of the string. Empty lines may not be terminated by the end of the string. See also lines'.

lines' :: UTF8Bytes string index => UTF8 string -> [UTF8 string] Source #

Split a string into a list of lines. Lines are terminated by '\n' or the end of the string. Empty lines may not be terminated by the end of the string. This function preserves the terminators. See also lines.

Representation based operations

null :: UTF8Bytes string index => UTF8 string -> Bool Source #

Checks if there are no more bytes in the underlying representation.

decode :: UTF8Bytes string index => UTF8 string -> Maybe (Char, index) Source #

Extract the first character for the underlying representation, if one is available. It also returns the number of bytes used in the representation of the character. See also uncons.

byteSplitAt :: UTF8Bytes string index => index -> UTF8 string -> (UTF8 string, UTF8 string) Source #

Split after a given number of bytes in the underlying representation. See also splitAt.

byteTake :: UTF8Bytes string index => index -> UTF8 string -> UTF8 string Source #

Take only the given number of bytes from the underlying representation. See also take.

byteDrop :: UTF8Bytes string index => index -> UTF8 string -> UTF8 string Source #

Drop the given number of bytes from the underlying representation. See also drop.