License	BSD-style
Maintainer	Vincent Hanquez <vincent@snarc.org>
Stability	experimental
Portability	portable
Safe Haskell	None
Language	Haskell2010

Basement.String

Contents

Binary conversion
Legacy utility

Description

A String type backed by a UTF8 encoded byte array and all the necessary functions to manipulate the string.

You can think of String as a specialization of a byte array that have element of type Char.

The String data must contain UTF8 valid data.

Synopsis

newtype String = String (UArray Word8)
newtype MutableString st = MutableString (MUArray Word8 st)
create :: PrimMonad prim => CountOf Word8 -> (MutableString (PrimState prim) -> prim (Offset Word8)) -> prim String
replicate :: CountOf Char -> Char -> String
length :: String -> CountOf Char
data Encoding
- = ASCII7
- | UTF8
- | UTF16
- | UTF32
- | ISO_8859_1
fromBytes :: Encoding -> UArray Word8 -> (String, Maybe ValidationFailure, UArray Word8)
fromChunkBytes :: [UArray Word8] -> [String]
fromBytesUnsafe :: UArray Word8 -> String
fromBytesLenient :: UArray Word8 -> (String, UArray Word8)
toBytes :: Encoding -> String -> UArray Word8
mutableValidate :: PrimMonad prim => MUArray Word8 (PrimState prim) -> Offset Word8 -> CountOf Word8 -> prim (Offset Word8, Maybe ValidationFailure)
copy :: String -> String
data ValidationFailure
- = InvalidHeader
- | InvalidContinuation
- | MissingByte
- | BuildingFailure
index :: String -> Offset Char -> Maybe Char
null :: String -> Bool
drop :: CountOf Char -> String -> String
take :: CountOf Char -> String -> String
splitAt :: CountOf Char -> String -> (String, String)
revDrop :: CountOf Char -> String -> String
revTake :: CountOf Char -> String -> String
revSplitAt :: CountOf Char -> String -> (String, String)
splitOn :: (Char -> Bool) -> String -> [String]
sub :: String -> Offset8 -> Offset8 -> String
elem :: Char -> String -> Bool
indices :: String -> String -> [Offset8]
intersperse :: Char -> String -> String
span :: (Char -> Bool) -> String -> (String, String)
spanEnd :: (Char -> Bool) -> String -> (String, String)
break :: (Char -> Bool) -> String -> (String, String)
breakEnd :: (Char -> Bool) -> String -> (String, String)
breakElem :: Char -> String -> (String, String)
breakLine :: String -> Either Bool (String, String)
dropWhile :: (Char -> Bool) -> String -> String
singleton :: Char -> String
charMap :: (Char -> Char) -> String -> String
snoc :: String -> Char -> String
cons :: Char -> String -> String
unsnoc :: String -> Maybe (String, Char)
uncons :: String -> Maybe (Char, String)
find :: (Char -> Bool) -> String -> Maybe Char
findIndex :: (Char -> Bool) -> String -> Maybe (Offset Char)
sortBy :: (Char -> Char -> Ordering) -> String -> String
filter :: (Char -> Bool) -> String -> String
reverse :: String -> String
replace :: String -> String -> String -> String
builderAppend :: PrimMonad state => Char -> Builder String MutableString Word8 state err ()
builderBuild :: PrimMonad m => Int -> Builder String MutableString Word8 m err () -> m (Either err String)
builderBuild_ :: PrimMonad m => Int -> Builder String MutableString Word8 m () () -> m String
readInteger :: String -> Maybe Integer
readIntegral :: (HasNegation i, IntegralUpsize Word8 i, Additive i, Multiplicative i, IsIntegral i) => String -> Maybe i
readNatural :: String -> Maybe Natural
readDouble :: String -> Maybe Double
readRational :: String -> Maybe Rational
readFloatingExact :: String -> ReadFloatingCallback a -> Maybe a
upper :: String -> String
lower :: String -> String
caseFold :: String -> String
isPrefixOf :: String -> String -> Bool
isSuffixOf :: String -> String -> Bool
isInfixOf :: String -> String -> Bool
stripPrefix :: String -> String -> Maybe String
stripSuffix :: String -> String -> Maybe String
all :: (Char -> Bool) -> String -> Bool
any :: (Char -> Bool) -> String -> Bool
lines :: String -> [String]
words :: String -> [String]
toBase64 :: String -> String
toBase64URL :: Bool -> String -> String
toBase64OpenBSD :: String -> String

Documentation

newtype String Source #

Opaque packed array of characters in the UTF8 encoding

Constructors

String (UArray Word8)

Instances

IsList String Source #
Instance details Defined in Basement.UTF8.Base Associated Types type Item String :: Type # Methods fromList :: [Item String] -> String # fromListN :: Int -> [Item String] -> String # toList :: String -> [Item String] #
Eq String Source #
Instance details Defined in Basement.UTF8.Base Methods (==) :: String -> String -> Bool # (/=) :: String -> String -> Bool #
Data String Source #
Instance details Defined in Basement.UTF8.Base Methods gfoldl :: (forall d b. Data d => c (d -> b) -> d -> c b) -> (forall g. g -> c g) -> String -> c String # gunfold :: (forall b r. Data b => c (b -> r) -> c r) -> (forall r. r -> c r) -> Constr -> c String # toConstr :: String -> Constr # dataTypeOf :: String -> DataType # dataCast1 :: Typeable t => (forall d. Data d => c (t d)) -> Maybe (c String) # dataCast2 :: Typeable t => (forall d e. (Data d, Data e) => c (t d e)) -> Maybe (c String) # gmapT :: (forall b. Data b => b -> b) -> String -> String # gmapQl :: (r -> r' -> r) -> r -> (forall d. Data d => d -> r') -> String -> r # gmapQr :: (r' -> r -> r) -> r -> (forall d. Data d => d -> r') -> String -> r # gmapQ :: (forall d. Data d => d -> u) -> String -> [u] # gmapQi :: Int -> (forall d. Data d => d -> u) -> String -> u # gmapM :: Monad m => (forall d. Data d => d -> m d) -> String -> m String # gmapMp :: MonadPlus m => (forall d. Data d => d -> m d) -> String -> m String # gmapMo :: MonadPlus m => (forall d. Data d => d -> m d) -> String -> m String #
Ord String Source #
Instance details Defined in Basement.UTF8.Base Methods compare :: String -> String -> Ordering # (<) :: String -> String -> Bool # (<=) :: String -> String -> Bool # (>) :: String -> String -> Bool # (>=) :: String -> String -> Bool # max :: String -> String -> String # min :: String -> String -> String #
Show String Source #
Instance details Defined in Basement.UTF8.Base Methods showsPrec :: Int -> String -> ShowS # show :: String -> String0 # showList :: [String] -> ShowS #
IsString String Source #
Instance details Defined in Basement.UTF8.Base Methods fromString :: String0 -> String #
Semigroup String Source #
Instance details Defined in Basement.UTF8.Base Methods (<>) :: String -> String -> String # sconcat :: NonEmpty String -> String # stimes :: Integral b => b -> String -> String #
Monoid String Source #
Instance details Defined in Basement.UTF8.Base Methods mempty :: String # mappend :: String -> String -> String # mconcat :: [String] -> String #
NormalForm String Source #
Instance details Defined in Basement.UTF8.Base Methods toNormalForm :: String -> () Source #
From AsciiString String Source #
Instance details Defined in Basement.From Methods from :: AsciiString -> String Source #
From String (UArray Word8) Source #
Instance details Defined in Basement.From Methods from :: String -> UArray Word8 Source #
TryFrom (UArray Word8) String Source #
Instance details Defined in Basement.From Methods tryFrom :: UArray Word8 -> Maybe String Source #
type Item String Source #
Instance details Defined in Basement.UTF8.Base type Item String = Char

newtype MutableString st Source #

Mutable String Buffer.

Use as an *append* buffer, as UTF8 variable encoding doesn't really allow to change previously written character without potentially shifting bytes.

Constructors

MutableString (MUArray Word8 st)

create :: PrimMonad prim => CountOf Word8 -> (MutableString (PrimState prim) -> prim (Offset Word8)) -> prim String Source #

Unsafely create a string of up to sz bytes.

The callback f needs to return the number of bytes filled in the underlaying bytes buffer. No check is made on the callback return values, and if it's not contained without the bounds, bad things will happen.

replicate :: CountOf Char -> Char -> String Source #

Replicate a character c n times to create a string of length n

length :: String -> CountOf Char Source #

Length of a String using CountOf

this size is available in o(n)

Binary conversion

data Encoding Source #

Various String Encoding that can be use to convert to and from bytes

Constructors

ASCII7
UTF8
UTF16
UTF32
ISO_8859_1

Instances

Bounded Encoding Source #
Instance details Defined in Basement.String Methods minBound :: Encoding # maxBound :: Encoding #
Enum Encoding Source #
Instance details Defined in Basement.String Methods succ :: Encoding -> Encoding # pred :: Encoding -> Encoding # toEnum :: Int -> Encoding # fromEnum :: Encoding -> Int # enumFrom :: Encoding -> [Encoding] # enumFromThen :: Encoding -> Encoding -> [Encoding] # enumFromTo :: Encoding -> Encoding -> [Encoding] # enumFromThenTo :: Encoding -> Encoding -> Encoding -> [Encoding] #
Eq Encoding Source #
Instance details Defined in Basement.String Methods (==) :: Encoding -> Encoding -> Bool # (/=) :: Encoding -> Encoding -> Bool #
Data Encoding Source #
Instance details Defined in Basement.String Methods gfoldl :: (forall d b. Data d => c (d -> b) -> d -> c b) -> (forall g. g -> c g) -> Encoding -> c Encoding # gunfold :: (forall b r. Data b => c (b -> r) -> c r) -> (forall r. r -> c r) -> Constr -> c Encoding # toConstr :: Encoding -> Constr # dataTypeOf :: Encoding -> DataType # dataCast1 :: Typeable t => (forall d. Data d => c (t d)) -> Maybe (c Encoding) # dataCast2 :: Typeable t => (forall d e. (Data d, Data e) => c (t d e)) -> Maybe (c Encoding) # gmapT :: (forall b. Data b => b -> b) -> Encoding -> Encoding # gmapQl :: (r -> r' -> r) -> r -> (forall d. Data d => d -> r') -> Encoding -> r # gmapQr :: (r' -> r -> r) -> r -> (forall d. Data d => d -> r') -> Encoding -> r # gmapQ :: (forall d. Data d => d -> u) -> Encoding -> [u] # gmapQi :: Int -> (forall d. Data d => d -> u) -> Encoding -> u # gmapM :: Monad m => (forall d. Data d => d -> m d) -> Encoding -> m Encoding # gmapMp :: MonadPlus m => (forall d. Data d => d -> m d) -> Encoding -> m Encoding # gmapMo :: MonadPlus m => (forall d. Data d => d -> m d) -> Encoding -> m Encoding #
Ord Encoding Source #
Instance details Defined in Basement.String Methods compare :: Encoding -> Encoding -> Ordering # (<) :: Encoding -> Encoding -> Bool # (<=) :: Encoding -> Encoding -> Bool # (>) :: Encoding -> Encoding -> Bool # (>=) :: Encoding -> Encoding -> Bool # max :: Encoding -> Encoding -> Encoding # min :: Encoding -> Encoding -> Encoding #
Show Encoding Source #
Instance details Defined in Basement.String Methods showsPrec :: Int -> Encoding -> ShowS # show :: Encoding -> String # showList :: [Encoding] -> ShowS #

fromBytes :: Encoding -> UArray Word8 -> (String, Maybe ValidationFailure, UArray Word8) Source #

Convert a ByteArray to a string assuming a specific encoding.

It returns a 3-tuple of:

The string that has been succesfully converted without any error
An optional validation error
The remaining buffer that hasn't been processed (either as a result of an error, or because the encoded sequence is not fully available)

Considering a stream of data that is fetched chunk by chunk, it's valid to assume that some sequence might fall in a chunk boundary. When converting chunks, if the error is Nothing and the remaining buffer is not empty, then this buffer need to be prepended to the next chunk

fromChunkBytes :: [UArray Word8] -> [String] Source #

Decode a stream of binary chunks containing UTF8 encoding in a list of valid String

Chunk not necessarily contains a valid string, as a UTF8 sequence could be split over 2 chunks.

fromBytesUnsafe :: UArray Word8 -> String Source #

Convert a Byte Array representing UTF8 data directly to a string without checking for UTF8 validity

If the input contains invalid sequences, it will trigger runtime async errors when processing data.

In doubt, use fromBytes

fromBytesLenient :: UArray Word8 -> (String, UArray Word8) Source #

Convert a UTF8 array of bytes to a String.

If there's any error in the stream, it will automatically insert replacement bytes to replace invalid sequences.

In the case of sequence that fall in the middle of 2 chunks, the remaining buffer is supposed to be preprended to the next chunk, and resume the parsing.

toBytes :: Encoding -> String -> UArray Word8 Source #

Convert a String to a bytearray in a specific encoding

if the encoding is UTF8, the underlying buffer is returned without extra allocation or any processing

In any other encoding, some allocation and processing are done to convert.

mutableValidate :: PrimMonad prim => MUArray Word8 (PrimState prim) -> Offset Word8 -> CountOf Word8 -> prim (Offset Word8, Maybe ValidationFailure) Source #

Similar to validate but works on a MutableByteArray

copy :: String -> String Source #

Copy the String

The slice of memory is copied to a new slice, making the new string independent from the original string..

data ValidationFailure Source #

Possible failure related to validating bytes of UTF8 sequences.

Constructors

InvalidHeader
InvalidContinuation
MissingByte
BuildingFailure

Instances

Eq ValidationFailure Source #
Instance details Defined in Basement.UTF8.Types Methods (==) :: ValidationFailure -> ValidationFailure -> Bool # (/=) :: ValidationFailure -> ValidationFailure -> Bool #
Show ValidationFailure Source #
Instance details Defined in Basement.UTF8.Types Methods showsPrec :: Int -> ValidationFailure -> ShowS # show :: ValidationFailure -> String # showList :: [ValidationFailure] -> ShowS #
Exception ValidationFailure Source #
Instance details Defined in Basement.UTF8.Types Methods toException :: ValidationFailure -> SomeException # fromException :: SomeException -> Maybe ValidationFailure # displayException :: ValidationFailure -> String #

index :: String -> Offset Char -> Maybe Char Source #

Return the nth character in a String

Compared to an array, the string need to be scanned from the beginning since the UTF8 encoding is variable.

null :: String -> Bool Source #

Check if a String is null

drop :: CountOf Char -> String -> String Source #

Create a string with the remaining Chars after dropping @n Chars from the beginning

take :: CountOf Char -> String -> String Source #

Create a string composed of a number @n of Chars (Unicode code points).

if the input @s contains less characters than required, then the input string is returned.

splitAt :: CountOf Char -> String -> (String, String) Source #

Split a string at the Offset specified (in Char) returning both the leading part and the remaining part.

revDrop :: CountOf Char -> String -> String Source #

Similar to drop but from the end

revTake :: CountOf Char -> String -> String Source #

Similar to take but from the end

revSplitAt :: CountOf Char -> String -> (String, String) Source #

Similar to splitAt but from the end

splitOn :: (Char -> Bool) -> String -> [String] Source #

Split on the input string using the predicate as separator

e.g.

splitOn (== ',') ","          == ["",""]
splitOn (== ',') ",abc,"      == ["","abc",""]
splitOn (== ':') "abc"        == ["abc"]
splitOn (== ':') "abc::def"   == ["abc","","def"]
splitOn (== ':') "::abc::def" == ["","","abc","","def"]

sub :: String -> Offset8 -> Offset8 -> String Source #

Internal call to make a substring given offset in bytes.

This is unsafe considering that one can create a substring starting and/or ending on the middle of a UTF8 sequence.

elem :: Char -> String -> Bool Source #

Return whereas the string contains a specific character or not

indices :: String -> String -> [Offset8] Source #

intersperse :: Char -> String -> String Source #

Intersperse the character sep between each character in the string

intersperse ' ' "Hello Foundation"

"H e l l o F o u n d a t i o n"

span :: (Char -> Bool) -> String -> (String, String) Source #

Apply a predicate to the string to return the longest prefix that satisfy the predicate and the remaining

spanEnd :: (Char -> Bool) -> String -> (String, String) Source #

Apply a predicate to the string to return the longest suffix that satisfy the predicate and the remaining

break :: (Char -> Bool) -> String -> (String, String) Source #

Break a string into 2 strings at the location where the predicate return True

breakEnd :: (Char -> Bool) -> String -> (String, String) Source #

breakElem :: Char -> String -> (String, String) Source #

Break a string into 2 strings at the first occurence of the character

breakLine :: String -> Either Bool (String, String) Source #

Same as break but cut on a line feed with an optional carriage return.

This is the same operation as 'breakElem LF' dropping the last character of the string if it's a CR.

Also for efficiency reason (streaming), it returns if the last character was a CR character.

dropWhile :: (Char -> Bool) -> String -> String Source #

Drop character from the beginning while the predicate is true

singleton :: Char -> String Source #

Create a single element String

charMap :: (Char -> Char) -> String -> String Source #

Monomorphically map the character in a string and return the transformed one

snoc :: String -> Char -> String Source #

Append a Char to the end of the String and return this new String

cons :: Char -> String -> String Source #

Prepend a Char to the beginning of the String and return this new String

unsnoc :: String -> Maybe (String, Char) Source #

Extract the String stripped of the last character and the last character if not empty

If empty, Nothing is returned

uncons :: String -> Maybe (Char, String) Source #

Extract the First character of a string, and the String stripped of the first character.

If empty, Nothing is returned

find :: (Char -> Bool) -> String -> Maybe Char Source #

Look for a predicate in the String and return the matched character, if any.

findIndex :: (Char -> Bool) -> String -> Maybe (Offset Char) Source #

Return the index in unit of Char of the first occurence of the predicate returning True

If not found, Nothing is returned

sortBy :: (Char -> Char -> Ordering) -> String -> String Source #

Sort the character in a String using a specific sort function

TODO: optimise not going through a list

filter :: (Char -> Bool) -> String -> String Source #

Filter characters of a string using the predicate

reverse :: String -> String Source #

Reverse a string

replace :: String -> String -> String -> String Source #

Replace all the occurrencies of needle with replacement in the haystack string.

builderAppend :: PrimMonad state => Char -> Builder String MutableString Word8 state err () Source #

Append a character to a String builder

builderBuild :: PrimMonad m => Int -> Builder String MutableString Word8 m err () -> m (Either err String) Source #

Create a new String builder using chunks of sizeChunksI

builderBuild_ :: PrimMonad m => Int -> Builder String MutableString Word8 m () () -> m String Source #

readInteger :: String -> Maybe Integer Source #

readIntegral :: (HasNegation i, IntegralUpsize Word8 i, Additive i, Multiplicative i, IsIntegral i) => String -> Maybe i Source #

Read an Integer from a String

Consume an optional minus sign and many digits until end of string.

readNatural :: String -> Maybe Natural Source #

Read a Natural from a String

Consume many digits until end of string.

readDouble :: String -> Maybe Double Source #

Try to read a Double

readRational :: String -> Maybe Rational Source #

Try to read a floating number as a Rational

Note that for safety reason, only exponent between -10000 and 10000 is allowed as otherwise DoS/OOM is very likely. if you don't want this behavior, switching to a scientific type (not provided yet) that represent the exponent separately is the advised solution.

readFloatingExact :: String -> ReadFloatingCallback a -> Maybe a Source #

Read an Floating like number of the form:

-: numbers [ . numbers ] [ ( e | E ) [ - ] number ]

Call a function with:

A boolean representing if the number is negative
The digits part represented as a single natural number (123.456 is represented as 123456)
The number of digits in the fractional part (e.g. 123.456 => 3)
The exponent if any

The code is structured as a simple state machine that:

Optionally Consume a - sign
Consume number for the integral part
Optionally
Consume .
Consume remaining digits if not already end of string
Optionally Consume a e or E follow by an optional - and a number

upper :: String -> String Source #

Convert a String to the upper-case equivalent.

lower :: String -> String Source #

Convert a String to the upper-case equivalent.

caseFold :: String -> String Source #

Convert a String to the unicode case fold equivalent.

Case folding is mostly used for caseless comparison of strings.

isPrefixOf :: String -> String -> Bool Source #

Check whether the first string is a prefix of the second string.

isSuffixOf :: String -> String -> Bool Source #

Check whether the first string is a suffix of the second string.

isInfixOf :: String -> String -> Bool Source #

Check whether the first string is contains within the second string.

TODO: implemented the naive way and thus terribly inefficient, reimplement properly

stripPrefix :: String -> String -> Maybe String Source #

Try to strip a prefix from the start of a String.

If the prefix is not starting the string, then Nothing is returned, otherwise the striped string is returned

stripSuffix :: String -> String -> Maybe String Source #

Try to strip a suffix from the end of a String.

If the suffix is not ending the string, then Nothing is returned, otherwise the striped string is returned

all :: (Char -> Bool) -> String -> Bool Source #

any :: (Char -> Bool) -> String -> Bool Source #

Legacy utility

lines :: String -> [String] Source #

Split lines in a string using newline as separation.

Note that carriage return preceding a newline are also strip for maximum compatibility between Windows and Unix system.

words :: String -> [String] Source #

Split words in a string using spaces as separation

words "Hello Foundation"

Hello, Foundation

toBase64 :: String -> String Source #

Transform string src to base64 binary representation.

toBase64URL :: Bool -> String -> String Source #

Transform string src to URL-safe base64 binary representation. The result will be either padded or unpadded, depending on the boolean padded argument.

toBase64OpenBSD :: String -> String Source #

Transform string src to OpenBSD base64 binary representation.