Copyright	(c) The University of Glasgow 2001 David Roundy 2003-2005
License	GPL (I'm happy to also license this file BSD style but don't want to bother distributing two license files with darcs.
Maintainer	droundy@abridgegame.org
Stability	experimental
Portability	portable
Safe Haskell	Safe-Inferred
Language	Haskell2010

Darcs.Util.ByteString

Contents

IO with mmap or gzip
gzip handling
list utilities
encoding and unicode utilities
properties

Description

GZIp and MMap IO for ByteStrings, encoding utilities, and miscellaneous functions for Data.ByteString

Synopsis

gzReadFilePS :: FilePath -> IO ByteString
mmapFilePS :: FilePath -> IO ByteString
gzWriteFilePS :: FilePath -> ByteString -> IO ()
gzWriteFilePSs :: FilePath -> [ByteString] -> IO ()
gzReadStdin :: IO ByteString
gzWriteHandle :: Handle -> [ByteString] -> IO ()
type FileSegment = (FilePath, Maybe (Int64, Int))
readSegment :: FileSegment -> IO ByteString
isGZFile :: FilePath -> IO (Maybe Int)
gzDecompress :: Maybe Int -> ByteString -> ([ByteString], Bool)
dropSpace :: ByteString -> ByteString
linesPS :: ByteString -> [ByteString]
unlinesPS :: [ByteString] -> ByteString
hashPS :: ByteString -> Int32
breakFirstPS :: Char -> ByteString -> Maybe (ByteString, ByteString)
breakLastPS :: Char -> ByteString -> Maybe (ByteString, ByteString)
substrPS :: ByteString -> ByteString -> Maybe Int
isFunky :: ByteString -> Bool
fromHex2PS :: ByteString -> Either String ByteString
fromPS2Hex :: ByteString -> ByteString
betweenLinesPS :: ByteString -> ByteString -> ByteString -> Maybe ByteString
intercalate :: ByteString -> [ByteString] -> ByteString
isAscii :: ByteString -> Bool
decodeLocale :: ByteString -> String
encodeLocale :: String -> ByteString
unpackPSFromUTF8 :: ByteString -> String
packStringToUTF8 :: String -> ByteString
prop_unlinesPS_linesPS_left_inverse :: ByteString -> Bool
prop_linesPS_length :: ByteString -> Bool
prop_unlinesPS_length :: [ByteString] -> Bool
propHexConversion :: ByteString -> Bool
spec_betweenLinesPS :: ByteString -> ByteString -> ByteString -> Maybe ByteString

IO with mmap or gzip

gzReadFilePS :: FilePath -> IO ByteString Source #

Read an entire file, which may or may not be gzip compressed, directly into a ByteString.

mmapFilePS :: FilePath -> IO ByteString Source #

Like readFilePS, this reads an entire file directly into a ByteString, but it is even more efficient. It involves directly mapping the file to memory. This has the advantage that the contents of the file never need to be copied. Also, under memory pressure the page may simply be discarded, wile in the case of readFilePS it would need to be written to swap. If you read many small files, mmapFilePS will be less memory-efficient than readFilePS, since each mmapFilePS takes up a separate page of memory. Also, you can run into bus errors if the file is modified.

gzWriteFilePS :: FilePath -> ByteString -> IO () Source #

gzWriteFilePSs :: FilePath -> [ByteString] -> IO () Source #

gzReadStdin :: IO ByteString Source #

Read standard input, which may or may not be gzip compressed, directly into a ByteString.

gzWriteHandle :: Handle -> [ByteString] -> IO () Source #

type FileSegment = (FilePath, Maybe (Int64, Int)) Source #

Pointer to a filesystem, possibly with start/end offsets. Supposed to be fed to (uncurry mmapFileByteString) or similar.

readSegment :: FileSegment -> IO ByteString Source #

Read in a FileSegment into a Lazy ByteString. Implemented using mmap.

gzip handling

isGZFile :: FilePath -> IO (Maybe Int) Source #

gzDecompress :: Maybe Int -> ByteString -> ([ByteString], Bool) Source #

Decompress the given bytestring into a lazy list of chunks, along with a boolean flag indicating (if True) that the CRC was corrupted. Inspecting the flag will cause the entire list of chunks to be evaluated (but if you throw away the list immediately this should run in constant space).

list utilities

dropSpace :: ByteString -> ByteString Source #

Drop leading white space, where white space is defined as consisting of ' ', 't', 'n', or 'r'.

linesPS :: ByteString -> [ByteString] Source #

Split the input into lines, that is, sections separated by 'n' bytes, unless it is empty, in which case the result has one empty line.

unlinesPS :: [ByteString] -> ByteString Source #

Concatenate the inputs with 'n' bytes in interspersed.

hashPS :: ByteString -> Int32 Source #

breakFirstPS :: Char -> ByteString -> Maybe (ByteString, ByteString) Source #

breakLastPS :: Char -> ByteString -> Maybe (ByteString, ByteString) Source #

substrPS :: ByteString -> ByteString -> Maybe Int Source #

isFunky :: ByteString -> Bool Source #

fromHex2PS :: ByteString -> Either String ByteString Source #

fromPS2Hex :: ByteString -> ByteString Source #

betweenLinesPS :: ByteString -> ByteString -> ByteString -> Maybe ByteString Source #

Return the B.ByteString between the two lines given, or Nothing if either of them does not appear.

Precondition: the first two arguments (start and end line) must be non-empty and contain no newline bytes.

intercalate :: ByteString -> [ByteString] -> ByteString #

O(n) The intercalate function takes a ByteString and a list of ByteStrings and concatenates the list after interspersing the first argument between each element of the list.

encoding and unicode utilities

isAscii :: ByteString -> Bool Source #

Test if a ByteString is made of ascii characters

decodeLocale :: ByteString -> String Source #

Decode a ByteString to a String according to the current locale, using lone surrogates for un-decodable bytes.

encodeLocale :: String -> ByteString Source #

Encode a String to a ByteString according to the current locale, converting lone surrogates back to the original byte. If that fails (because the locale does not support the full unicode range) then encode using utf-8, assuming that the un-ecodable characters come from patch meta data.

properties

prop_unlinesPS_linesPS_left_inverse :: ByteString -> Bool Source #

prop_linesPS_length :: ByteString -> Bool Source #

prop_unlinesPS_length :: [ByteString] -> Bool Source #

propHexConversion :: ByteString -> Bool Source #

spec_betweenLinesPS :: ByteString -> ByteString -> ByteString -> Maybe ByteString Source #

Simpler but less efficient variant of betweenLinesPS. Note that this is only equivalent under the stated preconditions.