text-containers-0.1.0.0: Memory-efficient string-indexed container types.

Copyright© 2017 Herbert Valerio Riedel
LicenseGPLv3
Safe HaskellTrustworthy
LanguageHaskell2010

Data.TextSet.Unboxed

Contents

Description

This module provides the TextSet container for storing sets of text strings.

This module is intended to be imported qualified, e.g.

import           Data.TextSet.Unboxed (TextSet)
import qualified Data.TextSet.Unboxed as TextSet

Synopsis

Documentation

data TextSet Source #

A set of unboxed ShortText strings

The memory footprint of this data-structure is a single heap object (an unlifted ByteArray#) with the size expressed in words

\[ 3 + n + \left\lceil \frac{1}{w} \sum_{i=0}^{n-1} len(s_i) \right\rceil \]

where the word-size \(w\) is either \(w = 4\) or \(w = 8\) bytes; and where \(len(s_i)\) denotes the UTF-8 size in bytes of the \(i\)-th text string.

NOTE: Depending on whether you UNPACK the TextSet wrapper, you need at least one additional word for the pointer to the internal ByteArray# heap object.

type Key = ShortText Source #

Querying & lookup

size :: TextSet -> Int Source #

\(\mathcal{O}(1)\). Report number of elements in TextSet.

>>> size empty
0
>>> size (singleton "")
1
>>> size (fromList ["Hey","Hey","Jude"])
2

null :: TextSet -> Bool Source #

\(\mathcal{O}(1)\). Check for empty set.

>>> null empty
True
>>> null (singleton "")
False

member :: Key -> TextSet -> Bool Source #

\(\mathcal{O}(\log n)\). Test whether set contains a string.

>>> member "empty" empty
False
>>> member "a" (fromList ["a","b","c"])
True
>>> member "d" (fromList ["a","b","c"])
False

lookupMin :: TextSet -> Maybe Key Source #

\(\mathcal{O}(1)\). Extract minimal element from set.

>>> lookupMin empty
Nothing
>>> lookupMin (fromList ["a","b","c"])
Just "a"

lookupMax :: TextSet -> Maybe Key Source #

\(\mathcal{O}(1)\). Extract maximal element from set.

>>> lookupMax empty
Nothing
>>> lookupMax (fromList ["a","b","c"])
Just "c"

lookupLE :: Key -> TextSet -> Maybe (Int, Key) Source #

\(\mathcal{O}(\log n)\). Look up "greatest" string (together with its index) in set less or equal to given string.

>>> lookupLE "a" (fromList ["bb","cc"])
Nothing
>>> lookupLE "c" (fromList ["bb","cc"])
Just (0,"bb")
>>> lookupLE "cc" (fromList ["bb","cc"])
Just (1,"cc")
>>> lookupLE "z" (fromList ["bb","cc"])
Just (1,"cc")

lookupGE :: Key -> TextSet -> Maybe (Int, Key) Source #

\(\mathcal{O}(\log n)\). Look up "least" string (together with its index) in set greater or equal to given string.

>>> lookupGE "a" (fromList ["bb","cc"])
Just (0,"bb")
>>> lookupGE "c" (fromList ["bb","cc"])
Just (1,"cc")
>>> lookupGE "cc" (fromList ["bb","cc"])
Just (1,"cc")
>>> lookupGE "z" (fromList ["bb","cc"])
Nothing

(!?) :: TextSet -> Int -> Maybe Key Source #

\(\mathcal{O}(1)\). Retrieve \(i\)-th element in the sorted sequence of elements.

>>> fromList ["Hey","","Jude"] !? 0
Just ""
>>> fromList ["Hey","","Jude"] !? 1
Just "Hey"
>>> fromList ["Hey","","Jude"] !? 3
Nothing

See also lookupIndex.

lookupIndex :: Key -> TextSet -> Maybe Int Source #

\(\mathcal{O}(\log n)\). Look up element in set and report its zero-based index in the sorted sequence elements.

>>> lookupIndex "" (fromList ["Hey","","Jude"])
Just 0
>>> lookupIndex "Hey" (fromList ["Hey","","Jude"])
Just 1
>>> lookupIndex "Law" (fromList ["Hey","","Jude"])
Nothing

See also !?.

Construction

empty :: TextSet Source #

\(\mathcal{O}(1)\). An empty TextSet.

singleton :: Key -> TextSet Source #

\(\mathcal{O}(1)\). Construct set containing one element.

>>> toList (singleton "alone")
["alone"]

fromList :: [Key] -> TextSet Source #

\(\mathcal{O}(n \log n)\). Construct set from list of elements.

>>> toList (fromList ["Hey","Jude","Hey","Law","Hey",""])
["","Hey","Jude","Law"]

fromDistinctAscList :: [Key] -> TextSet Source #

\(\mathcal{O}(n)\). Construct set from list of distinct elements in ascending order.

NOTE: If the input list is not strictly ascending, an error is thrown.

fromSet :: Set Key -> TextSet Source #

\(\mathcal{O}(n)\). Convert Set to TextSet.

Deconstruction

toList :: TextSet -> [Key] Source #

\(\mathcal{O}(n)\). Convert TextSet to list of ShortText in ascending order.

toArray :: TextSet -> TextArray Source #

\(\mathcal{O}(1)\). Convert TextSet to TextArray of ShortText in ascending order.

>>> toList (fromList ["Hey","Jude","Hey","Law","Hey",""])
["","Hey","Jude","Law"]

toSet :: TextSet -> Set Key Source #

\(\mathcal{O}(n)\). Convert TextSet to Set.