{-# LANGUAGE BangPatterns #-} -- ---------------------------------------------------------------------------- {- | Module : Data.StringMap.Base Copyright : Copyright (C) 2009-2012 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : experimental Portability: not portable An efficient implementation of maps from strings to arbitrary values. Values can associated with an arbitrary byte key. Searching for keys is very fast, but the prefix tree probably consumes more memory than "Data.Map". The main differences are the special 'prefixFind' functions, which can be used to perform prefix queries. The interface is heavily borrowed from "Data.Map" and "Data.IntMap". Most other function names clash with "Prelude" names, therefore this module is usually imported @qualified@, e.g. > import Data.StringMap (StringMap) > import qualified Data.StringMap as T Many functions have a worst-case complexity of /O(min(n,L))/. This means that the operation can become linear with the number of elements with a maximum of /L/, the length of the key (the number of bytes in the list). The functions for searching a prefix have a worst-case complexity of /O(max(L,R))/. This means that the operation can become linear with /R/, the number of elements found for the prefix, with a minimum of /L/. The module export list includes the internal data types, their constructors and access functions for ultimate flexibility. Derived modules should not export these (as shown in "Data.StringMap") to provide only a restricted interface. -} -- ---------------------------------------------------------------------------- module Data.StringMap.Base ( -- * Map type StringMap (Empty, Val, Branch) -- the constructors are exported for pattern matching only -- use cases occur in Data.StringMap.Strict , Key -- * Operators , (!) -- * Query , value , valueWithDefault , null , size , member , lookup , findWithDefault , prefixFind , prefixFindWithKey , prefixFindWithKeyBF , lookupGE , lookupLE , lookupRange -- * Construction , empty , singleton -- ** Insertion , insert , insertWith , insertWithKey -- ** Delete\/Update , adjust , adjustWithKey , delete , update , updateWithKey -- * Combine -- ** Union , union , unionWith , unionWithKey -- ** Difference , difference , differenceWith , differenceWithKey -- * Traversal -- ** Map , map , mapWithKey , mapM , mapWithKeyM , mapMaybe -- * Folds , fold , foldWithKey -- * Conversion , keys , elems -- ** Lists , fromList , toList , toListBF -- ** Maps , fromMap , toMap -- * Debugging , space , stat , keyChars -- Internal , cutPx' , cutAllPx' , branch , val , siseq , fromKey , norm , normError' , deepNorm ) where import Prelude hiding (lookup, map, mapM, null, succ) import Control.Arrow import Control.DeepSeq import qualified Data.Foldable as F import Data.Binary import qualified Data.List as L import qualified Data.Map as M import Data.Maybe hiding (mapMaybe) import Data.StringMap.StringSet import Data.StringMap.Types -- ---------------------------------------- data StringMap v = Empty | Val { value' :: v , tree :: ! (StringMap v) } | Branch { sym :: {-# UNPACK #-} ! Sym , child :: ! (StringMap v) , next :: ! (StringMap v) } -- the space optimisation nodes, these -- will be normalized during access into -- the three constructors Empty, Val and Branch | Leaf { value' :: v -- a value at a leaf of the tree } | Last { sym :: {-# UNPACK #-} ! Sym -- the last entry in a branch list , child :: ! (StringMap v) -- or no branch but a single child } | LsSeq { syms :: ! Key1 -- a sequence of single childs , child :: ! (StringMap v) -- in a last node } | BrSeq { syms :: ! Key1 -- a sequence of single childs , child :: ! (StringMap v) -- in a branch node , next :: ! (StringMap v) } | LsSeL { syms :: ! Key1 -- a sequence of single childs , value' :: v -- with a leaf } | BrSeL { syms :: ! Key1 -- a sequence of single childs , value' :: v -- with a leaf in a branch node , next :: ! (StringMap v) } | BrVal { sym :: {-# UNPACK #-} ! Sym -- a branch with a single char , value' :: v -- and a value , next :: ! (StringMap v) } | LsVal { sym :: {-# UNPACK #-} ! Sym -- a last node with a single char , value' :: v -- and a value } deriving (Show, Eq, Ord) -- ---------------------------------------- -- | strict list of chars with unpacked fields -- and packing of 2 or 3 chars into a single object -- -- for internal use in prefix tree to optimize space efficiency data Key1 = Nil | S1 {-# UNPACK #-} ! Sym | S2 {-# UNPACK #-} ! Sym {-# UNPACK #-} ! Sym | S3 {-# UNPACK #-} ! Sym {-# UNPACK #-} ! Sym {-# UNPACK #-} ! Sym | C1 {-# UNPACK #-} ! Sym ! Key1 | C2 {-# UNPACK #-} ! Sym {-# UNPACK #-} ! Sym ! Key1 | C3 {-# UNPACK #-} ! Sym {-# UNPACK #-} ! Sym {-# UNPACK #-} ! Sym ! Key1 deriving (Eq, Ord) instance Show Key1 where show k = show (toKey k) mk1 :: Sym -> Key1 mk1 s1 = S1 s1 -- mk1 s1 = C1 s1 Nil mk2 :: Sym -> Sym -> Key1 mk2 s1 s2 = S2 s1 s2 -- mk2 s1 s2 = C1 s1 (mk1 s2) -- mk2 s1 s2 = C1 s1 (C1 s2 Nil) mk3 :: Sym -> Sym -> Sym -> Key1 mk3 s1 s2 s3 = S3 s1 s2 s3 cons1 :: Sym -> Key1 -> Key1 cons1 s Nil = mk1 s cons1 s (S1 s2) = mk2 s s2 cons1 s (S2 s2 s3) = mk3 s s2 s3 cons1 s (C1 s2 k2) = C2 s s2 k2 cons1 s (C2 s2 s3 k3) = C3 s s2 s3 k3 cons1 s k = C1 s k uncons1 :: Key1 -> (Sym, Key1) uncons1 (S1 s) = (s, Nil) uncons1 (S2 s s2) = (s, mk1 s2) uncons1 (S3 s s2 s3) = (s, mk2 s2 s3) uncons1 (C1 s k1) = (s, k1) uncons1 (C2 s s2 k1) = (s, C1 s2 k1) uncons1 (C3 s s2 s3 k1) = (s, C2 s2 s3 k1) uncons1 Nil = error "uncons1 with Nil" {-# INLINE mk1 #-} {-# INLINE mk2 #-} {-# INLINE mk3 #-} {-# INLINE cons1 #-} {-# INLINE uncons1 #-} toKey :: Key1 -> Key toKey (C3 s1 s2 s3 k) = s1 : s2 : s3 : toKey k toKey (C2 s1 s2 k) = s1 : s2 : toKey k toKey (C1 s1 k) = s1 : toKey k toKey (S3 s1 s2 s3) = s1 : s2 : s3 : [] toKey (S2 s1 s2 ) = s1 : s2 : [] toKey (S1 s1 ) = s1 : [] toKey Nil = [] fromKey :: Key -> Key1 fromKey k1 = foldr cons1 Nil k1 length1 :: Key1 -> Int length1 = length . toKey space1 :: Key1 -> Int space1 Nil = 0 space1 (S1 _) = 2 space1 (S2 _ _) = 3 space1 (S3 _ _ _) = 4 space1 (C1 _ k1) = 3 + space1 k1 space1 (C2 _ _ k1) = 4 + space1 k1 space1 (C3 _ _ _ k1) = 5 + space1 k1 -- ---------------------------------------- -- smart constructors empty :: StringMap v empty = Empty {-# INLINE empty #-} val :: v -> StringMap v -> StringMap v val v Empty = Leaf v val v t = Val v t {-# INLINE val #-} branch :: Sym -> StringMap v -> StringMap v -> StringMap v branch !_k Empty n = n branch !k (Leaf v ) Empty = LsVal k v branch !k (LsVal k1 v) Empty = LsSeL (mk2 k k1) v branch !k (LsSeL ks v) Empty = LsSeL (cons1 k ks) v branch !k (Last k1 c) Empty = lsseq (mk2 k k1) c branch !k (LsSeq ks c) Empty = lsseq (cons1 k ks) c branch !k c Empty = Last k c branch !k (Leaf v ) n = BrVal k v n branch !k (LsVal k1 v) n = BrSeL (mk2 k k1) v n branch !k (LsSeL ks v) n = BrSeL (cons1 k ks) v n branch !k (Last k1 c) n = brseq (mk2 k k1) c n branch !k (LsSeq ks c) n = brseq (cons1 k ks) c n branch !k c n = Branch k c n lsseq :: Key1 -> StringMap v -> StringMap v lsseq !k (Leaf v) = LsSeL k v lsseq !k c = LsSeq k c {-# INLINE lsseq #-} brseq :: Key1 -> StringMap v -> StringMap v -> StringMap v brseq !k (Leaf v) n = BrSeL k v n brseq !k c n = BrSeq k c n {-# INLINE brseq #-} siseq :: Key1 -> StringMap v -> StringMap v siseq Nil c = c siseq k c = case uncons1 k of (k1, Nil) -> Last k1 c _ -> LsSeq k c {-# INLINE siseq #-} anyseq :: Key1 -> StringMap v -> StringMap v anyseq Nil c = c anyseq k (Leaf v) = case uncons1 k of (k1, Nil) -> LsVal k1 v _ -> LsSeL k v anyseq k c = case uncons1 k of (k1, Nil) -> Last k1 c _ -> LsSeq k c {-# INLINE anyseq #-} -- smart selectors norm :: StringMap v -> StringMap v norm (Leaf v) = Val v empty norm (Last k c) = Branch k c empty norm (LsSeq k' c) = case uncons1 k' of (k, Nil) -> Branch k c empty (k, ks) -> Branch k (siseq ks c) empty norm (BrSeq k' c n) = case uncons1 k' of (k, Nil) -> Branch k c n (k, ks) -> Branch k (siseq ks c) n norm (LsSeL ks v) = norm (LsSeq ks (val v empty)) norm (BrSeL ks v n) = norm (BrSeq ks (val v empty) n) norm (LsVal k v) = norm (LsSeq (mk1 k) (val v empty)) norm (BrVal k v n) = norm (BrSeq (mk1 k) (val v empty) n) norm t = t -- ---------------------------------------- deepNorm :: StringMap v -> StringMap v deepNorm t0 = case norm t0 of Empty -> Empty Val v t -> Val v (deepNorm t) Branch c s n -> Branch c (deepNorm s) (deepNorm n) _ -> normError "deepNorm" -- ---------------------------------------- normError' :: String -> String -> a normError' m f = error (m ++ "." ++ f ++ ": pattern match error, prefix tree not normalized") normError :: String -> a normError = normError' "Data.StringMap.Base" -- ---------------------------------------- -- | /O(1)/ Is the map empty? null :: StringMap a -> Bool null Empty = True null _ = False {-# INLINE null #-} -- | /O(1)/ Create a map with a single element. singleton :: Key -> a -> StringMap a singleton k v = anyseq (fromKey k) (val v empty) {-# INLINE singleton #-} -- | /O(1)/ Extract the value of a node (if there is one) value :: Monad m => StringMap a -> m a value t = case norm t of Val v _ -> return v _ -> fail "StringMap.value: no value at this node" {-# INLINE value #-} -- | /O(1)/ Extract the value of a node or return a default value if no value exists. valueWithDefault :: a -> StringMap a -> a valueWithDefault d t = fromMaybe d . value $ t -- | /O(1)/ Extract the successors of a node succ :: StringMap a -> StringMap a succ t = case norm t of Val _ t' -> succ t' t' -> t' {-# INLINE succ #-} -- ---------------------------------------- -- | /O(min(n,L))/ Find the value associated with a key. The function will @return@ the result in -- the monad or @fail@ in it if the key isn't in the map. lookup :: Monad m => Key -> StringMap a -> m a lookup k t = case lookup' k t of Just v -> return v Nothing -> fail "StringMap.lookup: Key not found" {-# INLINE lookup #-} -- | /O(min(n,L))/ Find the value associated with a key. The function will @return@ the result in -- the monad or @fail@ in it if the key isn't in the map. findWithDefault :: a -> Key -> StringMap a -> a findWithDefault v0 k = fromMaybe v0 . lookup' k {-# INLINE findWithDefault #-} -- | /O(min(n,L))/ Is the key a member of the map? member :: Key -> StringMap a -> Bool member k = isJust . lookup k {-# INLINE member #-} -- | /O(min(n,L))/ Find the value at a key. Calls error when the element can not be found. (!) :: StringMap a -> Key -> a (!) = flip $ findWithDefault (error "StringMap.! : element not in the map") -- | /O(min(n,L))/ Insert a new key and value into the map. If the key is already present in -- the map, the associated value will be replaced with the new value. insert :: Key -> a -> StringMap a -> StringMap a insert = insertWith const {-# INLINE insert #-} -- | /O(min(n,L))/ Insert with a combining function. If the key is already present in the map, -- the value of @f new_value old_value@ will be inserted. insertWith :: (a -> a -> a) -> Key -> a -> StringMap a -> StringMap a insertWith f = flip $ insert' f {-# INLINE insertWith #-} -- | /O(min(n,L))/ Insert with a combining function. If the key is already present in the map, -- the value of @f key new_value old_value@ will be inserted. insertWithKey :: (Key -> a -> a -> a) -> Key -> a -> StringMap a -> StringMap a insertWithKey f k = insertWith (f k) k {-# INLINE insertWithKey #-} -- | /O(min(n,L))/ Updates a value at a given key (if that key is in the trie) or deletes the -- element if the result of the updating function is 'Nothing'. If the key is not found, the trie -- is returned unchanged. update :: (a -> Maybe a) -> Key -> StringMap a -> StringMap a update = update' {-# INLINE update #-} -- | /O(min(n,L))/ Updates a value at a given key (if that key is in the trie) or deletes the -- element if the result of the updating function is 'Nothing'. If the key is not found, the trie -- is returned unchanged. updateWithKey :: (Key -> a -> Maybe a) -> Key -> StringMap a -> StringMap a updateWithKey f k = update' (f k) k {-# INLINE updateWithKey #-} -- | /O(min(n,L))/ Delete an element from the map. If no element exists for the key, the map -- remains unchanged. delete :: Key -> StringMap a -> StringMap a delete = update' (const Nothing) {-# INLINE delete #-} adjust :: (a -> a) -> Key -> StringMap a -> StringMap a adjust f = update' (Just . f) {-# INLINE adjust #-} adjustWithKey :: (Key -> a -> a) -> Key -> StringMap a -> StringMap a adjustWithKey f k = update' (Just . f k) k {-# INLINE adjustWithKey #-} -- ---------------------------------------- lookupPx' :: Key -> StringMap a -> StringMap a lookupPx' k0 = look k0 . norm where look [] t = t look k@(c : k1) (Branch c' s' n') | c < c' = empty | c == c' = lookupPx' k1 s' | otherwise = lookupPx' k n' look _ Empty = empty look k (Val _v' t') = lookupPx' k t' look _ _ = normError "lookupPx'" -- Internal lookup function lookup' :: Key -> StringMap a -> Maybe a lookup' k t = case lookupPx' k t of Val v _ -> Just v _ -> Nothing -- ---------------------------------------- -- | remove all entries from the map with key less than the argument key lookupGE :: Key -> StringMap a -> StringMap a lookupGE k0 = look k0 . norm where look [] t = t look k@(c : k1) t@(Branch c' s' n') | c < c' = t | c == c' = branch c' (lookupGE k1 s') n' | otherwise = lookupGE k n' look _ Empty = empty look k (Val _v' t') = lookupGE k t' look _ _ = normError "lookupGE" -- | remove all entries from the map with keys not having the argument key -- as prefix and are larger than the argument key lookupLE :: Key -> StringMap a -> StringMap a lookupLE k0 = look k0 . norm where look [] (Val v' _t') = (Val v' empty) look [] _t = empty look k@(c : k1) (Branch c' s' n') | c < c' = empty | c == c' = branch c' (lookupLE k1 s') empty | otherwise = branch c' s' (lookupLE k n') look _ Empty = empty look k (Val v' t') = val v' (lookupLE k t') look _ _ = normError "lookupLE" -- | Combination of 'lookupLE' and 'lookupGE' -- @keys $ lookupRange "a" "b" $ fromList $ zip ["", "a", "ab", "b", "ba", "c"] [1..] = ["a","ab","b"]@ -- For all keys in @k = keys $ lookupRange lb ub m@, this property holts true: @k >= ub && k <= lb@ lookupRange :: Key -> Key -> StringMap a -> StringMap a lookupRange lb ub = lookupLE ub . lookupGE lb -- ---------------------------------------- -- | /O(max(L,R))/ Find all values where the string is a prefix of the key. prefixFind :: Key -> StringMap a -> [a] prefixFind k = elems . lookupPx' k -- | /O(max(L,R))/ Find all values where the string is a prefix of the key and include the keys -- in the result. prefixFindWithKey :: Key -> StringMap a -> [(Key, a)] prefixFindWithKey k = fmap (first (k ++)) . toList . lookupPx' k -- ---------------------------------------- insert' :: (a -> a -> a) -> a -> Key -> StringMap a -> StringMap a insert' f v k0 = ins k0 . norm where ins' = insert' f v ins k (Branch c' s' n') = case k of [] -> val v (branch c' s' n') (c : k1) | c < c' -> branch c (singleton k1 v) (branch c' s' n') | c == c' -> branch c (ins' k1 s') n' | otherwise -> branch c' s' (ins' k n') ins k Empty = singleton k v ins k (Val v' t') = case k of [] -> val (f v v') t' _ -> val v' (ins' k t') ins _ _ = normError "insert'" -- ---------------------------------------- update' :: (a -> Maybe a) -> Key -> StringMap a -> StringMap a update' f k0 = upd k0 . norm where upd' = update' f upd k (Branch c' s' n') = case k of [] -> branch c' s' n' (c : k1) | c < c' -> branch c' s' n' | c == c' -> branch c (upd' k1 s') n' | otherwise -> branch c' s' (upd' k n') upd _ Empty = empty upd k (Val v' t') = case k of [] -> maybe t' (flip val t') $ f v' _ -> val v' (upd' k t') upd _ _ = normError "update'" -- ---------------------------------------- -- | /O(n+m)/ Left-biased union of two maps. It prefers the first map when duplicate keys are -- encountered, i.e. ('union' == 'unionWith' 'const'). union :: StringMap a -> StringMap a -> StringMap a union = union' const -- | /O(n+m)/ Union with a combining function. unionWith :: (a -> a -> a) -> StringMap a -> StringMap a -> StringMap a unionWith = union' union' :: (a -> a -> a) -> StringMap a -> StringMap a -> StringMap a union' f pt1 pt2 = uni (norm pt1) (norm pt2) where uni' t1' t2' = union' f (norm t1') (norm t2') uni Empty Empty = empty uni Empty (Val v2 t2) = val v2 t2 uni Empty (Branch c2 s2 n2) = branch c2 s2 n2 uni (Val v1 t1) Empty = val v1 t1 uni (Val v1 t1) (Val v2 t2) = val (f v1 v2) (uni' t1 t2) uni (Val v1 t1) t2@(Branch _ _ _) = val v1 (uni' t1 t2) uni (Branch c1 s1 n1) Empty = branch c1 s1 n1 uni t1@(Branch _ _ _ ) (Val v2 t2) = val v2 (uni' t1 t2) uni t1@(Branch c1 s1 n1) t2@(Branch c2 s2 n2) | c1 < c2 = branch c1 s1 (uni' n1 t2) | c1 > c2 = branch c2 s2 (uni' t1 n2) | otherwise = branch c1 (uni' s1 s2) (uni' n1 n2) uni _ _ = normError "union'" -- ---------------------------------------- -- | /O(n+m)/ Union with a combining function, including the key. unionWithKey :: (Key -> a -> a -> a) -> StringMap a -> StringMap a -> StringMap a unionWithKey f = union'' f id union'' :: (Key -> a -> a -> a) -> (Key -> Key) -> StringMap a -> StringMap a -> StringMap a union'' f kf pt1 pt2 = uni (norm pt1) (norm pt2) where uni' t1' t2' = union'' f kf (norm t1') (norm t2') uni Empty Empty = empty uni Empty (Val v2 t2) = val v2 t2 uni Empty (Branch c2 s2 n2) = branch c2 s2 n2 uni (Val v1 t1) Empty = val v1 t1 uni (Val v1 t1) (Val v2 t2) = val (f (kf []) v1 v2) (uni' t1 t2) uni (Val v1 t1) t2@(Branch _ _ _) = val v1 (uni' t1 t2) uni (Branch c1 s1 n1) Empty = branch c1 s1 n1 uni t1@(Branch _ _ _ ) (Val v2 t2) = val v2 (uni' t1 t2) uni t1@(Branch c1 s1 n1) t2@(Branch c2 s2 n2) | c1 < c2 = branch c1 s1 (uni' n1 t2) | c1 > c2 = branch c2 s2 (uni' t1 n2) | otherwise = branch c1 (union'' f (kf . (c1:)) s1 s2) (uni' n1 n2) uni _ _ = normError "union''" -- ---------------------------------------- -- -- | /(O(min(n,m))/ Difference between two tries (based on keys). difference :: StringMap a -> StringMap b -> StringMap a difference = differenceWith (const (const Nothing)) -- | /(O(min(n,m))/ Difference with a combining function. If the combining function always returns -- 'Nothing', this is equal to proper set difference. differenceWith :: (a -> b -> Maybe a) -> StringMap a -> StringMap b -> StringMap a differenceWith f = differenceWithKey (const f) -- | /O(min(n,m))/ Difference with a combining function, including the key. If two equal keys are -- encountered, the combining function is applied to the key and both values. If it returns -- 'Nothing', the element is discarded, if it returns 'Just' a value, the element is updated -- with the new value. differenceWithKey :: (Key -> a -> b -> Maybe a) -> StringMap a -> StringMap b -> StringMap a differenceWithKey f = diff'' f id diff'' :: (Key -> a -> b -> Maybe a) -> (Key -> Key) -> StringMap a -> StringMap b -> StringMap a diff'' f kf pt1 pt2 = dif (norm pt1) (norm pt2) where dif' t1' t2' = diff'' f kf (norm t1') (norm t2') dif Empty _ = empty dif (Val v1 t1) Empty = val v1 t1 dif (Val v1 t1) (Val v2 t2) = case f (kf []) v1 v2 of Nothing -> dif' t1 t2 Just nv -> val nv (dif' t1 t2) dif (Val v1 t1) t2@(Branch _ _ _) = val v1 (dif' t1 t2) dif (Branch c1 s1 n1) Empty = branch c1 s1 n1 dif t1@(Branch _ _ _ ) (Val _ t2) = dif' t1 t2 dif t1@(Branch c1 s1 n1) t2@(Branch c2 s2 n2) | c1 < c2 = branch c1 s1 (dif' n1 t2) | c1 > c2 = dif' t1 n2 | otherwise = branch c1 (diff'' f (kf . (c1:)) s1 s2) (dif' n1 n2) dif _ _ = normError "diff''" -- ---------------------------------------- -- | cut off all branches from a tree @t2@ that are not part of set @t1@ -- -- the following laws must holds -- -- @lookup' k' . cutPx' (singlePS k) $ t == lookup' k t@ for every @k'@ with @k@ prefix of @k'@ -- -- @lookup' k' . cutPx' (singlePS k) $ t == Nothing@ for every @k'@ with @k@ not being a prefix of @k'@ cutPx'' :: (StringMap a -> StringMap a) -> StringSet -> StringMap a -> StringMap a cutPx'' cf s1' t2' = cut s1' (norm t2') where cut PSempty _t2 = empty cut (PSelem _s1) t2 = cf t2 cut (PSnext _ _ _ ) Empty = empty cut t1@(PSnext _ _ _ ) (Val _ t2) = cut t1 (norm t2) cut t1@(PSnext c1 s1 n1) t2@(Branch c2 s2 n2) | c1 < c2 = cut n1 t2 | c1 > c2 = cut t1 (norm n2) | otherwise = branch c1 (cutPx'' cf s1 s2) (cutPx'' cf n1 n2) cut _ _ = normError "cutPx''" cutPx' :: StringSet -> StringMap a -> StringMap a cutPx' = cutPx'' id cutAllPx' :: StringSet -> StringMap a -> StringMap a cutAllPx' = cutPx'' (cv . norm) where cv (Val v _) = val v empty cv _ = empty -- ---------------------------------------- -- | /O(n)/ Map a function over all values in the prefix tree. map :: (a -> b) -> StringMap a -> StringMap b map f = mapWithKey (const f) mapWithKey :: (Key -> a -> b) -> StringMap a -> StringMap b mapWithKey f = map' f id map' :: (Key -> a -> b) -> (Key -> Key) -> StringMap a -> StringMap b map' _ _ (Empty) = Empty map' f k (Val v t) = Val (f (k []) v) (map' f k t) map' f k (Branch c s n) = Branch c (map' f ((c :) . k) s) (map' f k n) map' f k (Leaf v) = Leaf (f (k []) v) map' f k (Last c s) = Last c (map' f ((c :) . k) s) map' f k (LsSeq cs s) = LsSeq cs (map' f ((toKey cs ++) . k) s) map' f k (BrSeq cs s n) = BrSeq cs (map' f ((toKey cs ++) . k) s) (map' f k n) map' f k (LsSeL cs v) = LsSeL cs (f (k []) v) map' f k (BrSeL cs v n) = BrSeL cs (f (k []) v) (map' f k n) map' f k (LsVal c v) = LsVal c (f (k []) v) map' f k (BrVal c v n) = BrVal c (f (k []) v) (map' f k n) -- | /O(n)/ Updates a value or deletes the element if the result of the updating function is 'Nothing'. mapMaybe :: (a -> Maybe b) -> StringMap a -> StringMap b mapMaybe = mapMaybe' {-# INLINE mapMaybe #-} mapMaybe' :: (a -> Maybe b) -> StringMap a -> StringMap b mapMaybe' f = upd . norm where upd' = mapMaybe' f upd (Branch c' s' n') = branch c' (upd' s') (upd' n') upd Empty = empty upd (Val v' t') = maybe t (flip val t) $ f v' where t = upd' t' upd _ = normError "update'" -- ---------------------------------------- {- not yet used -- | Variant of map that works on normalized trees mapN :: (a -> b) -> StringMap a -> StringMap b mapN f = mapWithKeyN (const f) mapWithKeyN :: (Key -> a -> b) -> StringMap a -> StringMap b mapWithKeyN f = map'' f id map'' :: (Key -> a -> b) -> (Key -> Key) -> StringMap a -> StringMap b map'' f k = mapn . norm where mapn Empty = empty mapn (Val v t) = val (f (k []) v) (map'' f k t) mapn (Branch c s n) = branch c (map'' f ((c :) . k) s) (map'' f k n) mapn _ = normError "map''" -- -} -- ---------------------------------------- -- | Monadic map mapM :: Monad m => (a -> m b) -> StringMap a -> m (StringMap b) mapM f = mapWithKeyM (const f) -- | Monadic mapWithKey mapWithKeyM :: Monad m => (Key -> a -> m b) -> StringMap a -> m (StringMap b) mapWithKeyM f = mapM'' f id mapM'' :: Monad m => (Key -> a -> m b) -> (Key -> Key) -> StringMap a -> m (StringMap b) mapM'' f k = mapn . norm where mapn Empty = return $ empty mapn (Val v t) = do v' <- f (k []) v t' <- mapM'' f k t return $ val v' t' mapn (Branch c s n) = do s' <- mapM'' f ((c :) . k) s n' <- mapM'' f k n return $ branch c s' n' mapn _ = normError "mapM''" -- ---------------------------------------- -- -- A prefix tree visitor data StringMapVisitor a b = PTV { v_empty :: b , v_val :: a -> b -> b , v_branch :: Sym -> b -> b -> b , v_leaf :: a -> b , v_last :: Sym -> b -> b , v_lsseq :: Key1 -> b -> b , v_brseq :: Key1 -> b -> b -> b , v_lssel :: Key1 -> a -> b , v_brsel :: Key1 -> a -> b -> b , v_lsval :: Sym -> a -> b , v_brval :: Sym -> a -> b -> b } visit :: StringMapVisitor a b -> StringMap a -> b visit v (Empty) = v_empty v visit v (Val v' t) = v_val v v' (visit v t) visit v (Branch c s n) = v_branch v c (visit v s) (visit v n) visit v (Leaf v') = v_leaf v v' visit v (Last c s) = v_last v c (visit v s) visit v (LsSeq cs s) = v_lsseq v cs (visit v s) visit v (BrSeq cs s n) = v_brseq v cs (visit v s) (visit v n) visit v (LsSeL cs v') = v_lssel v cs v' visit v (BrSeL cs v' n) = v_brsel v cs v' (visit v n) visit v (LsVal c v') = v_lsval v c v' visit v (BrVal c v' n) = v_brval v c v' (visit v n) -- ---------------------------------------- -- -- | space required by a prefix tree (logically) -- -- Singletons are counted as 0, all other n-ary constructors -- are counted as n+1 (1 for the constructor and 1 for every field) -- cons nodes of char lists are counted 2, 1 for the cons, 1 for the char -- for values only the ref to the value is counted, not the space for the value itself -- key chars are assumed to be unboxed space :: StringMap a -> Int space = visit $ PTV { v_empty = 0 , v_val = const (3 +) , v_branch = const $ \ s n -> 4 + s + n , v_leaf = const 2 , v_last = const (3 +) , v_lsseq = \ cs s -> space1 cs + s , v_brseq = \ cs s n -> 4 + space1 cs + s + n , v_lssel = \ cs _ -> 3 + space1 cs , v_brsel = \ cs _ n -> 4 + space1 cs + n , v_lsval = \ _ _ -> 3 , v_brval = \ _ _ n -> 4 + n } keyChars :: StringMap a -> Int keyChars = visit $ PTV { v_empty = 0 , v_val = \ _ t -> t , v_branch = \ _ s n -> 1 + s + n , v_leaf = \ _ -> 0 , v_last = \ _ s -> 1 + s , v_lsseq = \ cs s -> length1 cs + s , v_brseq = \ cs s n -> length1 cs + s + n , v_lssel = \ cs _ -> length1 cs , v_brsel = \ cs _ n -> length1 cs + n , v_lsval = \ _ _ -> 1 , v_brval = \ _ _ n -> 1 + n } -- ---------------------------------------- -- -- | statistics about the # of different nodes in an optimized prefix tree stat :: StringMap a -> StringMap Int stat = visit $ PTV { v_empty = singleton "empty" 1 , v_val = \ _ t -> singleton "val" 1 `add` t , v_branch = \ _ s n -> singleton "branch" 1 `add` (s `add` n) , v_leaf = \ _ -> singleton "leaf" 1 , v_last = \ _ s -> singleton "last" 1 `add` s , v_lsseq = \ cs s -> singleton (show' "lsseq" cs) 1 `add` singleton "size-key1" (length1 cs) `add` singleton "space-key1" (space1 cs) `add` s , v_brseq = \ cs s n -> singleton (show' "brseq" cs) 1 `add` singleton "size-key1" (length1 cs) `add` singleton "space-key1" (space1 cs) `add` (s `add` n) , v_lssel = \ cs _ -> singleton (show' "lssel" cs) 1 `add` singleton "size-key1" (length1 cs) `add` singleton "space-key1" (space1 cs) , v_brsel = \ cs _ n -> singleton (show' "brseq" cs) 1 `add` singleton "size-key1" (length1 cs) `add` singleton "space-key1" (space1 cs) `add` n , v_lsval = \ _ _ -> singleton "lsval" 1 , v_brval = \ _ _ n -> singleton "brval" 1 `add` n } where add = unionWith (+) show' c k1 = c ++ "-" ++ show (length1 k1) -- ---------------------------------------- -- | /O(n)/ Fold over all key\/value pairs in the map. foldWithKey :: (Key -> a -> b -> b) -> b -> StringMap a -> b foldWithKey f e = fold' f e id {-# INLINE foldWithKey #-} -- | /O(n)/ Fold over all values in the map. fold :: (a -> b -> b) -> b -> StringMap a -> b fold f = foldWithKey $ const f {-# INLINE fold #-} {- not yet used foldTopDown :: (Key -> a -> b -> b) -> b -> (Key -> Key) -> StringMap a -> b foldTopDown f r k0 = fo k0 . norm where fo kf (Branch c' s' n') = let r' = foldTopDown f r ((c' :) . kf) s' in foldTopDown f r' kf n' fo _ (Empty) = r fo kf (Val v' t') = let r' = f (kf []) v' r in foldTopDown f r' kf t' fo _ _ = normError "foldTopDown" -- -} fold' :: (Key -> a -> b -> b) -> b -> (Key -> Key) -> StringMap a -> b fold' f r k0 = fo k0 . norm where fo kf (Branch c' s' n') = let r' = fold' f r kf n' in fold' f r' (kf . (c':)) s' fo _ (Empty) = r fo kf (Val v' t') = let r' = fold' f r kf t' in f (kf []) v' r' fo _ _ = normError "fold'" -- | /O(n)/ Convert into an ordinary map. toMap :: StringMap a -> M.Map Key a toMap = foldWithKey M.insert M.empty -- | /O(n)/ Convert an ordinary map into a Prefix tree fromMap :: M.Map Key a -> StringMap a fromMap = M.foldrWithKey insert empty -- | /O(n)/ Returns all elements as list of key value pairs, toList :: StringMap a -> [(Key, a)] toList = foldWithKey (\k v r -> (k, v) : r) [] -- | /O(n)/ Creates a trie from a list of key\/value pairs. fromList :: [(Key, a)] -> StringMap a fromList = L.foldl' (\p (k, v) -> insert k v p) empty -- | /O(n)/ The number of elements. size :: StringMap a -> Int size = fold (const (+1)) 0 -- | /O(n)/ Returns all values. elems :: StringMap a -> [a] elems = fold (:) [] -- | /O(n)/ Returns all values. keys :: StringMap a -> [Key] keys = foldWithKey (\ k _v r -> k : r) [] -- ---------------------------------------- -- | returns all key-value pairs in breadth first order (short words first) -- this enables prefix search with upper bounds on the size of the result set -- e.g. @ search ... >>> toListBF >>> take 1000 @ will give the 1000 shortest words -- found in the result set and will ignore all long words -- -- toList is derived from the following code found in the net when searching haskell breadth first search -- -- Haskell Standard Libraray Implementation -- -- > br :: Tree a -> [a] -- > br t = map rootLabel $ -- > concat $ -- > takeWhile (not . null) $ -- > iterate (concatMap subForest) [t] toListBF :: StringMap v -> [(Key, v)] toListBF = (\ t0 -> [(id, t0)]) >>> iterate (concatMap (second norm >>> uncurry subForest)) >>> takeWhile (not . L.null) >>> concat >>> concatMap (second norm >>> uncurry rootLabel) rootLabel :: (Key -> Key) -> StringMap v -> [(Key, v)] rootLabel kf (Val v _) = [(kf [], v)] rootLabel _ _ = [] subForest :: (Key -> Key) -> StringMap v -> [(Key -> Key, StringMap v)] subForest kf (Branch c s n) = (kf . (c:), s) : subForest kf (norm n) subForest _ Empty = [] subForest kf (Val _ t) = subForest kf (norm t) subForest _ _ = error "StringMap.Base.subForest: Pattern match failure" -- ---------------------------------------- -- | /O(max(L,R))/ Find all values where the string is a prefix of the key and include the keys -- in the result. The result list contains short words first prefixFindWithKeyBF :: Key -> StringMap a -> [(Key, a)] prefixFindWithKeyBF k = fmap (first (k ++)) . toListBF . lookupPx' k -- ---------------------------------------- instance Functor StringMap where fmap = map instance F.Foldable StringMap where foldr = fold {- for debugging not yet enabled instance Show a => Show (StringMap a) where showsPrec d m = showParen (d > 10) $ showString "fromList " . shows (toList m) -- -} -- ---------------------------------------- instance Read a => Read (StringMap a) where readsPrec p = readParen (p > 10) $ \ r -> do ("fromList",s) <- lex r (xs,t) <- reads s return (fromList xs,t) -- ---------------------------------------- instance NFData a => NFData (StringMap a) where rnf (Empty) = () rnf (Val v t) = rnf v `seq` rnf t rnf (Branch _c s n) = rnf s `seq` rnf n rnf (Leaf v) = rnf v rnf (Last _c s) = rnf s rnf (LsSeq _ks s) = rnf s rnf (BrSeq _ks s n) = rnf s `seq` rnf n rnf (LsSeL _ks v) = rnf v rnf (BrSeL _ks v n) = rnf v `seq` rnf n rnf (LsVal k v) = rnf k `seq` rnf v rnf (BrVal k v n) = rnf k `seq` rnf v `seq` rnf n -- ---------------------------------------- -- -- Provide native binary serialization (not via to-/fromList). instance (Binary a) => Binary (StringMap a) where put (Empty) = put (0::Word8) put (Val v t) = put (1::Word8) >> put v >> put t put (Branch c s n) = put (2::Word8) >> put c >> put s >> put n put (Leaf v) = put (3::Word8) >> put v put (Last c s) = put (4::Word8) >> put c >> put s put (LsSeq k s) = put (5::Word8) >> put (toKey k) >> put s put (BrSeq k s n) = put (6::Word8) >> put (toKey k) >> put s >> put n put (LsSeL k v) = put (7::Word8) >> put (toKey k) >> put v put (BrSeL k v n) = put (8::Word8) >> put (toKey k) >> put v >> put n put (LsVal k v) = put (9::Word8) >> put k >> put v put (BrVal k v n) = put (10::Word8) >> put k >> put v >> put n get = do !tag <- getWord8 case tag of 0 -> return Empty 1 -> do !v <- get !t <- get return $! Val v t 2 -> do !c <- get !s <- get !n <- get return $! Branch c s n 3 -> do !v <- get return $! Leaf v 4 -> do !c <- get !s <- get return $! Last c s 5 -> do !k <- get !s <- get return $! LsSeq (fromKey k) s 6 -> do !k <- get !s <- get !n <- get return $! BrSeq (fromKey k) s n 7 -> do !k <- get !v <- get return $! LsSeL (fromKey k) v 8 -> do !k <- get !v <- get !n <- get return $! BrSeL (fromKey k) v n 9 -> do !k <- get !v <- get return $! LsVal k v 10 -> do !k <- get !v <- get !n <- get return $! BrVal k v n _ -> fail "StringMap.get: error while decoding StringMap" -- ----------------------------------------