{-# LANGUAGE CPP, BangPatterns, PatternGuards #-}
{-# LANGUAGE DeriveDataTypeable, ScopedTypeVariables #-}
module Codec.Archive.Tar.Index.IntTrie (
IntTrie,
construct,
toList,
IntTrieBuilder,
empty,
insert,
finalise,
unfinalise,
lookup,
TrieLookup(..),
serialise,
serialiseSize,
deserialise,
#ifdef TESTS
test1, test2, test3,
ValidPaths(..),
prop_lookup,
prop_completions,
prop_lookup_mono,
prop_completions_mono,
prop_construct_toList,
prop_finalise_unfinalise,
prop_serialise_deserialise,
prop_serialiseSize,
#endif
) where
import Prelude hiding (lookup)
import Data.Typeable (Typeable)
import qualified Data.Array.Unboxed as A
import Data.Array.IArray ((!))
import qualified Data.Bits as Bits
import Data.Word (Word32)
import Data.Bits
import Data.Monoid (Monoid(..))
#if (MIN_VERSION_base(4,5,0))
import Data.Monoid ((<>))
#endif
import qualified Data.ByteString as BS
import qualified Data.ByteString.Lazy as LBS
import qualified Data.ByteString.Unsafe as BS
#if MIN_VERSION_bytestring(0,10,2) || defined(MIN_VERSION_bytestring_builder)
import Data.ByteString.Builder as BS
#else
import Data.ByteString.Lazy.Builder as BS
#endif
import Control.Exception (assert)
#if MIN_VERSION_containers(0,5,0)
import qualified Data.Map.Strict as Map
import qualified Data.IntMap.Strict as IntMap
import Data.IntMap.Strict (IntMap)
#else
import qualified Data.Map as Map
import qualified Data.IntMap as IntMap
import Data.IntMap (IntMap)
#endif
import Data.List hiding (lookup, insert)
import Data.Function (on)
#ifdef TESTS
import Test.QuickCheck
import Control.Applicative ((<$>), (<*>))
#endif
newtype IntTrie k v = IntTrie (A.UArray Word32 Word32)
deriving (Eq, Show, Typeable)
#ifdef TESTS
example0 :: [(FilePath, Int)]
example0 =
[("foo-1.0/foo-1.0.cabal", 512)
,("foo-1.0/LICENSE", 2048)
,("foo-1.0/Data/Foo.hs", 4096)]
example1 :: [([Word32], Word32)]
example1 =
[([1,2], 512)
,([1,3], 2048)
,([1,4,5], 4096)]
mktrie :: [(Int, TrieNode k v)] -> IntTrieBuilder k v
mkleaf :: (Enum k, Enum v) => k -> v -> (Int, TrieNode k v)
mknode :: Enum k => k -> IntTrieBuilder k v -> (Int, TrieNode k v)
mktrie = IntTrieBuilder . IntMap.fromList
mkleaf k v = (fromEnum k, TrieLeaf (enumToWord32 v))
mknode k t = (fromEnum k, TrieNode t)
example2 :: IntTrieBuilder Word32 Word32
example2 = mktrie [ mknode 1 t1 ]
where
t1 = mktrie [ mkleaf 2 512, mkleaf 3 2048, mknode 4 t2 ]
t2 = mktrie [ mkleaf 5 4096 ]
example2' :: IntTrieBuilder Word32 Word32
example2' = mktrie [ mknode 0 t1 ]
where
t1 = mktrie [ mknode 3 t2 ]
t2 = mktrie [ mknode 1 t3, mknode 2 t4 ]
t3 = mktrie [ mkleaf 4 10608 ]
t4 = mktrie [ mkleaf 4 10612 ]
example2'' :: IntTrieBuilder Word32 Word32
example2'' = mktrie [ mknode 1 t1, mknode 2 t2 ]
where
t1 = mktrie [ mkleaf 4 10608 ]
t2 = mktrie [ mkleaf 4 10612 ]
example2''' :: IntTrieBuilder Word32 Word32
example2''' = mktrie [ mknode 0 t3 ]
where
t3 = mktrie [ mknode 4 t8, mknode 6 t11 ]
t8 = mktrie [ mknode 1 t14 ]
t11 = mktrie [ mkleaf 5 10605 ]
t14 = mktrie [ mknode 2 t19, mknode 3 t22 ]
t19 = mktrie [ mkleaf 7 10608 ]
t22 = mktrie [ mkleaf 7 10612 ]
test1 = example2 == inserts example1 empty
#endif
tagLeaf, tagNode, untag :: Word32 -> Word32
tagLeaf = id
tagNode = flip Bits.setBit 31
untag = flip Bits.clearBit 31
isNode :: Word32 -> Bool
isNode = flip Bits.testBit 31
#ifdef TESTS
example3 :: [Word32]
example3 =
[1, tagNode 1,
3,
3, tagLeaf 2, tagLeaf 3, tagNode 4,
512, 2048, 10,
1, tagLeaf 5,
4096
]
test2 = example3 == flattenTrie example2
example4 :: IntTrie Int Int
example4 = IntTrie (mkArray example3)
mkArray :: [Word32] -> A.UArray Word32 Word32
mkArray xs = A.listArray (0, fromIntegral (length xs) - 1) xs
test3 = case lookup example4 [1] of
Just (Completions [(2,_),(3,_),(4,_)]) -> True
_ -> False
test1, test2, test3 :: Bool
#endif
completionsFrom :: (Enum k, Enum v) => IntTrie k v -> Word32 -> Completions k v
completionsFrom trie@(IntTrie arr) nodeOff =
[ (word32ToEnum (untag key), next)
| keyOff <- [keysStart..keysEnd]
, let key = arr ! keyOff
entry = arr ! (keyOff + nodeSize)
next | isNode key = Completions (completionsFrom trie entry)
| otherwise = Entry (word32ToEnum entry)
]
where
nodeSize = arr ! nodeOff
keysStart = nodeOff + 1
keysEnd = nodeOff + nodeSize
toList :: forall k v. (Enum k, Enum v) => IntTrie k v -> [([k], v)]
toList = concatMap (aux []) . (`completionsFrom` 0)
where
aux :: [k] -> (k, TrieLookup k v) -> [([k], v)]
aux ks (k, Entry v) = [(reverse (k:ks), v)]
aux ks (k, Completions cs) = concatMap (aux (k:ks)) cs
construct :: (Enum k, Enum v) => [([k], v)] -> IntTrie k v
construct = finalise . flip inserts empty
data TrieLookup k v = Entry !v | Completions (Completions k v) deriving Show
type Completions k v = [(k, TrieLookup k v)]
lookup :: forall k v. (Enum k, Enum v) => IntTrie k v -> [k] -> Maybe (TrieLookup k v)
lookup trie@(IntTrie arr) = go 0
where
go :: Word32 -> [k] -> Maybe (TrieLookup k v)
go nodeOff [] = Just (completions nodeOff)
go nodeOff (k:ks) = case search nodeOff (tagLeaf k') of
Just entryOff
| null ks -> Just (entry entryOff)
| otherwise -> Nothing
Nothing -> case search nodeOff (tagNode k') of
Nothing -> Nothing
Just entryOff -> go (arr ! entryOff) ks
where
k' = enumToWord32 k
entry entryOff = Entry (word32ToEnum (arr ! entryOff))
completions nodeOff = Completions (completionsFrom trie nodeOff)
search :: Word32 -> Word32 -> Maybe Word32
search nodeOff key = fmap (+nodeSize) (bsearch keysStart keysEnd key)
where
nodeSize = arr ! nodeOff
keysStart = nodeOff + 1
keysEnd = nodeOff + nodeSize
bsearch :: Word32 -> Word32 -> Word32 -> Maybe Word32
bsearch a b key
| a > b = Nothing
| otherwise = case compare key (arr ! mid) of
LT -> bsearch a (mid-1) key
EQ -> Just mid
GT -> bsearch (mid+1) b key
where mid = (a + b) `div` 2
enumToWord32 :: Enum n => n -> Word32
enumToWord32 = fromIntegral . fromEnum
word32ToEnum :: Enum n => Word32 -> n
word32ToEnum = toEnum . fromIntegral
newtype IntTrieBuilder k v = IntTrieBuilder (IntMap (TrieNode k v))
deriving (Show, Eq)
data TrieNode k v = TrieLeaf {-# UNPACK #-} !Word32
| TrieNode !(IntTrieBuilder k v)
deriving (Show, Eq)
empty :: IntTrieBuilder k v
empty = IntTrieBuilder IntMap.empty
insert :: (Enum k, Enum v) => [k] -> v
-> IntTrieBuilder k v -> IntTrieBuilder k v
insert [] _v t = t
insert (k:ks) v t = insertTrie (fromEnum k) (map fromEnum ks) (enumToWord32 v) t
insertTrie :: Int -> [Int] -> Word32
-> IntTrieBuilder k v -> IntTrieBuilder k v
insertTrie k ks v (IntTrieBuilder t) =
IntTrieBuilder $
IntMap.alter (\t' -> Just $! maybe (freshTrieNode ks v)
(insertTrieNode ks v) t')
k t
insertTrieNode :: [Int] -> Word32 -> TrieNode k v -> TrieNode k v
insertTrieNode [] v _ = TrieLeaf v
insertTrieNode (k:ks) v (TrieLeaf _) = TrieNode (freshTrie k ks v)
insertTrieNode (k:ks) v (TrieNode t) = TrieNode (insertTrie k ks v t)
freshTrie :: Int -> [Int] -> Word32 -> IntTrieBuilder k v
freshTrie k [] v =
IntTrieBuilder (IntMap.singleton k (TrieLeaf v))
freshTrie k (k':ks) v =
IntTrieBuilder (IntMap.singleton k (TrieNode (freshTrie k' ks v)))
freshTrieNode :: [Int] -> Word32 -> TrieNode k v
freshTrieNode [] v = TrieLeaf v
freshTrieNode (k:ks) v = TrieNode (freshTrie k ks v)
inserts :: (Enum k, Enum v) => [([k], v)]
-> IntTrieBuilder k v -> IntTrieBuilder k v
inserts kvs t = foldl' (\t' (ks, v) -> insert ks v t') t kvs
finalise :: IntTrieBuilder k v -> IntTrie k v
finalise trie =
IntTrie $
A.listArray (0, fromIntegral (flatTrieLength trie) - 1)
(flattenTrie trie)
unfinalise :: (Enum k, Enum v) => IntTrie k v -> IntTrieBuilder k v
unfinalise trie =
go (completionsFrom trie 0)
where
go kns =
IntTrieBuilder $
IntMap.fromList
[ (fromEnum k, t)
| (k, n) <- kns
, let t = case n of
Entry v -> TrieLeaf (enumToWord32 v)
Completions kns' -> TrieNode (go kns')
]
type Offset = Int
flatTrieLength :: IntTrieBuilder k v -> Int
flatTrieLength (IntTrieBuilder tns) =
1
+ 2 * IntMap.size tns
+ sum [ flatTrieLength n | TrieNode n <- IntMap.elems tns ]
flattenTrie :: IntTrieBuilder k v -> [Word32]
flattenTrie trie = go (queue [trie]) (size trie)
where
size (IntTrieBuilder tns) = 1 + 2 * IntMap.size tns
go :: Q (IntTrieBuilder k v) -> Offset -> [Word32]
go todo !offset =
case dequeue todo of
Nothing -> []
Just (IntTrieBuilder tnodes, tries) ->
flat ++ go tries' offset'
where
!count = IntMap.size tnodes
flat = fromIntegral count
: Map.keys keysValues
++ Map.elems keysValues
(!offset', !keysValues, !tries') =
#if MIN_VERSION_containers(0,4,2)
IntMap.foldlWithKey' accumNodes
(offset, Map.empty, tries)
tnodes
#else
foldl' (\a (k,v) -> accumNodes a k v)
(offset, Map.empty, tries)
(IntMap.toList tnodes)
#endif
accumNodes :: (Offset, Map.Map Word32 Word32, Q (IntTrieBuilder k v))
-> Int -> TrieNode k v
-> (Offset, Map.Map Word32 Word32, Q (IntTrieBuilder k v))
accumNodes (!off, !kvs, !tries) !k (TrieLeaf v) =
(off, kvs', tries)
where
kvs' = Map.insert (tagLeaf (int2Word32 k)) v kvs
accumNodes (!off, !kvs, !tries) !k (TrieNode t) =
(off + size t, kvs', tries')
where
kvs' = Map.insert (tagNode (int2Word32 k)) (int2Word32 off) kvs
tries' = enqueue tries t
data Q a = Q [a] [a]
queue :: [a] -> Q a
queue xs = Q xs []
enqueue :: Q a -> a -> Q a
enqueue (Q front back) x = Q front (x : back)
dequeue :: Q a -> Maybe (a, Q a)
dequeue (Q (x:xs) back) = Just (x, Q xs back)
dequeue (Q [] back) = case reverse back of
x:xs -> Just (x, Q xs [])
[] -> Nothing
int2Word32 :: Int -> Word32
int2Word32 = fromIntegral
serialise :: IntTrie k v -> BS.Builder
serialise (IntTrie arr) =
let (_, !ixEnd) = A.bounds arr in
BS.word32BE (ixEnd+1)
<> foldr (\n r -> BS.word32BE n <> r) mempty (A.elems arr)
serialiseSize :: IntTrie k v -> Int
serialiseSize (IntTrie arr) =
let (_, ixEnd) = A.bounds arr in
4
+ 4 * (fromIntegral ixEnd + 1)
deserialise :: BS.ByteString -> Maybe (IntTrie k v, BS.ByteString)
deserialise bs
| BS.length bs >= 4
, let lenArr = readWord32BE bs 0
lenTotal = 4 + 4 * fromIntegral lenArr
, BS.length bs >= 4 + 4 * fromIntegral lenArr
, let !arr = A.array (0, lenArr-1)
[ (i, readWord32BE bs off)
| (i, off) <- zip [0..lenArr-1] [4,8 .. lenTotal - 4] ]
!bs' = BS.drop lenTotal bs
= Just (IntTrie arr, bs')
| otherwise
= Nothing
readWord32BE :: BS.ByteString -> Int -> Word32
readWord32BE bs i =
assert (i >= 0 && i+3 <= BS.length bs - 1) $
fromIntegral (BS.unsafeIndex bs (i + 0)) `shiftL` 24
+ fromIntegral (BS.unsafeIndex bs (i + 1)) `shiftL` 16
+ fromIntegral (BS.unsafeIndex bs (i + 2)) `shiftL` 8
+ fromIntegral (BS.unsafeIndex bs (i + 3))
#ifdef TESTS
prop_lookup :: (Ord k, Enum k, Eq v, Enum v, Show k, Show v)
=> [([k], v)] -> Bool
prop_lookup paths =
flip all paths $ \(key, value) ->
case lookup trie key of
Just (Entry value') | value' == value -> True
Just (Entry value') -> error $ "IntTrie: " ++ show (key, value, value')
Nothing -> error $ "IntTrie: didn't find " ++ show key
Just (Completions xs) -> error $ "IntTrie: " ++ show xs
where
trie = construct paths
prop_completions :: forall k v. (Ord k, Enum k, Eq v, Enum v) => [([k], v)] -> Bool
prop_completions paths =
inserts paths empty
== convertCompletions (completionsFrom (construct paths) 0)
where
convertCompletions :: Ord k => Completions k v -> IntTrieBuilder k v
convertCompletions kls =
IntTrieBuilder $
IntMap.fromList
[ case l of
Entry v -> mkleaf k v
Completions kls' -> mknode k (convertCompletions kls')
| (k, l) <- sortBy (compare `on` fst) kls ]
prop_lookup_mono :: ValidPaths -> Bool
prop_lookup_mono (ValidPaths paths) = prop_lookup paths
prop_completions_mono :: ValidPaths -> Bool
prop_completions_mono (ValidPaths paths) = prop_completions paths
prop_construct_toList :: ValidPaths -> Bool
prop_construct_toList (ValidPaths paths) =
sortBy (compare `on` fst) (toList (construct paths))
== sortBy (compare `on` fst) paths
prop_finalise_unfinalise :: ValidPaths -> Bool
prop_finalise_unfinalise (ValidPaths paths) =
builder == unfinalise (finalise builder)
where
builder :: IntTrieBuilder Char Char
builder = inserts paths empty
prop_serialise_deserialise :: ValidPaths -> Bool
prop_serialise_deserialise (ValidPaths paths) =
Just (trie, BS.empty) == (deserialise
. toStrict . BS.toLazyByteString
. serialise) trie
where
trie :: IntTrie Char Char
trie = construct paths
prop_serialiseSize :: ValidPaths -> Bool
prop_serialiseSize (ValidPaths paths) =
(fromIntegral . LBS.length . BS.toLazyByteString . serialise) trie
== serialiseSize trie
where
trie :: IntTrie Char Char
trie = construct paths
newtype ValidPaths = ValidPaths [([Char], Char)] deriving Show
instance Arbitrary ValidPaths where
arbitrary =
ValidPaths . makeNoPrefix <$> listOf ((,) <$> listOf1 arbitrary <*> arbitrary)
where
makeNoPrefix [] = []
makeNoPrefix ((k,v):kvs)
| all (\(k', _) -> not (isPrefixOfOther k k')) kvs
= (k,v) : makeNoPrefix kvs
| otherwise = makeNoPrefix kvs
shrink (ValidPaths kvs) =
map ValidPaths . filter noPrefix . filter nonEmpty . shrink $ kvs
where
noPrefix [] = True
noPrefix ((k,_):kvs') = all (\(k', _) -> not (isPrefixOfOther k k')) kvs'
&& noPrefix kvs'
nonEmpty = all (not . null . fst)
isPrefixOfOther a b = a `isPrefixOf` b || b `isPrefixOf` a
toStrict :: LBS.ByteString -> BS.ByteString
#if MIN_VERSION_bytestring(0,10,0)
toStrict = LBS.toStrict
#else
toStrict = BS.concat . LBS.toChunks
#endif
#endif
#if !(MIN_VERSION_base(4,5,0))
(<>) :: Monoid m => m -> m -> m
(<>) = mappend
#endif