module NLP.Adict.DAWG
( DAWGD
, DAWG (..)
, fromTrie
, fromDAWG
, size
, row
, Row (..)
, entry
, charOn
, valueIn
, edges
, edgeOn
, serialize
, deserialize
) where
import Control.Applicative ((<$>))
import Data.Maybe (listToMaybe)
import Data.Binary (Binary, get, put)
import qualified Data.Vector as V
import NLP.Adict.DAWG.Node
import qualified NLP.Adict.Trie as Trie
type DAWGD a b = DAWG a (Maybe b)
data DAWG a b = DAWG
{ root :: Int
, array :: V.Vector (Row a b)
}
fromTrie :: (Ord a, Ord b) => Trie.Trie a b -> DAWG a b
fromTrie = deserialize . Trie.serialize
fromDAWG :: Ord a => DAWG a b -> Trie.Trie a b
fromDAWG = Trie.deserialize . serialize
size :: DAWG a b -> Int
size = V.length . array
row :: DAWG a b -> Int -> Row a b
row dag k = array dag V.! k
data Row a b = Row {
rowValue :: b,
rowEdges :: V.Vector (a, Int)
}
valueIn :: DAWG a b -> Int -> b
valueIn dag k = rowValue (array dag V.! k)
edges :: DAWG a b -> Int -> [(a, Int)]
edges dag k = V.toList . rowEdges $ row dag k
edgeOn :: Eq a => DAWG a b -> Int -> a -> Maybe Int
edgeOn DAWG{..} k x =
let r = array V.! k
in snd <$> V.find ((x==).fst) (rowEdges r)
entry :: DAWG a (Maybe b) -> [Int] -> Maybe ([a], b)
entry dag xs = do
x <- mapM (charOn dag) (zip (root dag:xs) xs)
r <- maybeLast xs >>= valueIn dag
return (x, r)
where
maybeLast [] = Nothing
maybeLast ys = Just $ last ys
charOn :: DAWG a b -> (Int, Int) -> Maybe a
charOn dag (root, x) = listToMaybe
[c | (c, y) <- edges dag root, x == y]
serialize :: Ord a => DAWG a b -> [Node a b]
serialize = map unRow . V.toList . array
deserialize :: Ord a => [Node a b] -> DAWG a b
deserialize xs =
let arr = V.fromList $ map mkRow xs
in DAWG (V.length arr 1) arr
unRow :: Ord a => Row a b -> Node a b
unRow Row{..} = mkNode rowValue (V.toList rowEdges)
mkRow :: Ord a => Node a b -> Row a b
mkRow n = Row (nodeValue n) (V.fromList $ nodeEdges n)
instance (Ord a, Binary a, Binary b) => Binary (DAWG a b) where
put = put . serialize
get = deserialize <$> get