module NLP.Partage.Gen
( Gram
, generateAll
, generateRand
, GenConf (..)
) where
import Control.Applicative ((<$>), (<*>))
import qualified Control.Monad.State.Strict as E
import Control.Monad.Trans.Maybe (MaybeT (..))
import Pipes
import qualified Pipes.Prelude as Pipes
import System.Random (randomRIO)
import qualified Data.Foldable as F
import Data.Maybe (maybeToList)
import qualified Data.Set as S
import qualified Data.Map.Strict as M
import qualified Data.PSQueue as Q
import Data.PSQueue (Binding(..))
import qualified Data.Tree as R
import NLP.Partage.Tree.Other
deriving instance (Ord n, Ord t) => (Ord (Tree n t))
type Gram n t = S.Set (Tree n t)
treeSize :: Tree n t -> Int
treeSize = length . R.flatten
type DoneMap n t = M.Map Int (S.Set (Tree n t))
data GenST n t = GenST {
waiting :: Q.PSQ (Tree n t) Int
, doneFinal :: DoneMap n t
, doneActive :: DoneMap n t
}
newGenST :: (Ord n, Ord t) => Gram n t -> GenST n t
newGenST gramSet = GenST {
waiting = Q.fromList
[ t :-> treeSize t
| t <- S.toList gramSet ]
, doneFinal = M.empty
, doneActive = M.empty }
pop
:: (E.MonadState (GenST n t) m, Ord n, Ord t)
=> ListT m (Tree n t)
pop = do
mayTree <- E.state $ \s@GenST{..} -> case Q.minView waiting of
Nothing -> (Nothing, s)
Just (t :-> _, q) -> (Just t, s {waiting=q})
some $ maybeToList mayTree
push :: (E.MonadState (GenST n t) m, Ord n, Ord t) => Tree n t -> m ()
push t = E.modify $ \s -> s
{waiting = Q.insert t (treeSize t) (waiting s)}
save :: (E.MonadState (GenST n t) m, Ord n, Ord t) => Tree n t -> m ()
save t = if isFinal t
then E.modify $ \s -> s
{ doneFinal = M.insertWith S.union
(treeSize t) (S.singleton t) (doneFinal s) }
else E.modify $ \s -> s
{ doneActive = M.insertWith S.union
(treeSize t) (S.singleton t) (doneActive s) }
visited
:: (E.MonadState (GenST n t) m, Ord n, Ord t)
=> Tree n t -> m Bool
visited t = if isFinal t
then isVisited doneFinal
else isVisited doneActive
where
isVisited doneMap = do
done <- E.gets doneMap
return $ case M.lookup (treeSize t) done of
Just ts -> S.member t ts
Nothing -> False
visitedWith
:: (E.MonadState (GenST n t) m, Ord n, Ord t)
=> (GenST n t -> DoneMap n t)
-> (Int -> Bool)
-> ListT m (Tree n t)
visitedWith doneMap cond = do
done <- E.gets doneMap
some [ t
| (k, treeSet) <- M.toList done
, cond k, t <- S.toList treeSet ]
data GenConf = GenConf {
genAllSize :: Int
, adjProb :: Double
} deriving (Show, Eq, Ord)
generateRand
:: (MonadIO m, Ord n, Ord t)
=> Gram n t
-> GenConf
-> Producer (Tree n t) m ()
generateRand gramSet cfg = E.forever $ do
finalSet <- collect basePipe
mayTree <- drawTree gramSet finalSet cfg
F.forM_ mayTree yield
where
basePipe = generateAll gramSet (genAllSize cfg)
>-> Pipes.filter isFinal
drawTree
:: (MonadIO m, Ord n, Ord t)
=> Gram n t
-> Gram n t
-> GenConf
-> m (Maybe (Tree n t))
drawTree gramSet finalSet GenConf{..} = runMaybeT $ do
t0 <- drawFrom $ limitTo isInitial gramSet
modify t0
where
modify t@(R.Node (Term _) []) =
return t
modify (R.Node (NonTerm x) []) =
let cond = (&&) <$> hasRoot x <*> isInitial
in drawFrom (limitTo cond finalSet)
modify (R.Node (NonTerm x) xs0) = do
xs <- mapM modify xs0
let t = R.Node (NonTerm x) xs
lottery adjProb (return t) $ do
let cond = (&&) <$> hasRoot x <*> isAuxiliary
auxTree <- drawFrom $ limitTo cond finalSet
return $ replaceFoot t auxTree
modify _ = error "drawTree.modify: unhandled node type"
drawFrom s = do
E.guard $ S.size s > 0
i <- liftIO $ randomRIO (0, S.size s 1)
return $ S.toList s !! i
limitTo f = S.fromList . filter f . S.toList
type Gen m n t = E.StateT (GenST n t) (Producer (Tree n t) m) ()
generateAll
:: (MonadIO m, Ord n, Ord t)
=> Gram n t -> Int -> Producer (Tree n t) m ()
generateAll gram0 sizeMax =
E.evalStateT
(genPipe sizeMax)
(newGenST gram0)
genPipe :: (MonadIO m, Ord n, Ord t) => Int -> Gen m n t
genPipe sizeMax = runListT $ do
t <- pop
lift $ do
genStep sizeMax t
genPipe sizeMax
genStep
:: (MonadIO m, Ord n, Ord t)
=> Int
-> Tree n t
-> Gen m n t
genStep sizeMax t = runListT $ do
E.guard . not =<< visited t
save t
lift . lift $ yield t
let doneMap = if isFinal t
then doneActive
else doneFinal
u <- visitedWith doneMap $
let n = treeSize t
in \k -> k + n <= sizeMax + 1
let combine x y = some $
inject x y ++
inject y x
v <- combine t u
E.guard $ treeSize v <= sizeMax
push v
inject :: (Eq n, Eq t) => Tree n t -> Tree n t -> [Tree n t]
inject s t = if isAuxiliary s
then adjoin s t
else subst s t
adjoin :: (Eq n, Eq t) => Tree n t -> Tree n t -> [Tree n t]
adjoin _ (R.Node (NonTerm _) []) = []
adjoin s (R.Node n ts) =
here ++ below
where
here = [replaceFoot (R.Node n ts) s | R.rootLabel s == n]
below = map (R.Node n) (doit ts)
doit [] = []
doit (x:xs) =
[u : xs | u <- adjoin s x] ++
[x : us | us <- doit xs]
replaceFoot :: Tree n t -> Tree n t -> Tree n t
replaceFoot t (R.Node (Foot _) []) = t
replaceFoot t (R.Node x xs) = R.Node x $ map (replaceFoot t) xs
subst :: (Eq n, Eq t) => Tree n t -> Tree n t -> [Tree n t]
subst s = take 1 . _subst s
_subst :: (Eq n, Eq t) => Tree n t -> Tree n t -> [Tree n t]
_subst s (R.Node n []) =
[s | R.rootLabel s == n]
_subst s (R.Node n ts) =
map (R.Node n) (doit ts)
where
doit [] = []
doit (x:xs) =
[u : xs | u <- subst s x] ++
[x : us | us <- doit xs]
some :: Monad m => [a] -> ListT m a
some = Select . each
collect :: (Monad m, Ord a) => Producer a m () -> m (S.Set a)
collect inputPipe =
flip E.execStateT S.empty
$ runEffect
$ hoist lift inputPipe >-> collectPipe
where
collectPipe = E.forever $ do
x <- await
lift . E.modify $ S.insert x
lottery :: (MonadIO m, MonadPlus m) => Double -> m a -> m a -> m a
lottery probMax mx my = do
p <- liftIO $ randomRIO (0, 1)
if p > probMax
then mx
else my