module Data.CRF.Chain2.Tiers.Dataset.Codec
( Codec
, CodecM
, obMax
, lbMax
, encodeWord'Cu
, encodeWord'Cn
, encodeSent'Cu
, encodeSent'Cn
, encodeSent
, encodeWordL'Cu
, encodeWordL'Cn
, encodeSentL'Cu
, encodeSentL'Cn
, encodeSentL
, decodeLabel
, decodeLabels
, mkCodec
, encodeData
, encodeDataL
, unJust
) where
import Control.Applicative ((<$>), (<*>), pure)
import Control.Comonad.Trans.Store (store)
import Data.Maybe (catMaybes, fromJust)
import Data.Lens.Common (Lens(..), fstLens)
import qualified Data.Set as S
import qualified Data.Map as M
import qualified Data.Vector as V
import qualified Control.Monad.Codec as C
import Data.CRF.Chain2.Tiers.Dataset.Internal
import Data.CRF.Chain2.Tiers.Dataset.External
type Codec a b =
( C.AtomCodec a
, V.Vector (C.AtomCodec (Maybe b)) )
obMax :: Codec a b -> Ob
obMax =
let idMax m = M.size m 1
in mkOb . idMax . C.to . fst
lbMax :: Codec a b -> [Lb]
lbMax =
let idMax m = M.size m 1
in map (mkLb . idMax . C.to) . V.toList . snd
obLens :: Lens (a, b) a
obLens = fstLens
lbLens :: Int -> Lens (a, V.Vector b) b
lbLens k = Lens $ \(a, b) -> store
(\x -> (a, b V.// [(k, x)]))
(b V.! k)
empty :: Ord b => Int -> Codec a b
empty n =
let withNo = C.execCodec C.empty (C.encode C.idLens Nothing)
in (C.empty, V.replicate n withNo)
type CodecM a b c = C.Codec (Codec a b) c
encodeObU :: Ord a => a -> CodecM a b Ob
encodeObU = fmap mkOb . C.encode' obLens
encodeObN :: Ord a => a -> CodecM a b (Maybe Ob)
encodeObN = fmap (fmap mkOb) . C.maybeEncode obLens
encodeLbU :: Ord b => [b] -> CodecM a b Cb
encodeLbU xs = mkCb <$> sequence
[ mkLb <$> C.encode (lbLens k) (Just x)
| (x, k) <- zip xs [0..] ]
encodeLbN :: Ord b => [b] -> CodecM a b Cb
encodeLbN xs =
let encode lens x = C.maybeEncode lens (Just x) >>= \mx -> case mx of
Just x' -> return x'
Nothing -> fromJust <$> C.maybeEncode lens Nothing
in mkCb <$> sequence
[ mkLb <$> encode (lbLens k) x
| (x, k) <- zip xs [0..] ]
decodeLbC :: Ord b => Cb -> CodecM a b (Maybe [b])
decodeLbC xs = sequence <$> sequence
[ C.decode (lbLens k) (unLb x)
| (x, k) <- zip (unCb xs) [0..] ]
hasLabel :: Ord b => Codec a b -> [b] -> Bool
hasLabel cdc xs = and
[ M.member
(Just x)
(C.to $ snd cdc V.! k)
| (x, k) <- zip xs [0..] ]
encodeWordL'Cu :: (Ord a, Ord b) => WordL a b -> CodecM a b (X, Y)
encodeWordL'Cu (word, choice) = do
x' <- mapM encodeObU (S.toList (obs word))
r' <- mapM encodeLbU (S.toList (lbs word))
let x = mkX x' r'
y <- mkY <$> sequence
[ (,) <$> encodeLbU lb <*> pure pr
| (lb, pr) <- (M.toList . unProb) choice ]
return (x, y)
encodeWordL'Cn :: (Ord a, Ord b) => WordL a b -> CodecM a b (X, Y)
encodeWordL'Cn (word, choice) = do
x' <- catMaybes <$> mapM encodeObN (S.toList (obs word))
r' <- mapM encodeLbN (S.toList (lbs word))
let x = mkX x' r'
y <- mkY <$> sequence
[ (,) <$> encodeLbN lb <*> pure pr
| (lb, pr) <- (M.toList . unProb) choice ]
return (x, y)
encodeWord'Cu :: (Ord a, Ord b) => Word a b -> CodecM a b X
encodeWord'Cu word = do
x' <- mapM encodeObU (S.toList (obs word))
r' <- mapM encodeLbU (S.toList (lbs word))
return $ mkX x' r'
encodeWord'Cn :: (Ord a, Ord b) => Word a b -> CodecM a b X
encodeWord'Cn word = do
x' <- catMaybes <$> mapM encodeObN (S.toList (obs word))
r' <- mapM encodeLbN (S.toList (lbs word))
return $ mkX x' r'
encodeSentL'Cu :: (Ord a, Ord b) => SentL a b -> CodecM a b (Xs, Ys)
encodeSentL'Cu sent = do
ps <- mapM (encodeWordL'Cu) sent
return (V.fromList (map fst ps), V.fromList (map snd ps))
encodeSentL'Cn :: (Ord a, Ord b) => SentL a b -> CodecM a b (Xs, Ys)
encodeSentL'Cn sent = do
ps <- mapM (encodeWordL'Cn) sent
return (V.fromList (map fst ps), V.fromList (map snd ps))
encodeSentL :: (Ord a, Ord b) => Codec a b -> SentL a b -> (Xs, Ys)
encodeSentL codec = C.evalCodec codec . encodeSentL'Cn
encodeSent'Cu :: (Ord a, Ord b) => Sent a b -> CodecM a b Xs
encodeSent'Cu = fmap V.fromList . mapM encodeWord'Cu
encodeSent'Cn :: (Ord a, Ord b) => Sent a b -> CodecM a b Xs
encodeSent'Cn = fmap V.fromList . mapM encodeWord'Cn
encodeSent :: (Ord a, Ord b) => Codec a b -> Sent a b -> Xs
encodeSent codec = C.evalCodec codec . encodeSent'Cn
mkCodec :: (Ord a, Ord b) => Int -> [SentL a b] -> Codec a b
mkCodec n = C.execCodec (empty n) . mapM_ encodeSentL'Cu
encodeDataL :: (Ord a, Ord b) => Codec a b -> [SentL a b] -> [(Xs, Ys)]
encodeDataL = map . encodeSentL
encodeData :: (Ord a, Ord b) => Codec a b -> [Sent a b] -> [Xs]
encodeData = map . encodeSent
decodeLabel :: Ord b => Codec a b -> Cb -> Maybe [b]
decodeLabel codec = C.evalCodec codec . decodeLbC
decodeLabels :: Ord b => Codec a b -> [Cb] -> [Maybe [b]]
decodeLabels codec = C.evalCodec codec . mapM decodeLbC
unJust :: Ord b => Codec a b -> Word a b -> Maybe [b] -> [b]
unJust _ _ (Just x) = x
unJust codec word Nothing = case allUnk of
(x:_) -> x
[] -> error "unJust: Nothing and all values known"
where
allUnk = filter (not . hasLabel codec) (S.toList $ lbs word)