module Data.Tagset.Positional
( Tagset (..)
, Attr
, POS
, Optional
, domain
, rule
, Tag (..)
, expand
, tagSim
) where
import Control.Arrow (first)
import qualified Data.Text as T
import qualified Data.Map as M
import qualified Data.Set as S
type Attr = T.Text
type POS = T.Text
type Optional = Bool
data Tagset = Tagset
{ domains :: M.Map Attr (S.Set T.Text)
, rules :: M.Map POS [(Attr, Optional)]
} deriving (Show)
domain :: Tagset -> Attr -> S.Set T.Text
domain Tagset{..} x =
case x `M.lookup` domains of
Just y -> y
Nothing -> error $ "domain: unknown attribute " ++ T.unpack x
rule :: Tagset -> POS -> [(Attr, Optional)]
rule Tagset{..} x =
case x `M.lookup` rules of
Just y -> y
Nothing -> error $ "rule: unknown POS " ++ T.unpack x
data Tag = Tag
{ pos :: POS
, atts :: M.Map Attr T.Text
} deriving (Show, Read, Eq, Ord)
expand :: Tagset -> Tag -> [Tag]
expand tagset tag = do
values <- sequence (map attrVal rl)
let attrMap = M.fromList $ zip (map fst rl) values
return $ Tag (pos tag) attrMap
where
rl = rule tagset (pos tag)
attrVal (attr, False) = [atts tag M.! attr]
attrVal (attr, True)
| Just x <- M.lookup attr (atts tag) = [x]
| otherwise = S.toList $ domain tagset attr
tagSim :: Tag -> Tag -> Int
tagSim t t' =
S.size (xs `S.intersection` xs')
where
xs = S.fromList $ (Nothing, pos t) : assocs t
xs' = S.fromList $ (Nothing, pos t') : assocs t'
assocs = map (first Just) . M.assocs . atts