module Data.MultextEastMsd (
Msd,
msd,
PoS (..),
Attribute,
get,
set,
unset,
check,
features,
pos,
(=~=),
toString,
fromString,
validString,
Feature (..),
AType (..),
Aspect (..),
Case (..),
Class (..),
CoordType (..),
CType (..),
Definiteness (..),
Degree (..),
Formation (..),
Gender (..),
MForm (..),
MType (..),
NType (..),
Number (..),
Person (..),
SType (..),
SubType (..),
Tense (..),
VForm (..),
Voice (..),
VType (..)) where
import Data.List (find,sort,delete,findIndex,deleteBy,intersectBy,nubBy)
import Data.Maybe (catMaybes,isJust)
import Control.Monad (liftM)
data Msd = Msd PoS [Feature]
deriving (Show)
instance Eq Msd where
Msd p1 fs1 == Msd p2 fs2 = p1==p2 && sort fs1==sort fs2
data PoS = Noun | Verb | Adjective | Adposition | Conjunction | Numeral
deriving (Eq,Enum,Show)
data Feature
= Animate Bool
| AType AType
| Aspect Aspect
| Case Case
| Class Class
| Clitic Bool
| CliticS Bool
| CoordType CoordType
| Courtesy Bool
| CType CType
| Definiteness Definiteness
| Degree Degree
| Formation Formation
| Gender Gender
| MForm MForm
| MType MType
| Negative Bool
| NType NType
| Number Number
| OwnedNumber Number
| OwnerNumber Number
| OwnerPerson Person
| Person Person
| SType SType
| SubType SubType
| Tense Tense
| VForm VForm
| Voice Voice
| VType VType
deriving (Show,Ord,Eq)
data NType =
Common | Proper
deriving (Eq,Enum,Ord,Show)
data Gender =
Masculine | Feminine | Neuter
deriving (Eq,Enum,Ord,Show)
data Number =
Singular | Plural | Dual | Count | Collective
deriving (Eq,Enum,Ord,Show)
data Case =
Nominative | Genitive | Dative | Accusative | Vocative |
Locative | Instrumental | Direct | Oblique | Partitive |
Illative | Inessive | Elative | Allative | Adessive | Ablative |
Translative | Terminative | Essive | Abessive | Komitative |
Aditive | Temporalis | Causalis | Sublative | Delative |
Sociative | Factive | Superessive | Distributive | EssiveFormal |
Multiplicative
deriving (Eq,Enum,Ord,Show)
data Definiteness =
No | Yes | ShortArt | FullArt | OneSTwoS
deriving (Eq,Enum,Ord,Show)
data VType =
Main | Auxiliary | Modal | Copula | Base
deriving (Eq,Enum,Ord,Show)
data VForm =
Indicative | Subjunctive | Imperative | Conditional |
Infinitive | Participle | Gerund | Supine | Transgressive | Quotative
deriving (Eq,Enum,Ord,Show)
data Tense =
Present | Imperfect | Future | Past | Pluperfect | Aorist
deriving (Eq,Enum,Ord,Show)
data Person =
First | Second | Third
deriving (Eq,Enum,Ord,Show)
data Voice =
Active | Passive
deriving (Eq,Enum,Ord,Show)
data Aspect =
Progressive | Perfective
deriving (Eq,Enum,Ord,Show)
data AType =
Qualificative | Indefinite | Possessive | OrdinalT
deriving (Eq,Enum,Ord,Show)
data Degree =
Positive | Comparative | Superlative | ElativeD | Diminutive
deriving (Eq,Enum,Ord,Show)
data Formation =
Nominal | Simple | Compound
deriving (Eq,Enum,Ord,Show)
data MType =
Cardinal | Ordinal | Fractal | Multiple | Collect | Special
deriving (Eq,Enum,Ord,Show)
data MForm =
Digit | Roman | Letter | Both | MForm_ | Approx
deriving (Eq,Enum,Ord,Show)
data Class =
Definite1 | Definite2 | Definite34 | Definite | Demonstrative |
IndefiniteC | Interrogative | Relative
deriving (Eq,Enum,Ord,Show)
data SType =
Preposition | Postposition
deriving (Eq,Enum,Ord,Show)
data CType =
Coordinating | Subordinating | Portmanteau
deriving (Eq,Enum,Ord,Show)
data CoordType =
CTSimple | CTRepetit | CTCorrelat | CTSentence | CTWords |
Initial | NonInitial
deriving (Eq,Enum,Ord,Show)
data SubType =
STNegative | STPositive
deriving (Eq,Enum,Ord,Show)
eq (Animate _) (Animate _) = True
eq (AType _) (AType _) = True
eq (Aspect _) (Aspect _) = True
eq (Case _) (Case _) = True
eq (Class _) (Class _) = True
eq (Clitic _) (Clitic _) = True
eq (CoordType _) (CoordType _) = True
eq (Courtesy _) (Courtesy _) = True
eq (CType _) (CType _) = True
eq (Definiteness _) (Definiteness _) = True
eq (Degree _) (Degree _) = True
eq (Formation _) (Formation _) = True
eq (Gender _) (Gender _) = True
eq (MForm _) (MForm _) = True
eq (MType _) (MType _) = True
eq (Negative _) (Negative _) = True
eq (NType _) (NType _) = True
eq (Number _) (Number _) = True
eq (OwnedNumber _) (OwnedNumber _) = True
eq (OwnerNumber _) (OwnerNumber _) = True
eq (OwnerPerson _) (OwnerPerson _) = True
eq (Person _) (Person _) = True
eq (SType _) (SType _) = True
eq (SubType _) (SubType _) = True
eq (Tense _) (Tense _) = True
eq (VForm _) (VForm _) = True
eq (Voice _) (Voice _) = True
eq (VType _) (VType _) = True
eq _ _ = False
msd :: PoS -> [Feature] -> Msd
msd p fs = set fs $ Msd p []
class MsdPattern a where
(=~=) :: a -> a -> Bool
infix 4 =~=
instance MsdPattern Msd where
Msd p1 fs1 =~= Msd p2 fs2 =
Msd p1 (intersectBy eq fs1 fs2) == Msd p2 (intersectBy eq fs2 fs1)
instance MsdPattern a => MsdPattern (Maybe a) where
(Just x) =~= (Just y) = x =~= y
_ =~= _ = False
instance MsdPattern a => MsdPattern [a] where
xs =~= ys = and (zipWith (=~=) xs ys) && length xs == length ys
type Attribute a = a -> Feature
x_ :: (Enum a) => a
x_ = toEnum 0
get :: (Enum a) => Attribute a -> Msd -> Maybe Feature
get a (Msd _ fs) = find (`eq` a x_) fs
set :: [Feature] -> Msd -> Msd
set fs2 (Msd p fs1) = Msd p $ nubBy eq fs3
where fs3 = intersectBy eq fs2 (posFeatures p) ++ fs1
unset :: (Enum a) => Attribute a -> Msd -> Msd
unset a (Msd p fs) = Msd p $ deleteBy eq (a x_) fs
check :: [Feature] -> Msd -> Bool
check fs2 (Msd _ fs1) = all (\av -> isJust . find (==av) $ fs1) fs2
features :: Msd -> [Feature]
features (Msd _ fs) = fs
pos :: Msd -> PoS
pos (Msd pos _) = pos
posCodes = "NVASCM"
decodeWith :: (Enum a) => String -> Char -> Maybe a
decodeWith cs c = toEnum `liftM` findIndex (==c) cs
encodeWith :: (Enum a) => String -> a -> Char
encodeWith cs x = cs !! fromEnum x
toString :: Msd -> String
toString (Msd p fs) =
trim $ c : map (\av -> enc $ get av fs) (posFeatures p)
where trim = reverse . dropWhile (=='-') . reverse
enc Nothing = '-'
enc (Just x) = encode x
c = encodeWith posCodes p
get av = find (`eq` av)
fromString :: String -> Maybe Msd
fromString (c:cs) = do
p <- decodeWith posCodes c
let fs = zipWith decode (posFeatures p) cs
if (all isJust fs)
then Just $ Msd p (catMaybes . catMaybes $ fs)
else Nothing
validString :: String -> Bool
validString = isJust . fromString
posFeatures Noun =
[NType x_,Gender x_,Number x_,Case x_,Clitic x_,Definiteness x_,
Animate x_,OwnerNumber x_,OwnerPerson x_,OwnedNumber x_]
posFeatures Verb =
[VType x_,VForm x_,Tense x_,Person x_,Number x_,Gender x_,Voice x_,
Negative x_,Definiteness x_,Clitic x_,Case x_,Animate x_,CliticS x_,
Aspect x_,Courtesy x_]
posFeatures Adjective =
[AType x_,Degree x_,Gender x_,Number x_,Case x_,Definiteness x_,
Clitic x_,Animate x_,Formation x_,OwnerNumber x_,OwnerPerson x_,
OwnedNumber x_]
posFeatures Adposition =
[SType x_,Formation x_,Case x_,Clitic x_]
posFeatures Conjunction =
[CType x_,Formation x_,CoordType x_,SubType x_,Clitic x_,Number x_,
Person x_]
posFeatures Numeral =
[MType x_,Gender x_,Number x_,Case x_,MForm x_,Definiteness x_,
Clitic x_,Class x_,Animate x_,OwnerNumber x_,OwnerPerson x_,
OwnedNumber x_]
encode (NType v) = enc NType v
encode (Gender v) = enc Gender v
encode (Number v) = enc Number v
encode (Case v) = enc Case v
encode (Definiteness v) = enc Definiteness v
encode (Clitic v) = enc Clitic v
encode (CliticS v) = enc CliticS v
encode (VType v) = enc VType v
encode (VForm v) = enc VForm v
encode (Tense v) = enc Tense v
encode (Person v) = enc Person v
encode (AType v) = enc AType v
encode (Voice v) = enc Voice v
encode (Aspect v) = enc Aspect v
encode (Degree v) = enc Degree v
encode (Formation v) = enc Formation v
encode (MType v) = enc MType v
encode (MForm v) = enc MForm v
encode (Class v) = enc Class v
encode (SType v) = enc SType v
encode (CType v) = enc CType v
encode (CoordType v) = enc CoordType v
encode (SubType v) = enc SubType v
encode (Animate v) = enc Animate v
encode (OwnerNumber v) = enc OwnerNumber v
encode (OwnerPerson v) = enc OwnerPerson v
encode (OwnedNumber v) = enc OwnedNumber v
encode (Negative v) = enc Negative v
encode (Courtesy v) = enc Courtesy v
decode (NType _) = dec NType
decode (Gender _) = dec Gender
decode (Number _) = dec Number
decode (Case _) = dec Case
decode (Definiteness _) = dec Definiteness
decode (Clitic _) = dec Clitic
decode (CliticS _) = dec CliticS
decode (VType _) = dec VType
decode (VForm _) = dec VForm
decode (Tense _) = dec Tense
decode (Person _) = dec Person
decode (AType _) = dec AType
decode (Voice _) = dec Voice
decode (Aspect _) = dec Aspect
decode (Degree _) = dec Degree
decode (Formation _) = dec Formation
decode (MType _) = dec MType
decode (MForm _) = dec MForm
decode (Class _) = dec Class
decode (SType _) = dec SType
decode (CType _) = dec CType
decode (CoordType _) = dec CoordType
decode (SubType _) = dec SubType
decode (Animate _) = dec Animate
decode (OwnerNumber _) = dec OwnerNumber
decode (OwnerPerson _) = dec OwnerPerson
decode (OwnedNumber _) = dec OwnedNumber
decode (Negative _) = dec Negative
decode (Courtesy _) = dec Courtesy
enc :: (Enum a) => Attribute a -> a -> Char
enc a v = encodeWith (codes $ a x_) v
dec :: (Enum a) => Attribute a -> Char -> Maybe (Maybe Feature)
dec _ '-' = Just Nothing
dec a c = decodeWith (codes $ a x_) c >>= return . Just . a
codes :: Feature -> String
codes (NType x) = "cp"
codes (Gender x_) = "mfn"
codes (Number x_) = "spdtl"
codes (Case x_) = "ngdavliro1x2et3b49w5k7mcshqypuf6"
codes (Definiteness x_) = "nysf2"
codes (Clitic x_) = "ny"
codes (CliticS x_) = "ny"
codes (VType x_) = "maoc"
codes (VForm x_) = "ismcnpgutq"
codes (Tense x_) = "pifsla"
codes (Person x_) = "123"
codes (AType x_) = "fiso"
codes (Voice x_) = "ap"
codes (Aspect x_) = "pe"
codes (Degree x_) = "pcsed"
codes (Formation x_) = "nsc"
codes (MType x_) = "cofmls"
codes (MForm x_) = "drlbma"
codes (Class x_) = "123fdiqr"
codes (SType x_) = "pt"
codes (CType x_) = "csr"
codes (CoordType x_) = "srcpwin"
codes (SubType x_) = "zp"
codes (Animate x_) = "ny"
codes (Courtesy x_) = "ny"
codes (Negative x_) = "ny"
codes (OwnerNumber x_) = "spdtl"
codes (OwnerPerson x_) = "123"
codes (OwnedNumber x_) = "spdtl"