# Croatian MULTEXTEast v4 tagset definition # MSDs for Croatian are a single entity (ex. Ncmsnn). # For concraft MSD string must be intercalated with ':' (ex. N:c:m:s:n:n). # [attribute] <- means attribute is optional (ex. Md - numeral tag) # number 9 represents the '-' (dash) # attributes in need of a 9: # person, number, gender, number1, gender1, pclitic, preferenttype # f <- read `fmap` readFile "full-dictionary.txt" :: IO (M.Map String (S.Set String)) # let train = S.toList . S.fromList . concat . map (S.toList snd) $ M.toList f # let f = S.fromList . concat . map (L.findIndices (=='-')) . filter (elem '-') # zip poss $ map f . map (\x -> filter ((==x).head) train) $ poss # appereance of '-' in the corpus (unknown parameter) # ('N',[gender]) # ('V',[person,gender]) # ('A',[]) # ('P',[person,gender,number,number1,gender1,pclitic,preferenttype]) # ('R',[]) # ('S',[]) # ('C',[]) # ('M',[gender]) # ('Q',[]) # ('Y',[]) # ('X',[]) # ('Z',[]) # appereance of '-' in construction for MA # ('N',fromList []) # ('V',fromList [person,gender]) # ('A',fromList [adefiniteness]) # ('P',fromList [person,gender,number,number1,gender1,preferenttype]) -- !!! source of eval errors # ('R',fromList []) # ('S',fromList []) # ('C',fromList []) # ('M',fromList [gender]) # ('Q',fromList []) # ('Y',fromList []) # ('X',fromList []) # ('Z',fromList []) [ATTR] # Attributes which appear in MSDs follow. # Global attributes case = n g d a v l i gender = m f n 9 animate = n y number = s p 9 person = 1 2 3 9 degree = p c s number1 = s p 9 gender1 = m f n 9 # Noun attributes (N) ntype = c p # Verb attributes (V) vtype = m a c vform = n p r f m a e vnegative = n y # Adjective attributes (A) atype = g s p adefiniteness = n y 9 # Pronoun attributes (P) ptype = p d i s q r x pclitic = n y 9 preferenttype = p s 9 psyntactictype = n a # Adverb attributes (R) rtype = g r # Adposition attributes (S) # scase = g d a l i # Conjunction attributes (C) ctype = c s cformation = s c # Numeral attributes (M) mform = d r l mtype = c o m s # Particle attributes (Q) qtype = z q o r # Interjection attributes (I) # none # Abbreviation attributes (Y) # none # Residual attributes (X) xtype = f t p # Special characters attributes (Z) # none [RULE] # Parsing rules definitions. N = ntype gender number case [animate] V = vtype vform [person] [number] [gender] [vnegative] A = atype degree gender number case [adefiniteness] [animate] P = ptype person gender number case number1 gender1 pclitic preferenttype psyntactictype [animate] R = rtype [degree] S = case C = ctype [cformation] M = mform [mtype] [gender] [number] [case] [animate] Q = qtype I = Y = X = [xtype] Z =