% GenI surface realiser
% Copyright (C) 2005 Carlos Areces and Eric Kow
%
% This program is free software; you can redistribute it and/or
% modify it under the terms of the GNU General Public License
% as published by the Free Software Foundation; either version 2
% of the License, or (at your option) any later version.
%
% This program is distributed in the hope that it will be useful,
% but WITHOUT ANY WARRANTY; without even the implied warranty of
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
% GNU General Public License for more details.
%
% You should have received a copy of the GNU General Public License
% along with this program; if not, write to the Free Software
% Foundation, Inc., 59 Temple Place Suite 330, Boston, MA 021111307, USA.
\chapter{Morphology}
\label{cha:Morphology}
This module handles mostly everything to do with morphology in Geni.
There are two basic tasks: morphological input and output.
GenI farms out morphology to whatever third party program you
specify on the command line. Note that a simple and stupid
``sillymorph'' realiser is provided either in the GenI repository
or on hackage.
\begin{code}
module NLP.GenI.Morphology
(
MorphFn
, LemmaPlus(..), LemmaPlusSentence
, readMorph, stripMorphSem, attachMorph, setMorphAnchor
, inflectSentencesUsingCmd, sansMorph
) where
\end{code}
\ignore{
\begin{code}
import Data.Maybe (isNothing)
import Data.Tree
import qualified Data.Map as Map
import System.IO
import System.Process
import Text.JSON
import Text.JSON.Pretty
import NLP.GenI.Btypes
import NLP.GenI.General
import NLP.GenI.Tags
import NLP.GenI.Builder
\end{code}
}
\begin{code}
type MorphFn = Pred -> Maybe Flist
\end{code}
\section{Morphological input}
Morphological input means attaching morphological features on trees. The
user specifies morphological input through the input semantics. Our job
is to identify morphological predicates like \semexpr{plural(x)} and
apply features like \fs{\it num:pl} on the relevant trees.
\begin{code}
readMorph :: [(String,[AvPair])] -> MorphFn
readMorph minfo pred_ = Map.lookup key fm
where fm = Map.fromList minfo
key = show $ snd3 pred_
stripMorphSem :: MorphFn -> Sem -> Sem
stripMorphSem morphfn tsem =
[ l | l <- tsem, (isNothing.morphfn) l ]
attachMorph :: MorphFn -> Sem -> [TagElem] -> [TagElem]
attachMorph morphfn sem cands =
let
relTree i = not.null.relfilt.tsemantics
where relfilt = filter (relLit i)
relLit i l = if null args then False else (head args == i)
where args = thd3 l
attachHelper :: GeniVal -> Flist -> TagElem -> TagElem
attachHelper i mfs t =
if relTree i t then attachMorphHelper mfs t else t
attach :: Pred -> [TagElem] -> [TagElem]
attach l cs =
case morphfn l of
Nothing -> cs
Just mfs -> map (attachHelper i mfs) cs
where i = if null args then GAnon else head args
args = thd3 l
in foldr attach cands sem
attachMorphHelper :: Flist -> TagElem -> TagElem
attachMorphHelper mfs te =
let
tt = ttree te
anchor = head $ filterTree fn tt
where fn a = (ganchor a && gtype a == Lex)
in case unifyFeat mfs (gup anchor) of
Nothing -> error ("Morphological unification failure on " ++ idname te)
Just (unf,subst) ->
let
te2 = replace subst te
tt2 = ttree te2
newgdown = replace subst (gdown anchor)
newa = anchor { gup = unf, gdown = newgdown }
in te2 { ttree = setMorphAnchor newa tt2 }
setMorphAnchor :: GNode -> Tree GNode -> Tree GNode
setMorphAnchor n t =
let filt (Node a _) = (gtype a == Lex && ganchor a)
fn (Node _ l) = Node n l
in (head.fst) $ listRepNode fn filt [t]
\end{code}
\section{Morphological realisation}
\jargon{Morphological realisation} refers to the actual process
of converting lemmas and morphological information into inflected forms.
We do this by calling some third party software specified by the user.
The morphological software must accept a JSON list of \jargon{lemma sentences}
where each lemma sentence is itself a list of objects containing a lemma and
a feature structure.
\begin{verbatim}
[
[{"lemma": "le", "lemma-features": "[num:sg gen:f]"},
{"lemma": "fille", "lemma-features": "[num:sg]"},
{"lemma": "detester", "lemma-features": "[num:sg tense:past]"},
{"lemma": "le", "lemma-features": "[num:pl gen:m]"},
{"lemma": "garcon", "lemma-features": "[num:pl]"}
],
[{"lemma": "ce", "lemma-features": "[]"},
{"lemma": "etre", "lemma-features": "[]"},
{"lemma": "le", "lemma-features": "[]"},
{"lemma": "garcon", "lemma-features": "[]"},
{"lemma": "que", "lemma-features": "[]"},
{"lemma": "le", "lemma-features": "[num:sg gen:f]"},
{"lemma": "fille", "lemma-features": "[num:sg]"},
{"lemma": "detester", "lemma-features": "[num:sg tense:past]"}
]
]
\end{verbatim}
NB: I recommend using a JSON library instead of parsing and writing this by
hand.
The morphological realiser may return more than one output per sentence.
Indeed, we expect a JSONformatted list (a) of lists (b), where each (b)
provides a number of candidate morphological realisations for a sentence in
(a). The list (a) must have the same length as the input because each item in
(a) is expected to correspond to a sentence from the input.
Notice that the morphological generator can choose to delete spaces or do other
orthographical tricks in between words:
\begin{verbatim}
[
["la fille detestait les garcons"],
["c'est le garcon que la fille detestait"
,"c'est les garcons que la fille detestait"]
]
\end{verbatim}
If your morphological software does not do this, you could wrap it with a
simple script.
\begin{code}
sansMorph :: LemmaPlusSentence -> [String]
sansMorph = singleton . unwords . map lem
where
lem (LemmaPlus l _) = l
inflectSentencesUsingCmd :: String -> [LemmaPlusSentence] -> IO [(LemmaPlusSentence,[String])]
inflectSentencesUsingCmd morphcmd sentences =
do
(toP, fromP, _, _) <- runInteractiveCommand morphcmd
hPutStrLn toP . render . pp_value . showJSON $ sentences
hClose toP
mResults <- (resultToEither . decode) `fmap` hGetContents fromP
case mResults of
Left err -> fallback $ "Could not parse morphological generator output: " ++ err
Right res -> do let lenResults = length res
lenSentences = length sentences
if lenResults == lenSentences
then return $ zip sentences res
else fallback $ "Morphological generator returned "
++ show lenResults ++ " results for "
++ show lenSentences ++ " inputs"
`catch` \e -> fallback $ "Error calling morphological generator:\n" ++ show e
where
fallback err =
do ePutStrLn err
return $ map (\x -> (x, sansMorph x)) sentences
singleton :: a -> [a]
singleton x = [x]
\end{code}