module NLP.GenI.Morphology
(
module NLP.GenI.Morphology.Types
, readMorph, stripMorphSem, attachMorph, setMorphAnchor
, inflectSentencesUsingCmd, sansMorph
) where
import Control.Applicative
import Control.Concurrent (forkIO)
import Control.Exception (IOException, bracket, catch,
evaluate)
import qualified Data.Map as Map
import Data.Maybe (fromMaybe, isNothing)
import Data.Text (Text)
import qualified Data.Text as T
import Data.Tree
import Data.Typeable
import Prelude hiding (catch)
import System.Exit
import System.IO
import System.Process
import System.Log.Logger
import Text.JSON
import qualified Text.JSON as J
import Text.JSON.Pretty hiding ((<+>), (<>))
import NLP.GenI.FeatureStructure
import NLP.GenI.General
import NLP.GenI.GeniVal (GeniVal, mkGAnon, replace)
import NLP.GenI.Morphology.Types
import NLP.GenI.Parser
import NLP.GenI.Pretty
import NLP.GenI.Semantics (Literal (..), Sem)
import NLP.GenI.Tag
import NLP.GenI.TreeSchema (GNode (..), GType (..))
readMorph :: [(Text,[AvPair GeniVal])] -> MorphInputFn
readMorph minfo lit =
Map.lookup key fm
where
fm = Map.fromList minfo
key = pretty (lPredicate lit)
stripMorphSem :: MorphInputFn -> Sem -> Sem
stripMorphSem morphfn tsem =
[ l | l <- tsem, (isNothing.morphfn) l ]
attachMorph :: MorphInputFn -> Sem -> [TagElem] -> [TagElem]
attachMorph morphfn sem cands =
let
relTree i = not.null.relfilt.tsemantics
where relfilt = filter (relLit i)
relLit i l = case lArgs l of
[] -> False
(x:_) -> x == i
attachHelper :: GeniVal -> Flist GeniVal -> TagElem -> TagElem
attachHelper i mfs t =
if relTree i t then attachMorphHelper mfs t else t
attach :: Literal GeniVal -> [TagElem] -> [TagElem]
attach l cs =
case morphfn l of
Nothing -> cs
Just mfs -> map (attachHelper i mfs) cs
where i = case lArgs l of
[] -> mkGAnon
(x:_) -> x
in foldr attach cands sem
attachMorphHelper :: Flist GeniVal -> TagElem -> TagElem
attachMorphHelper mfs te =
let
tt = ttree te
anchor = head $ filterTree fn tt
where fn a = (ganchor a && gtype a == Lex)
in case unifyFeat mfs (gup anchor) of
Left err -> error . T.unpack $
"Morphological unification failure on" <+> idname te <> ":" <+> err
Right (unf,subst) ->
let
te2 = replace subst te
tt2 = ttree te2
newgdown = replace subst (gdown anchor)
newa = anchor { gup = unf, gdown = newgdown }
in te2 { ttree = setMorphAnchor newa tt2 }
setMorphAnchor :: GNode GeniVal -> Tree (GNode GeniVal) -> Tree (GNode GeniVal)
setMorphAnchor n t =
fromMaybe (error oops) $ repNode fn filt t
where
filt (Node a _) = gtype a == Lex && ganchor a
fn (Node _ l) = Node n l
oops = "NLP.GenI.Morphology.setMorphAnchor did not anticipate failure was possible"
sansMorph :: LemmaPlusSentence -> MorphOutput
sansMorph =
MorphOutput [] . singleton . T.unwords . map lem
where
lem (LemmaPlus l _) = l
inflectSentencesUsingCmd :: String -> [LemmaPlusSentence] -> IO [(LemmaPlusSentence,MorphOutput)]
inflectSentencesUsingCmd morphcmd sentences =
doit `catch` \e -> let _ = e :: IOException in (fallback (show e))
where
hCloseSloppy h = hClose h `catch` \err -> let _ = err :: IOException in warningM logname (show err)
doit = bracket
(do debugM logname $ "Starting morph generator: " ++ morphcmd
runInteractiveCommand morphcmd)
(\(inh,outh,errh,_) -> do
debugM logname $ "Closing output handles from morph generator"
mapM hCloseSloppy [inh, outh, errh])
$ \(toP,fromP,errP,pid) -> do
debugM logname $ "Sending " ++ show (length sentences) ++ " sentences to morph generator"
hPutStrLn toP . render . pp_value . showJSON $ sentences
debugM logname $ "Closing input handle to morph generator"
hClose toP
err <- hGetContents errP
_ <- forkIO (evaluate (length err) >> warningM logname err)
output <- hGetContents fromP
_ <- evaluate (length output)
exitcode <- waitForProcess pid
debugM logname $ "Morph command exited"
if exitcode == ExitSuccess
then case resultToEither (decode output) of
Left jerr -> fallback $ "Could not parse morphological generator output: " ++ jerr
Right res -> do let lenResults = length res
lenSentences = length sentences
if lenResults == lenSentences
then return $ zip sentences res
else fallback $ "Morphological generator returned "
++ show lenResults ++ " results for "
++ show lenSentences ++ " inputs"
`catch` \e -> let _ = e :: IOException
in fallback ("Error calling morphological generator:\n" ++ show e)
else fallback "Morph generator failed"
fallback err = do
errorM logname err
return $ map (\x -> (x, sansMorph x)) sentences
instance JSON MorphOutput where
readJSON j =
case fromJSObject `fmap` readJSON j of
J.Error _ -> MorphOutput [] <$> readJSON j
J.Ok jo -> do
let field x = maybe (fail $ "Could not find: " ++ x) readJSON
$ lookup x jo
warnings = maybe (return []) readJSON (lookup "warnings" jo)
MorphOutput <$> warnings
<*> field "realisations"
showJSON _ = error "Don't know how to render MorphOutput"
instance JSON LemmaPlus where
readJSON j = do
jo <- fromJSObject `fmap` readJSON j
let field x = maybe (fail $ "Could not find: " ++ x) readJSON
$ lookup x jo
tfield = fmap T.pack . field
LemmaPlus <$> field "lemma"
<*> (parsecToJSON "lemma-features" geniFeats =<<
tfield "lemma-features")
showJSON (LemmaPlus l fs) = JSObject . toJSObject $
[ ("lemma" , showJSON l)
, ("lemma-features", showJSON $ prettyStr fs)
]
parsecToJSON :: Monad m => String -> Parser b -> Text -> m b
parsecToJSON description p str =
case runParser p () "" str of
Left err -> fail $ "Couldn't parse " ++ description ++ " because " ++ show err
Right res -> return res
singleton :: a -> [a]
singleton x = [x]
data MNAME = MNAME deriving Typeable
logname :: String
logname = mkLogname MNAME