module NLP.Nerf.Dict ( preparePNEG , prepareNELexicon , module NLP.Nerf.Dict.Base ) where import Control.Applicative ((<$>)) import Control.Arrow (first) import Data.Binary (encodeFile) import qualified Data.PoliMorf as Poli import qualified Data.Map as M import qualified Data.Text as T import NLP.Nerf.Dict.Base import NLP.Nerf.Dict.PNEG (readPNEG) import NLP.Nerf.Dict.NELexicon (readNELexicon) import qualified NLP.Adict.Trie as Trie -- | Make dictionary consisting only from one word NEs. mkDictW1 :: [Entry] -> NeDict mkDictW1 = let oneWord x _ = not (isMultiWord x) in siftDict oneWord . mkDict -- | Parse the PNEG dictionary and save it in a binary form into -- the output file. preparePNEG :: FilePath -- ^ Path to PNEG in the LMF format -> FilePath -- ^ Output file -> IO () preparePNEG lmfPath outPath = do neDict <- mkDictW1 <$> readPNEG lmfPath saveDict outPath neDict -- | Parse the NELexicon, merge it with the PoliMorf and serialize -- into a binary, DAWG form. prepareNELexicon :: FilePath -- ^ Path to NELexicon -> FilePath -- ^ Path to PoliMorf -> FilePath -- ^ Output file -> IO () prepareNELexicon nePath poliPath outPath = do neDict <- mkDictW1 <$> readNELexicon nePath baseMap <- Poli.mkBaseMap <$> Poli.readPoliMorf poliPath let neDict' = Poli.merge baseMap neDict trie = Trie.fromList $ map (first T.unpack) (M.assocs neDict') encodeFile outPath (Trie.serialize trie)