{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE FlexibleInstances #-}
module NLP.Concraft.Morphosyntax
(
Seg (..)
, mapSeg
, interpsSet
, interps
, Word (..)
, Sent
, mapSent
, SentO (..)
, mapSentO
, module NLP.Concraft.Morphosyntax.WMap
) where
import Prelude hiding (Word)
import Control.Applicative ((<$>), (<*>))
import Control.Arrow (first)
import Data.Aeson
import Data.Binary (Binary)
import qualified Data.Set as S
import qualified Data.Map as M
import qualified Data.Text as T
import qualified Data.Text.Lazy as L
import NLP.Concraft.Morphosyntax.WMap
data Seg w t = Seg {
word :: w
, tags :: WMap t }
deriving (Show)
instance ToJSON w => ToJSON (Seg w T.Text) where
toJSON Seg{..} = object
[ "word" .= word
, "tags" .= unWMap tags ]
instance FromJSON w => FromJSON (Seg w T.Text) where
parseJSON (Object v) = Seg
<$> v .: "word"
<*> (mkWMap <$> v .: "tags")
parseJSON _ = error "parseJSON (segment): absurd"
mapSeg :: Ord b => (a -> b) -> Seg w a -> Seg w b
mapSeg f w = w { tags = mapWMap f (tags w) }
interpsSet :: Seg w t -> S.Set t
interpsSet = M.keysSet . unWMap . tags
interps :: Seg w t -> [t]
interps = S.toList . interpsSet
class Word a where
orth :: a -> T.Text
oov :: a -> Bool
instance Word w => Word (Seg w t) where
orth = orth . word
{-# INLINE orth #-}
oov = oov . word
{-# INLINE oov #-}
type Sent w t = [Seg w t]
mapSent :: Ord b => (a -> b) -> Sent w a -> Sent w b
mapSent = map . mapSeg
data SentO w t = SentO
{ segs :: Sent w t
, orig :: L.Text }
deriving (Show)
mapSentO :: Ord b => (a -> b) -> SentO w a -> SentO w b
mapSentO f x =
let segs' = mapSent f (segs x)
in x { segs = segs' }