module NLP.Corpora.Parsing where
import qualified Data.Text as T
import Data.Text (Text)
import NLP.Types (Tag(..), parseTag, tagUNK, TaggedSentence)
readPOS :: Text -> TaggedSentence
readPOS str = map toTagged $ T.words str
where
toTagged :: Text -> (Text, Tag)
toTagged txt | "/" `T.isInfixOf` txt = let
(tok, tagStr) = T.breakOnEnd "/" (T.strip txt)
in (safeInit tok, parseTag tagStr)
| otherwise = (txt, tagUNK)
safeInit :: Text -> Text
safeInit str | T.length str == 0 = str
| otherwise = T.init str