Safe Haskell | None |
---|---|
Language | Haskell2010 |
Module provides a handy wrapper around the CoreNLP project's command-line utility https://nlp.stanford.edu/software/corenlp.html , and a parser for some of its output formats.
- launchCoreNLP :: FilePath -> [Text] -> IO [Either String Document]
- parseJsonDoc :: Text -> Either String Document
- data Dependency = Dependency {
- dep :: Text
- governor :: Int
- governorGloss :: Text
- dependent :: Int
- dependentGloss :: Text
- data Entitymention = Entitymention {
- docTokenBegin :: Int
- docTokenEnd :: Int
- tokenBegin :: Int
- tokenEnd :: Int
- text :: Text
- characterOffsetBegin :: Int
- characterOffsetEnd :: Int
- ner :: Text
- normalizedNER :: Maybe Text
- data Token = Token {}
- data Sentence = Sentence {
- index :: Int
- parse :: Text
- basicDependencies :: [Dependency]
- enhancedDependencies :: [Dependency]
- enhancedPlusPlusDependencies :: [Dependency]
- entitymentions :: [Entitymention]
- tokens :: [Token]
- data PennPOS
- data Coref = Coref {}
- type CorefsId = Text
- type Corefs = HashMap CorefsId [Coref]
- data Document = Document {}
- data NamedEntity
- = PERSON
- | LOCATION
- | ORGANIZATION
- | MISC
- | MONEY
- | NUMBER
- | ORDINAL
- | PERCENT
- | DATE
- | TIME
- | DURATION
- | SET
- | URL
- | CITY
- | STATE_OR_PROVINCE
- | COUNTRY
- | NATIONALITY
- | RELIGION
- | TITLE
- | IDEOLOGY
- | CRIMINAL_CHARGE
- | CAUSE_OF_DEATH
- | O
- test :: IO ()
Documentation
:: FilePath | Path to the directory where you extracted the CoreNLP project |
-> [Text] | List of inputs |
-> IO [Either String Document] | List of parsed results |
Launch CoreNLP with your inputs. This function will put every piece of Text
in a separate file, launch CoreNLP subprocess, and parse the results
parseJsonDoc :: Text -> Either String Document Source #
Parse JSON output of CoreNLP. See headlines
source for an example JSON input.
data Dependency Source #
Dependency | |
|
data Entitymention Source #
Entitymention | |
|
Sentence | |
|
CC | Coordinating conjunction |
CD | Cardinal number |
DT | Determiner |
EX | Existential *there* |
FW | Foreign word |
IN | Preposition or subordinating conjunction |
JJ | Adjective |
JJR | Adjective, comparative |
JJS | Adjective, superlative |
LS | List item marker |
MD | Modal |
NN | Noun, singular or mass |
NNS | Noun, plural |
NNP | Proper noun, singular |
NNPS | Proper noun, plural |
PDT | Predeterminer |
POS | Possessive ending |
PRP | Personal pronoun |
PRPDollar | Possessive pronoun |
RB | Adverb |
RBR | Adverb, comparative |
RBS | Adverb, superlative |
RP | Particle |
SYM | Symbol |
TO |
|
UH | Interjection |
VB | Verb, base form |
VBD | Verb, past tense |
VBG | Verb, gerund or present participle |
VBN | Verb, past participle |
VBP | Verb, non-3rd person singular present |
VBZ | Verb, 3rd person singular present |
WDT | Wh-determiner |
WP | Wh-pronoun |
WPDollar | Possessive wh-pronoun |
WRB | Wh-adverb |
LRB | "-LRB-"? No idea what's this |
RRB | "-RRB-"? No idea what's this |
PosPunctuation Text | anyOf ".:,''$#$,", sometimes few together |
data NamedEntity Source #
See https://stanfordnlp.github.io/CoreNLP/ner.html
PERSON | |
LOCATION | |
ORGANIZATION | |
MISC | |
MONEY | |
NUMBER | |
ORDINAL | |
PERCENT | |
DATE | |
TIME | |
DURATION | |
SET | |
URL | |
CITY | |
STATE_OR_PROVINCE | |
COUNTRY | |
NATIONALITY | |
RELIGION | |
TITLE | Job title |
IDEOLOGY | |
CRIMINAL_CHARGE | |
CAUSE_OF_DEATH | |
O | Not a named entity? TODO: check somehow |