chatter-0.9.1.0: A library of simple NLP algorithms.

Safe HaskellNone
LanguageHaskell2010

NLP.Types.IOB

Synopsis

Documentation

data IOBChunk chunk tag Source #

Data type to indicate IOB tags for chunking

Constructors

BChunk (POS tag) chunk

Beging marker.

IChunk (POS tag) chunk

In chunk tag

OChunk (POS tag)

Not in a chunk.

Instances

(Eq chunk, Eq tag) => Eq (IOBChunk chunk tag) Source # 

Methods

(==) :: IOBChunk chunk tag -> IOBChunk chunk tag -> Bool #

(/=) :: IOBChunk chunk tag -> IOBChunk chunk tag -> Bool #

(Read chunk, Read tag) => Read (IOBChunk chunk tag) Source # 

Methods

readsPrec :: Int -> ReadS (IOBChunk chunk tag) #

readList :: ReadS [IOBChunk chunk tag] #

readPrec :: ReadPrec (IOBChunk chunk tag) #

readListPrec :: ReadPrec [IOBChunk chunk tag] #

(Show chunk, Show tag) => Show (IOBChunk chunk tag) Source # 

Methods

showsPrec :: Int -> IOBChunk chunk tag -> ShowS #

show :: IOBChunk chunk tag -> String #

showList :: [IOBChunk chunk tag] -> ShowS #

(ChunkTag c, Arbitrary c, Arbitrary t, Tag t) => Arbitrary (IOBChunk c t) Source # 

Methods

arbitrary :: Gen (IOBChunk c t) #

shrink :: IOBChunk c t -> [IOBChunk c t] #

getPOS :: (ChunkTag c, Tag t) => IOBChunk c t -> POS t Source #

parseIOBLine :: (ChunkTag chunk, Tag tag) => Text -> Either Error (IOBChunk chunk tag) Source #

Parse an IOB-chunk encoded line of text.

Assumes that the line has three space-delimeted entries, in the format: > token POSTag IOBChunk For example: > > parseIOBLine "We PRP B-NP" :: IOBChunk B.Chunk B.Tag > BChunk (POS B.PRP (Token We)) B.C_NP

iobBuilder :: (ChunkTag c, Tag t) => Text -> POS t -> Either Error (IOBChunk c t) Source #

toChunkTree :: (ChunkTag c, Tag t) => [IOBChunk c t] -> ChunkedSentence c t Source #

Turn an IOB result into a tree.

parseIOB :: (ChunkTag chunk, Tag tag) => Text -> Either Error [[IOBChunk chunk tag]] Source #

Parse an IOB-encoded corpus.

parseSentence :: (ChunkTag chunk, Tag tag) => [Text] -> Either Error [IOBChunk chunk tag] Source #

getSentences :: Text -> [[Text]] Source #

Just split a body of text into lines, and then into "paragraphs". Each resulting sub list is separated by empty lines in the original text.

e.g.; > > getSentences "Henjumpedn.nnShenjumpedn." > [[He, "jumped", "."], [She,"jumped", "."]]