Safe Haskell | None |
---|---|
Language | Haskell2010 |
- data Analysis = Analysis {}
- newtype Tokenizer = Tokenizer Text
- data AnalyzerDefinition = AnalyzerDefinition {}
- data CharFilterDefinition
- newtype TokenizerDefinition = TokenizerDefinitionNgram Ngram
- data Ngram = Ngram {
- ngramMinGram :: Int
- ngramMaxGram :: Int
- ngramTokenChars :: [TokenChar]
- data TokenChar
- data TokenFilterDefinition
- = TokenFilterDefinitionLowercase (Maybe Language)
- | TokenFilterDefinitionUppercase (Maybe Language)
- | TokenFilterDefinitionApostrophe
- | TokenFilterDefinitionReverse
- | TokenFilterDefinitionSnowball Language
- | TokenFilterDefinitionShingle Shingle
- | TokenFilterDefinitionStemmer Language
- | TokenFilterDefinitionStop (Either Language [StopWord])
- data Language
- = Arabic
- | Armenian
- | Basque
- | Bengali
- | Brazilian
- | Bulgarian
- | Catalan
- | Cjk
- | Czech
- | Danish
- | Dutch
- | English
- | Finnish
- | French
- | Galician
- | German
- | German2
- | Greek
- | Hindi
- | Hungarian
- | Indonesian
- | Irish
- | Italian
- | Kp
- | Latvian
- | Lithuanian
- | Lovins
- | Norwegian
- | Persian
- | Porter
- | Portuguese
- | Romanian
- | Russian
- | Sorani
- | Spanish
- | Swedish
- | Thai
- | Turkish
- languageToText :: Language -> Text
- languageFromText :: Text -> Maybe Language
- data Shingle = Shingle {}
Documentation
data CharFilterDefinition Source #
Character filters are used to preprocess the stream of characters before it is passed to the tokenizer.
newtype TokenizerDefinition Source #
Ngram | |
|
data TokenFilterDefinition Source #
Token filters are used to create custom analyzers.
The set of languages that can be passed to various analyzers,
filters, etc. in Elasticsearch. Most data types in this module
that have a Language
field are actually only actually to
handle a subset of these languages. Consult the official
Elasticsearch documentation to see what is actually supported.
languageToText :: Language -> Text Source #