{-# LANGUAGE OverloadedStrings #-} {- | Module : Glider.NLP.Language.English.StopWords Copyright : Copyright (C) 2013-2014 Krzysztof Langner License : BSD3 Maintainer : Krzysztof Langner Stability : alpha Portability : portable Module contains stop words for English language -} module Glider.NLP.Language.English.StopWords (isStopWord) where import qualified Data.Set as S import Data.Text -- | Predicate to check if given words is a stop word isStopWord :: Text -> Bool isStopWord w = S.member w stopWords stopWords :: S.Set Text stopWords = S.fromList [ "i" , "me" , "my" , "myself" , "we" , "our" , "ours" , "ourselves" , "you" , "your" , "yours" , "yourself" , "yourselves" , "he" , "him" , "his" , "himself" , "she" , "her" , "hers" , "herself" , "it" , "its" , "itself" , "they" , "them" , "their" , "theirs" , "themselves" , "what" , "which" , "who" , "whom" , "this" , "that" , "these" , "those" , "am" , "is" , "are" , "was" , "were" , "be" , "been" , "being" , "have" , "has" , "had" , "having" , "do" , "does" , "did" , "doing" , "a" , "an" , "the" , "and" , "but" , "if" , "or" , "because" , "as" , "until" , "while" , "of" , "at" , "by" , "for" , "with" , "about" , "against" , "between" , "into" , "through" , "during" , "before" , "after" , "above" , "below" , "to" , "from" , "up" , "down" , "in" , "out" , "on" , "off" , "over" , "under" , "again" , "further" , "then" , "once" , "here" , "there" , "when" , "where" , "why" , "how" , "all" , "any" , "both" , "each" , "few" , "more" , "most" , "other" , "some" , "such" , "no" , "nor" , "not" , "only" , "own" , "same" , "so" , "than" , "too" , "very" , "s" , "t" , "can" , "will" , "just" , "don" , "should" , "now" ]