module Text.Wordlint.Words where
import Prelude hiding (Word)
import Data.Char (isPunctuation, toLower)
import Data.List
data Word a = Word
{ lemma :: String
,position :: a
,line :: Int
,column :: Int }
type Words a = [Word a]
instance Eq (Word a) where
x == y = lemma x == lemma y
instance Ord (Word a) where
compare x y = compare (lemma x) (lemma y)
class NumOps a where
createPos :: String -> String -> [(String, a)]
instance NumOps Double where
createPos text _ = createPercentPos text
instance NumOps Int where
createPos = wordOrLine
wordOrLine :: String -> String -> [(String, Int)]
wordOrLine s t = case t of
"word" -> createWordPos s
"line" -> createLinePos s
_ -> createWordPos s
createWordPos :: String -> [(String, Int)]
createWordPos s = zip (words s) [1..]
createLinePos :: String -> [(String, Int)]
createLinePos = createWordLinePos
createPercentPos :: String -> [(String, Double)]
createPercentPos s = getWordPercentPos wrdlst wrdln
where wrdln = length $ words s
wrdlst = createWordPos s
getWordPercentPos :: [(String, Int)] -> Int -> [(String, Double)]
getWordPercentPos [] _ = []
getWordPercentPos (x:xs) y = divWordPercentPos x y : getWordPercentPos xs y
divWordPercentPos :: (String, Int) -> Int -> (String, Double)
divWordPercentPos (s,x) y = (s,p)
where xi = fromIntegral x
yi = fromIntegral y
p = (xi/yi)*100
createWordLinePos :: String -> [(String, Int)]
createWordLinePos xs = setWordLines $ zip (getWordLines xs) [1..]
getWordLines :: String -> [[String]]
getWordLines xs = fmap words (lines xs)
setWordLine :: ([String], Int) -> [(String, Int)]
setWordLine (([],_)) = []
setWordLine ((x:xs,i)) = (x,i) : setWordLine (xs,i)
setWordLines :: [([String], Int)] -> [(String, Int)]
setWordLines [] = []
setWordLines (x:xs) = setWordLine x ++ setWordLines xs
createWordColPos :: String -> [(String, Int)]
createWordColPos xs = setWordCols lin $ numWordCols lin
where lin = lines xs
numWordCols :: [String] -> [[(Char,Int)]]
numWordCols [[]] = [[]]
numWordCols [] = [[]]
numWordCols (x:xs) = zip x [1..] : numWordCols xs
setWordCols :: [String] -> [[(Char,Int)]] -> [(String,Int)]
setWordCols [] [] = []
setWordCols (_:_) [] = []
setWordCols [] (_:_) = []
setWordCols (x:xs) (y:ys) = filtWordCols (words x) y ++ setWordCols xs ys
filtWordCols :: [String] -> [(Char,Int)] -> [(String,Int)]
filtWordCols [] _ = []
filtWordCols _ [] = []
filtWordCols w@(x:xs) c@(y:ys) = if (fst y == ' ') || (fst y /= head x)
then filtWordCols w ys
else (x,snd y) : filtWordCols xs (drop (length x) c )
zipWords :: (NumOps a) => String -> String -> Words a
zipWords s t = zipWith4 Word (words s) (wordpos s t) linepos colpos
where
linepos = snd . unzip $ createWordLinePos s
colpos = snd . unzip $ createWordColPos s
wordpos u v = snd . unzip $ createPos u v
isCheckWordLong :: (NumOps a) => Word a -> Int -> Bool
isCheckWordLong (Word w _ _ _) x = length w > x
checkWordList :: (NumOps a) => Words a -> Int -> Words a
checkWordList [] _ = []
checkWordList (x:xs) i = if isCheckWordLong x i
then x : checkWordList xs i
else checkWordList xs i
checkWordEquality :: (NumOps a) => Word a -> Word a -> Bool
checkWordEquality (Word a _ b c) (Word x _ y z) = coord1 /= coord2 && a==x
where coord1 = (b,c)
coord2 = (y,z)
checkWordDistance :: (Num a, NumOps a) => Word a -> Word a -> a
checkWordDistance (Word _ x _ _) (Word _ y _ _) = x y
filterMatchingWords :: (NumOps a) => Words a -> Words a
filterMatchingWords [] = []
filterMatchingWords xs = sort $ intersectBy checkWordEquality xs xs
filterWordPunctuation :: (NumOps a) => Words a -> Words a
filterWordPunctuation xs = [Word [a | a <- lemma x, not $ isPunctuation a]
(position x) (line x) (column x) | x <- xs]
filterWordCapitalization :: (NumOps a) => Words a -> Words a
filterWordCapitalization xs = [Word [toLower a | a <- lemma x]
(position x) (line x) (column x) | x <- xs]
filterWordBlacklist :: (NumOps a) => Words a -> [String] -> Words a
filterWordBlacklist xs blacklist = [x | x <- xs, let w = lemma x, w `notElem` blacklist]
filterWordWhitelist :: (NumOps a) => Words a -> [String] -> Words a
filterWordWhitelist xs whitelist = [x | x <- xs, let w = lemma x, w `elem` whitelist]