module PStemmer.Ru where
import qualified Data.Text as T
import PStemmer.Internal.Function
type ListG = [(T.Text, [T.Text])]
runPorter' :: [T.Text] -> [(T.Text, T.Text)]
runPorter' = map (id &&& runPorter)
runPorter :: T.Text -> T.Text
runPorter txt =
let
rv :: RV
rv = mkRV txt
in
fst rv `T.append` applySteps (snd rv) [step1, step2, step3, step4]
step4 :: Step
step4 txt
| T.null txt = txt
| otherwise = getFirstSR [step4_1, step4_2, step4_3] txt
where
step4_1 :: Substep
step4_1 txt' =
if "нн" `T.isSuffixOf` txt'
then (T.init txt', True)
else (txt', False)
step4_2 :: Substep
step4_2 txt' =
case cutList mapSuperl txt' of
(t, True) -> (fst $ step4_1 t, True)
_ -> (txt', False)
step4_3 :: Substep
step4_3 txt' =
if T.last txt' == 'ь'
then (T.init txt', True)
else (txt', False)
step3 :: Step
step3 txt
| T.null txt = txt
| otherwise =
let
r2 :: Maybe T.Text
r2 = snd . mkR1R2 $ txt
in
case r2 of
Just t ->
if any (`T.isSuffixOf` t) mapDerivat
then fst . cutList mapDerivat $ txt
else txt
_ -> txt
step2 :: Step
step2 txt
| T.null txt = txt
| otherwise =
if T.last txt == 'и'
then T.init txt
else txt
step1 :: Step
step1 txt
| T.null txt = txt
| otherwise = getFirstSR [step1_1, step1_2, step1_3, step1_4, step1_5, step1_6, step1_7, step1_8, step1_9] txt
where
step1_1 :: Substep
step1_1 = cutList mapPG_G2
step1_2 :: Substep
step1_2 = cutListG mapPG_G1
step1_3 :: Substep
step1_3 t = second (const False) $ cutList mapRefl t
step1_4 :: Substep
step1_4 = cutList mapPartic_G2
step1_5 :: Substep
step1_5 = cutListG mapPartic_G1
step1_6 :: Substep
step1_6 = cutList mapADJ
step1_7 :: Substep
step1_7 = cutList mapVerb_G2
step1_8 :: Substep
step1_8 = cutListG mapVerb_G1
step1_9 :: Substep
step1_9 = cutList mapNoun
cutListG :: ListG -> Substep
cutListG ((w, ws):ls) txt =
if any (`T.isSuffixOf` txt) ws
then fromMaybe (txt, False) (flip (,) True <$> T.stripSuffix w txt)
else cutListG ls txt
cutListG _ txt = (txt, False)
prepareList :: List -> List
prepareList = sortByLengthDown
prepareListWith :: List -> List -> List
prepareListWith sl al = sortByLengthDown . concatMap (\w -> map (w `T.append`) al) $ sl
prepareListG :: List -> ListG
prepareListG = map (\w -> (w, map (`T.cons` w) neccSuff)) . sortByLengthDown
prepareListGWith :: List -> List -> ListG
prepareListGWith sl al = map (\w -> (w, map (`T.cons` w) neccSuff)) .
sortByLengthDown . concatMap (\w -> map (w `T.append`) al) $ sl
mkRV :: T.Text -> RV
mkRV txt =
let
(p1,p2) = T.break (`elem` vowels) txt
in
case T.uncons p2 of
Just (ch, p2') -> (p1 `T.snoc` ch, p2')
_ -> (p1,p2)
mkR1R2 :: T.Text -> R1R2
mkR1R2 txt
| T.null txt = r1r2null
| otherwise = mkR' (T.tails txt) r1r2null
where
mkR' :: [T.Text] -> R1R2 -> R1R2
mkR' _ r1r2@(Just _, Just _) = r1r2
mkR' (t1:t2:xs) r1r2@(Nothing, r2@Nothing)
| T.head t1 `notElem` vowels = mkR' (t2:xs) (tNull t2, r2)
| otherwise = mkR' (t2:xs) r1r2
mkR' (t1:t2:t3:xs) r1r2@(r1@(Just _), Nothing)
| chkV t1 t2 = mkR' (t3:xs) (r1, tNull t3)
| otherwise = mkR' (t2:t3:xs) r1r2
mkR' _ r1r2 = r1r2
r1r2null :: R1R2
r1r2null = (Nothing, Nothing)
tNull :: T.Text -> Maybe T.Text
tNull t
| T.null t = Nothing
| otherwise = Just t
chkV :: T.Text -> T.Text -> Bool
chkV t1 t2 = (T.head t1 `elem` vowels) && (T.head t2 `notElem` vowels)
neccSuff :: String
neccSuff = "ая"
mapPG_G1 :: ListG
mapPG_G1 = prepareListG ["в", "вши", "вшись"]
mapPG_G2 :: List
mapPG_G2 = prepareList ["ив", "ивши", "ившись", "ыв", "ывши", "ывшись"]
mapADJ :: List
mapADJ = prepareList ["ее", "её", "ёе", "ёё", "ие", "иё", "ые", "ыё", "ое", "оё",
"ими", "ыми", "ей", "ёй", "ий", "ый", "ой", "ем", "ём", "им", "ым", "ом", "его",
"ёго", "ого", "ему", "ёму", "ому", "их", "ых", "ую", "юю", "ая", "яя", "ою",
"ею", "ёю"]
mapPartic_G1 :: ListG
mapPartic_G1 = prepareListGWith ["ем", "ём", "нн", "вш", "ющ", "щ"] mapADJ
mapPartic_G2 :: List
mapPartic_G2 = prepareListWith ["ивш", "ывш", "ующ"] mapADJ
mapRefl :: List
mapRefl = prepareList ["ся", "сь"]
mapVerb_G1 :: ListG
mapVerb_G1 = prepareListG ["ла", "на", "ете", "ёте", "етё", "ётё", "йте", "йтё", "ли",
"й", "л", "ем", "ём", "н", "ло", "но", "ет", "ёт", "ют", "ны", "ть", "ешь", "ёшь",
"нно"]
mapVerb_G2 :: List
mapVerb_G2 = prepareList ["ила", "ыла", "ена", "ёна", "ейте", "ёйте", "ейтё", "ёйтё",
"уйте", "уйтё", "ите", "итё", "или", "ыли", "ей", "ёй", "уй", "ил", "ыл", "им", "ым",
"ен", "ён", "ило", "ыло", "ено", "ёно", "ят", "ует", "уёт", "уют", "ит", "ыт", "ены",
"ёны", "ить", "ыть", "ишь", "ую", "ю"]
mapNoun :: List
mapNoun = prepareList ["а", "ев", "ёв", "ов", "ие", "иё", "ье", "е", "ё", "иями",
"ями", "ами", "еи", "ёи", "ии", "и", "ией", "иёй", "ей", "ёй", "ой", "ий", "й",
"иям", "ям", "ием", "иём", "ем", "ём", "ам", "ом", "о", "у", "ах", "иях", "ях",
"ы", "ь", "ию", "ью", "ю", "ия", "ья", "я"]
mapSuperl :: List
mapSuperl = prepareList ["ейш", "ёйш", "ейше", "ёйше", "ейшё", "ёйшё"]
mapDerivat :: List
mapDerivat = prepareList ["ост", "ость"]
vowels :: String
vowels = "аеёиоуыэюя"