-----------------------------------------------------------------------------
-- Module      :   Convert umlaut written as ae, oe or ue into ä, ö and ü
--              deals with lines, preserving the leading spaces and tabs.
-- could be improved to use span to break on first non-space character
-----------------------------------------------------------------------------
{-# LANGUAGE FlexibleContexts      #-}
{-# LANGUAGE FlexibleInstances     #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE ScopedTypeVariables   #-}
{-# LANGUAGE TypeFamilies          #-}
{-# LANGUAGE TypeSynonymInstances  #-}
{-# LANGUAGE OverloadedStrings     #-}

module Lib.ProcWord  -- (openMain, htf_thisModuelsTests)
                    where
-- import           Uniform.Strings
-- import           Uniform.FileIO
import UniformBase
-- import Lib.FileHandling
import qualified Data.Text   as T (commonPrefixes)
-- import Control.Monad
import Control.Monad.Trans.Writer.Strict
 


-- procTxt2 :: [Text] ->  Text -> Text  -- called from OneMDfile, not from ProcTxt
-- -- change all umlaut in text - yaml header and markdown text
-- -- preserve leading blanks of line, or tabs, but not mixture of these
-- procTxt2 erl2  = unlines' . map (procLine2 erl2) . lines' 


procLine2 :: [Text] ->  Text -> Text
-- process one line preserving spaces or tabs (but not a mix) at start
-- improve to use span break on first non-space 
-- assumes that text is not \n terminated!
procLine2 :: [Text] -> Text -> Text
procLine2 [Text]
erl2 Text
t = Text
ld forall a. Semigroup a => a -> a -> a
<> ([Text] -> Text -> Text
procLine [Text]
erl2 Text
t1) 
    where
        (Text
ld, Text
t1) = Text -> (Text, Text)
auxProcLine2 Text
t
        -- case mb1 t of
        --         Nothing -> case mb2 t of 
        --                         Nothing -> ("", t)
        --                         Just (lead2, _, t02) -> (lead2,t02)
        --         Just (lead, _, t0) ->  (lead,t0)
        -- mb1 tx = T.commonPrefixes "                  " tx
        -- mb2 ty = T.commonPrefixes "\t\t\t\t\t\t\t" ty

auxProcLine2 :: Text -> (Text,Text)
auxProcLine2 :: Text -> (Text, Text)
auxProcLine2 Text
t =  case Text -> Maybe (Text, Text, Text)
mb1 Text
t of
                Maybe (Text, Text, Text)
Nothing -> case Text -> Maybe (Text, Text, Text)
mb2 Text
t of 
                                Maybe (Text, Text, Text)
Nothing -> (Text
"", Text
t)
                                Just (Text
lead2, Text
_, Text
t02) -> (Text
lead2,Text
t02)
                Just (Text
lead, Text
_, Text
t0) ->  (Text
lead,Text
t0)
    where
        mb1 :: Text -> Maybe (Text, Text, Text)
mb1 Text
tx = Text -> Text -> Maybe (Text, Text, Text)
T.commonPrefixes Text
"                  " Text
tx
        mb2 :: Text -> Maybe (Text, Text, Text)
mb2 Text
ty = Text -> Text -> Maybe (Text, Text, Text)
T.commonPrefixes Text
"\t\t\t\t\t\t\t" Text
ty

procLine2Rep :: [Text] -> Text -> Writer Text Text
-- accumulates the changed woerds for checking
procLine2Rep :: [Text] -> Text -> Writer Text Text
procLine2Rep [Text]
erl2 Text
t = do 
        Text
t2 <- [Text] -> Text -> Writer Text Text
procLineRep [Text]
erl2 Text
t1
        forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Text
ld forall a. Semigroup a => a -> a -> a
<> Text
t2
    where (Text
ld,Text
t1) = Text -> (Text, Text)
auxProcLine2 Text
t 



procLine :: [Text] -> Text -> Text
procLine :: [Text] -> Text -> Text
procLine [Text]
erlaubt Text
ln = forall a. CharChains a => [a] -> a
unwords' forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a -> b) -> [a] -> [b]
map ([Text] -> Text -> Text
procWord2 [Text]
erlaubt) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. CharChains a => a -> [a]
words' forall a b. (a -> b) -> a -> b
$ Text
ln
-- process all words in a line
-- should be idempotent, as long as text is not n\ terminated

procLineRep :: [Text] -> Text -> Writer Text Text 
procLineRep :: [Text] -> Text -> Writer Text Text
procLineRep [Text]
erlaubt Text
ln = do 
    [Text]
ln2rep <- forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM ([Text] -> Text -> Writer Text Text
procWord2Rep [Text]
erlaubt) (forall a. CharChains a => a -> [a]
words' Text
ln)
    forall (m :: * -> *) a. Monad m => a -> m a
return forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. CharChains a => [a] -> a
unwords' forall a b. (a -> b) -> a -> b
$ [Text]
ln2rep 

procWord2 :: [Text] -> Text -> Text
-- replace umlaut unless it is an permitted group
procWord2 :: [Text] -> Text -> Text
procWord2 [Text]
erlaubt Text
word =
  if [Text] -> Text -> Bool
checkErlaubt [Text]
erlaubt Text
word then Text
word else Text -> Text
procWord1 Text
word

procWord2Rep :: [Text] -> Text -> Writer Text Text 
procWord2Rep :: [Text] -> Text -> Writer Text Text
procWord2Rep [Text]
erlaubt Text
word = do
    let word1 :: Text
word1 = [Text] -> Text -> Text
procWord2 [Text]
erlaubt Text
word
    if Text
word1 forall a. Eq a => a -> a -> Bool
== Text
word 
        then forall (m :: * -> *) a. Monad m => a -> m a
return Text
word 
        else do
            forall (m :: * -> *) w. Monad m => w -> WriterT w m ()
tell  ((Text
word1forall a. Semigroup a => a -> a -> a
<>Text
" ")::Text)
            forall (m :: * -> *) a. Monad m => a -> m a
return Text
word1 
    


procWord1 :: Text -> Text
-- ^ convert the umlaut in a single word
-- no test, no exclusions
-- preserve capitalization
procWord1 :: Text -> Text
procWord1 Text
t =
  forall a. CharChains a => a -> a -> a -> a
replace' Text
"AE" Text
"Ä"
    forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. CharChains a => a -> a -> a -> a
replace' Text
"OE" Text
"Ö"
    forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. CharChains a => a -> a -> a -> a
replace' Text
"UE" Text
"Ü"
    forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. CharChains a => a -> a -> a -> a
replace' Text
"Ae" Text
"Ä"
    forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. CharChains a => a -> a -> a -> a
replace' Text
"Oe" Text
"Ö"
    forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. CharChains a => a -> a -> a -> a
replace' Text
"Ue" Text
"Ü"
    forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. CharChains a => a -> a -> a -> a
replace' Text
"ae" Text
"ä"
    forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. CharChains a => a -> a -> a -> a
replace' Text
"oe" Text
"ö"
    forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. CharChains a => a -> a -> a -> a
replace' Text
"ue" Text
"ü"
    forall a b. (a -> b) -> a -> b
$ Text
t

erlaubt1 :: [Text]  -- erlaubte Gruppen - for test only
erlaubt1 :: [Text]
erlaubt1 = forall a b. (a -> b) -> [a] -> [b]
map forall a. CharChains a => a -> a
toLower' [Text
"koef", Text
"poet", Text
"poes", Text
"neue", Text
"freue"] 

checkErlaubt :: [Text] -> Text -> Bool
-- ^ enthaelt das Wort eine erlaubte kombination
checkErlaubt :: [Text] -> Text -> Bool
checkErlaubt [Text]
erlaubt Text
word = forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any (\Text
e -> forall a. CharChains a => a -> a -> Bool
isInfixOf' Text
e forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. CharChains a => a -> a
toLower' forall a b. (a -> b) -> a -> b
$ Text
word) [Text]
erlaubt

checkErlaubt1 :: Text -> Bool 
checkErlaubt1 :: Text -> Bool
checkErlaubt1 = [Text] -> Text -> Bool
checkErlaubt [Text]
erlaubt1
-- mit fester Liste der erlaubten - for test