{-# LANGUAGE OverloadedLists #-}

module Data.Gibberish.Gen.Trigraph
  ( genTrigraph,
    loadTrigraph,
  ) where

import Data.Gibberish.Errors (GibberishErr (..))
import Data.Gibberish.Types
import Data.Gibberish.Utils (toQwertyKey)
import Paths_gibberish (getDataFileName)

import Control.Exception (throwIO)
import Control.Monad (unless)
import Data.Aeson qualified as Aeson
import Data.Char (isPunctuation, toLower)
import Data.Map.Strict (Map ())
import Data.Map.Strict qualified as Map
import Data.Maybe (fromJust)
import Data.Text (Text ())
import Data.Text qualified as Text
import System.Directory (doesFileExist)
import System.FilePath ((</>))

-- | Generate trigraphs from a list of words
genTrigraph :: [Text] -> Trigraph
genTrigraph :: [Text] -> Trigraph
genTrigraph = Map Digram Frequencies -> Trigraph
Trigraph (Map Digram Frequencies -> Trigraph)
-> ([Text] -> Map Digram Frequencies) -> [Text] -> Trigraph
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Text -> Map Digram Frequencies -> Map Digram Frequencies)
-> Map Digram Frequencies -> [Text] -> Map Digram Frequencies
forall a b. (a -> b -> b) -> b -> [a] -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (Text -> Map Digram Frequencies -> Map Digram Frequencies
foldWord (Text -> Map Digram Frequencies -> Map Digram Frequencies)
-> (Text -> Text)
-> Text
-> Map Digram Frequencies
-> Map Digram Frequencies
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text
normalizeWord) Map Digram Frequencies
forall k a. Map k a
Map.empty
  where
    foldWord :: Text -> Map Digram Frequencies -> Map Digram Frequencies
foldWord = (Frequencies -> Frequencies -> Frequencies)
-> Map Digram Frequencies
-> Map Digram Frequencies
-> Map Digram Frequencies
forall k a. Ord k => (a -> a -> a) -> Map k a -> Map k a -> Map k a
Map.unionWith Frequencies -> Frequencies -> Frequencies
combine (Map Digram Frequencies
 -> Map Digram Frequencies -> Map Digram Frequencies)
-> (Text -> Map Digram Frequencies)
-> Text
-> Map Digram Frequencies
-> Map Digram Frequencies
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Map Digram Frequencies
mkTrigraph
    combine :: Frequencies -> Frequencies -> Frequencies
combine (Frequencies Map Unigram Frequency
f1) (Frequencies Map Unigram Frequency
f2) = Map Unigram Frequency -> Frequencies
Frequencies (Map Unigram Frequency -> Frequencies)
-> Map Unigram Frequency -> Frequencies
forall a b. (a -> b) -> a -> b
$ (Frequency -> Frequency -> Frequency)
-> Map Unigram Frequency
-> Map Unigram Frequency
-> Map Unigram Frequency
forall k a. Ord k => (a -> a -> a) -> Map k a -> Map k a -> Map k a
Map.unionWith Frequency -> Frequency -> Frequency
forall a. Num a => a -> a -> a
(+) Map Unigram Frequency
f1 Map Unigram Frequency
f2

-- | Generate a trigraph from a single word
mkTrigraph :: Text -> Map Digram Frequencies
mkTrigraph :: Text -> Map Digram Frequencies
mkTrigraph Text
word = (Trigram -> Map Digram Frequencies -> Map Digram Frequencies)
-> Map Digram Frequencies -> [Trigram] -> Map Digram Frequencies
forall a b. (a -> b -> b) -> b -> [a] -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr Trigram -> Map Digram Frequencies -> Map Digram Frequencies
insert' Map Digram Frequencies
forall k a. Map k a
Map.empty ([Trigram] -> Map Digram Frequencies)
-> [Trigram] -> Map Digram Frequencies
forall a b. (a -> b) -> a -> b
$ Text -> [Trigram]
scanTrigrams Text
word
  where
    insert' :: Trigram -> Map Digram Frequencies -> Map Digram Frequencies
insert' (Trigram Char
a Char
b Char
c) =
      (Frequencies -> Frequencies -> Frequencies)
-> Digram
-> Frequencies
-> Map Digram Frequencies
-> Map Digram Frequencies
forall k a. Ord k => (a -> a -> a) -> k -> a -> Map k a -> Map k a
Map.insertWith Frequencies -> Frequencies -> Frequencies
combineFrequencies (Char -> Char -> Digram
Digram Char
a Char
b) (Char -> Frequencies
mkFrequencies Char
c)
    combineFrequencies :: Frequencies -> Frequencies -> Frequencies
combineFrequencies (Frequencies Map Unigram Frequency
m1) (Frequencies Map Unigram Frequency
m2) =
      Map Unigram Frequency -> Frequencies
Frequencies ((Frequency -> Frequency -> Frequency)
-> Map Unigram Frequency
-> Map Unigram Frequency
-> Map Unigram Frequency
forall k a. Ord k => (a -> a -> a) -> Map k a -> Map k a -> Map k a
Map.unionWith Frequency -> Frequency -> Frequency
forall a. Num a => a -> a -> a
(+) Map Unigram Frequency
m1 Map Unigram Frequency
m2)
    mkFrequencies :: Char -> Frequencies
mkFrequencies Char
c = Map Unigram Frequency -> Frequencies
Frequencies (Map Unigram Frequency -> Frequencies)
-> Map Unigram Frequency -> Frequencies
forall a b. (a -> b) -> a -> b
$ Unigram -> Frequency -> Map Unigram Frequency
forall k a. k -> a -> Map k a
Map.singleton (Char -> Unigram
Unigram Char
c) Frequency
1

-- | Normalize a word before calculating the trigraph:
--
--  1. Remove punctuation (quotes, dashes, and so on)
--  2. Lower case all letters
--  3. Translate non-qwerty chars to qwerty keys (eg, à -> a)
normalizeWord :: Text -> Text
normalizeWord :: Text -> Text
normalizeWord = (Char -> Char) -> Text -> Text
Text.map Char -> Char
transformChar (Text -> Text) -> (Text -> Text) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> Text -> Text
Text.filter Char -> Bool
filterChar
  where
    transformChar :: Char -> Char
    transformChar :: Char -> Char
transformChar = Char -> Char
toQwertyKey (Char -> Char) -> (Char -> Char) -> Char -> Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Char
toLower

    filterChar :: Char -> Bool
    filterChar :: Char -> Bool
filterChar = Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
isPunctuation

scanTrigrams :: Text -> [Trigram]
scanTrigrams :: Text -> [Trigram]
scanTrigrams Text
word = case Int -> Text -> Text
Text.take Int
3 Text
word of
  [Item Text
a, Item Text
b, Item Text
c] -> Char -> Char -> Char -> Trigram
Trigram Char
Item Text
a Char
Item Text
b Char
Item Text
c Trigram -> [Trigram] -> [Trigram]
forall a. a -> [a] -> [a]
: Text -> [Trigram]
scanTrigrams (HasCallStack => Text -> Text
Text -> Text
Text.tail Text
word)
  Text
_ -> []

loadTrigraph :: Language -> IO Trigraph
loadTrigraph :: Language -> IO Trigraph
loadTrigraph Language
English = FilePath -> IO Trigraph
loadBuiltinTrigraph FilePath
"wamerican.json"
loadTrigraph Language
Spanish = FilePath -> IO Trigraph
loadBuiltinTrigraph FilePath
"wspanish.json"
loadTrigraph (CustomTrigraph TrigraphConfig
cfg) = FilePath -> IO Trigraph
loadTrigraphFromFile (TrigraphConfig -> FilePath
unTrigraphConfig TrigraphConfig
cfg)

loadBuiltinTrigraph :: FilePath -> IO Trigraph
loadBuiltinTrigraph :: FilePath -> IO Trigraph
loadBuiltinTrigraph FilePath
file' = FilePath -> IO Trigraph
loadTrigraphFromFile (FilePath -> IO Trigraph) -> IO FilePath -> IO Trigraph
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< FilePath -> IO FilePath
getBuiltinFilePath FilePath
file'
  where
    getBuiltinFilePath :: FilePath -> IO FilePath
getBuiltinFilePath FilePath
basename = FilePath -> IO FilePath
getDataFileName (FilePath
"data" FilePath -> FilePath -> FilePath
</> FilePath
"trigraphs" FilePath -> FilePath -> FilePath
</> FilePath
basename)

loadTrigraphFromFile :: FilePath -> IO Trigraph
loadTrigraphFromFile :: FilePath -> IO Trigraph
loadTrigraphFromFile FilePath
file' = do
  Bool
exists <- FilePath -> IO Bool
doesFileExist FilePath
file'
  Bool -> IO () -> IO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless Bool
exists (IO () -> IO ()) -> IO () -> IO ()
forall a b. (a -> b) -> a -> b
$
    GibberishErr -> IO ()
forall e a. Exception e => e -> IO a
throwIO (FilePath -> GibberishErr
TrigraphNotFound FilePath
file')

  Maybe Trigraph -> Trigraph
forall a. HasCallStack => Maybe a -> a
fromJust (Maybe Trigraph -> Trigraph) -> IO (Maybe Trigraph) -> IO Trigraph
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> FilePath -> IO (Maybe Trigraph)
forall a. FromJSON a => FilePath -> IO (Maybe a)
Aeson.decodeFileStrict FilePath
file'