{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TypeSynonymInstances #-}
{-# LANGUAGE FlexibleInstances #-}

{-|
Module      : Text.Pronounce
Description : A library for interfacing with the CMU Pronouncing Dictionary
Copyright   : (c) Noah Goodman, 2018
License     : BSD3
Stability   : experimental

This is a library for interpreting the parsed Carnegie Mellon University Pronouncing
Dictionary. It is modelled after Allison Parrish's python library, @pronouncing@.
-}
module Text.Pronounce (
    -- * Fundamentals
    -- ** Basic Datatypes
      CMUdict
    , Entry
    , Phones
    , Stress
    -- ** The Dictionary Computation Monad
    , DictComp
    , dictcomp
    , runPronounce
    -- ** Using Text.Pronounce
    , initDict
    , stdDict
    , DictSource
    -- * Basic Functions
    , phonesForEntry
    , stressesForEntry
    , noStress
    , stresses
    , syllableCount
    -- * Searching the Dictionary
    -- ** Field Selectors
    , entries
    , phoneses
    , pairs
    -- ** Filtering Searches
    , DictField
    , filterDict
    , filterComp
    -- ** Specific Searches
    , search
    , searchStresses
    -- * Rhyming
    , rhymingPart
    , rhymesUsing
    , rhymes
    ) where

import           Text.Pronounce.ParseDict

import           Control.Monad.Reader
import           Control.Monad
import           Data.Char (isDigit)
import           Data.Function (on)
import           Data.List
import           Data.Map (Map)
import qualified Data.Map as Map
import           Data.Maybe (catMaybes)
import           Data.Text (Text)
import qualified Data.Text as T
import           Safe (readMay)

-- | We are using the List monad inside the ReaderT monad to perform nondeterministic computations
-- (due to the possibility of multiple @Phones@ patterns per @Entry@) in the context of the
-- CMU dictionary without having to pass it as an argument to every function.
type DictComp = ReaderT CMUdict []

-- | Contruct a Dictionary Computation based on a selector function on the
-- @CMUdict@ that returns a list of possible results. This is just a synonym for
-- the @ReaderT@ constructor.
dictcomp :: (CMUdict -> [a]) -> DictComp a
dictcomp = ReaderT

-- | Get the possible values resulting from a series of Dictionary Computations by supplying the
-- dictionary to the computation. This is just @runReaderT@.
runPronounce :: DictComp a -> CMUdict -> [a]
runPronounce = runReaderT

-- | Type alias for a stress pattern, which is a list of integers 0-2 indicating
-- stress.
--
--   * 0 -> unstressed
--   * 1 -> primary stress
--   * 2 -> secondary stress
type Stress = [Int]

-- | Look up the pronunciation (list of possible phones) of a word in the
-- dictionary
phonesForEntry :: Entry -> DictComp Phones
phonesForEntry = dictcomp . Map.findWithDefault []

-- | Gives the stress pattern for a given word in the dictionary
stressesForEntry :: Entry -> DictComp Stress
stressesForEntry = fmap stresses . phonesForEntry

-- | Strips the stress-indicating numbers off of a phones
noStress :: Phones -> Phones
noStress = fmap (T.filter (not . isDigit))

-- | Isolates the stress pattern from a sequence of phones
stresses :: Phones -> Stress
stresses = catMaybes . fmap (readMay . filter isDigit . T.unpack)

-- | Gives the syllable count of a given pronunciation
syllableCount :: Phones -> Int
syllableCount = length . stresses

-- | Finds the rhyming part of the given phones, where the rhyming part is
-- defined as everything in a word after and including the last stressed or
-- semistressed phone. Note that this is merely one
-- interpretation of what constitutes a rhyme. There exist both stricter and
-- looser definitions that may be suited to different purposes.
rhymingPart :: Phones -> Phones
rhymingPart = reverse
            . takeWhileInc ((`notElem` ['1','2']) . T.last)
            . reverse
    where takeWhileInc _ [] = []
          takeWhileInc p (x:xs) = x : if p x then takeWhileInc p xs else []

-- | A class that provides a generalized function @filterDict@ for filtering the
-- @CMUdict@ based on a choice of different "fields"
class DictField a where
    filterDict :: (a -> Bool) -> CMUdict -> CMUdict

instance DictField Entry where
    filterDict = Map.filterWithKey . fmap const

instance DictField Phones where
    filterDict = Map.filter . any

instance DictField (Entry, Phones) where
    filterDict = Map.filterWithKey . fmap any . curry

instance DictField [Phones] where
    filterDict = Map.filter

instance DictField (Entry, [Phones]) where
    filterDict = Map.filterWithKey . curry

-- | Filter a the results of a @DictComp@, taking only those whose corresponing
-- entries conform to the selector function
filterComp :: DictField a => (a -> Bool) -> DictComp b -> DictComp b
filterComp = local . filterDict

-- | A Dictionary Computation that returns a list of all the entry words in the
-- @CMUdict@
entries :: DictComp Entry
entries = dictcomp Map.keys

-- | A Dictionary Computation that returns a list of all the lists of phones in
-- the @CMUdict@
phoneses :: DictComp [Phones]
phoneses = dictcomp Map.elems

-- | A Dictionary Computation that returns a list of all the @(key,value) pairs
-- in the @CMUdict@
pairs :: DictComp (Entry,[Phones])
pairs = dictcomp Map.toList

-- | Given a sequence of phones, find all words that contain that sequence of
-- phones
search :: Phones -> DictComp Entry
search subPhones = filterComp (subPhones `isInfixOf`) entries

-- | Given a stress pattern, find all words that satisfy that pattern
searchStresses :: Stress -> DictComp Entry
searchStresses stress = filterComp ((== stress) . stresses) entries

-- | Given a function that tells whether or not two sets of phones rhyme, and an
-- entry, find all words that rhyme with that entry according to the provided
-- definition of a rhyme
rhymesUsing :: (Phones -> Phones -> Bool) -> Entry -> DictComp Entry
rhymesUsing rhymesWith word = do phones <- phonesForEntry word
                                 match <- filterComp (rhymesWith phones) entries
                                 guard (match /= word)
                                 return match

-- | Given a word, finds all other words that rhyme with it, using the default
-- @rhymingPart@ definition
rhymes :: Entry -> DictComp Entry
rhymes = rhymesUsing ((==) `on` rhymingPart)