{-# LANGUAGE GeneralizedNewtypeDeriving #-} 
{-# LANGUAGE ScopedTypeVariables #-} 
{-# LANGUAGE RecordWildCards #-} 
{-# LANGUAGE OverloadedStrings #-} 
{-# LANGUAGE TupleSections #-} 

-- | The module provides functions for working with the binary
-- representation of the historical dictionary of Polish.

module NLP.Polh.Binary
( BinEntry (..)
, Key (..)
, Rule (..)
, proxyForm
, binKey
, between
, apply

, savePolh
, loadPolh

, PolhT
, runPolhT
, PolhM
, runPolh
, index
, withKey
, lookup
) where

import Prelude hiding (lookup)
import Control.Exception (try, SomeException)
import Control.Applicative (Applicative, (<$>), (<*>))
import Control.Monad (when, guard)
import Control.Monad.Trans.Class (MonadTrans)
import Control.Monad.IO.Class (liftIO, MonadIO)
import Control.Monad.Reader (ReaderT (..), ask, lift)
import Control.Monad.Trans.Maybe (MaybeT (..))
import System.IO.Unsafe (unsafeInterleaveIO)
import System.FilePath ((</>))
import System.Directory ( getDirectoryContents, createDirectoryIfMissing
                        , createDirectory, doesDirectoryExist )
import Data.Maybe (catMaybes)
import Data.List (mapAccumL)
import Data.Binary (Binary, get, put, encodeFile, decodeFile)
import qualified Data.Set as S
import qualified Data.Text as T
import qualified Data.DAWG.Dynamic as DD
import qualified Data.DAWG.Static as D

import NLP.Polh.Types
import qualified NLP.Polh.Util as Util

-- | Static DAWG version.
type DAWG a  = D.DAWG Char () a

-- | Path to entries in the binary dictionary.
entryDir :: String
entryDir = "entries"

-- | Path to key map in the binary dictionary.
formMapFile :: String
formMapFile = "forms.bin"

-- | Entry in the binary dictionary consists of the lexical
-- entry and corresponding unique identifier.
data BinEntry = BinEntry {
    -- | Lexical entry.
      entry :: LexEntry
    -- | Unique identifier among lexical entries with the same first form
    -- (see 'Key' data type).
    , uid   :: Int }
    deriving (Show, Eq, Ord)

instance Binary BinEntry where
    put BinEntry{..} = put entry >> put uid
    get = BinEntry <$> get <*> get

-- | A dictionary key which uniquely identifies the lexical entry.
data Key = Key {
    -- | First form (presumably lemma) of the lexical entry.
      keyForm   :: T.Text
    -- | Unique identifier among lexical entries with the same 'keyForm'.
    , keyUid    :: Int }
    deriving (Show, Eq, Ord)

-- | Form representing the lexical entry.
proxyForm :: LexEntry -> T.Text
proxyForm entry = case Util.allForms entry of
    (x:_)   -> x
    []      -> error "proxyForm: entry with no forms"

-- | Key assigned to the binary entry.
binKey :: BinEntry -> Key
binKey BinEntry{..} = Key (proxyForm entry) uid

-- | Convert the key to the path where binary representation of the entry
-- is stored.
showKey :: Key -> String
showKey Key{..} = (T.unpack . T.concat) [T.pack (show keyUid), "-", keyForm]

-- | Parse the key.
parseKey :: String -> Key
parseKey x =
    let (uid'S, (_:form'S)) = break (=='-') x
    in  Key (T.pack form'S) (read uid'S)

-- | Load the directory contents.
loadContents :: FilePath -> IO [FilePath]
loadContents path = do
    xs <- getDirectoryContents path
    return [x | x <- xs, x /= ".", x /= ".."]

-- | Check if the directory is empty.
emptyDirectory :: FilePath -> IO Bool
emptyDirectory path = null <$> loadContents path

-- | Save the binary entry on the disk.
saveLexEntry :: FilePath -> BinEntry -> IO ()
saveLexEntry path x =
    let binPath = showKey . binKey
    in  encodeFile (path </> binPath x) x

withUid :: DD.DAWG Char Int -> LexEntry -> (DD.DAWG Char Int, BinEntry)
withUid m x =
    let path = T.unpack (proxyForm x)
        num  = maybe 0 id (DD.lookup path m) + 1
    in  (DD.insert path num m, BinEntry x num)

withUids :: [LexEntry] -> [BinEntry]
withUids = snd . mapAccumL withUid DD.empty

mapIO'Lazy :: (a -> IO b) -> [a] -> IO [b]
mapIO'Lazy f (x:xs) = (:) <$> f x <*> unsafeInterleaveIO (mapIO'Lazy f xs)
mapIO'Lazy _ []     = return []

-- | Save the polh dictionary in the empty directory.
savePolh :: FilePath -> Polh -> IO ()
savePolh path xs = do
    createDirectoryIfMissing True path
    isEmpty <- emptyDirectory path
    when (not isEmpty) $ do
        error $ "savePolh: directory " ++ path ++ " is not empty"
    let lexPath = path </> entryDir
    createDirectory lexPath
    formMap' <- D.fromListWith S.union . concat
        <$> mapIO'Lazy (saveLex lexPath) (withUids xs)
    encodeFile (path </> formMapFile) formMap'
  where
    saveLex lexPath x = do
        saveLexEntry lexPath x
        return $ rules x
    rules binEntry =
        [ ( T.unpack x
          , S.singleton (between x key) )
        | x <- Util.allForms (entry binEntry) ]
      where
        key = binKey binEntry

maybeErr :: MonadIO m => IO a -> m (Maybe a)
maybeErr io = do
    r <- liftIO (try io)
    case r of
        Left (_e :: SomeException)  -> return Nothing
        Right x                     -> return (Just x)

maybeT :: Monad m => Maybe a -> MaybeT m a
maybeT = MaybeT . return
{-# INLINE maybeT #-}

maybeErrT :: MonadIO m => IO a -> MaybeT m a
maybeErrT io = do
    r <- liftIO (maybeErr io)
    maybeT r

-- | Load lexical entry from disk by its key.
loadLexEntry :: FilePath -> Key -> IO (Maybe BinEntry)
loadLexEntry path key = do
    maybeErr $ decodeFile (path </> showKey key)

-- | A rule for translating a form into a binary dictionary key.
data Rule = Rule {
    -- | Number of characters to cut from the end of the form.
      cut       :: !Int
    -- | A suffix to paste.
    , suffix    :: !T.Text
    -- | Unique identifier of the entry.
    , ruleUid   :: !Int }
    deriving (Show, Eq, Ord)

instance Binary Rule where
    put Rule{..} = put cut >> put suffix >> put ruleUid
    get = Rule <$> get <*> get <*> get

-- | Apply the rule.
apply :: Rule -> T.Text -> Key
apply r x =
    let y = T.take (T.length x - cut r) x `T.append` suffix r
    in  Key y (ruleUid r)

-- | Make a rule which translates between the string and the key.
between :: T.Text -> Key -> Rule
between source dest =
    let k = lcp source (keyForm dest)
    in  Rule (T.length source - k) (T.drop k (keyForm dest)) (keyUid dest)
  where
    lcp a b = case T.commonPrefixes a b of
        Just (c, _, _)  -> T.length c
        Nothing         -> 0

-- | Binary dictionary data kept in program memory.
data MemData = MemData
    { polhPath  :: FilePath
    , formMap   :: DAWG (S.Set Rule) }

-- | A Polh monad transformer.
newtype PolhT m a = PolhT (ReaderT MemData m a)
    deriving (Functor, Applicative, Monad, MonadTrans, MonadIO)

-- | A Polh monad is a Polh monad transformer over the hidden IO monad.
type PolhM a = PolhT IO a

-- | Path to directory with entries.
entryPath :: MemData -> FilePath
entryPath = (</> entryDir) . polhPath

-- | List of dictionary keys.
index :: (Applicative m, MonadIO m) => PolhT m [Key]
index = PolhT $ do
    path <- entryPath <$> ask
    map parseKey <$> liftIO (loadContents path)

-- | Extract lexical entry with the given key.
withKey :: (Applicative m, MonadIO m) => Key -> PolhT m (Maybe BinEntry)
withKey key = PolhT $ do
    path <- entryPath <$> ask
    liftIO . unsafeInterleaveIO $ loadLexEntry path key

-- | Lookup the form in the dictionary.
lookup :: (Applicative m, MonadIO m) => T.Text -> PolhT m [BinEntry]
lookup x = do
    fm <- PolhT $ formMap <$> ask
    keys <- return $ case D.lookup (T.unpack x) fm of
        Nothing -> []
        Just xs -> map (flip apply x) (S.toList xs)
    catMaybes <$> mapM withKey keys

-- | Execute the Polh monad against the binary Polh representation
-- located in the given directory.  Return Nothing if the directory
-- doesnt' exist or if it doesn't look like a Polh dictionary.
runPolh :: FilePath -> PolhM a -> IO (Maybe a)
runPolh path polh = runPolhT path polh

-- | Execute the Polh monad transformer against the binary Polh representation
-- located in the given directory.  Return Nothing if the directory doesnt'
-- exist or if it doesn't look like a Polh dictionary.
runPolhT :: MonadIO m => FilePath -> PolhT m a -> m (Maybe a)
runPolhT path (PolhT r) = runMaybeT $ do
    formMap' <- maybeErrT $ decodeFile (path </> formMapFile)
    doesExist <- liftIO $ doesDirectoryExist (path </> entryDir)
    guard doesExist 
    lift $ runReaderT r (MemData path formMap')

-- | Load dictionary from a disk in a lazy manner.  Return 'Nothing'
-- if the path doesn't correspond to a binary representation of the
-- dictionary. 
loadPolh :: FilePath -> IO (Maybe [BinEntry])
loadPolh path = runPolhT path $ do
    keys <- index
    catMaybes <$> mapM withKey keys

-- We don't provide update functionality since we want only the pure
-- iterface to be visible.  It greatly simplifies the implementation.
--
-- updateLexEntry :: FilePath -> Key -> (LexEntry -> LexEntry) -> IO LexEntry
-- updateLexEntry path lexKey f = do
--     lexEntry <- loadLexEntry path lexKey
--     let lexEntry' = f lexEntry
--     saveLexEntry path lexEntry'
--     return lexEntry'
-- 
-- updateLexEntry_ :: FilePath -> Key -> (LexEntry -> LexEntry) -> IO ()
-- updateLexEntry_ path lexKey f = do
--     lexEntry <- loadLexEntry path lexKey
--     saveLexEntry path (f lexEntry)