-- |
-- Module:      Data.HashCons
-- Description: Hash-consing support
-- Copyright:   © 2018 Andy Morris
-- Licence:     BSD-3-Clause
-- Maintainer:  hello@andy-morris.xyz
-- Stability:   experimental
-- Portability: GHC internals; weak pointers & finalizers; stable names
--
-- Hash-consing, or interning, is a way to gain constant-time equality testing
-- (and hashing) for potentially-large data types such as strings or abstract
-- syntax trees. Internally a table of live values is kept, and
-- newly-constructed values are looked up in the table to check if they already
-- exist. If they do, then the existing one is reused (along with a tag). The
-- table is pruned using finalisers when these tagged values are garbage
-- collected.
--
-- This library should be thread- and exception-safe.

{-# LANGUAGE BangPatterns, FlexibleInstances #-}

module Data.HashCons
  (HashCons, hc,
   HC, getVal, getTag, Tag,
   Hashable (..))
where

import Data.HashCons.ConstRef
import Data.HashCons.MkWeak

import Data.Hashable
import Data.HashTable.IO (BasicHashTable)
import qualified Data.HashTable.IO as HashTable

import Control.DeepSeq

import Control.Concurrent.MVar
import System.Mem.StableName

import System.IO.Unsafe
import Foreign

-- for HashCons instances {{{
import Numeric.Natural
import qualified Data.Text as S
import qualified Data.Text.Lazy as L
import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as L
-- }}}


-- * Why tags?
--
-- After reading the implementation below, you might come away wondering
-- something like, "why does HC have a Tag? Why can't it just check equality of
-- the ConstRef inside?"
--
-- That would work for the HCs themselves; the purpose of the tag is for
-- memoisation. The memo tables are pruned when a value is garbage collected,
-- but if the values are /in/ the table, then this can never happen. A Tag
-- doesn't actually contain any references to the value itself, so it can be
-- used as the memo table key and allow pruning.
--
-- Less importantly, Tags also have constant-time hashing, however this could
-- also be achieved by storing the original value's hash.
--
-- Tags are also exported to the outside world since other applications may find
-- their non-ownership behaviour useful.


-- | A tag for a value. Tags are unique among values which are simultaneously
-- alive. They also /don't/ keep the corresponding value alive on their own.
newtype Tag a = Tag {fromTag :: StableName a} deriving Eq

instance Hashable (Tag a) where
  hash         = hash . fromTag
  hashWithSalt = hashUsing fromTag

makeTag :: a -> IO (Tag a)
makeTag x = fmap Tag . makeStableName $! x


-- | A value which has been given a unique tag.
data HC a = HC {-# UNPACK #-} !(Tag a) !(ConstRef a)

-- | Make an @HC@ value.
makeHC :: a -> IO (HC a)
makeHC x = HC <$> makeTag x <*> newConstRef x

-- | Retrieves the unique tag for the value.
getTag :: HC a -> Tag a
getTag (HC t _) = t

-- | Retrieves the underlying value.
getVal :: HC a -> a
getVal (HC _ x) = readConstRef x


-- | \(\mathcal{O}(1)\) using the tag
instance Eq (HC a) where
  x == y = getTag x == getTag y

-- | Checks the tag for equality first, and otherwise falls back to the
-- underlying type's ordering
instance Ord a => Ord (HC a) where
  compare x y = if x == y then EQ else compare (getVal x) (getVal y)

-- | Shows the underlying value
instance Show a => Show (HC a) where
  showsPrec d = showsPrec d . getVal

-- | \(\mathcal{O}(1)\) using the tag
instance Hashable (HC a) where
  hash         = hash . getTag
  hashWithSalt = hashUsing getTag

-- | Also evaluates the underlying value
instance NFData a => NFData (HC a) where
  rnf = rnf . getVal

instance MkWeak (HC a) where
  mkWeak (HC _ x) = mkWeak x

-- | Reads an underlying value and caches it
instance (Read a, HashCons a) => Read (HC a) where
  readsPrec d = map (\(x, s) -> (hc x, s)) . readsPrec d

-- | Stores the underlying value, and re-caches it on retrieval
instance (Storable a, HashCons a) => Storable (HC a) where
  sizeOf    = sizeOf . getVal
  alignment = alignment . getVal
  peek      = fmap hc . peek . castPtr
  poke p    = poke (castPtr p) . getVal


type HashTable k v = BasicHashTable k v

newtype Cache a = C (MVar (HashTable (Hashed a) (CacheEntry a)))

type CacheEntry a = Weak (HC a)


newCache :: IO (Cache a)
newCache =
  fmap C $ newMVar =<< HashTable.new

remove :: (Eq a, Hashable a) => a -> Cache a -> IO ()
remove x (C var) =
  let !hx = hashed x in
  withMVar var $ \cache ->
    HashTable.delete cache hx

lookupOrAdd :: (Eq a, Hashable a) => a -> Cache a -> IO (HC a)
lookupOrAdd x c@(C var) =
    withMVar var $ \cache ->
      HashTable.lookup cache hx >>= \ent -> case ent of
        Nothing  -> newHC cache
        Just ptr -> deRefWeak ptr >>= \y' -> case y' of
          Nothing -> newHC cache
          Just y  -> pure y
  where
    !hx = hashed x
    newHC cache = do
      y   <- makeHC x
      ptr <- mkWeakPtr y (Just $ remove x c)
      y <$ HashTable.insert cache hx ptr


-- | Types which support hash-consing.
--
-- There are some restrictions on types for which this class makes sense:
--
-- 1. The type must have no type variables: an instance for @T Int@ would be
--    fine, but not for @T a@. (There must also be no constraints, but that is
--    unlikely to be a problem if all instances are ground.)
-- 2. Equality and hashing must consider all data in a value. It need not
--    necessarily be structural equality, but a subterm should not simply be
--    ignored. (An example of why someone might want to ave equality ignore
--    parts of a type is annotations in an abstract syntax tree.)
class (Eq a, Hashable a) => HashCons a where
  hcCache :: Cache a
  hcCache = unsafePerformIO newCache
  {-# NOINLINE hcCache #-}

-- | Make a hash-consed value.
hc :: HashCons a => a -> HC a
hc x = unsafePerformIO $ lookupOrAdd x hcCache


instance HashCons Integer
instance HashCons Natural
instance HashCons [Char]
-- others?


instance HashCons S.Text
instance HashCons L.Text

instance HashCons S.ByteString
instance HashCons L.ByteString