{-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE DeriveFunctor #-} {-# language GeneralizedNewtypeDeriving #-} {-# LANGUAGE LambdaCase #-} {-# OPTIONS_GHC -Wno-unused-imports -Wno-unused-top-binds #-} module Numeric.AD.DelCont.Internal (rad1, rad2, grad, auto, rad1g, rad2g, radNg, op1Num, op2Num, op1, op2, AD0, AD, AD') where import Control.Monad.ST (ST, runST) import Data.Bifunctor (Bifunctor(..)) import Data.STRef (STRef, newSTRef, readSTRef, modifySTRef') -- transformers import Control.Monad.Trans.Class (MonadTrans(..)) import Control.Monad.Trans.Cont (ContT, shiftT, resetT, evalContT) import Prelude hiding (read) -- | Dual numbers data D a da = D { primal :: a, dual :: da } deriving (Show, Functor) instance Eq a => Eq (D a da) where D x _ == D y _ = x == y instance Ord a => Ord (D a db) where compare (D x _) (D y _) = compare x y instance Bifunctor D where bimap f g (D a b) = D (f a) (g b) -- | Modify the adjoint part of a 'D' withD :: (da -> db) -> D a da -> D a db withD = second -- | Differentiable variable -- -- A (safely) mutable reference to a dual number type DVar s a da = STRef s (D a da) -- | Introduce a fresh DVar var :: a -> da -> ST s (DVar s a da) var x dx = newSTRef (D x dx) -- | Lift a constant value into 'AD' -- -- As one expects from a constant, its value will be used for computing the result, but it will be discarded when computing the sensitivities. auto :: a -> AD s a da auto x = AD0 $ lift $ var x undefined -- NB blows up with -XStrict autoStrict :: a -> da -> AD0 s (DVar s a da) autoStrict x dx = AD0 $ lift $ var x dx -- | Mutable references in the continuation monad newtype AD0 s a = AD0 { unAD0 :: forall x . ContT x (ST s) a } deriving (Functor) instance Applicative (AD0 s) where AD0 f <*> AD0 x = AD0 (f <*> x) pure x = AD0 $ pure x -- instance Monad (AD s) where -- TODO -- | A synonym of 'AD0' for the common case of returning a 'DVar' (which is a 'ST' computation that returns a dual number) -- -- Here the @a@ and @da@ type parameters are respectively the /primal/ and /dual/ quantities tracked by the AD computation. type AD s a da = AD0 s (DVar s a da) -- | Like 'AD' but the types of primal and dual coincide type AD' s a = AD s a a -- | Lift a unary function -- -- This is a polymorphic combinator for tracking how primal and adjoint values are transformed by a function. -- -- How does this work : -- -- 1) Compute the function result and bind the function inputs to the adjoint updating function (the "pullback") -- -- 2) Allocate a fresh STRef @rb@ with the function result and @zero@ adjoint part -- -- 3) @rb@ is passed downstream as an argument to the continuation @k@, with the expectation that the STRef will be mutated -- -- 4) Upon returning from the @k@ (bouncing from the boundary of @resetT@), the mutated STRef is read back in -- -- 5) The adjoint part of the input variable is updated using @rb@ and the result of the continuation is returned. op1_ :: db -- ^ zero -> (da -> da -> da) -- ^ plus -> (a -> (b, db -> da)) -- ^ returns : (function result, pullback) -> ContT x (ST s) (DVar s a da) -> ContT x (ST s) (DVar s b db) op1_ zeroa plusa f ioa = do ra <- ioa (D xa _) <- lift $ readSTRef ra let (xb, g) = f xa -- 1) shiftT $ \ k -> lift $ do rb <- var xb zeroa -- 2) ry <- k rb -- 3) (D _ yd) <- readSTRef rb -- 4) modifySTRef' ra (withD (\rda0 -> rda0 `plusa` g yd)) -- 5) pure ry -- | Lift a unary function -- -- The first two arguments constrain the types of the adjoint values of the output and input variable respectively, see 'op1Num' for an example. -- -- The third argument is the most interesting: it specifies at once how to compute the function value and how to compute the sensitivity with respect to the function parameter. -- -- Note : the type parameters are completely unconstrained. op1 :: db -- ^ zero -> (da -> da -> da) -- ^ plus -> (a -> (b, db -> da)) -- ^ returns : (function result, pullback) -> AD s a da -> AD s b db op1 z plusa f (AD0 ioa) = AD0 $ op1_ z plusa f ioa -- | Helper for constructing unary functions that operate on Num instances (i.e. 'op1' specialized to Num) op1Num :: (Num da, Num db) => (a -> (b, db -> da)) -- ^ returns : (function result, pullback) -> AD s a da -> AD s b db op1Num = op1 0 (+) -- | Lift a binary function op2_ :: dc -- ^ zero -> (da -> da -> da) -- ^ plus -> (db -> db -> db) -- ^ plus -> (a -> b -> (c, dc -> da, dc -> db)) -- ^ returns : (function result, pullbacks) -> ContT x (ST s) (DVar s a da) -> ContT x (ST s) (DVar s b db) -> ContT x (ST s) (DVar s c dc) op2_ zeroa plusa plusb f ioa iob = do ra <- ioa rb <- iob (D xa _) <- lift $ readSTRef ra (D xb _) <- lift $ readSTRef rb let (xc, g, h) = f xa xb shiftT $ \ k -> lift $ do rc <- var xc zeroa ry <- k rc (D _ yd) <- readSTRef rc modifySTRef' ra (withD (\rda0 -> rda0 `plusa` g yd)) modifySTRef' rb (withD (\rdb0 -> rdb0 `plusb` h yd)) pure ry -- | Lift a binary function -- -- See 'op1' for more information. op2 :: dc -- ^ zero -> (da -> da -> da) -- ^ plus -> (db -> db -> db) -- ^ plus -> (a -> b -> (c, dc -> da, dc -> db)) -- ^ returns : (function result, pullbacks) -> (AD s a da -> AD s b db -> AD s c dc) op2 z plusa plusb f (AD0 ioa) (AD0 iob) = AD0 $ op2_ z plusa plusb f ioa iob -- | Helper for constructing binary functions that operate on Num instances (i.e. 'op2' specialized to Num) op2Num :: (Num da, Num db, Num dc) => (a -> b -> (c, dc -> da, dc -> db)) -- ^ returns : (function result, pullback) -> AD s a da -> AD s b db -> AD s c dc op2Num = op2 0 (+) (+) -- | The numerical methods of (Num, Fractional, Floating etc.) can be read off their @backprop@ counterparts : https://hackage.haskell.org/package/backprop-0.2.6.4/docs/src/Numeric.Backprop.Op.html#%2A. instance (Num a) => Num (AD s a a) where (+) = op2Num $ \x y -> (x + y, id, id) (-) = op2Num $ \x y -> (x - y, id, negate) (*) = op2Num $ \x y -> (x*y, (y *), (x *)) fromInteger x = auto (fromInteger x) abs = op1Num $ \x -> (abs x, (* signum x)) signum = op1Num $ \x -> (signum x, const 0) instance (Fractional a) => Fractional (AD s a a) where (/) = op2Num $ \x y -> (x / y, (/ y), (\g -> -g*x/(y*y) )) fromRational x = auto (fromRational x) recip = op1Num $ \x -> (recip x, (/(x*x)) . negate) instance Floating a => Floating (AD s a a) where pi = auto pi exp = op1Num $ \x -> (exp x, (exp x *)) log = op1Num $ \x -> (log x, (/x)) sqrt = op1Num $ \x -> (sqrt x, (/ (2 * sqrt x))) (**) = op2Num $ \x y -> (x ** y, (* (y * x ** (y - 1))), (* (x ** y * log x))) logBase = op2Num $ \x y -> let dx = - logBase x y / (log x * x) in ( logBase x y , ( * dx) , (/(y * log x)) ) sin = op1Num $ \x -> (sin x, (* cos x)) cos = op1Num $ \x -> (cos x, (* (-sin x))) tan = op1Num $ \x -> (tan x, (/ cos x^(2::Int))) asin = op1Num $ \x -> (asin x, (/ sqrt(1 - x*x))) acos = op1Num $ \x -> (acos x, (/ sqrt (1 - x*x)) . negate) atan = op1Num $ \x -> (atan x, (/ (x*x + 1))) sinh = op1Num $ \x -> (sinh x, (* cosh x)) cosh = op1Num $ \x -> (cosh x, (* sinh x)) tanh = op1Num $ \x -> (tanh x, (/ cosh x^(2::Int))) asinh = op1Num $ \x -> (asinh x, (/ sqrt (x*x + 1))) acosh = op1Num $ \x -> (acosh x, (/ sqrt (x*x - 1))) atanh = op1Num $ \x -> (atanh x, (/ (1 - x*x))) -- -- instance Eq a => Eq (AD s a da) where -- ??? likely impossible -- -- instance Ord (AD s a da) where -- ??? see above -- | Evaluate (forward mode) and differentiate (reverse mode) a unary function, without committing to a specific numeric typeclass rad1g :: da -- ^ zero -> db -- ^ one -> (forall s . AD s a da -> AD s b db) -> a -- ^ function argument -> (b, da) -- ^ (result, adjoint) rad1g zeroa oneb f x = runST $ do xr <- var x zeroa zr' <- evalContT $ resetT $ do let z = f (AD0 (pure xr)) zr <- unAD0 z lift $ modifySTRef' zr (withD (const oneb)) pure zr (D z _) <- readSTRef zr' (D _ x_bar) <- readSTRef xr pure (z, x_bar) -- | Evaluate (forward mode) and differentiate (reverse mode) a binary function, without committing to a specific numeric typeclass rad2g :: da -- ^ zero -> db -- ^ zero -> dc -- ^ one -> (forall s . AD s a da -> AD s b db -> AD s c dc) -> a -> b -> (c, (da, db)) -- ^ (result, adjoints) rad2g zeroa zerob onec f x y = runST $ do xr <- var x zeroa yr <- var y zerob zr' <- evalContT $ resetT $ do let z = f (AD0 (pure xr)) (AD0 (pure yr)) zr <- unAD0 z lift $ modifySTRef' zr (withD (const onec)) pure zr (D z _) <- readSTRef zr' (D _ x_bar) <- readSTRef xr (D _ y_bar) <- readSTRef yr pure (z, (x_bar, y_bar)) -- | Evaluate (forward mode) and differentiate (reverse mode) a function of a 'Traversable' -- -- In linear algebra terms, this computes the gradient of a scalar function of vector argument radNg :: Traversable t => da -- ^ zero -> db -- ^ one -> (forall s . t (AD s a da) -> AD s b db) -> t a -- ^ argument vector -> (b, t da) -- ^ (result, gradient vector) radNg zeroa onea f xs = runST $ do xrs <- traverse (`var` zeroa) xs zr' <- evalContT $ resetT $ do let (AD0 z) = f (fmap pure xrs) zr <- z lift $ modifySTRef' zr (withD (const onea)) pure zr (D z _) <- readSTRef zr' xs_bar <- traverse readSTRef xrs let xs_bar_d = dual <$> xs_bar pure (z, xs_bar_d) -- jacg zeroa onea f xs = runST $ do -- -- Jacobian TODO -- xrs <- traverse (`var` zeroa) xs -- zr' <- evalContT $ -- resetT $ do -- let -- zads = f (fmap pure xrs) -- traversable of AD results -- for zads $ \zad -> do -- zr <- zad -- lift $ modifySTRef' zr (withD (const onea)) -- pure zr -- undefined for :: (Applicative f, Traversable t) => t a -> (a -> f b) -> f (t b) for = flip traverse -- | Evaluate (forward mode) and differentiate (reverse mode) a unary function -- -- >>> rad1 (\x -> x * x) 1 -- (1, 2) rad1 :: (Num a, Num b) => (forall s . AD' s a -> AD' s b) -- ^ function to be differentiated -> a -- ^ function argument -> (b, a) -- ^ (result, adjoint) rad1 = rad1g 0 1 -- | Evaluate (forward mode) and differentiate (reverse mode) a binary function -- -- >>> rad2 (\x y -> x + y + y) 1 1 -- (1,2) -- -- >>> rad2 (\x y -> (x + y) * x) 3 2 -- (15,(8,3)) rad2 :: (Num a, Num b, Num c) => (forall s . AD' s a -> AD' s b -> AD' s c) -- ^ function to be differentiated -> a -> b -> (c, (a, b)) -- ^ (result, adjoints) rad2 = rad2g 0 0 1 -- | Evaluate (forward mode) and differentiate (reverse mode) a function of a 'Traversable' -- -- In linear algebra terms, this computes the gradient of a scalar function of vector argument -- -- -- @ -- sqNorm :: Num a => [a] -> a -- sqNorm xs = sum $ zipWith (*) xs xs -- -- p :: [Double] -- p = [4.1, 2] -- @ -- -- >>> grad sqNorm p -- (20.81,[8.2,4.0]) grad :: (Traversable t, Num a, Num b) => (forall s . t (AD' s a) -> AD' s b) -> t a -- ^ argument vector -> (b, t a) -- ^ (result, gradient vector) grad = radNg 0 1 -- ======================== EXPERIMENTAL ========================== data Backprop a da = Backprop { zero :: a -> da , one :: da -> da , add :: da -> da -> da } bpNum :: (Num a, Num da) => Backprop a da bpNum = Backprop zeroNum oneNum addNum -- | backprop typeclass, adapted from https://hackage.haskell.org/package/backprop-0.2.6.4/docs/src/Numeric.Backprop.Class.html -- -- we use two type parameters to keep the distinction between primal and dual variables -- class Backprop a da where -- zero :: a -> da -- one :: proxy a -> da -> da -- add :: proxy a -> da -> da -> da -- | 'zero' for instances of 'Num'. lazy in its argument. zeroNum :: Num da => a -> da zeroNum _ = 0 {-# INLINE zeroNum #-} -- | 'add' for instances of 'Num'. addNum :: Num da => da -> da -> da addNum = (+) {-# INLINE addNum #-} -- | 'one' for instances of 'Num'. lazy in its argument. oneNum :: Num da => a -> da oneNum _ = 1 {-# INLINE oneNum #-} -- rad1BP :: Backprop a da -- -> Backprop b db -- -> (forall s . AD s a da -> AD s b db) -- -> a -- ^ function argument -- -> (b, da) -- ^ (result, adjoint) -- rad1BP bpa bpb f x = runST $ do -- xr <- var x (zero bpa x) -- zr' <- evalContT $ -- resetT $ do -- let -- z = f (AD (pure xr)) -- zr <- unAD z -- lift $ modifySTRef' zr (withD $ one bpb) -- pure zr -- (D z _) <- readSTRef zr' -- (D _ x_bar) <- readSTRef xr -- pure (z, x_bar) -- -- rad1BP :: (Backprop a da, Backprop b db) -- -- => (forall s . AD s a da -> AD s b db) -- -- -> a -- ^ function argument -- -- -> (b, da) -- ^ (result, adjoint) -- -- rad1BP f x = runST $ do -- -- xr <- var x (zero x) -- -- zr' <- evalContT $ -- -- resetT $ do -- -- let -- -- z = f (AD (pure xr)) -- -- zr <- unAD z -- -- let -- -- oneB :: forall b db . Backprop b db => db -> db -> db -- -- oneB = one (Proxy :: Proxy db) -- -- lift $ modifySTRef' zr (withD oneB) -- -- pure zr -- -- (D z _) <- readSTRef zr' -- -- (D _ x_bar) <- readSTRef xr -- -- pure (z, x_bar) -- -- playground -- -- product type (simplified version of vinyl's Rec) -- data Rec :: [*] -> * where -- RNil :: Rec '[] -- (:*) :: !a -> !(Rec as) -> Rec (a ': as) -- -- dual pairing -- class Dual a where -- dual :: Num r => a -> (a -> r) -- -- | Dual numbers DD (alternative take, using a type family for the first variation) -- data DD a = Dd a (Adj a) -- class Diff a where type Adj a :: * -- instance Diff Double where type Adj Double = Double -- data SDRec s as where -- SDNil :: SDRec s '[] -- (:&) :: DVar s a a -> !(SDRec s as) -> SDRec s (a ': as)