1
0
mirror of https://github.com/tensorflow/haskell.git synced 2024-06-02 11:03:34 +02:00

Added support for minimizing on Tensor Ref besides Variable

This commit is contained in:
Rik van der Kleij 2019-01-29 16:40:50 +01:00
parent e4acd69574
commit 831250c671

View File

@ -11,71 +11,96 @@
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE MonoLocalBinds #-}
module TensorFlow.Minimize
( Minimizer
, minimizeWith
, gradientDescent
, AdamConfig(..)
, adam
, adam'
) where
( Minimizer
, minimizeWith
, gradientDescent
, gradientDescentRef
, AdamConfig(..)
, adam
, adam'
, adamRef
, adamRef'
) where
import Control.Monad (zipWithM)
import Data.Default (Default(..))
import Data.List (zipWith4)
import Data.Maybe (fromMaybe)
import Control.Monad (zipWithM)
import Data.Default (Default (..))
import Data.List (zipWith4)
import Data.Maybe (fromMaybe)
import qualified TensorFlow.Core as TF
import qualified TensorFlow.Core as TF
import qualified TensorFlow.Gradient as TF
import qualified TensorFlow.Ops as TF hiding (assign, initializedVariable)
import qualified TensorFlow.Ops as TF (scalar, mul, zerosLike)
import qualified TensorFlow.Variable as TF
-- | Functions that minimize a loss w.r.t. a set of 'TF.Variable's.
import qualified TensorFlow.Tensor as TF (Rendered, ToTensor)
import qualified TensorFlow.GenOps.Core as TFO (applyAdam, assignAdd, assign)
import qualified TensorFlow.Ops as TFO (assign, initializedVariable,
initializedVariable', scalar,
zeroInitializedVariable, zeros, initializedVariable)
-- | Functions that minimize a loss w.r.t. a set of 'TF.Variable's or 'TF.Tensor TF.Ref's.
--
-- Generally only performs one step of an iterative algorithm.
--
-- 'Minimizer's are defined as a function of the gradients instead of
-- the loss so that users can apply transformations to the gradients.
type Minimizer a =
forall m. TF.MonadBuild m =>
[TF.Variable a] -> [TF.Tensor TF.Value a] -> m TF.ControlNode
newtype Minimizer t a m = Minimizer
{ minimize :: (TF.GradientCompatible a, TF.TensorType a, TF.MonadBuild m, TF.ToTensor t, TF.Rendered t) =>
[t a] -> [TF.Tensor TF.Value a] -> m TF.ControlNode
}
-- | Convenience wrapper around 'TF.gradients' and a 'Minimizer'.
minimizeWith :: (TF.MonadBuild m, TF.GradientCompatible a)
=> Minimizer a
-> TF.Tensor v a -- ^ Loss.
-> [TF.Variable a] -- ^ Parameters of the loss function.
-> m TF.ControlNode
minimizeWith minimizer loss params =
TF.gradients loss params >>= minimizer params
-- | Perform one step of the gradient descent algorithm.
gradientDescent :: TF.GradientCompatible a
=> a -- ^ Learning rate.
-> Minimizer a
gradientDescent learningRate params grads = TF.withNameScope "gradientDescent" $ do
let applyGrad param grad =
TF.assignAdd param (TF.scalar (-learningRate) `TF.mul` grad)
TF.group =<< zipWithM applyGrad params grads
-- TODO: Support more than Float in adam.
data AdamConfig = AdamConfig
{ adamLearningRate :: Float
, adamBeta1 :: Float
, adamBeta2 :: Float
, adamEpsilon :: Float
minimizer :: forall a m t n. TF.Nodes n => (t a -> TF.Tensor TF.Build a -> m n) -> a -> Minimizer t a m
minimizer assignAdd learningRate =
Minimizer
{ minimize =
\params grads ->
TF.withNameScope "gradientDescent" $ do
let applyGrad param grad = assignAdd param (TF.scalar (-learningRate) `TF.mul` grad)
TF.group =<< zipWithM applyGrad params grads
}
instance Default AdamConfig where
-- | Convenience wrapper around 'TF.gradients' and a 'Minimizer'.
minimizeWith ::
(TF.MonadBuild m, TF.GradientCompatible a, TF.Rendered t, TF.ToTensor t)
=> Minimizer t a m
-> TF.Tensor v a -- ^ Loss.
-> [t a] -- ^ Parameters of the loss function.
-> m TF.ControlNode
minimizeWith minimizer loss params = TF.gradients loss params >>= minimize minimizer params
-- | Perform one step of the gradient descent algorithm for TF.Variable.
gradientDescent ::
(TF.MonadBuild m,
TF.GradientCompatible a)
=> a -- ^ Learning rate.
-> Minimizer TF.Variable a m
gradientDescent = minimizer TF.assignAdd
-- | Perform one step of the gradient descent algorithm for TF.Tensor TF.Ref
gradientDescentRef ::
(TF.MonadBuild m,
TF.GradientCompatible a)
=> a -- ^ Learning rate.
-> Minimizer (TF.Tensor TF.Ref) a m
gradientDescentRef = minimizer TFO.assignAdd
-- TODO: Support more than Float in adam.
data AdamConfig = AdamConfig
{ adamLearningRate :: Float
, adamBeta1 :: Float
, adamBeta2 :: Float
, adamEpsilon :: Float
}
-- Recommended defaults from the adam paper.
instance Default AdamConfig where
def = AdamConfig 0.001 0.9 0.999 1e-8
-- | Perform one step of the adam algorithm.
@ -83,33 +108,64 @@ instance Default AdamConfig where
-- See https://arxiv.org/abs/1412.6980.
--
-- NOTE: Currently requires all 'TF.Variable's to have an 'TF.initializedValue'.
adam :: Minimizer Float
adam :: Minimizer TF.Variable Float TF.Build
adam = adam' def
adam' :: AdamConfig -> Minimizer Float
adam' config params grads = TF.withNameScope "adam" $ do
adam' :: AdamConfig -> Minimizer TF.Variable Float TF.Build
adam' config =
let errorMsg = "TensorFlow.Minimize.adam requires an initial value for all variables"
initVal = fromMaybe (error errorMsg) . TF.initializedValue
in adam''
config
(TF.initializedVariable . TF.zerosLike . initVal)
TF.initializedVariable
TF.resourceApplyAdam
TF.readValue
TF.assign
adamRef :: Minimizer (TF.Tensor TF.Ref) Float TF.Build
adamRef = adamRef' def
adamRef' :: AdamConfig -> Minimizer (TF.Tensor TF.Ref) Float TF.Build
adamRef' config =
adam''
config
(TFO.initializedVariable . TF.zerosLike . TF.value)
TFO.initializedVariable
TFO.applyAdam
TF.expr
TFO.assign
adam'' :: forall t n . (TF.Nodes n, TF.ToTensor t, TF.Rendered t) =>
AdamConfig
-> (t Float -> TF.Build (t Float))
-> (TF.Tensor TF.Build Float -> TF.Build (t Float))
-> (t Float -> t Float -> t Float -> TF.Tensor TF.Build Float -> TF.Tensor TF.Build Float -> TF.Tensor TF.Build Float -> TF.Tensor TF.Build Float -> TF.Tensor TF.Build Float -> TF.Tensor TF.Build Float -> TF.Tensor TF.Value Float -> TF.Build n)
-> (t Float -> TF.Tensor TF.Build Float)
-> (t Float -> TF.Tensor TF.Build Float -> TF.Build n)
-> Minimizer t Float TF.Build
adam'' config initVar initV aGrad rv asi = Minimizer
{ minimize = \params grads -> TF.withNameScope "adam" $ do
let lr = TF.scalar (adamLearningRate config)
beta1 = TF.scalar (adamBeta1 config)
beta2 = TF.scalar (adamBeta2 config)
epsilon = TF.scalar (adamEpsilon config)
-- Create adam state variables.
let errorMsg = "TensorFlow.Minimize.adam requires an initial value for all variables"
initVal = fromMaybe (error errorMsg) . TF.initializedValue
ms <- mapM (TF.initializedVariable . TF.zerosLike . initVal) params
vs <- mapM (TF.initializedVariable . TF.zerosLike . initVal) params
beta1Power <- TF.initializedVariable beta1
beta2Power <- TF.initializedVariable beta2
ms <- mapM initVar params
vs <- mapM initVar params
beta1Power <- initV beta1
beta2Power <- initV beta2
-- Perform adam update.
let applyGrad param m v =
TF.resourceApplyAdam param m v
(TF.readValue beta1Power)
(TF.readValue beta2Power)
let applyGrad param m v = aGrad param m v
(rv beta1Power)
(rv beta2Power)
lr beta1 beta2 epsilon
updateVars <- sequence $ zipWith4 applyGrad params ms vs grads
-- Update beta variables after adam update.
let updateBeta betaPower beta =
TF.withControlDependencies updateVars
(TF.assign betaPower (TF.readValue betaPower `TF.mul` beta))
(asi betaPower (rv betaPower `TF.mul` beta))
updateBeta1 <- updateBeta beta1Power beta1
updateBeta2 <- updateBeta beta2Power beta2
TF.group (updateBeta1:updateBeta2:updateVars)
}