Add Minimize module with gradient descent and adam implementations (#125)

This commit is contained in:
fkm3 2017-05-25 19:19:22 -07:00 committed by Greg Steuck
parent a86d424cac
commit 0603a6987b
8 changed files with 203 additions and 55 deletions

View File

@ -20,14 +20,15 @@ Toy example of a linear regression model
([full code](tensorflow-ops/tests/RegressionTest.hs)):
```haskell
import Control.Monad (replicateM, replicateM_, zipWithM)
import Control.Monad (replicateM, replicateM_)
import System.Random (randomIO)
import Test.HUnit (assertBool)
import qualified TensorFlow.Core as TF
import qualified TensorFlow.GenOps.Core as TF
import qualified TensorFlow.Gradient as TF
import qualified TensorFlow.Ops as TF
import qualified TensorFlow.Minimize as TF
import qualified TensorFlow.Ops as TF hiding (initializedVariable)
import qualified TensorFlow.Variable as TF
main :: IO ()
main = do
@ -48,23 +49,14 @@ fit xData yData = TF.runSession $ do
w <- TF.initializedVariable 0
b <- TF.initializedVariable 0
-- Define the loss function.
let yHat = (x `TF.mul` w) `TF.add` b
let yHat = (x `TF.mul` TF.readValue w) `TF.add` TF.readValue b
loss = TF.square (yHat `TF.sub` y)
-- Optimize with gradient descent.
trainStep <- gradientDescent 0.001 loss [w, b]
trainStep <- TF.minimizeWith (TF.gradientDescent 0.001) loss [w, b]
replicateM_ 1000 (TF.run trainStep)
-- Return the learned parameters.
(TF.Scalar w', TF.Scalar b') <- TF.run (w, b)
(TF.Scalar w', TF.Scalar b') <- TF.run (TF.readValue w, TF.readValue b)
return (w', b')
gradientDescent :: Float
-> TF.Tensor TF.Build Float
-> [TF.Tensor TF.Ref Float]
-> TF.Session TF.ControlNode
gradientDescent alpha loss params = do
let applyGrad param grad =
TF.assign param (param `TF.sub` (TF.scalar alpha `TF.mul` grad))
TF.group =<< zipWithM applyGrad params =<< TF.gradients loss params
```
# Installation Instructions

View File

@ -15,7 +15,7 @@
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedLists #-}
import Control.Monad (zipWithM, when, forM_)
import Control.Monad (forM_, when)
import Control.Monad.IO.Class (liftIO)
import Data.Int (Int32, Int64)
import Data.List (genericLength)
@ -23,9 +23,9 @@ import qualified Data.Text.IO as T
import qualified Data.Vector as V
import qualified TensorFlow.Core as TF
import qualified TensorFlow.Gradient as TF
import qualified TensorFlow.Ops as TF hiding (initializedVariable, zeroInitializedVariable)
import qualified TensorFlow.Variable as TF
import qualified TensorFlow.Minimize as TF
import TensorFlow.Examples.MNIST.InputData
import TensorFlow.Examples.MNIST.Parse
@ -87,11 +87,7 @@ createModel = do
loss =
reduceMean $ fst $ TF.softmaxCrossEntropyWithLogits logits labelVecs
params = [hiddenWeights, hiddenBiases, logitWeights, logitBiases]
grads <- TF.gradients loss params
let lr = TF.scalar 0.00001
applyGrad param grad = TF.assignAdd param (negate $ lr `TF.mul` grad)
trainStep <- TF.group =<< zipWithM applyGrad params grads
trainStep <- TF.minimizeWith TF.adam loss params
let correctPredictions = TF.equal predict labels
errorRateTensor <- TF.render $ 1 - reduceMean (TF.cast correctPredictions)

View File

@ -22,7 +22,8 @@
{-# LANGUAGE ViewPatterns #-}
module TensorFlow.Gradient
( gradients
( GradientCompatible
, gradients
) where
import Control.Monad (forM, zipWithM)

View File

@ -0,0 +1,115 @@
-- Copyright 2016 TensorFlow authors.
--
-- Licensed under the Apache License, Version 2.0 (the "License");
-- you may not use this file except in compliance with the License.
-- You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
module TensorFlow.Minimize
( Minimizer
, minimizeWith
, gradientDescent
, AdamConfig(..)
, adam
, adam'
) where
import Control.Monad (zipWithM)
import Data.Default (Default(..))
import Data.List (zipWith4)
import Data.Maybe (fromMaybe)
import qualified TensorFlow.Core as TF
import qualified TensorFlow.Gradient as TF
import qualified TensorFlow.Ops as TF hiding (assign, initializedVariable)
import qualified TensorFlow.Variable as TF
-- | Functions that minimize a loss w.r.t. a set of 'TF.Variable's.
--
-- Generally only performs one step of an iterative algorithm.
--
-- 'Minimizer's are defined as a function of the gradients instead of
-- the loss so that users can apply transformations to the gradients.
type Minimizer a =
forall m. TF.MonadBuild m =>
[TF.Variable a] -> [TF.Tensor TF.Value a] -> m TF.ControlNode
-- | Convenience wrapper around 'TF.gradients' and a 'Minimizer'.
minimizeWith :: (TF.MonadBuild m, TF.GradientCompatible a)
=> Minimizer a
-> TF.Tensor v a -- ^ Loss.
-> [TF.Variable a] -- ^ Parameters of the loss function.
-> m TF.ControlNode
minimizeWith minimizer loss params =
TF.gradients loss params >>= minimizer params
-- | Perform one step of the gradient descent algorithm.
gradientDescent :: TF.GradientCompatible a
=> a -- ^ Learning rate.
-> Minimizer a
gradientDescent learningRate params grads = TF.withNameScope "gradientDescent" $ do
let applyGrad param grad =
TF.assignAdd param (TF.scalar (-learningRate) `TF.mul` grad)
TF.group =<< zipWithM applyGrad params grads
-- TODO: Support more than Float in adam.
data AdamConfig = AdamConfig
{ adamLearningRate :: Float
, adamBeta1 :: Float
, adamBeta2 :: Float
, adamEpsilon :: Float
}
instance Default AdamConfig where
-- Recommended defaults from the adam paper.
def = AdamConfig 0.001 0.9 0.999 1e-8
-- | Perform one step of the adam algorithm.
--
-- See https://arxiv.org/abs/1412.6980.
--
-- NOTE: Currently requires all 'TF.Variable's to have an 'TF.initializedValue'.
adam :: Minimizer Float
adam = adam' def
adam' :: AdamConfig -> Minimizer Float
adam' config params grads = TF.withNameScope "adam" $ do
let lr = TF.scalar (adamLearningRate config)
beta1 = TF.scalar (adamBeta1 config)
beta2 = TF.scalar (adamBeta2 config)
epsilon = TF.scalar (adamEpsilon config)
-- Create adam state variables.
let errorMsg = "TensorFlow.Minimize.adam requires an initial value for all variables"
initVal = fromMaybe (error errorMsg) . TF.initializedValue
ms <- mapM (TF.initializedVariable . TF.zerosLike . initVal) params
vs <- mapM (TF.initializedVariable . TF.zerosLike . initVal) params
beta1Power <- TF.initializedVariable beta1
beta2Power <- TF.initializedVariable beta2
-- Perform adam update.
let applyGrad param m v =
TF.resourceApplyAdam param m v
(TF.readValue beta1Power)
(TF.readValue beta2Power)
lr beta1 beta2 epsilon
updateVars <- sequence $ zipWith4 applyGrad params ms vs grads
-- Update beta variables after adam update.
let updateBeta betaPower beta =
TF.withControlDependencies updateVars
(TF.assign betaPower (TF.readValue betaPower `TF.mul` beta))
updateBeta1 <- updateBeta beta1Power beta1
updateBeta2 <- updateBeta beta2Power beta2
TF.group (updateBeta1:updateBeta2:updateVars)

View File

@ -6,6 +6,8 @@
-- TODO: given that distinction, figure out a good story around
-- gradients and save/restore. Then, merge this module into
-- TensorFlow.Ops.
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE RecursiveDo #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE OverloadedStrings #-}
@ -23,8 +25,13 @@ module TensorFlow.Variable
, assign'
, assignAdd
, assignAdd'
, resourceApplyAdam
, resourceApplyAdam'
) where
import qualified Data.Complex
import qualified Data.Int
import qualified Data.Word
import Data.Text.Encoding (encodeUtf8)
import Lens.Family2 ((.~), (&))
import TensorFlow.Core
@ -133,3 +140,55 @@ assignAdd = assignAdd' id
assignAdd' :: (MonadBuild m, TensorType a)
=> OpParams -> Variable a -> Tensor v a -> m ControlNode
assignAdd' params (Variable h _) v = CoreOps.assignAddVariableOp' params h v
-- | Update '*var' according to the Adam algorithm.
--
-- lr_t <- learning_rate * sqrt(1 - beta2^t) / (1 - beta1^t)
-- m_t <- beta1 * m_{t-1} + (1 - beta1) * g_t
-- v_t <- beta2 * v_{t-1} + (1 - beta2) * g_t * g_t
-- variable <- variable - lr_t * m_t / (sqrt(v_t) + epsilon)
resourceApplyAdam ::
(MonadBuild m,
OneOf '[(Data.Complex.Complex Double),
(Data.Complex.Complex Float),
Data.Int.Int16,
Data.Int.Int32,
Data.Int.Int64, Data.Int.Int8,
Data.Word.Word16,
Data.Word.Word8, Double,
Float] t)
=> Variable t -- ^ __var__: Should be from a Variable().
-> Variable t -- ^ __m__: Should be from a Variable().
-> Variable t -- ^ __v__: Should be from a Variable().
-> Tensor v1 t -- ^ __beta1_power__: Must be a scalar.
-> Tensor v2 t -- ^ __beta2_power__: Must be a scalar.
-> Tensor v3 t -- ^ __lr__: Scaling factor. Must be a scalar.
-> Tensor v4 t -- ^ __beta1__: Momentum factor. Must be a scalar.
-> Tensor v5 t -- ^ __beta2__: Momentum factor. Must be a scalar.
-> Tensor v6 t -- ^ __epsilon__: Ridge term. Must be a scalar.
-> Tensor v7 t -- ^ __grad__: The gradient.
-> m (ControlNode)
resourceApplyAdam = resourceApplyAdam' id
resourceApplyAdam' ::
(MonadBuild m,
OneOf '[(Data.Complex.Complex Double),
(Data.Complex.Complex Float),
Data.Int.Int16, Data.Int.Int32,
Data.Int.Int64, Data.Int.Int8,
Data.Word.Word16, Data.Word.Word8, Double,
Float] t)
=> OpParams
-> Variable t -- ^ __var__: Should be from a Variable().
-> Variable t -- ^ __m__: Should be from a Variable().
-> Variable t -- ^ __v__: Should be from a Variable().
-> Tensor v1 t -- ^ __beta1_power__: Must be a scalar.
-> Tensor v2 t -- ^ __beta2_power__: Must be a scalar.
-> Tensor v3 t -- ^ __lr__: Scaling factor. Must be a scalar.
-> Tensor v4 t -- ^ __beta1__: Momentum factor. Must be a scalar.
-> Tensor v5 t -- ^ __beta2__: Momentum factor. Must be a scalar.
-> Tensor v6 t -- ^ __epsilon__: Ridge term. Must be a scalar.
-> Tensor v7 t -- ^ __grad__: The gradient.
-> m (ControlNode)
resourceApplyAdam' params (Variable var _) (Variable m _) (Variable v _) =
CoreOps.resourceApplyAdam' params var m v

View File

@ -17,6 +17,7 @@ library
exposed-modules: TensorFlow.Gradient
, TensorFlow.Ops
, TensorFlow.EmbeddingOps
, TensorFlow.Minimize
, TensorFlow.NN
, TensorFlow.Queue
, TensorFlow.Variable

View File

@ -2,13 +2,14 @@
{-# LANGUAGE OverloadedLists #-}
import Control.Monad.IO.Class (liftIO)
import Control.Monad (replicateM_, zipWithM)
import Control.Monad (replicateM_)
import qualified TensorFlow.GenOps.Core as TF (square, rank)
import qualified TensorFlow.Core as TF
import qualified TensorFlow.Gradient as TF
import qualified TensorFlow.Ops as TF
import qualified Data.Vector as V
import qualified TensorFlow.Core as TF
import qualified TensorFlow.GenOps.Core as TF (square, rank)
import qualified TensorFlow.Minimize as TF
import qualified TensorFlow.Ops as TF hiding (initializedVariable)
import qualified TensorFlow.Variable as TF
import Test.Framework (defaultMain, Test)
import Test.Framework.Providers.HUnit (testCase)
@ -26,22 +27,13 @@ fitMatrix = testCase "fitMatrix" $ TF.runSession $ do
v <- TF.initializedVariable =<< randomParam [1, 2]
let ones = [1, 1, 1, 1] :: [Float]
matx = TF.constant [2, 2] ones
diff = matx `TF.sub` (u `TF.matMul` v)
diff = matx `TF.sub` (TF.readValue u `TF.matMul` TF.readValue v)
loss = reduceMean $ TF.square diff
trainStep <- gradientDescent 0.01 loss [u, v]
trainStep <- TF.minimizeWith (TF.gradientDescent 0.01) loss [u, v]
replicateM_ 1000 (TF.run trainStep)
(u',v') <- TF.run (u, v)
(u',v') <- TF.run (TF.readValue u, TF.readValue v)
-- ones = u * v
liftIO $ assertAllClose (V.fromList ones) ((*) <$> u' <*> v')
gradientDescent :: Float
-> TF.Tensor TF.Build Float
-> [TF.Tensor TF.Ref Float]
-> TF.Session TF.ControlNode
gradientDescent alpha loss params = do
let applyGrad param grad =
TF.assign param (param `TF.sub` (TF.scalar alpha `TF.mul` grad))
TF.group =<< zipWithM applyGrad params =<< TF.gradients loss params
main :: IO ()
main = defaultMain [ fitMatrix ]

View File

@ -1,13 +1,14 @@
-- | Simple linear regression example for the README.
import Control.Monad (replicateM, replicateM_, zipWithM)
import Control.Monad (replicateM, replicateM_)
import System.Random (randomIO)
import Test.HUnit (assertBool)
import qualified TensorFlow.Core as TF
import qualified TensorFlow.GenOps.Core as TF
import qualified TensorFlow.Gradient as TF
import qualified TensorFlow.Ops as TF
import qualified TensorFlow.Minimize as TF
import qualified TensorFlow.Ops as TF hiding (initializedVariable)
import qualified TensorFlow.Variable as TF
main :: IO ()
main = do
@ -28,20 +29,11 @@ fit xData yData = TF.runSession $ do
w <- TF.initializedVariable 0
b <- TF.initializedVariable 0
-- Define the loss function.
let yHat = (x `TF.mul` w) `TF.add` b
let yHat = (x `TF.mul` TF.readValue w) `TF.add` TF.readValue b
loss = TF.square (yHat `TF.sub` y)
-- Optimize with gradient descent.
trainStep <- gradientDescent 0.001 loss [w, b]
trainStep <- TF.minimizeWith (TF.gradientDescent 0.001) loss [w, b]
replicateM_ 1000 (TF.run trainStep)
-- Return the learned parameters.
(TF.Scalar w', TF.Scalar b') <- TF.run (w, b)
(TF.Scalar w', TF.Scalar b') <- TF.run (TF.readValue w, TF.readValue b)
return (w', b')
gradientDescent :: Float
-> TF.Tensor TF.Build Float
-> [TF.Tensor TF.Ref Float]
-> TF.Session TF.ControlNode
gradientDescent alpha loss params = do
let applyGrad param grad =
TF.assign param (param `TF.sub` (TF.scalar alpha `TF.mul` grad))
TF.group =<< zipWithM applyGrad params =<< TF.gradients loss params