1
0
Fork 0
mirror of https://github.com/tensorflow/haskell.git synced 2024-11-04 18:19:41 +01:00

Support Variable in TensorFlow.Gradient and use in mnist example (#116)

This commit is contained in:
fkm3 2017-05-17 13:20:51 -07:00 committed by Judah Jacobson
parent ddb4fe4f90
commit b86945f008
3 changed files with 33 additions and 19 deletions

View file

@ -24,7 +24,8 @@ import qualified Data.Vector as V
import qualified TensorFlow.Core as TF import qualified TensorFlow.Core as TF
import qualified TensorFlow.Gradient as TF import qualified TensorFlow.Gradient as TF
import qualified TensorFlow.Ops as TF import qualified TensorFlow.Ops as TF hiding (initializedVariable, zeroInitializedVariable)
import qualified TensorFlow.Variable as TF
import TensorFlow.Examples.MNIST.InputData import TensorFlow.Examples.MNIST.InputData
import TensorFlow.Examples.MNIST.Parse import TensorFlow.Examples.MNIST.Parse
@ -68,13 +69,15 @@ createModel = do
hiddenWeights <- hiddenWeights <-
TF.initializedVariable =<< randomParam numPixels [numPixels, numUnits] TF.initializedVariable =<< randomParam numPixels [numPixels, numUnits]
hiddenBiases <- TF.zeroInitializedVariable [numUnits] hiddenBiases <- TF.zeroInitializedVariable [numUnits]
let hiddenZ = (images `TF.matMul` hiddenWeights) `TF.add` hiddenBiases let hiddenZ = (images `TF.matMul` TF.readValue hiddenWeights)
`TF.add` TF.readValue hiddenBiases
let hidden = TF.relu hiddenZ let hidden = TF.relu hiddenZ
-- Logits. -- Logits.
logitWeights <- logitWeights <-
TF.initializedVariable =<< randomParam numUnits [numUnits, numLabels] TF.initializedVariable =<< randomParam numUnits [numUnits, numLabels]
logitBiases <- TF.zeroInitializedVariable [numLabels] logitBiases <- TF.zeroInitializedVariable [numLabels]
let logits = (hidden `TF.matMul` logitWeights) `TF.add` logitBiases let logits = (hidden `TF.matMul` TF.readValue logitWeights)
`TF.add` TF.readValue logitBiases
predict <- TF.render $ TF.cast $ predict <- TF.render $ TF.cast $
TF.argMax (TF.softmax logits) (TF.scalar (1 :: LabelType)) TF.argMax (TF.softmax logits) (TF.scalar (1 :: LabelType))
@ -87,7 +90,7 @@ createModel = do
grads <- TF.gradients loss params grads <- TF.gradients loss params
let lr = TF.scalar 0.00001 let lr = TF.scalar 0.00001
applyGrad param grad = TF.assign param $ param `TF.sub` (lr `TF.mul` grad) applyGrad param grad = TF.assignAdd param (negate $ lr `TF.mul` grad)
trainStep <- TF.group =<< zipWithM applyGrad params grads trainStep <- TF.group =<< zipWithM applyGrad params grads
let correctPredictions = TF.equal predict labels let correctPredictions = TF.equal predict labels

View file

@ -99,6 +99,7 @@ import TensorFlow.Tensor
, tensorNodeName , tensorNodeName
, renderedOutput , renderedOutput
, renderValue , renderValue
, ToTensor(..)
) )
import TensorFlow.Types (Attribute, OneOf, TensorType, attrLens) import TensorFlow.Types (Attribute, OneOf, TensorType, attrLens)
import Proto.Tensorflow.Core.Framework.NodeDef import Proto.Tensorflow.Core.Framework.NodeDef
@ -116,12 +117,13 @@ type GradientCompatible a =
-- | Gradient of @y@ w.r.t. each element of @xs@. -- | Gradient of @y@ w.r.t. each element of @xs@.
gradients :: forall a v1 v2 m . ( MonadBuild m gradients :: forall a v1 t m . ( MonadBuild m
, Rendered (Tensor v2) , Rendered t
, GradientCompatible a , ToTensor t
) , GradientCompatible a
)
=> Tensor v1 a -- ^ The output of the graph. => Tensor v1 a -- ^ The output of the graph.
-> [Tensor v2 a] -- ^ Tensors for which gradients are computed. -> [t a] -- ^ Tensors for which gradients are computed.
-> m [Tensor Value a] -> m [Tensor Value a]
gradients y xs = build $ do gradients y xs = build $ do
-- The gradients are computed using "reverse accumulation", similarly to -- The gradients are computed using "reverse accumulation", similarly to
@ -171,10 +173,9 @@ gradients y xs = build $ do
gradientMap <- graphGrads gr initPending gradientMap <- graphGrads gr initPending
-- Lookup the gradients for each x. -- Lookup the gradients for each x.
forM xs $ \x -> forM xs $ \x ->
let xName = tensorNodeName x let Output i xName = renderedOutput x
in maybe (render $ zerosLike x) return $ do in maybe (render $ zerosLike $ toTensor x) return $ do
n <- nodeMap ^. at xName n <- nodeMap ^. at xName
let i = outputIndex $ renderedOutput x
gradientMap ^. at n . nonEmpty . outputIxAt i gradientMap ^. at n . nonEmpty . outputIxAt i
outputIxAt :: OutputIx -> Lens' (IntMap.IntMap v) (Maybe v) outputIxAt :: OutputIx -> Lens' (IntMap.IntMap v) (Maybe v)
@ -687,9 +688,16 @@ opGrad "Fill" _ _ [dz] = [Nothing, Just $ sum dz rx]
where where
rx = rangeOfRank dz rx = rangeOfRank dz
-- Treat read ops as an identity function on the variable. This allows us to
-- take gradients w.r.t. to the variable handle instead of the result of a read
-- op. If a variable is read multiple times, the gradients will propagate back
-- through each read.
opGrad "ReadVariableOp" _ _ [dz] = [Just $ expr dz]
-- TODO(fmayle): These can go away if we properly prune the graph. -- TODO(fmayle): These can go away if we properly prune the graph.
opGrad "Const" _ _ _ = [Nothing, Nothing] opGrad "Const" _ _ _ = [Nothing, Nothing]
opGrad "Placeholder" _ _ _ = [] opGrad "Placeholder" _ _ _ = []
opGrad "VarHandleOp" _ _ _ = []
opGrad "Variable" _ _ _ = [] opGrad "Variable" _ _ _ = []
opGrad n nodeDef ins grads = opGrad n nodeDef ins grads =
@ -723,6 +731,7 @@ numOutputs o =
"Neg" -> 1 "Neg" -> 1
"Placeholder" -> 1 "Placeholder" -> 1
"OneHot" -> 1 "OneHot" -> 1
"ReadVariableOp" -> 1
"RefIdentity" -> 1 "RefIdentity" -> 1
"Relu" -> 1 "Relu" -> 1
"ReluGrad" -> 1 "ReluGrad" -> 1
@ -737,6 +746,7 @@ numOutputs o =
"Tile" -> 1 "Tile" -> 1
"Transpose" -> 1 "Transpose" -> 1
"TruncatedNormal" -> 1 "TruncatedNormal" -> 1
"VarHandleOp" -> 1
"Variable" -> 1 "Variable" -> 1
"ZerosLike" -> 1 "ZerosLike" -> 1
"Fill" -> 1 "Fill" -> 1

View file

@ -31,9 +31,10 @@ import Control.Monad.IO.Class (liftIO)
import qualified TensorFlow.Core as TF import qualified TensorFlow.Core as TF
import qualified TensorFlow.GenOps.Core as TF (max, tile) import qualified TensorFlow.GenOps.Core as TF (max, tile)
import qualified TensorFlow.Gradient as TF import qualified TensorFlow.Gradient as TF
import qualified TensorFlow.Ops as TF import qualified TensorFlow.Ops as TF hiding (zeroInitializedVariable)
import qualified TensorFlow.Output as TF import qualified TensorFlow.Output as TF
import qualified TensorFlow.Types as TF import qualified TensorFlow.Types as TF
import qualified TensorFlow.Variable as TF
import Proto.Tensorflow.Core.Framework.Graph (node) import Proto.Tensorflow.Core.Framework.Graph (node)
import Proto.Tensorflow.Core.Framework.NodeDef (op) import Proto.Tensorflow.Core.Framework.NodeDef (op)
@ -222,7 +223,7 @@ matMulGradient = testCase "matMulGradients" $ do
let dfBuild = do let dfBuild = do
x <- TF.render $ TF.zeros $ TF.Shape [3, 1 :: Int64] x <- TF.render $ TF.zeros $ TF.Shape [3, 1 :: Int64]
w <- TF.zeroInitializedVariable $ TF.Shape [1, 2 :: Int64] w <- TF.zeroInitializedVariable $ TF.Shape [1, 2 :: Int64]
let f = x `TF.matMul` w :: TF.Tensor TF.Build Float let f = x `TF.matMul` TF.readValue w :: TF.Tensor TF.Build Float
dfs <- TF.gradients f [x] dfs <- TF.gradients f [x]
return (x, dfs) return (x, dfs)
@ -242,11 +243,11 @@ matMulGradGrad = testCase "matMulGradGrad" $ do
let tower = do let tower = do
x <- TF.render $ TF.zeros $ TF.Shape [batch, 1] x <- TF.render $ TF.zeros $ TF.Shape [batch, 1]
w <- TF.zeroInitializedVariable $ TF.Shape [1, width] w <- TF.zeroInitializedVariable $ TF.Shape [1, width]
let f = x `TF.matMul` w let f = x `TF.matMul` TF.readValue w
[dfdx] <- TF.gradients f [x] [dfdx] <- TF.gradients f [x]
let f'x = TF.reduceSum dfdx let f'x = TF.reduceSum dfdx
[dfdw] <- TF.gradients f'x [w] -- take gradient again (this time over w) [dfdw] <- TF.gradients f'x [w] -- take gradient again (this time over w)
return [TF.value w, dfdw] return [TF.readValue w, TF.expr dfdw]
TF.runSession $ do TF.runSession $ do
[w, dfdw] <- TF.build tower [w, dfdw] <- TF.build tower
@ -255,12 +256,12 @@ matMulGradGrad = testCase "matMulGradGrad" $ do
let step = w `TF.add` dfdw let step = w `TF.add` dfdw
w0 <- TF.run step w0 <- TF.run step
liftIO $ ((V.fromList [4, 4 :: Float]) @=? w0) liftIO $ V.fromList [4, 4 :: Float] @=? w0
-- test that gradient of matMul deals correctly with transpose_a and transpose_b -- test that gradient of matMul deals correctly with transpose_a and transpose_b
matMulTransposeGradient :: (Bool, Bool) -> Test matMulTransposeGradient :: (Bool, Bool) -> Test
matMulTransposeGradient txw = testCase ("matMulTransposeGradients " ++ (show txw)) $ do matMulTransposeGradient txw = testCase ("matMulTransposeGradients " ++ show txw) $ do
let (transposeX, transposeW) = txw let (transposeX, transposeW) = txw
let dfBuild = do let dfBuild = do
@ -268,7 +269,7 @@ matMulTransposeGradient txw = testCase ("matMulTransposeGradients " ++ (show txw
let xZeros = TF.zeros xShape let xZeros = TF.zeros xShape
x <- TF.render $ if transposeX then TF.matTranspose xZeros else xZeros x <- TF.render $ if transposeX then TF.matTranspose xZeros else xZeros
variable <- TF.zeroInitializedVariable $ TF.Shape [1, 2 :: Int64] variable <- TF.zeroInitializedVariable $ TF.Shape [1, 2 :: Int64]
let wv = if transposeW then TF.matTranspose variable else TF.expr variable let wv = if transposeW then TF.matTranspose (TF.readValue variable) else TF.readValue variable
let f = TF.matMul' (transAttrs transposeX transposeW) x wv :: TF.Tensor TF.Build Float let f = TF.matMul' (transAttrs transposeX transposeW) x wv :: TF.Tensor TF.Build Float
w <- TF.render wv w <- TF.render wv
ds <- TF.gradients f [x, w] ds <- TF.gradients f [x, w]