diff --git a/tensorflow-ops/src/TensorFlow/Gradient.hs b/tensorflow-ops/src/TensorFlow/Gradient.hs index dde196d..91fa72c 100644 --- a/tensorflow-ops/src/TensorFlow/Gradient.hs +++ b/tensorflow-ops/src/TensorFlow/Gradient.hs @@ -431,6 +431,22 @@ flatSlice t begin size = CoreOps.slice t (vector [begin]) (vector [size]) nodeDefName :: NodeDef -> NodeName nodeDefName = NodeName . view name +-- | Gradient helper for binary component wise operations +-- See https://github.com/tensorflow/tensorflow/blob/e9de087fa7f59c39bbe12ac2c83c5547c83f746c/tensorflow/core/ops/math_grad.cc#L329 +gradForBinaryCwise :: ( OneOf '[ Int32, Int64, Float, Double, Complex Float, Complex Double ] t + ) + => (Tensor v1 t, Tensor v1 t) + -> (Tensor v1 t, Tensor v1 t) + -> [ Maybe (Tensor Build t) ] +gradForBinaryCwise (x, gx) (y, gy) = + [ Just dx + , Just dy ] + where + dx = reshape (sum gx rx) sx + dy = reshape (sum gy ry) sy + sx = shape x -- (x :: Tensor Build t) + sy = shape y -- (y :: Tensor Build t) + (rx, ry) = broadcastGradientArgs sx sy -- | The gradient function for an op type. -- @@ -483,6 +499,15 @@ opGrad "Max" _ [toT -> x, toT -> indices] [dz] = -- Min and Max have identical gradient implementations. opGrad "Min" u v w = opGrad "Max" u v w +-- Element wise maximum gradient +-- See https://github.com/tensorflow/tensorflow/blob/e9de087fa7f59c39bbe12ac2c83c5547c83f746c/tensorflow/core/ops/math_grad.cc#L473 +opGrad "Maximum" _ [toT -> x, toT -> y] [dz] = + gradForBinaryCwise (x, gx) (y, gy) + where + xmask = CoreOps.greaterEqual x y + gx = CoreOps.select xmask dz (CoreOps.zerosLike dz) + gy = CoreOps.select (CoreOps.logicalNot xmask) dz (CoreOps.zerosLike dz) + opGrad "Sum" _ [toT -> x, toT -> indices] [dz] = [ Just $ CoreOps.tile grad tileScaling, Nothing ] where @@ -731,6 +756,7 @@ numOutputs o = "Log" -> 1 "MatMul" -> 1 "Max" -> 1 + "Maximum" -> 1 "MaxPool" -> 1 "Mean" -> 1 "Min" -> 1 diff --git a/tensorflow-ops/tests/GradientTest.hs b/tensorflow-ops/tests/GradientTest.hs index 2902297..935d9f2 100644 --- a/tensorflow-ops/tests/GradientTest.hs +++ b/tensorflow-ops/tests/GradientTest.hs @@ -29,7 +29,7 @@ import qualified Data.Vector as V import Control.Monad.IO.Class (liftIO) import qualified TensorFlow.Core as TF -import qualified TensorFlow.GenOps.Core as TF (max, tile) +import qualified TensorFlow.GenOps.Core as TF (max, tile, maximum) import qualified TensorFlow.Gradient as TF import qualified TensorFlow.Ops as TF hiding (zeroInitializedVariable) import qualified TensorFlow.Output as TF @@ -173,6 +173,27 @@ testMaxGradient = testCase "testMaxGradient" $ do TF.gradients y [x] >>= TF.run V.fromList [0, 0, 1, 0, 0 :: Float] @=? dx +-- run single test like this: +-- stack --docker --docker-image=$IMAGE_NAME test tensorflow-ops:GradientTest --test-arguments -t"*MaximumGrad*" +testMaximumGrad :: Test +testMaximumGrad = testCase "testMaximumGrad" $ do + [gx, gy] <- TF.runSession $ do + x <- TF.render $ TF.vector [0 :: Float] + y <- TF.render $ TF.vector [0 :: Float] + let z = TF.maximum x y + TF.gradients z [x, y] >>= TF.run + V.fromList [1] @=? gx + V.fromList [1] @=? gy + +testMaximumGradGrad :: Test +testMaximumGradGrad = testCase "testMaximumGradGrad" $ do + [ggx] <- TF.runSession $ do + x <- TF.render $ TF.vector [2 :: Float] + y <- TF.render $ TF.vector [1 :: Float] + let z = TF.maximum x y + [gx, _gy] <- TF.gradients z [x, y] + TF.gradients gx [x] >>= TF.run + V.fromList [0] @=? ggx testReluGrad :: Test testReluGrad = testCase "testReluGrad" $ do @@ -191,7 +212,6 @@ testReluGradGrad = testCase "testReluGradGrad" $ do TF.gradients y' [x] >>= TF.run V.fromList [0] @=? dx - testFillGrad :: Test testFillGrad = testCase "testFillGrad" $ do [dx] <- TF.runSession $ do @@ -309,6 +329,8 @@ main = defaultMain , testDiamond , testAddNGradient , testMaxGradient + , testMaximumGrad + , testMaximumGradGrad , testReluGrad , testReluGradGrad , testFillGrad