Merge

2025-07-08 03:06:53 +02:00 · 2019-02-28 13:09:44 +01:00 · 2019-02-28 13:09:44 +01:00 · 1ed871b59a
commit 1ed871b59a
parent 285ffe38c4
6 changed files with 130 additions and 76 deletions
--- a/nix/src.json
+++ b/nix/src.json
@ -1,6 +1,6 @@
 {
  "owner": "NixOS",
  "repo": "nixpkgs",
-  "rev": "7098bcac278a2d028036bb3a23508fd1c52155ac",
-  "sha256": "04m7z7334mjma0ci3vp4js6rbz4s2jxy864s1v4dkdm7860zjc28"
+  "rev": "dbb9f8818af7cf2ea91b89f7d80a8fb1800cbfb5",
+  "sha256": "1i8lsiscjzq066p02a5c7azjrvqxqkz6dipzkkyhh9rmhgs1aca3"
 }
--- a/nix/update
+++ b/nix/update
@ -7,7 +7,7 @@ set -euo pipefail

 cd "$(dirname "$0")" || exit 1

-branch=release-17.09
+branch=release-18.04

 owner=NixOS
 repo=nixpkgs
--- a/shell.nix
+++ b/shell.nix
@ -11,8 +11,8 @@ let
  pkgs = import nixpkgs {};
 in
  pkgs.haskell.lib.buildStackProject {
-    # Either use specified GHC or use GHC 8.2.2 (which we need for LTS 11.9)
-    ghc = if isNull ghc then pkgs.haskell.compiler.ghc822 else ghc;
+    # Either use specified GHC or use GHC 8.4.4 (which we need for LTS 12.26)
+    ghc = if isNull ghc then pkgs.haskell.compiler.ghc844 else ghc;
    extraArgs = "--system-ghc";
    name = "tf-env";
    buildInputs = with pkgs; [ snappy zlib protobuf libtensorflow ];
--- a/stack.yaml
+++ b/stack.yaml
@ -1,4 +1,4 @@
-resolver: lts-11.9
+resolver: lts-12.26

 packages:
 - tensorflow
@ -14,12 +14,7 @@ packages:
 - tensorflow-test

 extra-deps:
- snappy-framing-0.1.1
- snappy-0.2.0.2
- proto-lens-protobuf-types-0.3.0.1
- proto-lens-protoc-0.3.1.0
- proto-lens-0.3.1.0
- lens-labels-0.2.0.1
+- snappy-framing-0.1.2

 # For Mac OS X, whose linker doesn't use this path by default
 # unless you run `xcode-select --install`.
--- a/tensorflow-opgen/src/TensorFlow/OpGen.hs
+++ b/tensorflow-opgen/src/TensorFlow/OpGen.hs
@ -71,9 +71,9 @@ import Proto.Tensorflow.Core.Framework.OpDef_Fields
 import Proto.Tensorflow.Core.Framework.Types (DataType(..))
 import System.FilePath (takeBaseName)
 import TensorFlow.OpGen.ParsedOp
+import Data.Semigroup ((<>))
 import Text.PrettyPrint.Mainland
  ( Doc
-  , (<>)
  , (<+>)
  , (</>)
  , (<+/>)
--- a/tensorflow-ops/src/TensorFlow/Minimize.hs
+++ b/tensorflow-ops/src/TensorFlow/Minimize.hs
@ -11,105 +11,164 @@
 -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 -- See the License for the specific language governing permissions and
 -- limitations under the License.
-
-{-# LANGUAGE FlexibleContexts #-}
-{-# LANGUAGE OverloadedStrings #-}
-{-# LANGUAGE RankNTypes #-}
+{-# LANGUAGE OverloadedStrings   #-}
+{-# LANGUAGE RankNTypes          #-}
 {-# LANGUAGE ScopedTypeVariables #-}
-{-# LANGUAGE TypeApplications #-}

 module TensorFlow.Minimize
-    ( Minimizer
-    , minimizeWith
-    , gradientDescent
-    , AdamConfig(..)
-    , adam
-    , adam'
-    ) where
+  ( Minimizer
+  , minimizeWith
+  , gradientDescent
+  , gradientDescentRef
+  , AdamConfig(..)
+  , adam
+  , adam'
+  , adamRef
+  , adamRef'
+  ) where

-import Control.Monad (zipWithM)
-import Data.Default (Default(..))
-import Data.List (zipWith4)
-import Data.Maybe (fromMaybe)
+import           Control.Monad       (zipWithM)
+import           Data.Default        (Default (..))
+import           Data.List           (zipWith4)
+import           Data.Maybe          (fromMaybe)

-import qualified TensorFlow.Core as TF
+import qualified TensorFlow.Core     as TF
 import qualified TensorFlow.Gradient as TF
-import qualified TensorFlow.Ops as TF hiding (assign, initializedVariable)
+import qualified TensorFlow.Ops      as TF (scalar, mul, zerosLike)
 import qualified TensorFlow.Variable as TF

-- | Functions that minimize a loss w.r.t. a set of 'TF.Variable's.
+import qualified TensorFlow.Tensor   as TF (Rendered, ToTensor)
+
+import qualified TensorFlow.GenOps.Core as TFO (applyAdam, assignAdd, assign)
+import qualified TensorFlow.Ops         as TFO (initializedVariable,
+                                                zeroInitializedVariable)
+
+-- | Functions that minimize a loss w.r.t. a set of 'TF.Variable's or 'TF.Tensor TF.Ref's.
 --
 -- Generally only performs one step of an iterative algorithm.
 --
 -- 'Minimizer's are defined as a function of the gradients instead of
 -- the loss so that users can apply transformations to the gradients.
-type Minimizer a =
-    forall m. TF.MonadBuild m =>
-    [TF.Variable a] -> [TF.Tensor TF.Value a] -> m TF.ControlNode
+newtype Minimizer t a m = Minimizer
+  { minimize :: (TF.GradientCompatible a, TF.TensorType a, TF.MonadBuild m, TF.ToTensor t, TF.Rendered t) =>
+                            [t a] -> [TF.Tensor TF.Value a] -> m TF.ControlNode
+  }

-- | Convenience wrapper around 'TF.gradients' and a 'Minimizer'.
-minimizeWith :: (TF.MonadBuild m, TF.GradientCompatible a)
-             => Minimizer a
-             -> TF.Tensor v a    -- ^ Loss.
-             -> [TF.Variable a]  -- ^ Parameters of the loss function.
-             -> m TF.ControlNode
-minimizeWith minimizer loss params =
-    TF.gradients loss params >>= minimizer params
-
-- | Perform one step of the gradient descent algorithm.
-gradientDescent :: TF.GradientCompatible a
-                => a  -- ^ Learning rate.
-                -> Minimizer a
-gradientDescent learningRate params grads = TF.withNameScope "gradientDescent" $ do
-    let applyGrad param grad =
-            TF.assignAdd param (TF.scalar (-learningRate) `TF.mul` grad)
-    TF.group =<< zipWithM applyGrad params grads
-
-- TODO: Support more than Float in adam.
-
-data AdamConfig = AdamConfig
-    { adamLearningRate :: Float
-    , adamBeta1        :: Float
-    , adamBeta2        :: Float
-    , adamEpsilon      :: Float
+minimizer :: forall a m t n. TF.Nodes n => (t a -> TF.Tensor TF.Build a -> m n) -> a -> Minimizer t a m
+minimizer assignAdd learningRate =
+  Minimizer
+    { minimize =
+        \params grads ->
+          TF.withNameScope "gradientDescent" $ do
+            let applyGrad param grad = assignAdd param (TF.scalar (-learningRate) `TF.mul` grad)
+            TF.group =<< zipWithM applyGrad params grads
    }

-instance Default AdamConfig where
+-- | Convenience wrapper around 'TF.gradients' and a 'Minimizer'.
+minimizeWith ::
+     (TF.MonadBuild m, TF.GradientCompatible a, TF.Rendered t, TF.ToTensor t)
+  => Minimizer t a m
+  -> TF.Tensor v a -- ^ Loss.
+  -> [t a] -- ^ Parameters of the loss function.
+  -> m TF.ControlNode
+minimizeWith m loss params = TF.gradients loss params >>= minimize m params
+
+-- | Perform one step of the gradient descent algorithm for TF.Variable.
+gradientDescent ::
+     (TF.MonadBuild m,
+     TF.GradientCompatible a)
+  => a -- ^ Learning rate.
+  -> Minimizer TF.Variable a m
+gradientDescent = minimizer TF.assignAdd
+
+-- | Perform one step of the gradient descent algorithm for `TF.Tensor TF.Ref`.
+gradientDescentRef ::
+    (TF.MonadBuild m,
+     TF.GradientCompatible a)
+  => a -- ^ Learning rate.
+  -> Minimizer (TF.Tensor TF.Ref) a m
+gradientDescentRef = minimizer TFO.assignAdd
+
+-- TODO: Support more than Float in adam.
+data AdamConfig = AdamConfig
+  { adamLearningRate :: Float
+  , adamBeta1        :: Float
+  , adamBeta2        :: Float
+  , adamEpsilon      :: Float
+  }
+
  -- Recommended defaults from the adam paper.
+instance Default AdamConfig where
  def = AdamConfig 0.001 0.9 0.999 1e-8

-- | Perform one step of the adam algorithm.
+-- | Perform one step of the adam algorithm for `TF.Variable`.
 --
 -- See https://arxiv.org/abs/1412.6980.
 --
 -- NOTE: Currently requires all 'TF.Variable's to have an 'TF.initializedValue'.
-adam :: Minimizer Float
+adam :: Minimizer TF.Variable Float TF.Build
 adam = adam' def

-adam' :: AdamConfig -> Minimizer Float
-adam' config params grads = TF.withNameScope "adam" $ do
+adam' :: AdamConfig -> Minimizer TF.Variable Float TF.Build
+adam' config =
+  let errorMsg = "TensorFlow.Minimize.adam requires an initial value for all variables"
+      initVal = fromMaybe (error errorMsg) . TF.initializedValue
+   in adam''
+        config
+        (mapM (TF.initializedVariable . TF.zerosLike . initVal))
+        TF.initializedVariable
+        TF.resourceApplyAdam
+        TF.readValue
+        TF.assign
+
+adamRef :: [TF.Shape] -> Minimizer (TF.Tensor TF.Ref) Float TF.Build
+adamRef = adamRef' def
+
+-- | Perform one step of the adam algorithm for `TF.Tensor TF.Ref`.
+-- |
+--   Similar solution as for `TF.Variable` works sometimes...
+--   Creating initialized variables the same as for `TF.Variable` is `(TFO.initializedVariable . TF.zerosLike . TF.value)`
+--   but gives many times runtime error: "attempting to use uninitialized value variable"
+adamRef' :: AdamConfig -> [TF.Shape] -> Minimizer (TF.Tensor TF.Ref) Float TF.Build
+adamRef' config shapes =
+  adam''
+    config
+    (\_ -> mapM TFO.zeroInitializedVariable shapes)
+    TFO.initializedVariable
+    TFO.applyAdam
+    TF.expr
+    TFO.assign
+
+adam'' :: forall t n . (TF.Nodes n, TF.ToTensor t, TF.Rendered t) =>
+     AdamConfig
+  -> ([t Float] -> TF.Build [t Float])
+  -> (TF.Tensor TF.Build Float -> TF.Build (t Float))
+  -> (t Float -> t Float -> t Float -> TF.Tensor TF.Build Float -> TF.Tensor TF.Build Float -> TF.Tensor TF.Build Float -> TF.Tensor TF.Build Float -> TF.Tensor TF.Build Float -> TF.Tensor TF.Build Float -> TF.Tensor TF.Value Float -> TF.Build n)
+  -> (t Float -> TF.Tensor TF.Build Float)
+  -> (t Float -> TF.Tensor TF.Build Float -> TF.Build n)
+  -> Minimizer t Float TF.Build
+adam'' config initVarZero initVar applyAdam readValue assign = Minimizer
+  { minimize = \params grads -> TF.withNameScope "adam" $ do
    let lr = TF.scalar (adamLearningRate config)
        beta1 = TF.scalar (adamBeta1 config)
        beta2 = TF.scalar (adamBeta2 config)
        epsilon = TF.scalar (adamEpsilon config)
    -- Create adam state variables.
-    let errorMsg = "TensorFlow.Minimize.adam requires an initial value for all variables"
-        initVal = fromMaybe (error errorMsg) . TF.initializedValue
-    ms <- mapM (TF.initializedVariable . TF.zerosLike . initVal) params
-    vs <- mapM (TF.initializedVariable . TF.zerosLike . initVal) params
-    beta1Power <- TF.initializedVariable beta1
-    beta2Power <- TF.initializedVariable beta2
+    ms <- initVarZero params
+    vs <- initVarZero params
+    beta1Power <- initVar beta1
+    beta2Power <- initVar beta2
    -- Perform adam update.
-    let applyGrad param m v =
-            TF.resourceApplyAdam param m v
-                                 (TF.readValue beta1Power)
-                                 (TF.readValue beta2Power)
+    let applyGrad param m v = applyAdam param m v
+                                 (readValue beta1Power)
+                                 (readValue beta2Power)
                                 lr beta1 beta2 epsilon
    updateVars <- sequence $ zipWith4 applyGrad params ms vs grads
    -- Update beta variables after adam update.
    let updateBeta betaPower beta =
            TF.withControlDependencies updateVars
-                (TF.assign betaPower (TF.readValue betaPower `TF.mul` beta))
+                (assign betaPower (readValue betaPower `TF.mul` beta))
    updateBeta1 <- updateBeta beta1Power beta1
    updateBeta2 <- updateBeta beta2Power beta2
    TF.group (updateBeta1:updateBeta2:updateVars)
+  }