tensorflow-haskell/tensorflow-ops/tests/EmbeddingOpsTest.hs

179 lines
7.2 KiB
Haskell
Raw Normal View History

2016-10-24 21:26:42 +02:00
-- Copyright 2016 TensorFlow authors.
--
-- Licensed under the Apache License, Version 2.0 (the "License");
-- you may not use this file except in compliance with the License.
-- You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
Support fetching storable vectors + use them in benchmark (#50) In addition, you can now fetch TensorData directly. This might be useful in scenarios where you feed the result of a computation back in, like RNN. Before: benchmarking feedFetch/4 byte time 83.31 μs (81.88 μs .. 84.75 μs) 0.997 R² (0.994 R² .. 0.998 R²) mean 87.32 μs (86.06 μs .. 88.83 μs) std dev 4.580 μs (3.698 μs .. 5.567 μs) variance introduced by outliers: 55% (severely inflated) benchmarking feedFetch/4 KiB time 114.9 μs (111.5 μs .. 118.2 μs) 0.996 R² (0.994 R² .. 0.998 R²) mean 117.3 μs (116.2 μs .. 118.6 μs) std dev 3.877 μs (3.058 μs .. 5.565 μs) variance introduced by outliers: 31% (moderately inflated) benchmarking feedFetch/4 MiB time 109.0 ms (107.9 ms .. 110.7 ms) 1.000 R² (0.999 R² .. 1.000 R²) mean 108.6 ms (108.2 ms .. 109.2 ms) std dev 740.2 μs (353.2 μs .. 1.186 ms) After: benchmarking feedFetch/4 byte time 82.92 μs (80.55 μs .. 85.24 μs) 0.996 R² (0.993 R² .. 0.998 R²) mean 83.58 μs (82.34 μs .. 84.89 μs) std dev 4.327 μs (3.664 μs .. 5.375 μs) variance introduced by outliers: 54% (severely inflated) benchmarking feedFetch/4 KiB time 85.69 μs (83.81 μs .. 87.30 μs) 0.997 R² (0.996 R² .. 0.999 R²) mean 86.99 μs (86.11 μs .. 88.15 μs) std dev 3.608 μs (2.854 μs .. 5.273 μs) variance introduced by outliers: 43% (moderately inflated) benchmarking feedFetch/4 MiB time 1.582 ms (1.509 ms .. 1.677 ms) 0.970 R² (0.936 R² .. 0.993 R²) mean 1.645 ms (1.554 ms .. 1.981 ms) std dev 490.6 μs (138.9 μs .. 1.067 ms) variance introduced by outliers: 97% (severely inflated)
2016-12-15 03:53:06 +01:00
{-# LANGUAGE FlexibleContexts #-}
2016-10-24 21:26:42 +02:00
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
-- | Tests for EmbeddingOps.
module Main where
import Control.Monad.IO.Class (liftIO)
2016-10-24 21:26:42 +02:00
import Data.Int (Int32, Int64)
import Data.List (genericLength)
import TensorFlow.EmbeddingOps (embeddingLookup)
import Test.Framework (defaultMain, Test)
2016-10-24 21:26:42 +02:00
import Test.Framework.Providers.QuickCheck2 (testProperty)
import Test.HUnit ((@=?))
import Test.Framework.Providers.HUnit (testCase)
2016-10-24 21:26:42 +02:00
import Test.QuickCheck (Arbitrary(..), Property, choose, vectorOf)
import Test.QuickCheck.Monadic (monadicIO, run)
import TensorFlow.Test (assertAllClose)
2016-10-24 21:26:42 +02:00
import qualified Data.Vector as V
import qualified TensorFlow.GenOps.Core as CoreOps
import qualified TensorFlow.Ops as TF
import qualified TensorFlow.Session as TF
import qualified TensorFlow.Tensor as TF
import qualified TensorFlow.Types as TF
import qualified TensorFlow.Gradient as TF
import qualified TensorFlow.Build as TF
-- | Tries to perform a simple embedding lookup, with two partitions.
testEmbeddingLookupHasRightShapeWithPartition :: Test
testEmbeddingLookupHasRightShapeWithPartition =
testCase "testEmbeddingLookupHasRightShapeWithPartition" $ do
let embShape = TF.Shape [1, 3] -- Consider a 3-dim embedding of two items.
let embedding1 = [1, 1, 1 :: Int32]
let embedding2 = [0, 0, 0 :: Int32]
let idValues = [0, 1 :: Int32]
(values, shape) <- TF.runSession $ do
embedding <- mapM TF.render [ TF.constant embShape embedding1
, TF.constant embShape embedding2
]
let ids = TF.constant (TF.Shape [1, 2]) idValues
vs <- embeddingLookup embedding ids
TF.run (vs, TF.shape vs)
-- This is the shape that is returned in the equiv. Python.
shape @=? V.fromList [1, 2, 3]
-- "[0, 1]" should pull out the resulting vector.
values @=? V.fromList [1, 1, 1, 0, 0, 0]
-- | Tries to perform a simple embedding lookup, with only a single partition.
testEmbeddingLookupHasRightShape :: Test
testEmbeddingLookupHasRightShape =
testCase "testEmbeddingLookupHasRightShape" $ do
-- Consider a 3-dim embedding of two items
let embShape = TF.Shape [2, 3]
let embeddingInit = [ 1, 1, 1
, 0, 0, 0 :: Int32
]
let idValues = [0, 1 :: Int32]
(values, shape) <- TF.runSession $ do
embedding <- TF.render $ TF.constant embShape embeddingInit
let ids = TF.constant (TF.Shape [1, 2]) idValues
vs <- embeddingLookup [embedding] ids
TF.run (vs, TF.shape vs)
-- This is the shape that is returned in the equiv. Python.
shape @=? V.fromList [1, 2, 3]
-- "[0, 1]" should pull out the resulting vector.
values @=? V.fromList [1, 1, 1, 0, 0, 0]
-- | Check that we can calculate gradients w.r.t embeddings.
testEmbeddingLookupGradients :: Test
testEmbeddingLookupGradients = testCase "testEmbeddingLookupGradients" $ do
-- Agrees with "embedding", so gradient should be zero.
let xVals = V.fromList ([20, 20 :: Float])
let shape = TF.Shape [2]
gs <- TF.runSession $ do
let embShape = TF.Shape [2, 1]
let embeddingInit = [1, 20 ::Float]
let idValues = [1, 1 :: Int32]
let ids = TF.constant (TF.Shape [1, 2]) idValues
x <- TF.placeholder (TF.Shape [2])
embedding <- TF.initializedVariable
(TF.constant embShape embeddingInit)
op <- embeddingLookup [embedding] ids
let twoNorm = CoreOps.square $ TF.abs (op `TF.sub` x)
loss = TF.mean twoNorm (TF.scalar (0 :: Int32))
grad <- fmap head (TF.gradients loss [embedding])
TF.runWithFeeds
[TF.feed x $ TF.encodeTensorData shape xVals]
grad
-- Gradients should be zero (or close)
assertAllClose gs (V.fromList ([0, 0 :: Float]))
2016-10-24 21:26:42 +02:00
-- Verifies that direct gather is the same as dynamic split into
-- partitions, followed by embedding lookup.
Support fetching storable vectors + use them in benchmark (#50) In addition, you can now fetch TensorData directly. This might be useful in scenarios where you feed the result of a computation back in, like RNN. Before: benchmarking feedFetch/4 byte time 83.31 μs (81.88 μs .. 84.75 μs) 0.997 R² (0.994 R² .. 0.998 R²) mean 87.32 μs (86.06 μs .. 88.83 μs) std dev 4.580 μs (3.698 μs .. 5.567 μs) variance introduced by outliers: 55% (severely inflated) benchmarking feedFetch/4 KiB time 114.9 μs (111.5 μs .. 118.2 μs) 0.996 R² (0.994 R² .. 0.998 R²) mean 117.3 μs (116.2 μs .. 118.6 μs) std dev 3.877 μs (3.058 μs .. 5.565 μs) variance introduced by outliers: 31% (moderately inflated) benchmarking feedFetch/4 MiB time 109.0 ms (107.9 ms .. 110.7 ms) 1.000 R² (0.999 R² .. 1.000 R²) mean 108.6 ms (108.2 ms .. 109.2 ms) std dev 740.2 μs (353.2 μs .. 1.186 ms) After: benchmarking feedFetch/4 byte time 82.92 μs (80.55 μs .. 85.24 μs) 0.996 R² (0.993 R² .. 0.998 R²) mean 83.58 μs (82.34 μs .. 84.89 μs) std dev 4.327 μs (3.664 μs .. 5.375 μs) variance introduced by outliers: 54% (severely inflated) benchmarking feedFetch/4 KiB time 85.69 μs (83.81 μs .. 87.30 μs) 0.997 R² (0.996 R² .. 0.999 R²) mean 86.99 μs (86.11 μs .. 88.15 μs) std dev 3.608 μs (2.854 μs .. 5.273 μs) variance introduced by outliers: 43% (moderately inflated) benchmarking feedFetch/4 MiB time 1.582 ms (1.509 ms .. 1.677 ms) 0.970 R² (0.936 R² .. 0.993 R²) mean 1.645 ms (1.554 ms .. 1.981 ms) std dev 490.6 μs (138.9 μs .. 1.067 ms) variance introduced by outliers: 97% (severely inflated)
2016-12-15 03:53:06 +01:00
testEmbeddingLookupUndoesSplit ::
forall a. (TF.TensorDataType V.Vector a, Show a, Eq a)
=> LookupExample a -> Property
2016-10-24 21:26:42 +02:00
testEmbeddingLookupUndoesSplit
(LookupExample numParts
shape@(TF.Shape (firstDim : restDims))
values
indices) = monadicIO $ run $ TF.runSession $ do
let shapedValues = TF.constant shape values
indicesVector <- TF.render $ TF.vector indices
let directs = CoreOps.gather shapedValues indicesVector
let cyclicCounter :: TF.Tensor TF.Build Int32 =
2016-10-24 21:26:42 +02:00
TF.vector [0..fromIntegral firstDim-1]
`CoreOps.mod` fromIntegral numParts
modShardedValues :: [TF.Tensor TF.Value a] <-
mapM TF.render $ CoreOps.dynamicPartition numParts shapedValues cyclicCounter
embeddings <- embeddingLookup modShardedValues indicesVector
(shapeOut, got, want :: V.Vector a) <-
TF.run (TF.cast (TF.shape embeddings), embeddings, directs)
-- Checks the explicitly documented invariant of embeddingLookup.
liftIO $ shapeOut @=? V.fromList (genericLength indices : restDims)
liftIO $ got @=? want
2016-10-24 21:26:42 +02:00
testEmbeddingLookupUndoesSplit _ = error "Bug in Arbitrary (LookupExample)"
-- | Consistent set of parameters for EmbeddingLookupUndoesSplit.
data LookupExample a = LookupExample
Int64 -- ^ number of ways to split.
TF.Shape -- ^ shape of the generated tensor
[a] -- ^ data for the tensor
[Int32] -- ^ indices to split the tensor by
deriving Show
instance Arbitrary a => Arbitrary (LookupExample a) where
arbitrary = do
rank <- choose (1, 4)
-- Takes rank-th root of 100 to cap the tensor size.
let maxDim = fromIntegral (ceiling doubleMaxDim :: Int64)
doubleMaxDim :: Double
doubleMaxDim = 100 ** (1 / fromIntegral rank)
shape <- vectorOf rank (choose (1, maxDim))
let firstDim = head shape
2016-10-24 21:26:42 +02:00
values <- vectorOf (fromIntegral $ product shape) arbitrary
numParts <- choose (2, 15)
indSize <- choose (0, fromIntegral $ firstDim - 1)
indices <- vectorOf indSize (choose (0, fromIntegral firstDim - 1))
return $ LookupExample numParts (TF.Shape shape) values indices
main :: IO ()
main = defaultMain
2016-10-24 21:26:42 +02:00
[ testProperty "EmbeddingLookupUndoesSplit"
(testEmbeddingLookupUndoesSplit :: LookupExample Double -> Property)
, testEmbeddingLookupHasRightShape
, testEmbeddingLookupHasRightShapeWithPartition
, testEmbeddingLookupGradients
2016-10-24 21:26:42 +02:00
]