tensorflow-haskell/tensorflow-mnist/app/Main.hs

-- Copyright 2016 TensorFlow authors.
--
-- Licensed under the Apache License, Version 2.0 (the "License");
-- you may not use this file except in compliance with the License.
-- You may obtain a copy of the License at
--
--     http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.

{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedLists #-}
{-# LANGUAGE TypeApplications #-}

import Control.Monad (forM_, when)
import Control.Monad.IO.Class (liftIO)
import Data.Int (Int32, Int64)
import Data.List (genericLength)
import qualified Data.Text.IO as T
import qualified Data.Vector as V

import qualified TensorFlow.Core as TF
import qualified TensorFlow.Ops as TF hiding (initializedVariable, zeroInitializedVariable)
import qualified TensorFlow.Variable as TF
import qualified TensorFlow.Minimize as TF

import TensorFlow.Examples.MNIST.InputData
import TensorFlow.Examples.MNIST.Parse

numPixels, numLabels :: Int64
numPixels = 28*28 :: Int64
numLabels = 10 :: Int64

-- | Create tensor with random values where the stddev depends on the width.
randomParam :: Int64 -> TF.Shape -> TF.Build (TF.Tensor TF.Build Float)
randomParam width (TF.Shape shape) =
    (`TF.mul` stddev) <$> TF.truncatedNormal (TF.vector shape)
  where
    stddev = TF.scalar (1 / sqrt (fromIntegral width))

-- Types must match due to model structure.
type LabelType = Int32

data Model = Model {
      train :: TF.TensorData Float  -- ^ images
            -> TF.TensorData LabelType
            -> TF.Session ()
    , infer :: TF.TensorData Float  -- ^ images
            -> TF.Session (V.Vector LabelType)  -- ^ predictions
    , errorRate :: TF.TensorData Float  -- ^ images
                -> TF.TensorData LabelType
                -> TF.Session Float
    }

createModel :: TF.Build Model
createModel = do
    -- Use -1 batch size to support variable sized batches.
    let batchSize = -1
    -- Inputs.
    images <- TF.placeholder [batchSize, numPixels]
    -- Hidden layer.
    let numUnits = 500
    hiddenWeights <-
        TF.initializedVariable =<< randomParam numPixels [numPixels, numUnits]
    hiddenBiases <- TF.zeroInitializedVariable [numUnits]
    let hiddenZ = (images `TF.matMul` TF.readValue hiddenWeights)
                  `TF.add` TF.readValue hiddenBiases
    let hidden = TF.relu hiddenZ
    -- Logits.
    logitWeights <-
        TF.initializedVariable =<< randomParam numUnits [numUnits, numLabels]
    logitBiases <- TF.zeroInitializedVariable [numLabels]
    let logits = (hidden `TF.matMul` TF.readValue logitWeights)
                 `TF.add` TF.readValue logitBiases
    predict <- TF.render @TF.Build @LabelType $
               TF.argMax (TF.softmax logits) (TF.scalar (1 :: LabelType))

    -- Create training action.
    labels <- TF.placeholder [batchSize]
    let labelVecs = TF.oneHot labels (fromIntegral numLabels) 1 0
        loss =
            TF.reduceMean $ fst $ TF.softmaxCrossEntropyWithLogits logits labelVecs
        params = [hiddenWeights, hiddenBiases, logitWeights, logitBiases]
    trainStep <- TF.minimizeWith TF.adam loss params

    let correctPredictions = TF.equal predict labels
    errorRateTensor <- TF.render $ 1 - TF.reduceMean (TF.cast correctPredictions)

    return Model {
          train = \imFeed lFeed -> TF.runWithFeeds_ [
                TF.feed images imFeed
              , TF.feed labels lFeed
              ] trainStep
        , infer = \imFeed -> TF.runWithFeeds [TF.feed images imFeed] predict
        , errorRate = \imFeed lFeed -> TF.unScalar <$> TF.runWithFeeds [
                TF.feed images imFeed
              , TF.feed labels lFeed
              ] errorRateTensor
        }

main :: IO ()
main = TF.runSession $ do
    -- Read training and test data.
    trainingImages <- liftIO (readMNISTSamples =<< trainingImageData)
    trainingLabels <- liftIO (readMNISTLabels =<< trainingLabelData)
    testImages <- liftIO (readMNISTSamples =<< testImageData)
    testLabels <- liftIO (readMNISTLabels =<< testLabelData)

    -- Create the model.
    model <- TF.build createModel

    -- Functions for generating batches.
    let encodeImageBatch xs =
            TF.encodeTensorData [genericLength xs, numPixels]
                                (fromIntegral <$> mconcat xs)
    let encodeLabelBatch xs =
            TF.encodeTensorData [genericLength xs]
                                (fromIntegral <$> V.fromList xs)
    let batchSize = 100
    let selectBatch i xs = take batchSize $ drop (i * batchSize) (cycle xs)

    -- Train.
    forM_ ([0..1000] :: [Int]) $ \i -> do
        let images = encodeImageBatch (selectBatch i trainingImages)
            labels = encodeLabelBatch (selectBatch i trainingLabels)
        train model images labels
        when (i `mod` 100 == 0) $ do
            err <- errorRate model images labels
            liftIO $ putStrLn $ "training error " ++ show (err * 100)
    liftIO $ putStrLn ""

    -- Test.
    testErr <- errorRate model (encodeImageBatch testImages)
                               (encodeLabelBatch testLabels)
    liftIO $ putStrLn $ "test error " ++ show (testErr * 100)

    -- Show some predictions.
    testPreds <- infer model (encodeImageBatch testImages)
    liftIO $ forM_ ([0..3] :: [Int]) $ \i -> do
        putStrLn ""
        T.putStrLn $ drawMNIST $ testImages !! i
        putStrLn $ "expected " ++ show (testLabels !! i)
        putStrLn $ "     got " ++ show (testPreds V.! i)
Initial commit 2016-10-24 21:26:42 +02:00			`-- Copyright 2016 TensorFlow authors.`
			`--`
			`-- Licensed under the Apache License, Version 2.0 (the "License");`
			`-- you may not use this file except in compliance with the License.`
			`-- You may obtain a copy of the License at`
			`--`
			`-- http://www.apache.org/licenses/LICENSE-2.0`
			`--`
			`-- Unless required by applicable law or agreed to in writing, software`
			`-- distributed under the License is distributed on an "AS IS" BASIS,`
			`-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`-- See the License for the specific language governing permissions and`
			`-- limitations under the License.`

Misc MNIST example cleanup (#9) * Use native oneHot op in the example code. It didn't exist when this was originally written. * Misc cleanup in MNIST example - Use unspecified dimension for batch size in model. This simplifies the code for the test set. - Move error rate calculation into model. 2016-10-26 20:14:38 +02:00			`{-# LANGUAGE FlexibleContexts #-}`
Initial commit 2016-10-24 21:26:42 +02:00			`{-# LANGUAGE OverloadedLists #-}`
Update to tensorflow 1.3 (#161) * Tested on linux without Docker. * Couldn't get nix build to work, so I just updated the URL and hash. * Did not test macos build. The mnist change was necessary because the argmax output type is now polmorphic. 2017-10-19 19:41:55 +02:00			`{-# LANGUAGE TypeApplications #-}`
Initial commit 2016-10-24 21:26:42 +02:00
Add Minimize module with gradient descent and adam implementations (#125) 2017-05-26 04:19:22 +02:00			`import Control.Monad (forM_, when)`
Initial commit 2016-10-24 21:26:42 +02:00			`import Control.Monad.IO.Class (liftIO)`
			`import Data.Int (Int32, Int64)`
Misc MNIST example cleanup (#9) * Use native oneHot op in the example code. It didn't exist when this was originally written. * Misc cleanup in MNIST example - Use unspecified dimension for batch size in model. This simplifies the code for the test set. - Move error rate calculation into model. 2016-10-26 20:14:38 +02:00			`import Data.List (genericLength)`
Initial commit 2016-10-24 21:26:42 +02:00			`import qualified Data.Text.IO as T`
			`import qualified Data.Vector as V`

Support fetching storable vectors + use them in benchmark (#50) In addition, you can now fetch TensorData directly. This might be useful in scenarios where you feed the result of a computation back in, like RNN. Before: benchmarking feedFetch/4 byte time 83.31 μs (81.88 μs .. 84.75 μs) 0.997 R² (0.994 R² .. 0.998 R²) mean 87.32 μs (86.06 μs .. 88.83 μs) std dev 4.580 μs (3.698 μs .. 5.567 μs) variance introduced by outliers: 55% (severely inflated) benchmarking feedFetch/4 KiB time 114.9 μs (111.5 μs .. 118.2 μs) 0.996 R² (0.994 R² .. 0.998 R²) mean 117.3 μs (116.2 μs .. 118.6 μs) std dev 3.877 μs (3.058 μs .. 5.565 μs) variance introduced by outliers: 31% (moderately inflated) benchmarking feedFetch/4 MiB time 109.0 ms (107.9 ms .. 110.7 ms) 1.000 R² (0.999 R² .. 1.000 R²) mean 108.6 ms (108.2 ms .. 109.2 ms) std dev 740.2 μs (353.2 μs .. 1.186 ms) After: benchmarking feedFetch/4 byte time 82.92 μs (80.55 μs .. 85.24 μs) 0.996 R² (0.993 R² .. 0.998 R²) mean 83.58 μs (82.34 μs .. 84.89 μs) std dev 4.327 μs (3.664 μs .. 5.375 μs) variance introduced by outliers: 54% (severely inflated) benchmarking feedFetch/4 KiB time 85.69 μs (83.81 μs .. 87.30 μs) 0.997 R² (0.996 R² .. 0.999 R²) mean 86.99 μs (86.11 μs .. 88.15 μs) std dev 3.608 μs (2.854 μs .. 5.273 μs) variance introduced by outliers: 43% (moderately inflated) benchmarking feedFetch/4 MiB time 1.582 ms (1.509 ms .. 1.677 ms) 0.970 R² (0.936 R² .. 0.993 R²) mean 1.645 ms (1.554 ms .. 1.981 ms) std dev 490.6 μs (138.9 μs .. 1.067 ms) variance introduced by outliers: 97% (severely inflated) 2016-12-15 03:53:06 +01:00			`import qualified TensorFlow.Core as TF`
Support Variable in TensorFlow.Gradient and use in mnist example (#116) 2017-05-17 22:20:51 +02:00			`import qualified TensorFlow.Ops as TF hiding (initializedVariable, zeroInitializedVariable)`
			`import qualified TensorFlow.Variable as TF`
Add Minimize module with gradient descent and adam implementations (#125) 2017-05-26 04:19:22 +02:00			`import qualified TensorFlow.Minimize as TF`
Initial commit 2016-10-24 21:26:42 +02:00
			`import TensorFlow.Examples.MNIST.InputData`
			`import TensorFlow.Examples.MNIST.Parse`

Make code --pedantic (#35) * Enforce pedantic build mode in CI. * Our imports drifted really far from where they should be. 2016-11-18 19:42:02 +01:00			`numPixels, numLabels :: Int64`
			`numPixels = 28*28 :: Int64`
Initial commit 2016-10-24 21:26:42 +02:00			`numLabels = 10 :: Int64`

			`-- \| Create tensor with random values where the stddev depends on the width.`
Distinguish between "rendered" and "unrendered" Tensors. (#88) Distinguish between "rendered" and "unrendered" Tensors. There are now three types of `Tensor`: - `Tensor Value a`: rendered value - `Tensor Ref a`: rendered reference - `Tensor Build a` : unrendered value The extra bookkeeping makes it easier to track (and enforce) which tensors are rendered or not. For examples where this has been confusing in the past, see With this change, pure ops look similar to before, returning `Tensor Build` instead of `Tensor Value`. "Stateful" (monadic) ops are unchanged. For example: add :: OneOf [..] t => Tensor v'1 t -> Tensor v'2 t -> Tensor Build t assign :: (MonadBuild m, TensorType t) => Tensor Ref t -> Tensor v'2 t -> m (Tensor Ref t) The `gradients` function now requires that the variables over which it's differentiating are pre-rendered: gradients :: (..., Rendered v2) => Tensor v1 a -> [Tensor v2 a] -> m [Tensor Value a] (`Rendered v2` means that `v2` is either a `Ref` or a `Value`.) Additionally, the implementation of `gradients` now takes care to render every intermediate value when performing the reverse accumulation. I suspect this fixes an exponential blowup for complicated expressions. 2017-04-07 00:10:33 +02:00			`randomParam :: Int64 -> TF.Shape -> TF.Build (TF.Tensor TF.Build Float)`
Initial commit 2016-10-24 21:26:42 +02:00			`randomParam width (TF.Shape shape) =`
Distinguish between "rendered" and "unrendered" Tensors. (#88) Distinguish between "rendered" and "unrendered" Tensors. There are now three types of `Tensor`: - `Tensor Value a`: rendered value - `Tensor Ref a`: rendered reference - `Tensor Build a` : unrendered value The extra bookkeeping makes it easier to track (and enforce) which tensors are rendered or not. For examples where this has been confusing in the past, see With this change, pure ops look similar to before, returning `Tensor Build` instead of `Tensor Value`. "Stateful" (monadic) ops are unchanged. For example: add :: OneOf [..] t => Tensor v'1 t -> Tensor v'2 t -> Tensor Build t assign :: (MonadBuild m, TensorType t) => Tensor Ref t -> Tensor v'2 t -> m (Tensor Ref t) The `gradients` function now requires that the variables over which it's differentiating are pre-rendered: gradients :: (..., Rendered v2) => Tensor v1 a -> [Tensor v2 a] -> m [Tensor Value a] (`Rendered v2` means that `v2` is either a `Ref` or a `Value`.) Additionally, the implementation of `gradients` now takes care to render every intermediate value when performing the reverse accumulation. I suspect this fixes an exponential blowup for complicated expressions. 2017-04-07 00:10:33 +02:00			(`TF.mul` stddev) <$> TF.truncatedNormal (TF.vector shape)
Initial commit 2016-10-24 21:26:42 +02:00			`where`
			`stddev = TF.scalar (1 / sqrt (fromIntegral width))`

Misc MNIST example cleanup (#9) * Use native oneHot op in the example code. It didn't exist when this was originally written. * Misc cleanup in MNIST example - Use unspecified dimension for batch size in model. This simplifies the code for the test set. - Move error rate calculation into model. 2016-10-26 20:14:38 +02:00			`-- Types must match due to model structure.`
Initial commit 2016-10-24 21:26:42 +02:00			`type LabelType = Int32`

			`data Model = Model {`
			`train :: TF.TensorData Float -- ^ images`
			`-> TF.TensorData LabelType`
			`-> TF.Session ()`
			`, infer :: TF.TensorData Float -- ^ images`
			`-> TF.Session (V.Vector LabelType) -- ^ predictions`
Misc MNIST example cleanup (#9) * Use native oneHot op in the example code. It didn't exist when this was originally written. * Misc cleanup in MNIST example - Use unspecified dimension for batch size in model. This simplifies the code for the test set. - Move error rate calculation into model. 2016-10-26 20:14:38 +02:00			`, errorRate :: TF.TensorData Float -- ^ images`
			`-> TF.TensorData LabelType`
			`-> TF.Session Float`
Initial commit 2016-10-24 21:26:42 +02:00			`}`

Misc MNIST example cleanup (#9) * Use native oneHot op in the example code. It didn't exist when this was originally written. * Misc cleanup in MNIST example - Use unspecified dimension for batch size in model. This simplifies the code for the test set. - Move error rate calculation into model. 2016-10-26 20:14:38 +02:00			`createModel :: TF.Build Model`
			`createModel = do`
			`-- Use -1 batch size to support variable sized batches.`
			`let batchSize = -1`
Initial commit 2016-10-24 21:26:42 +02:00			`-- Inputs.`
			`images <- TF.placeholder [batchSize, numPixels]`
			`-- Hidden layer.`
			`let numUnits = 500`
			`hiddenWeights <-`
			`TF.initializedVariable =<< randomParam numPixels [numPixels, numUnits]`
			`hiddenBiases <- TF.zeroInitializedVariable [numUnits]`
Support Variable in TensorFlow.Gradient and use in mnist example (#116) 2017-05-17 22:20:51 +02:00			let hiddenZ = (images `TF.matMul` TF.readValue hiddenWeights)
			`TF.add` TF.readValue hiddenBiases
Initial commit 2016-10-24 21:26:42 +02:00			`let hidden = TF.relu hiddenZ`
			`-- Logits.`
			`logitWeights <-`
			`TF.initializedVariable =<< randomParam numUnits [numUnits, numLabels]`
			`logitBiases <- TF.zeroInitializedVariable [numLabels]`
Support Variable in TensorFlow.Gradient and use in mnist example (#116) 2017-05-17 22:20:51 +02:00			let logits = (hidden `TF.matMul` TF.readValue logitWeights)
			`TF.add` TF.readValue logitBiases
Update to tensorflow 1.3 (#161) * Tested on linux without Docker. * Couldn't get nix build to work, so I just updated the URL and hash. * Did not test macos build. The mnist change was necessary because the argmax output type is now polmorphic. 2017-10-19 19:41:55 +02:00			`predict <- TF.render @TF.Build @LabelType $`
Initial commit 2016-10-24 21:26:42 +02:00			`TF.argMax (TF.softmax logits) (TF.scalar (1 :: LabelType))`

			`-- Create training action.`
			`labels <- TF.placeholder [batchSize]`
Misc MNIST example cleanup (#9) * Use native oneHot op in the example code. It didn't exist when this was originally written. * Misc cleanup in MNIST example - Use unspecified dimension for batch size in model. This simplifies the code for the test set. - Move error rate calculation into model. 2016-10-26 20:14:38 +02:00			`let labelVecs = TF.oneHot labels (fromIntegral numLabels) 1 0`
			`loss =`
Moved reduceMean to Ops (#136) 2017-06-21 05:50:46 +02:00			`TF.reduceMean $ fst $ TF.softmaxCrossEntropyWithLogits logits labelVecs`
Initial commit 2016-10-24 21:26:42 +02:00			`params = [hiddenWeights, hiddenBiases, logitWeights, logitBiases]`
Add Minimize module with gradient descent and adam implementations (#125) 2017-05-26 04:19:22 +02:00			`trainStep <- TF.minimizeWith TF.adam loss params`
Initial commit 2016-10-24 21:26:42 +02:00
Misc MNIST example cleanup (#9) * Use native oneHot op in the example code. It didn't exist when this was originally written. * Misc cleanup in MNIST example - Use unspecified dimension for batch size in model. This simplifies the code for the test set. - Move error rate calculation into model. 2016-10-26 20:14:38 +02:00			`let correctPredictions = TF.equal predict labels`
Moved reduceMean to Ops (#136) 2017-06-21 05:50:46 +02:00			`errorRateTensor <- TF.render $ 1 - TF.reduceMean (TF.cast correctPredictions)`
Misc MNIST example cleanup (#9) * Use native oneHot op in the example code. It didn't exist when this was originally written. * Misc cleanup in MNIST example - Use unspecified dimension for batch size in model. This simplifies the code for the test set. - Move error rate calculation into model. 2016-10-26 20:14:38 +02:00
Initial commit 2016-10-24 21:26:42 +02:00			`return Model {`
Misc MNIST example cleanup (#9) * Use native oneHot op in the example code. It didn't exist when this was originally written. * Misc cleanup in MNIST example - Use unspecified dimension for batch size in model. This simplifies the code for the test set. - Move error rate calculation into model. 2016-10-26 20:14:38 +02:00			`train = \imFeed lFeed -> TF.runWithFeeds_ [`
			`TF.feed images imFeed`
			`, TF.feed labels lFeed`
			`] trainStep`
Initial commit 2016-10-24 21:26:42 +02:00			`, infer = \imFeed -> TF.runWithFeeds [TF.feed images imFeed] predict`
Misc MNIST example cleanup (#9) * Use native oneHot op in the example code. It didn't exist when this was originally written. * Misc cleanup in MNIST example - Use unspecified dimension for batch size in model. This simplifies the code for the test set. - Move error rate calculation into model. 2016-10-26 20:14:38 +02:00			`, errorRate = \imFeed lFeed -> TF.unScalar <$> TF.runWithFeeds [`
			`TF.feed images imFeed`
			`, TF.feed labels lFeed`
			`] errorRateTensor`
Initial commit 2016-10-24 21:26:42 +02:00			`}`

Make code --pedantic (#35) * Enforce pedantic build mode in CI. * Our imports drifted really far from where they should be. 2016-11-18 19:42:02 +01:00			`main :: IO ()`
Initial commit 2016-10-24 21:26:42 +02:00			`main = TF.runSession $ do`
			`-- Read training and test data.`
			`trainingImages <- liftIO (readMNISTSamples =<< trainingImageData)`
			`trainingLabels <- liftIO (readMNISTLabels =<< trainingLabelData)`
			`testImages <- liftIO (readMNISTSamples =<< testImageData)`
			`testLabels <- liftIO (readMNISTLabels =<< testLabelData)`

			`-- Create the model.`
Misc MNIST example cleanup (#9) * Use native oneHot op in the example code. It didn't exist when this was originally written. * Misc cleanup in MNIST example - Use unspecified dimension for batch size in model. This simplifies the code for the test set. - Move error rate calculation into model. 2016-10-26 20:14:38 +02:00			`model <- TF.build createModel`

			`-- Functions for generating batches.`
			`let encodeImageBatch xs =`
			`TF.encodeTensorData [genericLength xs, numPixels]`
			`(fromIntegral <$> mconcat xs)`
			`let encodeLabelBatch xs =`
			`TF.encodeTensorData [genericLength xs]`
			`(fromIntegral <$> V.fromList xs)`
			`let batchSize = 100`
			`let selectBatch i xs = take batchSize $ drop (i * batchSize) (cycle xs)`
Initial commit 2016-10-24 21:26:42 +02:00
			`-- Train.`
			`forM_ ([0..1000] :: [Int]) $ \i -> do`
Misc MNIST example cleanup (#9) * Use native oneHot op in the example code. It didn't exist when this was originally written. * Misc cleanup in MNIST example - Use unspecified dimension for batch size in model. This simplifies the code for the test set. - Move error rate calculation into model. 2016-10-26 20:14:38 +02:00			`let images = encodeImageBatch (selectBatch i trainingImages)`
			`labels = encodeLabelBatch (selectBatch i trainingLabels)`
			`train model images labels`
Initial commit 2016-10-24 21:26:42 +02:00			when (i `mod` 100 == 0) $ do
Misc MNIST example cleanup (#9) * Use native oneHot op in the example code. It didn't exist when this was originally written. * Misc cleanup in MNIST example - Use unspecified dimension for batch size in model. This simplifies the code for the test set. - Move error rate calculation into model. 2016-10-26 20:14:38 +02:00			`err <- errorRate model images labels`
			`liftIO $ putStrLn $ "training error " ++ show (err * 100)`
Initial commit 2016-10-24 21:26:42 +02:00			`liftIO $ putStrLn ""`

			`-- Test.`
Misc MNIST example cleanup (#9) * Use native oneHot op in the example code. It didn't exist when this was originally written. * Misc cleanup in MNIST example - Use unspecified dimension for batch size in model. This simplifies the code for the test set. - Move error rate calculation into model. 2016-10-26 20:14:38 +02:00			`testErr <- errorRate model (encodeImageBatch testImages)`
			`(encodeLabelBatch testLabels)`
			`liftIO $ putStrLn $ "test error " ++ show (testErr * 100)`
Initial commit 2016-10-24 21:26:42 +02:00
			`-- Show some predictions.`
Misc MNIST example cleanup (#9) * Use native oneHot op in the example code. It didn't exist when this was originally written. * Misc cleanup in MNIST example - Use unspecified dimension for batch size in model. This simplifies the code for the test set. - Move error rate calculation into model. 2016-10-26 20:14:38 +02:00			`testPreds <- infer model (encodeImageBatch testImages)`
Initial commit 2016-10-24 21:26:42 +02:00			`liftIO $ forM_ ([0..3] :: [Int]) $ \i -> do`
			`putStrLn ""`
			`T.putStrLn $ drawMNIST $ testImages !! i`
			`putStrLn $ "expected " ++ show (testLabels !! i)`
			`putStrLn $ " got " ++ show (testPreds V.! i)`