tensorflow-haskell/tensorflow-mnist/app/Main.hs

-- Copyright 2016 TensorFlow authors.
--
-- Licensed under the Apache License, Version 2.0 (the "License");
-- you may not use this file except in compliance with the License.
-- You may obtain a copy of the License at
--
--     http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.

{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedLists #-}

import Control.Monad (zipWithM, when, forM_)
import Control.Monad.IO.Class (liftIO)
import Data.Int (Int32, Int64)
import Data.List (genericLength)
import qualified Data.Text.IO as T
import qualified Data.Vector as V

import qualified TensorFlow.Core as TF
import qualified TensorFlow.Gradient as TF
import qualified TensorFlow.Ops as TF

import TensorFlow.Examples.MNIST.InputData
import TensorFlow.Examples.MNIST.Parse

numPixels, numLabels :: Int64
numPixels = 28*28 :: Int64
numLabels = 10 :: Int64

-- | Create tensor with random values where the stddev depends on the width.
randomParam :: Int64 -> TF.Shape -> TF.Build (TF.Tensor TF.Value Float)
randomParam width (TF.Shape shape) =
    (* stddev) <$> TF.truncatedNormal (TF.vector shape)
  where
    stddev = TF.scalar (1 / sqrt (fromIntegral width))

reduceMean :: TF.Tensor TF.Value Float -> TF.Tensor TF.Value Float
reduceMean xs = TF.mean xs (TF.scalar (0 :: Int32))

-- Types must match due to model structure.
type LabelType = Int32

data Model = Model {
      train :: TF.TensorData Float  -- ^ images
            -> TF.TensorData LabelType
            -> TF.Session ()
    , infer :: TF.TensorData Float  -- ^ images
            -> TF.Session (V.Vector LabelType)  -- ^ predictions
    , errorRate :: TF.TensorData Float  -- ^ images
                -> TF.TensorData LabelType
                -> TF.Session Float
    }

createModel :: TF.Build Model
createModel = do
    -- Use -1 batch size to support variable sized batches.
    let batchSize = -1
    -- Inputs.
    images <- TF.placeholder [batchSize, numPixels]
    -- Hidden layer.
    let numUnits = 500
    hiddenWeights <-
        TF.initializedVariable =<< randomParam numPixels [numPixels, numUnits]
    hiddenBiases <- TF.zeroInitializedVariable [numUnits]
    let hiddenZ = (images `TF.matMul` hiddenWeights) `TF.add` hiddenBiases
    let hidden = TF.relu hiddenZ
    -- Logits.
    logitWeights <-
        TF.initializedVariable =<< randomParam numUnits [numUnits, numLabels]
    logitBiases <- TF.zeroInitializedVariable [numLabels]
    let logits = (hidden `TF.matMul` logitWeights) `TF.add` logitBiases
    predict <- TF.render $ TF.cast $
               TF.argMax (TF.softmax logits) (TF.scalar (1 :: LabelType))

    -- Create training action.
    labels <- TF.placeholder [batchSize]
    let labelVecs = TF.oneHot labels (fromIntegral numLabels) 1 0
        loss =
            reduceMean $ fst $ TF.softmaxCrossEntropyWithLogits logits labelVecs
        params = [hiddenWeights, hiddenBiases, logitWeights, logitBiases]
    grads <- TF.gradients loss params

    let lr = TF.scalar 0.00001
        applyGrad param grad = TF.assign param $ param `TF.sub` (lr * grad)
    trainStep <- TF.group =<< zipWithM applyGrad params grads

    let correctPredictions = TF.equal predict labels
    errorRateTensor <- TF.render $ 1 - reduceMean (TF.cast correctPredictions)

    return Model {
          train = \imFeed lFeed -> TF.runWithFeeds_ [
                TF.feed images imFeed
              , TF.feed labels lFeed
              ] trainStep
        , infer = \imFeed -> TF.runWithFeeds [TF.feed images imFeed] predict
        , errorRate = \imFeed lFeed -> TF.unScalar <$> TF.runWithFeeds [
                TF.feed images imFeed
              , TF.feed labels lFeed
              ] errorRateTensor
        }

main :: IO ()
main = TF.runSession $ do
    -- Read training and test data.
    trainingImages <- liftIO (readMNISTSamples =<< trainingImageData)
    trainingLabels <- liftIO (readMNISTLabels =<< trainingLabelData)
    testImages <- liftIO (readMNISTSamples =<< testImageData)
    testLabels <- liftIO (readMNISTLabels =<< testLabelData)

    -- Create the model.
    model <- TF.build createModel

    -- Functions for generating batches.
    let encodeImageBatch xs =
            TF.encodeTensorData [genericLength xs, numPixels]
                                (fromIntegral <$> mconcat xs)
    let encodeLabelBatch xs =
            TF.encodeTensorData [genericLength xs]
                                (fromIntegral <$> V.fromList xs)
    let batchSize = 100
    let selectBatch i xs = take batchSize $ drop (i * batchSize) (cycle xs)

    -- Train.
    forM_ ([0..1000] :: [Int]) $ \i -> do
        let images = encodeImageBatch (selectBatch i trainingImages)
            labels = encodeLabelBatch (selectBatch i trainingLabels)
        train model images labels
        when (i `mod` 100 == 0) $ do
            err <- errorRate model images labels
            liftIO $ putStrLn $ "training error " ++ show (err * 100)
    liftIO $ putStrLn ""

    -- Test.
    testErr <- errorRate model (encodeImageBatch testImages)
                               (encodeLabelBatch testLabels)
    liftIO $ putStrLn $ "test error " ++ show (testErr * 100)

    -- Show some predictions.
    testPreds <- infer model (encodeImageBatch testImages)
    liftIO $ forM_ ([0..3] :: [Int]) $ \i -> do
        putStrLn ""
        T.putStrLn $ drawMNIST $ testImages !! i
        putStrLn $ "expected " ++ show (testLabels !! i)
        putStrLn $ "     got " ++ show (testPreds V.! i)
Initial commit 2016-10-24 21:26:42 +02:00			`-- Copyright 2016 TensorFlow authors.`
			`--`
			`-- Licensed under the Apache License, Version 2.0 (the "License");`
			`-- you may not use this file except in compliance with the License.`
			`-- You may obtain a copy of the License at`
			`--`
			`-- http://www.apache.org/licenses/LICENSE-2.0`
			`--`
			`-- Unless required by applicable law or agreed to in writing, software`
			`-- distributed under the License is distributed on an "AS IS" BASIS,`
			`-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`-- See the License for the specific language governing permissions and`
			`-- limitations under the License.`

Misc MNIST example cleanup (#9) * Use native oneHot op in the example code. It didn't exist when this was originally written. * Misc cleanup in MNIST example - Use unspecified dimension for batch size in model. This simplifies the code for the test set. - Move error rate calculation into model. 2016-10-26 20:14:38 +02:00			`{-# LANGUAGE FlexibleContexts #-}`
Initial commit 2016-10-24 21:26:42 +02:00			`{-# LANGUAGE OverloadedLists #-}`

Make code --pedantic (#35) * Enforce pedantic build mode in CI. * Our imports drifted really far from where they should be. 2016-11-18 19:42:02 +01:00			`import Control.Monad (zipWithM, when, forM_)`
Initial commit 2016-10-24 21:26:42 +02:00			`import Control.Monad.IO.Class (liftIO)`
			`import Data.Int (Int32, Int64)`
Misc MNIST example cleanup (#9) * Use native oneHot op in the example code. It didn't exist when this was originally written. * Misc cleanup in MNIST example - Use unspecified dimension for batch size in model. This simplifies the code for the test set. - Move error rate calculation into model. 2016-10-26 20:14:38 +02:00			`import Data.List (genericLength)`
Initial commit 2016-10-24 21:26:42 +02:00			`import qualified Data.Text.IO as T`
			`import qualified Data.Vector as V`

Support fetching storable vectors + use them in benchmark (#50) In addition, you can now fetch TensorData directly. This might be useful in scenarios where you feed the result of a computation back in, like RNN. Before: benchmarking feedFetch/4 byte time 83.31 μs (81.88 μs .. 84.75 μs) 0.997 R² (0.994 R² .. 0.998 R²) mean 87.32 μs (86.06 μs .. 88.83 μs) std dev 4.580 μs (3.698 μs .. 5.567 μs) variance introduced by outliers: 55% (severely inflated) benchmarking feedFetch/4 KiB time 114.9 μs (111.5 μs .. 118.2 μs) 0.996 R² (0.994 R² .. 0.998 R²) mean 117.3 μs (116.2 μs .. 118.6 μs) std dev 3.877 μs (3.058 μs .. 5.565 μs) variance introduced by outliers: 31% (moderately inflated) benchmarking feedFetch/4 MiB time 109.0 ms (107.9 ms .. 110.7 ms) 1.000 R² (0.999 R² .. 1.000 R²) mean 108.6 ms (108.2 ms .. 109.2 ms) std dev 740.2 μs (353.2 μs .. 1.186 ms) After: benchmarking feedFetch/4 byte time 82.92 μs (80.55 μs .. 85.24 μs) 0.996 R² (0.993 R² .. 0.998 R²) mean 83.58 μs (82.34 μs .. 84.89 μs) std dev 4.327 μs (3.664 μs .. 5.375 μs) variance introduced by outliers: 54% (severely inflated) benchmarking feedFetch/4 KiB time 85.69 μs (83.81 μs .. 87.30 μs) 0.997 R² (0.996 R² .. 0.999 R²) mean 86.99 μs (86.11 μs .. 88.15 μs) std dev 3.608 μs (2.854 μs .. 5.273 μs) variance introduced by outliers: 43% (moderately inflated) benchmarking feedFetch/4 MiB time 1.582 ms (1.509 ms .. 1.677 ms) 0.970 R² (0.936 R² .. 0.993 R²) mean 1.645 ms (1.554 ms .. 1.981 ms) std dev 490.6 μs (138.9 μs .. 1.067 ms) variance introduced by outliers: 97% (severely inflated) 2016-12-15 03:53:06 +01:00			`import qualified TensorFlow.Core as TF`
Misc MNIST example cleanup (#9) * Use native oneHot op in the example code. It didn't exist when this was originally written. * Misc cleanup in MNIST example - Use unspecified dimension for batch size in model. This simplifies the code for the test set. - Move error rate calculation into model. 2016-10-26 20:14:38 +02:00			`import qualified TensorFlow.Gradient as TF`
Initial commit 2016-10-24 21:26:42 +02:00			`import qualified TensorFlow.Ops as TF`

			`import TensorFlow.Examples.MNIST.InputData`
			`import TensorFlow.Examples.MNIST.Parse`

Make code --pedantic (#35) * Enforce pedantic build mode in CI. * Our imports drifted really far from where they should be. 2016-11-18 19:42:02 +01:00			`numPixels, numLabels :: Int64`
			`numPixels = 28*28 :: Int64`
Initial commit 2016-10-24 21:26:42 +02:00			`numLabels = 10 :: Int64`

			`-- \| Create tensor with random values where the stddev depends on the width.`
			`randomParam :: Int64 -> TF.Shape -> TF.Build (TF.Tensor TF.Value Float)`
			`randomParam width (TF.Shape shape) =`
			`(* stddev) <$> TF.truncatedNormal (TF.vector shape)`
			`where`
			`stddev = TF.scalar (1 / sqrt (fromIntegral width))`

Make code --pedantic (#35) * Enforce pedantic build mode in CI. * Our imports drifted really far from where they should be. 2016-11-18 19:42:02 +01:00			`reduceMean :: TF.Tensor TF.Value Float -> TF.Tensor TF.Value Float`
Misc MNIST example cleanup (#9) * Use native oneHot op in the example code. It didn't exist when this was originally written. * Misc cleanup in MNIST example - Use unspecified dimension for batch size in model. This simplifies the code for the test set. - Move error rate calculation into model. 2016-10-26 20:14:38 +02:00			`reduceMean xs = TF.mean xs (TF.scalar (0 :: Int32))`

			`-- Types must match due to model structure.`
Initial commit 2016-10-24 21:26:42 +02:00			`type LabelType = Int32`

			`data Model = Model {`
			`train :: TF.TensorData Float -- ^ images`
			`-> TF.TensorData LabelType`
			`-> TF.Session ()`
			`, infer :: TF.TensorData Float -- ^ images`
			`-> TF.Session (V.Vector LabelType) -- ^ predictions`
Misc MNIST example cleanup (#9) * Use native oneHot op in the example code. It didn't exist when this was originally written. * Misc cleanup in MNIST example - Use unspecified dimension for batch size in model. This simplifies the code for the test set. - Move error rate calculation into model. 2016-10-26 20:14:38 +02:00			`, errorRate :: TF.TensorData Float -- ^ images`
			`-> TF.TensorData LabelType`
			`-> TF.Session Float`
Initial commit 2016-10-24 21:26:42 +02:00			`}`

Misc MNIST example cleanup (#9) * Use native oneHot op in the example code. It didn't exist when this was originally written. * Misc cleanup in MNIST example - Use unspecified dimension for batch size in model. This simplifies the code for the test set. - Move error rate calculation into model. 2016-10-26 20:14:38 +02:00			`createModel :: TF.Build Model`
			`createModel = do`
			`-- Use -1 batch size to support variable sized batches.`
			`let batchSize = -1`
Initial commit 2016-10-24 21:26:42 +02:00			`-- Inputs.`
			`images <- TF.placeholder [batchSize, numPixels]`
			`-- Hidden layer.`
			`let numUnits = 500`
			`hiddenWeights <-`
			`TF.initializedVariable =<< randomParam numPixels [numPixels, numUnits]`
			`hiddenBiases <- TF.zeroInitializedVariable [numUnits]`
			let hiddenZ = (images `TF.matMul` hiddenWeights) `TF.add` hiddenBiases
			`let hidden = TF.relu hiddenZ`
			`-- Logits.`
			`logitWeights <-`
			`TF.initializedVariable =<< randomParam numUnits [numUnits, numLabels]`
			`logitBiases <- TF.zeroInitializedVariable [numLabels]`
			let logits = (hidden `TF.matMul` logitWeights) `TF.add` logitBiases
			`predict <- TF.render $ TF.cast $`
			`TF.argMax (TF.softmax logits) (TF.scalar (1 :: LabelType))`

			`-- Create training action.`
			`labels <- TF.placeholder [batchSize]`
Misc MNIST example cleanup (#9) * Use native oneHot op in the example code. It didn't exist when this was originally written. * Misc cleanup in MNIST example - Use unspecified dimension for batch size in model. This simplifies the code for the test set. - Move error rate calculation into model. 2016-10-26 20:14:38 +02:00			`let labelVecs = TF.oneHot labels (fromIntegral numLabels) 1 0`
			`loss =`
			`reduceMean $ fst $ TF.softmaxCrossEntropyWithLogits logits labelVecs`
Initial commit 2016-10-24 21:26:42 +02:00			`params = [hiddenWeights, hiddenBiases, logitWeights, logitBiases]`
			`grads <- TF.gradients loss params`

Misc MNIST example cleanup (#9) * Use native oneHot op in the example code. It didn't exist when this was originally written. * Misc cleanup in MNIST example - Use unspecified dimension for batch size in model. This simplifies the code for the test set. - Move error rate calculation into model. 2016-10-26 20:14:38 +02:00			`let lr = TF.scalar 0.00001`
			applyGrad param grad = TF.assign param $ param `TF.sub` (lr * grad)
Initial commit 2016-10-24 21:26:42 +02:00			`trainStep <- TF.group =<< zipWithM applyGrad params grads`

Misc MNIST example cleanup (#9) * Use native oneHot op in the example code. It didn't exist when this was originally written. * Misc cleanup in MNIST example - Use unspecified dimension for batch size in model. This simplifies the code for the test set. - Move error rate calculation into model. 2016-10-26 20:14:38 +02:00			`let correctPredictions = TF.equal predict labels`
			`errorRateTensor <- TF.render $ 1 - reduceMean (TF.cast correctPredictions)`

Initial commit 2016-10-24 21:26:42 +02:00			`return Model {`
Misc MNIST example cleanup (#9) * Use native oneHot op in the example code. It didn't exist when this was originally written. * Misc cleanup in MNIST example - Use unspecified dimension for batch size in model. This simplifies the code for the test set. - Move error rate calculation into model. 2016-10-26 20:14:38 +02:00			`train = \imFeed lFeed -> TF.runWithFeeds_ [`
			`TF.feed images imFeed`
			`, TF.feed labels lFeed`
			`] trainStep`
Initial commit 2016-10-24 21:26:42 +02:00			`, infer = \imFeed -> TF.runWithFeeds [TF.feed images imFeed] predict`
Misc MNIST example cleanup (#9) * Use native oneHot op in the example code. It didn't exist when this was originally written. * Misc cleanup in MNIST example - Use unspecified dimension for batch size in model. This simplifies the code for the test set. - Move error rate calculation into model. 2016-10-26 20:14:38 +02:00			`, errorRate = \imFeed lFeed -> TF.unScalar <$> TF.runWithFeeds [`
			`TF.feed images imFeed`
			`, TF.feed labels lFeed`
			`] errorRateTensor`
Initial commit 2016-10-24 21:26:42 +02:00			`}`

Make code --pedantic (#35) * Enforce pedantic build mode in CI. * Our imports drifted really far from where they should be. 2016-11-18 19:42:02 +01:00			`main :: IO ()`
Initial commit 2016-10-24 21:26:42 +02:00			`main = TF.runSession $ do`
			`-- Read training and test data.`
			`trainingImages <- liftIO (readMNISTSamples =<< trainingImageData)`
			`trainingLabels <- liftIO (readMNISTLabels =<< trainingLabelData)`
			`testImages <- liftIO (readMNISTSamples =<< testImageData)`
			`testLabels <- liftIO (readMNISTLabels =<< testLabelData)`

			`-- Create the model.`
Misc MNIST example cleanup (#9) * Use native oneHot op in the example code. It didn't exist when this was originally written. * Misc cleanup in MNIST example - Use unspecified dimension for batch size in model. This simplifies the code for the test set. - Move error rate calculation into model. 2016-10-26 20:14:38 +02:00			`model <- TF.build createModel`

			`-- Functions for generating batches.`
			`let encodeImageBatch xs =`
			`TF.encodeTensorData [genericLength xs, numPixels]`
			`(fromIntegral <$> mconcat xs)`
			`let encodeLabelBatch xs =`
			`TF.encodeTensorData [genericLength xs]`
			`(fromIntegral <$> V.fromList xs)`
			`let batchSize = 100`
			`let selectBatch i xs = take batchSize $ drop (i * batchSize) (cycle xs)`
Initial commit 2016-10-24 21:26:42 +02:00
			`-- Train.`
			`forM_ ([0..1000] :: [Int]) $ \i -> do`
Misc MNIST example cleanup (#9) * Use native oneHot op in the example code. It didn't exist when this was originally written. * Misc cleanup in MNIST example - Use unspecified dimension for batch size in model. This simplifies the code for the test set. - Move error rate calculation into model. 2016-10-26 20:14:38 +02:00			`let images = encodeImageBatch (selectBatch i trainingImages)`
			`labels = encodeLabelBatch (selectBatch i trainingLabels)`
			`train model images labels`
Initial commit 2016-10-24 21:26:42 +02:00			when (i `mod` 100 == 0) $ do
Misc MNIST example cleanup (#9) * Use native oneHot op in the example code. It didn't exist when this was originally written. * Misc cleanup in MNIST example - Use unspecified dimension for batch size in model. This simplifies the code for the test set. - Move error rate calculation into model. 2016-10-26 20:14:38 +02:00			`err <- errorRate model images labels`
			`liftIO $ putStrLn $ "training error " ++ show (err * 100)`
Initial commit 2016-10-24 21:26:42 +02:00			`liftIO $ putStrLn ""`

			`-- Test.`
Misc MNIST example cleanup (#9) * Use native oneHot op in the example code. It didn't exist when this was originally written. * Misc cleanup in MNIST example - Use unspecified dimension for batch size in model. This simplifies the code for the test set. - Move error rate calculation into model. 2016-10-26 20:14:38 +02:00			`testErr <- errorRate model (encodeImageBatch testImages)`
			`(encodeLabelBatch testLabels)`
			`liftIO $ putStrLn $ "test error " ++ show (testErr * 100)`
Initial commit 2016-10-24 21:26:42 +02:00
			`-- Show some predictions.`
Misc MNIST example cleanup (#9) * Use native oneHot op in the example code. It didn't exist when this was originally written. * Misc cleanup in MNIST example - Use unspecified dimension for batch size in model. This simplifies the code for the test set. - Move error rate calculation into model. 2016-10-26 20:14:38 +02:00			`testPreds <- infer model (encodeImageBatch testImages)`
Initial commit 2016-10-24 21:26:42 +02:00			`liftIO $ forM_ ([0..3] :: [Int]) $ \i -> do`
			`putStrLn ""`
			`T.putStrLn $ drawMNIST $ testImages !! i`
			`putStrLn $ "expected " ++ show (testLabels !! i)`
			`putStrLn $ " got " ++ show (testPreds V.! i)`