tensorflow-haskell/tensorflow-mnist/app/Main.hs

148 lines
5.6 KiB
Haskell
Raw Permalink Normal View History

2016-10-24 21:26:42 +02:00
-- Copyright 2016 TensorFlow authors.
--
-- Licensed under the Apache License, Version 2.0 (the "License");
-- you may not use this file except in compliance with the License.
-- You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
{-# LANGUAGE FlexibleContexts #-}
2016-10-24 21:26:42 +02:00
{-# LANGUAGE OverloadedLists #-}
{-# LANGUAGE TypeApplications #-}
2016-10-24 21:26:42 +02:00
import Control.Monad (forM_, when)
2016-10-24 21:26:42 +02:00
import Control.Monad.IO.Class (liftIO)
import Data.Int (Int32, Int64)
import Data.List (genericLength)
2016-10-24 21:26:42 +02:00
import qualified Data.Text.IO as T
import qualified Data.Vector as V
Support fetching storable vectors + use them in benchmark (#50) In addition, you can now fetch TensorData directly. This might be useful in scenarios where you feed the result of a computation back in, like RNN. Before: benchmarking feedFetch/4 byte time 83.31 μs (81.88 μs .. 84.75 μs) 0.997 R² (0.994 R² .. 0.998 R²) mean 87.32 μs (86.06 μs .. 88.83 μs) std dev 4.580 μs (3.698 μs .. 5.567 μs) variance introduced by outliers: 55% (severely inflated) benchmarking feedFetch/4 KiB time 114.9 μs (111.5 μs .. 118.2 μs) 0.996 R² (0.994 R² .. 0.998 R²) mean 117.3 μs (116.2 μs .. 118.6 μs) std dev 3.877 μs (3.058 μs .. 5.565 μs) variance introduced by outliers: 31% (moderately inflated) benchmarking feedFetch/4 MiB time 109.0 ms (107.9 ms .. 110.7 ms) 1.000 R² (0.999 R² .. 1.000 R²) mean 108.6 ms (108.2 ms .. 109.2 ms) std dev 740.2 μs (353.2 μs .. 1.186 ms) After: benchmarking feedFetch/4 byte time 82.92 μs (80.55 μs .. 85.24 μs) 0.996 R² (0.993 R² .. 0.998 R²) mean 83.58 μs (82.34 μs .. 84.89 μs) std dev 4.327 μs (3.664 μs .. 5.375 μs) variance introduced by outliers: 54% (severely inflated) benchmarking feedFetch/4 KiB time 85.69 μs (83.81 μs .. 87.30 μs) 0.997 R² (0.996 R² .. 0.999 R²) mean 86.99 μs (86.11 μs .. 88.15 μs) std dev 3.608 μs (2.854 μs .. 5.273 μs) variance introduced by outliers: 43% (moderately inflated) benchmarking feedFetch/4 MiB time 1.582 ms (1.509 ms .. 1.677 ms) 0.970 R² (0.936 R² .. 0.993 R²) mean 1.645 ms (1.554 ms .. 1.981 ms) std dev 490.6 μs (138.9 μs .. 1.067 ms) variance introduced by outliers: 97% (severely inflated)
2016-12-15 03:53:06 +01:00
import qualified TensorFlow.Core as TF
import qualified TensorFlow.Ops as TF hiding (initializedVariable, zeroInitializedVariable)
import qualified TensorFlow.Variable as TF
import qualified TensorFlow.Minimize as TF
2016-10-24 21:26:42 +02:00
import TensorFlow.Examples.MNIST.InputData
import TensorFlow.Examples.MNIST.Parse
numPixels, numLabels :: Int64
numPixels = 28*28 :: Int64
2016-10-24 21:26:42 +02:00
numLabels = 10 :: Int64
-- | Create tensor with random values where the stddev depends on the width.
randomParam :: Int64 -> TF.Shape -> TF.Build (TF.Tensor TF.Build Float)
2016-10-24 21:26:42 +02:00
randomParam width (TF.Shape shape) =
(`TF.mul` stddev) <$> TF.truncatedNormal (TF.vector shape)
2016-10-24 21:26:42 +02:00
where
stddev = TF.scalar (1 / sqrt (fromIntegral width))
-- Types must match due to model structure.
2016-10-24 21:26:42 +02:00
type LabelType = Int32
data Model = Model {
train :: TF.TensorData Float -- ^ images
-> TF.TensorData LabelType
-> TF.Session ()
, infer :: TF.TensorData Float -- ^ images
-> TF.Session (V.Vector LabelType) -- ^ predictions
, errorRate :: TF.TensorData Float -- ^ images
-> TF.TensorData LabelType
-> TF.Session Float
2016-10-24 21:26:42 +02:00
}
createModel :: TF.Build Model
createModel = do
-- Use -1 batch size to support variable sized batches.
let batchSize = -1
2016-10-24 21:26:42 +02:00
-- Inputs.
images <- TF.placeholder [batchSize, numPixels]
-- Hidden layer.
let numUnits = 500
hiddenWeights <-
TF.initializedVariable =<< randomParam numPixels [numPixels, numUnits]
hiddenBiases <- TF.zeroInitializedVariable [numUnits]
let hiddenZ = (images `TF.matMul` TF.readValue hiddenWeights)
`TF.add` TF.readValue hiddenBiases
2016-10-24 21:26:42 +02:00
let hidden = TF.relu hiddenZ
-- Logits.
logitWeights <-
TF.initializedVariable =<< randomParam numUnits [numUnits, numLabels]
logitBiases <- TF.zeroInitializedVariable [numLabels]
let logits = (hidden `TF.matMul` TF.readValue logitWeights)
`TF.add` TF.readValue logitBiases
predict <- TF.render @TF.Build @LabelType $
2016-10-24 21:26:42 +02:00
TF.argMax (TF.softmax logits) (TF.scalar (1 :: LabelType))
-- Create training action.
labels <- TF.placeholder [batchSize]
let labelVecs = TF.oneHot labels (fromIntegral numLabels) 1 0
loss =
2017-06-21 05:50:46 +02:00
TF.reduceMean $ fst $ TF.softmaxCrossEntropyWithLogits logits labelVecs
2016-10-24 21:26:42 +02:00
params = [hiddenWeights, hiddenBiases, logitWeights, logitBiases]
trainStep <- TF.minimizeWith TF.adam loss params
2016-10-24 21:26:42 +02:00
let correctPredictions = TF.equal predict labels
2017-06-21 05:50:46 +02:00
errorRateTensor <- TF.render $ 1 - TF.reduceMean (TF.cast correctPredictions)
2016-10-24 21:26:42 +02:00
return Model {
train = \imFeed lFeed -> TF.runWithFeeds_ [
TF.feed images imFeed
, TF.feed labels lFeed
] trainStep
2016-10-24 21:26:42 +02:00
, infer = \imFeed -> TF.runWithFeeds [TF.feed images imFeed] predict
, errorRate = \imFeed lFeed -> TF.unScalar <$> TF.runWithFeeds [
TF.feed images imFeed
, TF.feed labels lFeed
] errorRateTensor
2016-10-24 21:26:42 +02:00
}
main :: IO ()
2016-10-24 21:26:42 +02:00
main = TF.runSession $ do
-- Read training and test data.
trainingImages <- liftIO (readMNISTSamples =<< trainingImageData)
trainingLabels <- liftIO (readMNISTLabels =<< trainingLabelData)
testImages <- liftIO (readMNISTSamples =<< testImageData)
testLabels <- liftIO (readMNISTLabels =<< testLabelData)
-- Create the model.
model <- TF.build createModel
-- Functions for generating batches.
let encodeImageBatch xs =
TF.encodeTensorData [genericLength xs, numPixels]
(fromIntegral <$> mconcat xs)
let encodeLabelBatch xs =
TF.encodeTensorData [genericLength xs]
(fromIntegral <$> V.fromList xs)
let batchSize = 100
let selectBatch i xs = take batchSize $ drop (i * batchSize) (cycle xs)
2016-10-24 21:26:42 +02:00
-- Train.
forM_ ([0..1000] :: [Int]) $ \i -> do
let images = encodeImageBatch (selectBatch i trainingImages)
labels = encodeLabelBatch (selectBatch i trainingLabels)
train model images labels
2016-10-24 21:26:42 +02:00
when (i `mod` 100 == 0) $ do
err <- errorRate model images labels
liftIO $ putStrLn $ "training error " ++ show (err * 100)
2016-10-24 21:26:42 +02:00
liftIO $ putStrLn ""
-- Test.
testErr <- errorRate model (encodeImageBatch testImages)
(encodeLabelBatch testLabels)
liftIO $ putStrLn $ "test error " ++ show (testErr * 100)
2016-10-24 21:26:42 +02:00
-- Show some predictions.
testPreds <- infer model (encodeImageBatch testImages)
2016-10-24 21:26:42 +02:00
liftIO $ forM_ ([0..3] :: [Int]) $ \i -> do
putStrLn ""
T.putStrLn $ drawMNIST $ testImages !! i
putStrLn $ "expected " ++ show (testLabels !! i)
putStrLn $ " got " ++ show (testPreds V.! i)