2016-10-24 21:26:42 +02:00
|
|
|
-- Copyright 2016 TensorFlow authors.
|
|
|
|
--
|
|
|
|
-- Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
-- you may not use this file except in compliance with the License.
|
|
|
|
-- You may obtain a copy of the License at
|
|
|
|
--
|
|
|
|
-- http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
--
|
|
|
|
-- Unless required by applicable law or agreed to in writing, software
|
|
|
|
-- distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
-- See the License for the specific language governing permissions and
|
|
|
|
-- limitations under the License.
|
|
|
|
|
2016-10-26 20:14:38 +02:00
|
|
|
{-# LANGUAGE FlexibleContexts #-}
|
2016-10-24 21:26:42 +02:00
|
|
|
{-# LANGUAGE OverloadedLists #-}
|
|
|
|
|
|
|
|
import Control.Monad (zipWithM, when, forM, forM_)
|
|
|
|
import Control.Monad.IO.Class (liftIO)
|
|
|
|
import Data.Int (Int32, Int64)
|
2016-10-26 20:14:38 +02:00
|
|
|
import Data.List (genericLength)
|
2016-10-24 21:26:42 +02:00
|
|
|
import qualified Data.Text.IO as T
|
|
|
|
import qualified Data.Vector as V
|
|
|
|
|
|
|
|
import qualified TensorFlow.Build as TF
|
2016-10-26 20:14:38 +02:00
|
|
|
import qualified TensorFlow.ControlFlow as TF
|
|
|
|
import qualified TensorFlow.Gradient as TF
|
|
|
|
import qualified TensorFlow.Nodes as TF
|
2016-10-24 21:26:42 +02:00
|
|
|
import qualified TensorFlow.Ops as TF
|
|
|
|
import qualified TensorFlow.Session as TF
|
|
|
|
import qualified TensorFlow.Tensor as TF
|
|
|
|
import qualified TensorFlow.Types as TF
|
|
|
|
|
|
|
|
import TensorFlow.Examples.MNIST.InputData
|
|
|
|
import TensorFlow.Examples.MNIST.Parse
|
|
|
|
|
|
|
|
numPixels = 28^2 :: Int64
|
|
|
|
numLabels = 10 :: Int64
|
|
|
|
|
|
|
|
-- | Create tensor with random values where the stddev depends on the width.
|
|
|
|
randomParam :: Int64 -> TF.Shape -> TF.Build (TF.Tensor TF.Value Float)
|
|
|
|
randomParam width (TF.Shape shape) =
|
|
|
|
(* stddev) <$> TF.truncatedNormal (TF.vector shape)
|
|
|
|
where
|
|
|
|
stddev = TF.scalar (1 / sqrt (fromIntegral width))
|
|
|
|
|
2016-10-26 20:14:38 +02:00
|
|
|
reduceMean xs = TF.mean xs (TF.scalar (0 :: Int32))
|
|
|
|
|
|
|
|
-- Types must match due to model structure.
|
2016-10-24 21:26:42 +02:00
|
|
|
type LabelType = Int32
|
|
|
|
|
|
|
|
data Model = Model {
|
|
|
|
train :: TF.TensorData Float -- ^ images
|
|
|
|
-> TF.TensorData LabelType
|
|
|
|
-> TF.Session ()
|
|
|
|
, infer :: TF.TensorData Float -- ^ images
|
|
|
|
-> TF.Session (V.Vector LabelType) -- ^ predictions
|
2016-10-26 20:14:38 +02:00
|
|
|
, errorRate :: TF.TensorData Float -- ^ images
|
|
|
|
-> TF.TensorData LabelType
|
|
|
|
-> TF.Session Float
|
2016-10-24 21:26:42 +02:00
|
|
|
}
|
|
|
|
|
2016-10-26 20:14:38 +02:00
|
|
|
createModel :: TF.Build Model
|
|
|
|
createModel = do
|
|
|
|
-- Use -1 batch size to support variable sized batches.
|
|
|
|
let batchSize = -1
|
2016-10-24 21:26:42 +02:00
|
|
|
-- Inputs.
|
|
|
|
images <- TF.placeholder [batchSize, numPixels]
|
|
|
|
-- Hidden layer.
|
|
|
|
let numUnits = 500
|
|
|
|
hiddenWeights <-
|
|
|
|
TF.initializedVariable =<< randomParam numPixels [numPixels, numUnits]
|
|
|
|
hiddenBiases <- TF.zeroInitializedVariable [numUnits]
|
|
|
|
let hiddenZ = (images `TF.matMul` hiddenWeights) `TF.add` hiddenBiases
|
|
|
|
let hidden = TF.relu hiddenZ
|
|
|
|
-- Logits.
|
|
|
|
logitWeights <-
|
|
|
|
TF.initializedVariable =<< randomParam numUnits [numUnits, numLabels]
|
|
|
|
logitBiases <- TF.zeroInitializedVariable [numLabels]
|
|
|
|
let logits = (hidden `TF.matMul` logitWeights) `TF.add` logitBiases
|
|
|
|
predict <- TF.render $ TF.cast $
|
|
|
|
TF.argMax (TF.softmax logits) (TF.scalar (1 :: LabelType))
|
|
|
|
|
|
|
|
-- Create training action.
|
|
|
|
labels <- TF.placeholder [batchSize]
|
2016-10-26 20:14:38 +02:00
|
|
|
let labelVecs = TF.oneHot labels (fromIntegral numLabels) 1 0
|
|
|
|
loss =
|
|
|
|
reduceMean $ fst $ TF.softmaxCrossEntropyWithLogits logits labelVecs
|
2016-10-24 21:26:42 +02:00
|
|
|
params = [hiddenWeights, hiddenBiases, logitWeights, logitBiases]
|
|
|
|
grads <- TF.gradients loss params
|
|
|
|
|
2016-10-26 20:14:38 +02:00
|
|
|
let lr = TF.scalar 0.00001
|
|
|
|
applyGrad param grad = TF.assign param $ param `TF.sub` (lr * grad)
|
2016-10-24 21:26:42 +02:00
|
|
|
trainStep <- TF.group =<< zipWithM applyGrad params grads
|
|
|
|
|
2016-10-26 20:14:38 +02:00
|
|
|
let correctPredictions = TF.equal predict labels
|
|
|
|
errorRateTensor <- TF.render $ 1 - reduceMean (TF.cast correctPredictions)
|
|
|
|
|
2016-10-24 21:26:42 +02:00
|
|
|
return Model {
|
2016-10-26 20:14:38 +02:00
|
|
|
train = \imFeed lFeed -> TF.runWithFeeds_ [
|
|
|
|
TF.feed images imFeed
|
|
|
|
, TF.feed labels lFeed
|
|
|
|
] trainStep
|
2016-10-24 21:26:42 +02:00
|
|
|
, infer = \imFeed -> TF.runWithFeeds [TF.feed images imFeed] predict
|
2016-10-26 20:14:38 +02:00
|
|
|
, errorRate = \imFeed lFeed -> TF.unScalar <$> TF.runWithFeeds [
|
|
|
|
TF.feed images imFeed
|
|
|
|
, TF.feed labels lFeed
|
|
|
|
] errorRateTensor
|
2016-10-24 21:26:42 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
main = TF.runSession $ do
|
|
|
|
-- Read training and test data.
|
|
|
|
trainingImages <- liftIO (readMNISTSamples =<< trainingImageData)
|
|
|
|
trainingLabels <- liftIO (readMNISTLabels =<< trainingLabelData)
|
|
|
|
testImages <- liftIO (readMNISTSamples =<< testImageData)
|
|
|
|
testLabels <- liftIO (readMNISTLabels =<< testLabelData)
|
|
|
|
|
|
|
|
-- Create the model.
|
2016-10-26 20:14:38 +02:00
|
|
|
model <- TF.build createModel
|
|
|
|
|
|
|
|
-- Functions for generating batches.
|
|
|
|
let encodeImageBatch xs =
|
|
|
|
TF.encodeTensorData [genericLength xs, numPixels]
|
|
|
|
(fromIntegral <$> mconcat xs)
|
|
|
|
let encodeLabelBatch xs =
|
|
|
|
TF.encodeTensorData [genericLength xs]
|
|
|
|
(fromIntegral <$> V.fromList xs)
|
|
|
|
let batchSize = 100
|
|
|
|
let selectBatch i xs = take batchSize $ drop (i * batchSize) (cycle xs)
|
2016-10-24 21:26:42 +02:00
|
|
|
|
|
|
|
-- Train.
|
|
|
|
forM_ ([0..1000] :: [Int]) $ \i -> do
|
2016-10-26 20:14:38 +02:00
|
|
|
let images = encodeImageBatch (selectBatch i trainingImages)
|
|
|
|
labels = encodeLabelBatch (selectBatch i trainingLabels)
|
|
|
|
train model images labels
|
2016-10-24 21:26:42 +02:00
|
|
|
when (i `mod` 100 == 0) $ do
|
2016-10-26 20:14:38 +02:00
|
|
|
err <- errorRate model images labels
|
|
|
|
liftIO $ putStrLn $ "training error " ++ show (err * 100)
|
2016-10-24 21:26:42 +02:00
|
|
|
liftIO $ putStrLn ""
|
|
|
|
|
|
|
|
-- Test.
|
2016-10-26 20:14:38 +02:00
|
|
|
testErr <- errorRate model (encodeImageBatch testImages)
|
|
|
|
(encodeLabelBatch testLabels)
|
|
|
|
liftIO $ putStrLn $ "test error " ++ show (testErr * 100)
|
2016-10-24 21:26:42 +02:00
|
|
|
|
|
|
|
-- Show some predictions.
|
2016-10-26 20:14:38 +02:00
|
|
|
testPreds <- infer model (encodeImageBatch testImages)
|
2016-10-24 21:26:42 +02:00
|
|
|
liftIO $ forM_ ([0..3] :: [Int]) $ \i -> do
|
|
|
|
putStrLn ""
|
|
|
|
T.putStrLn $ drawMNIST $ testImages !! i
|
|
|
|
putStrLn $ "expected " ++ show (testLabels !! i)
|
|
|
|
putStrLn $ " got " ++ show (testPreds V.! i)
|