{"id":2558,"date":"2020-03-04T15:02:44","date_gmt":"2020-03-04T06:02:44","guid":{"rendered":"https:\/\/julialang.kr\/?p=2558"},"modified":"2020-03-04T15:02:46","modified_gmt":"2020-03-04T06:02:46","slug":"flux-mnist-conv-example-updated","status":"publish","type":"post","link":"https:\/\/julialang.kr\/?p=2558","title":{"rendered":"[Flux] MNIST conv example updated!"},"content":{"rendered":"\n<p>loss NaN \ubc29\uc9c0 \ubc0f onecold \ub300\uccb4 \ud568\uc218 \uc0ac\uc6a9 -> \uc774\uc804 MNIST update \ucc38\uc870<\/p>\n\n\n\n<p>conv_gpu_minibatch2.jl<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>#=\nTest Environment\n - Julia : v1.3.1\n - Flux  : v0.10.1\n Usage:\n - julia conv_gpu_minibatch.jl  --help\n - ex) julia conv_gpu_minibatch.jl -e 100 -b 1000 -g 0 -l false\n -     epochs : 100, batch size: 1000, gpu device index : 0 , log file : false\n=#\n\n# Classifies MNIST digits with a convolution network.\n# Writes out saved model to the file \"mnist_conv.bson\".\n# Demonstrates basic model construction, training, saving,\n# conditional early-exits, and learning rate scheduling.\n#\n# This model, while simple, should hit around 99% test\n# accuracy after training for approximately 20 epochs.\n\nusing Flux, Flux.Data.MNIST, Statistics\nusing Flux: onehotbatch, onecold, crossentropy, throttle,OneHotMatrix,@epochs\nusing Base.Iterators: repeated, partition\nusing Printf, BSON\nusing Logging\nusing Dates\nusing CUDAnative: device!\nusing CuArrays\nusing Random\nusing Dates\n\n# loss NaN \ubc29\uc9c0\uc6a9\n\u03f5 = 1.0f-32\n\nworking_path = dirname(@__FILE__)\nfile_path(file_name) = joinpath(working_path,file_name)\ninclude(file_path(\"cmd_parser.jl\"))\n\nmodel_file = file_path(\"conv_gpu_minibatch2.bson\")\n\n# Get arguments\nparsed_args = CmdParser.parse_commandline()\n\nepochs = parsed_args&#91;\"epochs\"]\nbatch_size = parsed_args&#91;\"batch\"]\nuse_saved_model = parsed_args&#91;\"model\"]\ngpu_device = parsed_args&#91;\"gpu\"]\ncreate_log_file = parsed_args&#91;\"log\"]\n\nif create_log_file\n    log_file = file_path(\"conv_gpu_minibatch2_$(Dates.format(now(),\"yyyymmdd-HHMMSS\")).log\")\n    log = open(log_file,\"w+\")\nelse\n    log = stdout\nend\nglobal_logger(ConsoleLogger(log))\n\n@info \"Start - $(now())\";flush(log)\n\n@info \"============= Arguments =============\"\n@info \"epochs=$(epochs)\"\n@info \"batch_size=$(batch_size)\"\n@info \"use_saved_model=$(use_saved_model)\"\n@info \"gpu_device=$(gpu_device)\"\n@info \"create_log_file=$(create_log_file)\"\n@info \"=====================================\";flush(log)\n\n# set using GPU device\ndevice!(gpu_device)\nCuArrays.allowscalar(false)\n\n# Load labels and images from Flux.Data.MNIST\n@info \"Loading data set\";flush(log)\n\n# Bundle images together with labels and groups into minibatch\nfunction make_minibatch(imgs,labels,batch_size)\n    # WHCN: width x height x #channel x #batch\n    # transform (28x28) to (28x28x1x#bacth)\n    len = length(imgs)\n    sz = size(imgs&#91;1])\n    data_set =\n    &#91;(cat(&#91;reshape(Float32.(imgs&#91;i]),sz...,1,1) for i in idx]...,dims=4),\n      onehotbatch(labels&#91;idx],0:9)) for idx in partition(1:len,batch_size) ]\n    return data_set\nend\n\n# Train data load\n# 60,000 labels\ntrain_labels = MNIST.labels()\n# 60,000 images : ((28x28),...,(28x28))\ntrain_imgs = MNIST.images()\n# Make train data to minibatch\ntrain_set = make_minibatch(train_imgs,train_labels,batch_size)\n\n# Test data load\ntest_labels = MNIST.labels(:test)\ntest_imgs = MNIST.images(:test)\ntest_set = make_minibatch(test_imgs,test_labels,batch_size)\n\n#=\n Define our model. We will use a simple convolutional architecture with\n three iterations of Conv -> ReLu -> MaxPool, followed by a final Dense\n layer that feeds into a softmax probability output.\n=#\n@info \"Construncting model...\";flush(log)\nmodel = Chain(\n  # First convolution, operating upon a 28x28 image\n  Conv((3,3), 1=>16, pad=(1,1), relu),\n  MaxPool((2,2)),\n\n  # Second convolution, operating upon a 14x14 image\n  Conv((3,3), 16=>32, pad=(1,1), relu),\n  MaxPool((2,2)),\n\n  # Third convolution, operating upon a 7x7 image\n  Conv((3,3), 32=>32, pad=(1,1), relu),\n  MaxPool((2,2)),\n\n  # Reshape 3d tensor into a 2d one, at this point it should be (3,3,32,N)\n  # which is where we get the 288 in the `Dense` layer below:\n  x -> reshape(x, :, size(x,4)),\n  Dense(288,10),\n\n  # Finally, softmax to get nice probabilities\n  softmax,\n)\n\nm = model |> gpu;\n\n#=\n`loss()` calculates the crossentropy loss between our prediction `y_hat`\n (calculated from `m(x)`) and the ground truth `y`. We augment the data\n a bit, adding gaussian random noise to our image to make it more robust.\n =#\n\ncompare(y::OneHotMatrix, y\u2032) = maximum(y\u2032, dims = 1) .== maximum(y .* y\u2032, dims = 1)\naccuracy(x, y::OneHotMatrix) = mean(compare(y, m(x)))\n\nfunction loss(x,y)\n  y\u0302 = m(x)\n  return crossentropy(y\u0302 .+ \u03f5,y)\nend\n# Make sure our model is nicely precompiled befor starting our training loop\n\nfunction accuracy(data_set)\n  batch_size = size(data_set&#91;1]&#91;1])&#91;end]\n  l = length(data_set)*batch_size\n  s = 0f0\n  for (x,y::OneHotMatrix) in data_set\n    s += sum(compare(y|>gpu, m(x|>gpu)))\n  end\n  return s\/l\nend\n\n# Make sure our is nicely precompiled befor starting our training loop\n# train_set&#91;1]&#91;1] : (28,28,1,batch_size)\n# m(train_set&#91;1]&#91;1] |> gpu)\n\n# Train our model with the given training set using the ADAM optimizer and\n# printing out performance aganin the test set as we go.\nopt = ADAM(0.001)\n\n@info \"Beginning training loop...\";flush(log)\nbest_acc = 0.0\nlast_improvement = 0\n\n@time begin\nfor epoch_idx in 1:epochs\n  global best_acc, last_improvement\n  suffle_idxs = collect(1:length(train_set))\n  shuffle!(suffle_idxs)\n  for idx in suffle_idxs\n    (x,y) = train_set&#91;idx]\n    # We augment `x` a little bit here, adding in random noise\n    x = (x .+ 0.1f0*randn(eltype(x),size(x))) |> gpu;\n    y = y|> gpu;\n    Flux.train!(loss, params(m), &#91;(x, y)],opt)\n  end\n  acc = accuracy(test_set)\n  @info(@sprintf(\"&#91;%d]: Test accuracy: %.4f\",epoch_idx,acc));flush(log)\n\n  # If our accuracy is good enough, quit out.\n  if acc >= 0.999\n    @info \" -> Early-exiting: We reached our target accuracy of 99.9%\";flush(log)\n    break\n  end\n\n  # If this is the best accuracy we've seen so far, save the model out\n  if acc >= best_acc\n    @info \" -> New best accuracy! saving model out to $(model_file)\"; flush(log)\n    model = m |> cpu\n    acc = acc |> cpu\n    BSON.@save model_file model epoch_idx acc\n    best_acc = acc\n    last_improvement = epoch_idx\n  end\n\n  #If we haven't seen improvement in 5 epochs, drop out learing rate:\n  if epoch_idx - last_improvement >= 5 &amp;&amp; opt.eta > 1e-6\n    opt.eta \/= 10.0\n    @warn \" -> Haven't improved in a while, dropping learning rate to $(opt.eta)!\"; flush(log)\n\n    # After dropping learing rate, give it a few epochs to improve\n    last_improvement = epoch_idx\n  end\n\n  if epoch_idx - last_improvement >= 10\n    @warn \" -> We're calling this converged.\";flush(log)\n    break\n  end\nend # for\nend # @time\n@info \"End - $(now())\"\nif create_log_file\n  close(log)\nend<\/code><\/pre>\n","protected":false},"excerpt":{"rendered":"<p>loss NaN \ubc29\uc9c0 \ubc0f onecold \ub300\uccb4 \ud568\uc218 \uc0ac\uc6a9 -> \uc774\uc804 MNIST update \ucc38\uc870 conv_gpu_minibatch2.jl<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"site-sidebar-layout":"default","site-content-layout":"default","ast-site-content-layout":"","site-content-style":"default","site-sidebar-style":"default","ast-global-header-display":"","ast-banner-title-visibility":"","ast-main-header-display":"","ast-hfb-above-header-display":"","ast-hfb-below-header-display":"","ast-hfb-mobile-header-display":"","site-post-title":"","ast-breadcrumbs-content":"","ast-featured-img":"","footer-sml-layout":"","theme-transparent-header-meta":"default","adv-header-id-meta":"","stick-header-meta":"","header-above-stick-meta":"","header-main-stick-meta":"","header-below-stick-meta":"","astra-migrate-meta-layouts":"default","ast-page-background-enabled":"default","ast-page-background-meta":{"desktop":{"background-color":"","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"tablet":{"background-color":"","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"mobile":{"background-color":"","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""}},"ast-content-background-meta":{"desktop":{"background-color":"var(--ast-global-color-5)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"tablet":{"background-color":"var(--ast-global-color-5)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"mobile":{"background-color":"var(--ast-global-color-5)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""}},"footnotes":""},"categories":[18,21],"tags":[],"_links":{"self":[{"href":"https:\/\/julialang.kr\/index.php?rest_route=\/wp\/v2\/posts\/2558"}],"collection":[{"href":"https:\/\/julialang.kr\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/julialang.kr\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/julialang.kr\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/julialang.kr\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=2558"}],"version-history":[{"count":2,"href":"https:\/\/julialang.kr\/index.php?rest_route=\/wp\/v2\/posts\/2558\/revisions"}],"predecessor-version":[{"id":2560,"href":"https:\/\/julialang.kr\/index.php?rest_route=\/wp\/v2\/posts\/2558\/revisions\/2560"}],"wp:attachment":[{"href":"https:\/\/julialang.kr\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=2558"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/julialang.kr\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=2558"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/julialang.kr\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=2558"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}