Merge pull request #245 from dmlc/vc/test-v0.2.2

Preparing release v0.2.2
dmlc · May 14, 2017 · 828e4c4 · 828e4c4
2 parents 0619b55 + 594a8a8
commit 828e4c4
Show file tree

Hide file tree

Showing 27 changed files with 645 additions and 211 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -4,6 +4,7 @@ language: julia
 os:
   - linux
   - osx
+osx_image: xcode8
 julia:
   - 0.5
 #  - nightly 0.6 supports depends on #170

diff --git a/NEWS.md b/NEWS.md
@@ -1,7 +1,17 @@
+# v0.2.2 (2017.05.14)
+* Updated supported version of MXNet to 0.9.4.
+* Improved build-system with support for auto-detecting GPU support.
+* Several updates to Metrics.
+* CI for Windows.
+* Verbosity option for `predict` (@rdeits)
+
+# v0.2.1 (2017.01.29)
+* Bugfix release for Windows
+
 # v0.2.0 (2017.01.26)
 * Drop support for Julia v0.4.
 * Added support for NVVM.
-* Updated supported version of MXNet to 0.9.3.
+* Updated supported version of MXNet to 0.9.2
 * New optimizers (@Arkoniak).
 
 # v0.1.0 (2016.09.08)

diff --git a/README.md b/README.md
@@ -1,6 +1,7 @@
 # MXNet
 
 [![Build Status](https://travis-ci.org/dmlc/MXNet.jl.svg?branch=master)](https://travis-ci.org/dmlc/MXNet.jl)
+[![Windows Build](https://ci.appveyor.com/api/projects/status/re90njols2th2ide?svg=true)](https://ci.appveyor.com/project/pluskid/mxnet-jl)
 [![codecov.io](https://codecov.io/github/dmlc/MXNet.jl/coverage.svg?branch=master)](https://codecov.io/github/dmlc/MXNet.jl?branch=master)
 [![](https://img.shields.io/badge/docs-latest-blue.svg)](https://dmlc.github.io/MXNet.jl/latest)
 [![MXNet](http://pkg.julialang.org/badges/MXNet_0.4.svg)](http://pkg.julialang.org/?pkg=MXNet)

diff --git a/appveyor.yml b/appveyor.yml
@@ -1,14 +1,11 @@
 environment:
   matrix:
-  - JULIAVERSION: "julialang/bin/winnt/x86/0.3/julia-0.3-latest-win32.exe"
-  - JULIAVERSION: "julialang/bin/winnt/x64/0.3/julia-0.3-latest-win64.exe"
-  - JULIAVERSION: "julianightlies/bin/winnt/x86/julia-latest-win32.exe"
-  - JULIAVERSION: "julianightlies/bin/winnt/x64/julia-latest-win64.exe"
+  - JULIAVERSION: "julialang/bin/winnt/x64/0.5/julia-0.5-latest-win64.exe"
 
 branches:
   only:
     - master
-    - /release-.*/
+    - stable
 
 notifications:
   - provider: Email
@@ -17,6 +14,11 @@ notifications:
     on_build_status_changed: false
 
 install:
+# If there's a newer build queued for the same PR, cancel this one
+  - ps: if ($env:APPVEYOR_PULL_REQUEST_NUMBER -and $env:APPVEYOR_BUILD_NUMBER -ne ((Invoke-RestMethod `
+        https://ci.appveyor.com/api/projects/$env:APPVEYOR_ACCOUNT_NAME/$env:APPVEYOR_PROJECT_SLUG/history?recordsNumber=50).builds | `
+        Where-Object pullRequestId -eq $env:APPVEYOR_PULL_REQUEST_NUMBER)[0].buildNumber) { `
+        throw "There are newer queued builds for this pull request, failing early." }
 # Download most recent Julia Windows binary
   - ps: (new-object net.webclient).DownloadFile(
         $("http://s3.amazonaws.com/"+$env:JULIAVERSION),

diff --git a/deps/build.jl b/deps/build.jl
@@ -5,8 +5,8 @@ import JSON
 # First try to detect and load existing libmxnet
 ################################################################################
 libmxnet_detected = false
-libmxnet_curr_ver = "v0.9.3"
-curr_win = "20170122"
+libmxnet_curr_ver = "v0.9.5"
+curr_win = "20170502"
 
 if haskey(ENV, "MXNET_HOME")
   info("MXNET_HOME environment detected: $(ENV["MXNET_HOME"])")
@@ -20,31 +20,70 @@ if haskey(ENV, "MXNET_HOME")
   end
 end
 
+# Try to find cuda
+CUDAPATHS = String[]
+if haskey(ENV, "CUDA_HOME")
+  push!(CUDAPATHS, joinpath(ENV["CUDA_HOME"], "lib64"))
+elseif is_linux()
+  append!(CUDAPATHS, ["/opt/cuda/lib64", "/usr/local/cuda/lib64"])
+end
+
+if is_unix()
+  try
+    push!(CUDAPATHS, replace(strip(readstring(`which nvcc`)), "bin/nvcc", "lib64"))
+  end
+end
+
+HAS_CUDA = false
+let cudalib = Libdl.find_library(["libcuda", "nvcuda.dll"], CUDAPATHS)
+  HAS_CUDA = Libdl.dlopen_e(cudalib) != C_NULL
+end
+
+if !HAS_CUDA && is_windows()
+  # TODO: this needs to be improved.
+  try
+    run(`nvcc --version`)
+    HAS_CUDA = true
+  end
+end
 
+if HAS_CUDA
+  info("Found a CUDA installation.")
+else
+  info("Did not find a CUDA installation, using CPU-only version of MXNet.")
+end
 
 using BinDeps
 @BinDeps.setup
 if !libmxnet_detected
   if is_windows()
-    # TODO: Detect GPU support on Windows
-    info("Downloading pre-built CPU packages for Windows.")
-    base_url = "https://github.com/dmlc/mxnet/releases/download/20160531/20160531_win10_x64_cpu.7z"
+    if Sys.ARCH != :x86_64
+      info("Prebuilt windows binaries are only available on 64bit. You will have to built MXNet yourself.")
+      return
+    end
+    info("Downloading pre-built packages for Windows.")
+    base_url = "https://github.com/yajiedesign/mxnet/releases/download/weekly_binary_build/prebuildbase_win10_x64_vc14.7z"
+
     if libmxnet_curr_ver == "master"
       # download_cmd uses powershell 2, but we need powershell 3 to do this
-	  run(`powershell -NoProfile -Command Invoke-WebRequest -Uri "https://api.github.com/repos/yajiedesign/mxnet/releases/latest" -OutFile "mxnet.json"`)
+      run(`powershell -NoProfile -Command Invoke-WebRequest -Uri "https://api.github.com/repos/yajiedesign/mxnet/releases/latest" -OutFile "mxnet.json"`)
       curr_win = JSON.parsefile("mxnet.json")["tag_name"]
       info("Can't use MXNet master on Windows, using latest binaries from $curr_win.")
     end
     # TODO: Get url from JSON.
-    package_url = "https://github.com/yajiedesign/mxnet/releases/download/$(curr_win)/$(curr_win)_mxnet_x64_vc12_cpu.7z"
+    name = "mxnet_x64_vc14_$(HAS_CUDA ? "gpu" : "cpu").7z"
+    package_url = "https://github.com/yajiedesign/mxnet/releases/download/$(curr_win)/$(curr_win)_$(name)"
+
+    exe7z = joinpath(JULIA_HOME, "7z.exe")
 
     run(download_cmd(base_url, "mxnet_base.7z"))
-    run(`7z x mxnet_base.7z -y -ousr`)
-    run(`usr\\setupenv.cmd`)
+    run(`$exe7z x mxnet_base.7z -y -ousr`)
     run(`cmd /c copy "usr\\3rdparty\\openblas\\bin\\*.dll" "usr\\lib"`)
+    run(`cmd /c copy "usr\\3rdparty\\opencv\\*.dll" "usr\\lib"`)
 
     run(download_cmd(package_url, "mxnet.7z"))
-    run(`7z x mxnet.7z -y -ousr`)
+    run(`$exe7z x mxnet.7z -y -ousr`)
+    run(`cmd /c copy "usr\\build\\*.dll" "usr\\lib"`)
 
     return
   end
@@ -55,16 +94,6 @@ if !libmxnet_detected
 
   blas_path = Libdl.dlpath(Libdl.dlopen(Base.libblas_name))
 
-  # Try to find cuda
-  hascuda = false
-  if haskey(ENV, "CUDA_HOME")
-    hascuda = Libdl.dlopen_e(joinpath(ENV["CUDA_HOME"], "lib64", "libcuda.so")) != C_NULL
-  else
-    cudapaths = String["/opt/cuda/lib64", "/usr/local/cuda/lib64"]
-    cudalib = Libdl.find_library(["libcuda", "libcuda.so"], cudapaths)
-    hascuda = Libdl.dlopen_e(cudalib) != C_NULL
-  end
-
   if VERSION >= v"0.5.0-dev+4338"
     blas_vendor = Base.BLAS.vendor()
   else
@@ -116,7 +145,8 @@ if !libmxnet_detected
           `git -C mshadow checkout -- make/mshadow.mk`
           `git fetch`
           `git checkout $libmxnet_curr_ver`
-          `git submodule update`
+          `git submodule update --init`
+          `make clean`
           `sed -i -s "s/MSHADOW_CFLAGS = \(.*\)/MSHADOW_CFLAGS = \1 $ilp64/" mshadow/make/mshadow.mk`
         end
         FileRule(joinpath(_mxdir, "config.mk"), @build_steps begin
@@ -127,7 +157,7 @@ if !libmxnet_detected
             `cp make/config.mk config.mk`
           end
           `sed -i -s 's/USE_OPENCV = 1/USE_OPENCV = 0/' config.mk`
-          if hascuda
+          if HAS_CUDA
             `sed -i -s 's/USE_CUDA = 0/USE_CUDA = 1/' config.mk`
             if haskey(ENV, "CUDA_HOME")
               `sed -i -s 's/USE_CUDA_PATH = NULL/USE_CUDA_PATH = $(ENV["CUDA_HOME"])/' config.mk`

diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml
@@ -12,7 +12,7 @@ extra_css:
   - assets/Documenter.css
 
 extra_javascript:
-  - https://cdn.mathjax.org/mathjax/latest/MathJax.jl?config=TeX-AMS-MML_HTMLorMML
+  - https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML
   - assets/mathjaxhelper.js
 
 markdown_extensions:

diff --git a/docs/src/tutorial/mnist.md b/docs/src/tutorial/mnist.md
@@ -6,7 +6,7 @@ multi-layer perceptron and then a convolutional neural network (the
 LeNet architecture) on the [MNIST handwritten digit
 dataset](http://yann.lecun.com/exdb/mnist/). The code for this tutorial
 could be found in
-[examples/mnist](https://github.com/dmlc/MXNet.jl/tree/master/examples/mnist).
+[examples/mnist](https://github.com/dmlc/MXNet.jl/tree/master/examples/mnist).  There are also two Jupyter notebooks that expand a little more on the [MLP](https://github.com/ultradian/julia_notebooks/blob/master/mxnet/mnistMLP.ipynb) and the [LeNet](https://github.com/ultradian/julia_notebooks/blob/master/mxnet/mnistLenet.ipynb), using the more general `ArrayDataProvider`. 
 
 Simple 3-layer MLP
 ------------------
@@ -29,14 +29,14 @@ data = mx.Variable(:data)
 and then cascading fully-connected layers and activation functions:
 
 ```julia
-fc1  = mx.FullyConnected(data = data, name=:fc1, num_hidden=128)
-act1 = mx.Activation(data = fc1, name=:relu1, act_type=:relu)
-fc2  = mx.FullyConnected(data = act1, name=:fc2, num_hidden=64)
-act2 = mx.Activation(data = fc2, name=:relu2, act_type=:relu)
-fc3  = mx.FullyConnected(data = act2, name=:fc3, num_hidden=10)
+fc1  = mx.FullyConnected(data, name=:fc1, num_hidden=128)
+act1 = mx.Activation(fc1, name=:relu1, act_type=:relu)
+fc2  = mx.FullyConnected(act1, name=:fc2, num_hidden=64)
+act2 = mx.Activation(fc2, name=:relu2, act_type=:relu)
+fc3  = mx.FullyConnected(act2, name=:fc3, num_hidden=10)
 ```
 
-Note each composition we take the previous symbol as the data argument,
+Note each composition we take the previous symbol as the first argument,
 forming a feedforward chain. The architecture looks like
 
 ```
@@ -49,7 +49,7 @@ where the last 10 units correspond to the 10 output classes (digits
 classes:
 
 ```julia
-mlp  = mx.SoftmaxOutput(data = fc3, name=:softmax)
+mlp  = mx.SoftmaxOutput(fc3, name=:softmax)
 ```
 
 As we can see, the MLP is just a chain of layers. For this case, we can
@@ -148,12 +148,12 @@ listed below:
 data = mx.Variable(:data)
 
 # first conv
-conv1 = @mx.chain mx.Convolution(data=data, kernel=(5,5), num_filter=20)  =>
+conv1 = @mx.chain mx.Convolution(data, kernel=(5,5), num_filter=20)  =>
                   mx.Activation(act_type=:tanh) =>
                   mx.Pooling(pool_type=:max, kernel=(2,2), stride=(2,2))
 
 # second conv
-conv2 = @mx.chain mx.Convolution(data=conv1, kernel=(5,5), num_filter=50) =>
+conv2 = @mx.chain mx.Convolution(conv1, kernel=(5,5), num_filter=50) =>
                   mx.Activation(act_type=:tanh) =>
                   mx.Pooling(pool_type=:max, kernel=(2,2), stride=(2,2))
 ```
@@ -168,17 +168,17 @@ a tensor of shape `(28,28,1,100)`. The convolution and pooling operates
 in the spatial axis, so `kernel=(5,5)` indicate a square region of
 5-width and 5-height. The rest of the architecture follows as:
 
-```ulia
+```julia
 # first fully-connected
-fc1   = @mx.chain mx.Flatten(data=conv2) =>
+fc1   = @mx.chain mx.Flatten(conv2) =>
                   mx.FullyConnected(num_hidden=500) =>
                   mx.Activation(act_type=:tanh)
 
 # second fully-connected
-fc2   = mx.FullyConnected(data=fc1, num_hidden=10)
+fc2   = mx.FullyConnected(fc1, num_hidden=10)
 
 # softmax loss
-lenet = mx.Softmax(data=fc2, name=:softmax)
+lenet = mx.Softmax(fc2, name=:softmax)
 ```
 
 Note a fully-connected operator expects the input to be a matrix.

diff --git a/docs/src/user-guide/install.md b/docs/src/user-guide/install.md
@@ -21,9 +21,14 @@ MXNet.jl is built on top of [libmxnet](https://github.com/dmlc/mxnet).
 Upon installation, Julia will try to automatically download and build
 libmxnet.
 
+There are two environment variables that change this behaviour. If you
+already have a pre-installed version of mxnet you can use `MXNET_HOME`
+to point the build-process in the right direction. If the automatic
+cuda detection fails you can also set `CUDA_HOME` to override the process.
+
 The libmxnet source is downloaded to `Pkg.dir("MXNet")/deps/src/mxnet`.
-The automatic build is using default configurations, with OpenCV, CUDA
-disabled. If the compilation failed due to unresolved dependency, or if
+The automatic build is using default configurations, with OpenCV disabled.
+If the compilation failed due to unresolved dependency, or if
 you want to customize the build, you can compile and
 install libmxnet manually. Please see below for more details.
 

diff --git a/examples/char-lstm/lstm.jl b/examples/char-lstm/lstm.jl
@@ -26,11 +26,11 @@ function lstm_cell(data::mx.SymbolicNode, prev_state::LSTMState, param::LSTMPara
   end
 
   i2h = mx.FullyConnected(data, weight=param.i2h_W, bias=param.i2h_b,
-                          num_hidden=4num_hidden, name=symbol(name, "_i2h"))
+                          num_hidden=4num_hidden, name=Symbol(name, "_i2h"))
   h2h = mx.FullyConnected(prev_state.h, weight=param.h2h_W, bias=param.h2h_b,
-                          num_hidden=4num_hidden, name=symbol(name, "_h2h"))
+                          num_hidden=4num_hidden, name=Symbol(name, "_h2h"))
 
-  gates = mx.SliceChannel(i2h + h2h, num_outputs=4, name=symbol(name, "_gates"))
+  gates = mx.SliceChannel(i2h + h2h, num_outputs=4, name=Symbol(name, "_gates"))
 
   in_gate     = mx.Activation(gates[1], act_type=:sigmoid)
   in_trans    = mx.Activation(gates[2], act_type=:tanh)
@@ -49,17 +49,17 @@ function LSTM(n_layer::Int, seq_len::Int, dim_hidden::Int, dim_embed::Int, n_cla
               dropout::Real=0, name::Symbol=gensym(), output_states::Bool=false)
 
   # placeholder nodes for all parameters
-  embed_W = mx.Variable(symbol(name, "_embed_weight"))
-  pred_W  = mx.Variable(symbol(name, "_pred_weight"))
-  pred_b  = mx.Variable(symbol(name, "_pred_bias"))
+  embed_W = mx.Variable(Symbol(name, "_embed_weight"))
+  pred_W  = mx.Variable(Symbol(name, "_pred_weight"))
+  pred_b  = mx.Variable(Symbol(name, "_pred_bias"))
 
   layer_param_states = map(1:n_layer) do i
-    param = LSTMParam(mx.Variable(symbol(name, "_l$(i)_i2h_weight")),
-                      mx.Variable(symbol(name, "_l$(i)_h2h_weight")),
-                      mx.Variable(symbol(name, "_l$(i)_i2h_bias")),
-                      mx.Variable(symbol(name, "_l$(i)_h2h_bias")))
-    state = LSTMState(mx.Variable(symbol(name, "_l$(i)_init_c")),
-                      mx.Variable(symbol(name, "_l$(i)_init_h")))
+    param = LSTMParam(mx.Variable(Symbol(name, "_l$(i)_i2h_weight")),
+                      mx.Variable(Symbol(name, "_l$(i)_h2h_weight")),
+                      mx.Variable(Symbol(name, "_l$(i)_i2h_bias")),
+                      mx.Variable(Symbol(name, "_l$(i)_h2h_bias")))
+    state = LSTMState(mx.Variable(Symbol(name, "_l$(i)_init_c")),
+                      mx.Variable(Symbol(name, "_l$(i)_init_h")))
     (param, state)
   end
   #...
@@ -69,17 +69,17 @@ function LSTM(n_layer::Int, seq_len::Int, dim_hidden::Int, dim_embed::Int, n_cla
   # now unroll over time
   outputs = mx.SymbolicNode[]
   for t = 1:seq_len
-    data   = mx.Variable(symbol(name, "_data_$t"))
-    label  = mx.Variable(symbol(name, "_label_$t"))
+    data   = mx.Variable(Symbol(name, "_data_$t"))
+    label  = mx.Variable(Symbol(name, "_label_$t"))
     hidden = mx.FullyConnected(data, weight=embed_W, num_hidden=dim_embed,
-                               no_bias=true, name=symbol(name, "_embed_$t"))
+                               no_bias=true, name=Symbol(name, "_embed_$t"))
 
     # stack LSTM cells
     for i = 1:n_layer
       l_param, l_state = layer_param_states[i]
       dp = i == 1 ? 0 : dropout # don't do dropout for data
       next_state = lstm_cell(hidden, l_state, l_param, num_hidden=dim_hidden, dropout=dp,
-                             name=symbol(name, "_lstm_$t"))
+                             name=Symbol(name, "_lstm_$t"))
       hidden = next_state.h
       layer_param_states[i] = (l_param, next_state)
     end
@@ -89,8 +89,8 @@ function LSTM(n_layer::Int, seq_len::Int, dim_hidden::Int, dim_embed::Int, n_cla
       hidden = mx.Dropout(hidden, p=dropout)
     end
     pred = mx.FullyConnected(hidden, weight=pred_W, bias=pred_b, num_hidden=n_class,
-                             name=symbol(name, "_pred_$t"))
-    smax = mx.SoftmaxOutput(pred, label, name=symbol(name, "_softmax_$t"))
+                             name=Symbol(name, "_pred_$t"))
+    smax = mx.SoftmaxOutput(pred, label, name=Symbol(name, "_softmax_$t"))
     push!(outputs, smax)
   end
   #...
@@ -100,8 +100,8 @@ function LSTM(n_layer::Int, seq_len::Int, dim_hidden::Int, dim_embed::Int, n_cla
   # append block-gradient nodes to the final states
   for i = 1:n_layer
     l_param, l_state = layer_param_states[i]
-    final_state = LSTMState(mx.BlockGrad(l_state.c, name=symbol(name, "_l$(i)_last_c")),
-                            mx.BlockGrad(l_state.h, name=symbol(name, "_l$(i)_last_h")))
+    final_state = LSTMState(mx.BlockGrad(l_state.c, name=Symbol(name, "_l$(i)_last_c")),
+                            mx.BlockGrad(l_state.h, name=Symbol(name, "_l$(i)_last_h")))
     layer_param_states[i] = (l_param, final_state)
   end
-Original file line number
+Diff line change
@@ Expand Up / @@ -4,6 +4,7 @@ language: julia @@
     os:
       - linux
       - osx
+    osx_image: xcode8
     julia:
       - 0.5
     #  - nightly 0.6 supports depends on #170
@@ Expand Down @@