Skip to content
This repository was archived by the owner on Nov 17, 2023. It is now read-only.

Commit 69cf9f3

Browse files
author
Jacob Schreiber
committed
ENH python metrics added, fixes
1 parent 0b66c47 commit 69cf9f3

File tree

16 files changed

+409
-96
lines changed

16 files changed

+409
-96
lines changed

.gitignore

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -82,12 +82,12 @@ R-package/inst/*
8282
*.bin
8383

8484
# ipython notebook
85-
example/notebooks/.ipynb_checkpoints/*
8685
*_pb2.py
86+
*.ipynb_checkpoints*
87+
input.txt*
8788

8889
# Jetbrain
8990
.idea
9091

9192
# ctags
92-
tags
93-
93+
tags

Makefile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,8 @@ $(EXTRA_OPERATORS)/build/%_gpu.o: $(EXTRA_OPERATORS)/%.cu
145145
$(NVCC) $(NVCCFLAGS) -Xcompiler "$(CFLAGS) -Isrc/operator" -M -MT $(EXTRA_OPERATORS)/build/$*_gpu.o $< >$(EXTRA_OPERATORS)/build/$*_gpu.d
146146
$(NVCC) -c -o $@ $(NVCCFLAGS) -Xcompiler "$(CFLAGS) -Isrc/operator" $<
147147

148+
# NOTE: to statically link libmxnet.a we need the option
149+
# --Wl,--whole-archive -lmxnet --Wl,--no-whole-archive
148150
lib/libmxnet.a: $(ALL_DEP)
149151
@mkdir -p $(@D)
150152
ar crv $@ $(filter %.o, $?)
@@ -153,7 +155,6 @@ lib/libmxnet.so: $(ALL_DEP)
153155
@mkdir -p $(@D)
154156
$(CXX) $(CFLAGS) -shared -o $@ $(filter %.o %.a, $^) $(LDFLAGS)
155157

156-
# ps-lite
157158
$(PS_PATH)/build/libps.a:
158159
$(MAKE) CXX=$(CXX) DEPS_PATH=$(DEPS_PATH) -C $(PS_PATH) ps
159160
ln -fs $(PS_PATH)/tracker .

cmake/Utils.cmake

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
# For cmake_parse_arguments
2+
include(CMakeParseArguments)
3+
14
################################################################################################
25
# Command alias for debugging messages
36
# Usage:
@@ -395,4 +398,5 @@ function(mxnet_source_group group)
395398
file(GLOB_RECURSE srcs2 ${CAFFE_SOURCE_GROUP_GLOB_RECURSE})
396399
source_group(${group} FILES ${srcs2})
397400
endif()
398-
endfunction()
401+
endfunction()
402+

doc/build.md

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ Our goal is to build the shared library:
3535
The minimal building requirement is
3636

3737
- A recent c++ compiler supporting C++ 11 such as `g++ >= 4.8` or `clang`
38-
- A BLAS library, such as `libblas`, `libblas`, `openblas` `intel mkl`
38+
- A BLAS library, such as `libblas`, `atlas`, `openblas` or `intel mkl`
3939

4040
Optional libraries
4141

@@ -239,6 +239,31 @@ Now you should have the R package as a tar.gz file and you can install it as a n
239239
R CMD INSTALL mxnet_0.5.tar.gz
240240
```
241241
242+
243+
To install the package using GPU on Windows without building the package from scratch. Note that you need a couple of programs installed already:
244+
- You'll need the [CUDA Toolkit](https://developer.nvidia.com/cuda-toolkit). This depends on Visual Studio, and a free compatible version would be [Visual Studio Community 2013](https://www.visualstudio.com/en-us/news/vs2013-community-vs.aspx). For instructions and compatibility checks, read http://docs.nvidia.com/cuda/cuda-getting-started-guide-for-microsoft-windows/ .
245+
246+
- You will also need to register as a developer at nvidia and download CUDNN V3, https://developer.nvidia.com/cudnn .
247+
248+
249+
1. Download the mxnet package as a ZIP from the Github repository https://github.com/dmlc/mxnet and unpack it. You will be editing the `/mxnet/R-package` folder.
250+
251+
2. Download the most recent GPU-enabled package from the [Releases tab](https://github.com/dmlc/mxnet/releases). Unzip this file so you have a folder `/nocudnn`. Note that this file and the folder you'll save it in will be used for future reference and not directly for installing the package. Only some files will be copied from it into the `R-package` folder.
252+
253+
(Note: you now have 2 folders we're working with, possibly in different locations, that we'll reference with `R-package/` and `nocudnn/`.)
254+
255+
3. Download CUDNN V3 from https://developer.nvidia.com/cudnn. Unpack the .zip file and you'll see 3 folders, `/bin`, `/include`, `/lib`. Copy and replace these 3 folders into `nocudnn/3rdparty/cudnn/`, or unpack the .zip file there directly.
256+
257+
4. Create the folder `R-package/inst/libs/x64`. We only support 64-bit operating system now, so you need the x64 folder;
258+
259+
5. Put dll files in `R-package/inst/libs/x64`.
260+
261+
The first dll file you need is `nocudnn/lib/libmxnet.dll`. The other dll files you need are the ones in all 4 subfolders of `nocudnn/3rdparty/`, for the `cudnn` and `openblas` you'll need to look in the `/bin` folders. There should be 11 dll files now in `R-package/inst/libs/x64`.
262+
263+
6. Copy the folder `nocudnn/include/` to `R-package/inst/`. So now you should have a folder `R-package/inst/include/` with 3 subfolders.
264+
265+
7. Run `R CMD INSTALL --no-multiarch R-package`. Make sure that R is added to your PATH in Environment Variables. Running the command `Where R` in Command Prompt should return the location.
266+
242267
Note on Library Build:
243268

244269
We isolate the library build with Rcpp end to maximize the portability

example/cpp/Makefile

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,16 @@
11
CFLAGS=-I ../../include -Wall -O3 -msse3 -funroll-loops -Wno-unused-parameter -Wno-unknown-pragmas -fopenmp -I ../../mshadow -I ../../dmlc-core/include
22
LDFLAGS=-L ../../lib -lmxnet -lopenblas -DMSHADOW_USE_CBLAS=1 -DMSHADOW_USE_MKL=0 -DMSHADOW_USE_CUDA=1
33

4+
CXX=g++
5+
46
mlp: ./mlp.cpp
5-
g++ -std=c++0x $(CFLAGS) $(LDFLAGS) -o $@ $^
7+
$(CXX) -std=c++0x $(CFLAGS) -o $@ $^ $(LDFLAGS)
68

79
use_ndarray: ./use_ndarray.cpp
8-
g++ -std=c++0x $(CFLAGS) $(LDFLAGS) -o $@ $^
10+
$(CXX) -std=c++0x $(CFLAGS) -o $@ $^ $(LDFLAGS)
911

1012
lint:
1113
python2 ../../dmlc-core/scripts/lint.py mxnet "cpp" ./
14+
15+
clean:
16+
rm -f mlp use_ndarray

example/kaggle-ndsb1/README.md

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
Tutorial for Kaggle NDSB-1
2+
-----
3+
4+
This is an MXNet example for Kaggle Nation Data Science Bowl 1.
5+
6+
In this example we ignored submission part, only show local validation result.
7+
8+
#### Step 1: Generate image list
9+
- Prepare original data, in layout like
10+
```
11+
--gen_img_list.py
12+
--data/
13+
|
14+
|--train/
15+
| |
16+
| |--acantharia_protist/...
17+
| |--.../
18+
|--sampleSubmission.csv
19+
```
20+
- Run command ``` python gen_img_list.py train data/sampleSubmission.csv data/train/ train.lst``` to generate a full image list
21+
- Run command ```sed -n '1, 20000p' train.lst > tr.lst``` to generate local train list
22+
- Run command ```sed -n '20001p, 30337p' train.lst > va.lst``` to generate local validation list
23+
24+
25+
#### Step 2: Generate Image Record (new shape with short edge = 48)
26+
- Run command ```../../bin/im2rec tr.lst ./ tr.rec resize=48``` to generate training data record file
27+
- Run command ```../../bin/im2rec va.lst ./ va.rec resize=48``` to generate validation data record file
28+
29+
#### Step 3: Train Model
30+
- Feel free to change hyper parameter in ```run_local.py```
31+
- Run ```python run_local.py``` to train the model
32+
- Sample code result: Train-accuracy=60.1%, Validation-accuracy=62.1%
33+
34+
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
import csv
2+
import os
3+
import sys
4+
import random
5+
6+
if len(sys.argv) < 4:
7+
print "Usage: gen_img_list.py train/test sample_submission.csv train_folder img.lst"
8+
exit(1)
9+
10+
random.seed(888)
11+
12+
task = sys.argv[1]
13+
fc = csv.reader(file(sys.argv[2]))
14+
fi = sys.argv[3]
15+
fo = csv.writer(open(sys.argv[4], "w"), delimiter='\t', lineterminator='\n')
16+
17+
# make class map
18+
head = fc.next()
19+
head = head[1:]
20+
21+
# make image list
22+
img_lst = []
23+
cnt = 0
24+
if task == "train":
25+
for i in xrange(len(head)):
26+
path = fi + head[i]
27+
lst = os.listdir(fi + head[i])
28+
for img in lst:
29+
img_lst.append((cnt, i, path + '/' + img))
30+
cnt += 1
31+
else:
32+
lst = os.listdir(fi)
33+
for img in lst:
34+
img_lst.append((cnt, 0, fi + img))
35+
cnt += 1
36+
37+
# shuffle
38+
random.shuffle(img_lst)
39+
40+
#wirte
41+
for item in img_lst:
42+
fo.writerow(item)
43+

example/kaggle-ndsb1/run_local.py

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
import mxnet as mx
2+
import numpy as np
3+
import logging
4+
5+
# Example performance:
6+
# INFO:root:Epoch[34] Train-accuracy=0.601388
7+
# INFO:root:Epoch[34] Validation-accuracy=0.620949
8+
9+
logger = logging.getLogger()
10+
logger.setLevel(logging.DEBUG)
11+
12+
# running device
13+
dev = mx.gpu()
14+
# batch size and input shape
15+
batch_size = 64
16+
data_shape = (3, 36, 36)
17+
# training data info for learning rate reduction
18+
num_examples = 20000
19+
epoch_size = num_examples / batch_size
20+
lr_factor_epoch = 15
21+
# model saving parameter
22+
model_prefix = "./models/sample_net"
23+
24+
# train data iterator
25+
train = mx.io.ImageRecordIter(
26+
path_imgrec = "tr.rec",
27+
mean_r = 128,
28+
mean_g = 128,
29+
mean_b = 128,
30+
scale = 0.0078125,
31+
max_aspect_ratio = 0.35,
32+
data_shape = data_shape,
33+
batch_size = batch_size,
34+
rand_crop = True,
35+
rand_mirror = True)
36+
37+
# validate data iterator
38+
val = mx.io.ImageRecordIter(
39+
path_imgrec = "va.rec",
40+
mean_r = 128,
41+
mean_b = 128,
42+
mean_g = 128,
43+
scale = 0.0078125,
44+
rand_crop = False,
45+
rand_mirror = False,
46+
data_shape = data_shape,
47+
batch_size = batch_size)
48+
49+
# network definition
50+
# stage 1
51+
net = mx.sym.Variable("data")
52+
net = mx.sym.Convolution(data=net, kernel=(5, 5), num_filter=32, pad=(2, 2))
53+
net = mx.sym.Activation(data=net, act_type="relu")
54+
net = mx.sym.Convolution(data=net, kernel=(5, 5), num_filter=64, pad=(2, 2))
55+
net = mx.sym.Activation(data=net, act_type="relu")
56+
net = mx.sym.Pooling(data=net, pool_type="max", kernel=(3, 3), stride=(2, 2))
57+
# stage 2
58+
net = mx.sym.Convolution(data=net, kernel=(3, 3), num_filter=64, pad=(1, 1))
59+
net = mx.sym.Activation(data=net, act_type="relu")
60+
net = mx.sym.Convolution(data=net, kernel=(3, 3), num_filter=64, pad=(1, 1))
61+
net = mx.sym.Activation(data=net, act_type="relu")
62+
net = mx.sym.Convolution(data=net, kernel=(3, 3), num_filter=128, pad=(1, 1))
63+
net = mx.sym.Activation(data=net, act_type="relu")
64+
net = mx.sym.Pooling(data=net, pool_type="max", kernel=(3, 3), stride=(2, 2))
65+
# stage 3
66+
net = mx.sym.Convolution(data=net, kernel=(3, 3), num_filter=256, pad=(1, 1))
67+
net = mx.sym.Activation(data=net, act_type="relu")
68+
net = mx.sym.Convolution(data=net, kernel=(3, 3), num_filter=256, pad=(1, 1))
69+
net = mx.sym.Activation(data=net, act_type="relu")
70+
net = mx.sym.Pooling(data=net, pool_type="avg", kernel=(9, 9), stride=(1, 1))
71+
# stage 4
72+
net = mx.sym.Flatten(data=net)
73+
net = mx.sym.Dropout(data=net, p=0.25)
74+
net = mx.sym.FullyConnected(data=net, num_hidden=121)
75+
net = mx.symbol.SoftmaxOutput(data=net, name='softmax')
76+
77+
# Model parameter
78+
# This model will reduce learning rate by factor 0.1 for every 15 epoch
79+
model = mx.model.FeedForward(
80+
ctx = dev,
81+
symbol = net,
82+
num_epoch = 35,
83+
learning_rate = 0.01,
84+
momentum = 0.9,
85+
wd = 0.0001,
86+
clip_gradient = 5,
87+
lr_scheduler = mx.lr_scheduler.FactorScheduler(step=epoch_size * lr_factor_epoch, factor = 0.1),
88+
initializer = mx.init.Xavier(factor_type="in", magnitude=2.34))
89+
90+
# fit the model
91+
model.fit(
92+
X = train,
93+
eval_data = val,
94+
batch_end_callback = mx.callback.Speedometer(batch_size, 50),
95+
epoch_end_callback = mx.callback.do_checkpoint(model_prefix))
96+

example/kaggle-ndsb2/Preprocessing.py

Lines changed: 25 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
import numpy as np
1111
import dicom
1212
from skimage import io, transform
13+
from joblib import Parallel, delayed
14+
import dill
1315

1416
def mkdir(fname):
1517
try:
@@ -53,29 +55,33 @@ def write_label_csv(fname, frames, label_map):
5355
fo.close()
5456

5557

58+
def get_data(lst,preproc):
59+
data = []
60+
result = []
61+
for path in lst:
62+
f = dicom.read_file(path)
63+
img = preproc(f.pixel_array.astype(float) / np.max(f.pixel_array))
64+
dst_path = path.rsplit(".", 1)[0] + ".64x64.jpg"
65+
scipy.misc.imsave(dst_path, img)
66+
result.append(dst_path)
67+
data.append(img)
68+
data = np.array(data, dtype=np.uint8)
69+
data = data.reshape(data.size)
70+
data = np.array(data,dtype=np.str_)
71+
data = data.reshape(data.size)
72+
return [data,result]
73+
74+
5675
def write_data_csv(fname, frames, preproc):
5776
"""Write data to csv file"""
5877
fdata = open(fname, "w")
59-
dwriter = csv.writer(fdata)
60-
counter = 0
61-
result = []
62-
for lst in frames:
63-
data = []
64-
for path in lst:
65-
f = dicom.read_file(path)
66-
img = preproc(f.pixel_array.astype(float) / np.max(f.pixel_array))
67-
dst_path = path.rsplit(".", 1)[0] + ".64x64.jpg"
68-
scipy.misc.imsave(dst_path, img)
69-
result.append(dst_path)
70-
data.append(img)
71-
data = np.array(data, dtype=np.uint8)
72-
data = data.reshape(data.size)
73-
dwriter.writerow(data)
74-
counter += 1
75-
if counter % 100 == 0:
76-
print("%d slices processed" % counter)
77-
print("All finished, %d slices in total" % counter)
78+
dr = Parallel()(delayed(get_data)(lst,preproc) for lst in frames)
79+
data,result = zip(*dr)
80+
for entry in data:
81+
fdata.write(','.join(entry)+'\r\n')
82+
print("All finished, %d slices in total" % len(data))
7883
fdata.close()
84+
result = np.ravel(result)
7985
return result
8086

8187

0 commit comments

Comments
 (0)