Skip to content

Commit

Permalink
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
Browse files Browse the repository at this point in the history
… develop
  • Loading branch information
zchen0211 committed Nov 1, 2017
2 parents b720f28 + a343504 commit 634face
Show file tree
Hide file tree
Showing 13 changed files with 268 additions and 402 deletions.
48 changes: 48 additions & 0 deletions benchmark/IntelOptimizedPaddle.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# Benchmark

Machine:

- Server
- Intel(R) Xeon(R) Gold 6148 CPU @ 2.40GHz, 2 Sockets, 20 Cores per socket
- Laptop
- DELL XPS15-9560-R1745: i7-7700HQ 8G 256GSSD
- i5 MacBook Pro (Retina, 13-inch, Early 2015)
- Desktop
- i7-6700k

System: CentOS release 6.3 (Final), Docker 1.12.1.

PaddlePaddle: paddlepaddle/paddle:latest (TODO: will rerun after 0.11.0)

- MKL-DNN tag v0.10
- MKLML 2018.0.20170720
- OpenBLAS v0.2.20

On each machine, we will test and compare the performance of training on single node using MKL-DNN / MKLML / OpenBLAS respectively.

## Benchmark Model

### Server
Test on batch size 64, 128, 256 on Intel(R) Xeon(R) Gold 6148M CPU @ 2.40GHz

Input image size - 3 * 224 * 224, Time: images/second

- VGG-19

| BatchSize | 64 | 128 | 256 |
|--------------|-------| -----| --------|
| OpenBLAS | 7.82 | 8.62 | 10.34 |
| MKLML | 11.02 | 12.86 | 15.33 |
| MKL-DNN | 27.69 | 28.8 | 29.27 |


chart on batch size 128
TBD

- ResNet
- GoogLeNet

### Laptop
TBD
### Desktop
TBD
50 changes: 30 additions & 20 deletions paddle/gserver/tests/MKLDNNTester.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -273,31 +273,37 @@ void MKLDNNTester::printVector(const VectorPtr& v) {
VLOG(MKLDNN_ALL) << std::endl << ostr.str();
}

double MKLDNNTester::getDelta(const real* d1,
const real* d2,
double MKLDNNTester::getDelta(const real* refer,
const real* value,
size_t len,
const float failRate,
const float thres) {
double delta = 0, sum = 0;
int failCnt = 0;
const double eps = 1e-5;
double maxOut = 0;
double maxRatio = 0;
for (size_t i = 0; i < len; ++i) {
double ref = fabs(d2[i]);
double diff = fabs(d1[i] - d2[i]);
double ref = fabs(refer[i]);
double val = fabs(value[i]);
double diff = fabs(refer[i] - value[i]);
delta += diff;
sum += ref;
if (ref > eps && fabs(d1[i]) > eps && diff / ref > thres) {
maxOut = std::max(maxOut, diff / ref);
if (ref < eps && val < eps) { // both values are very small
continue;
}
double ratio = diff / ref;
if (ratio > thres) {
maxRatio = std::max(maxRatio, ratio);
failCnt++;
}
}
EXPECT_TRUE(std::isnormal(sum));
EXPECT_FALSE(std::isinf(sum));
EXPECT_FALSE(std::isnan(sum));
EXPECT_FALSE(std::isnan(delta));
VLOG(MKLDNN_ALL) << "reference avg data: " << sum / len
<< ", delta: " << delta / sum << ", failCnt:" << failCnt;
return (failCnt / (float)len) > failRate ? maxOut : delta / sum;
double res = sum > eps ? delta / sum : eps;
return (failCnt / (float)len) > failRate ? maxRatio : res;
}

double MKLDNNTester::compareMatrix(const MatrixPtr& m1, const MatrixPtr& m2) {
Expand Down Expand Up @@ -515,12 +521,16 @@ void MKLDNNTester::getOutResult(const std::string& configPath,
gradientMachine->forward(in.inArgs[i], &outArgs, PASS_TRAIN);
// save forward result
for (size_t k = 0; k < outArgs.size(); k++) {
MatrixPtr value = Matrix::create(outArgs[k].value->getHeight(),
outArgs[k].value->getWidth(),
false,
false);
value->copyFrom(*outArgs[k].value);
out.outValues.push_back(value);
const MatrixPtr& src = outArgs[k].value;
MatrixPtr dst =
Matrix::create(src->getHeight(), src->getWidth(), false, false);
if (typeid(*src) == typeid(MKLDNNMatrix)) {
MKLDNNMatrixPtr dnnSrc = std::dynamic_pointer_cast<MKLDNNMatrix>(src);
dnnSrc->copyTo(*dst);
} else {
dst->copyFrom(*src);
}
out.outValues.push_back(dst);
}

// random backward input
Expand All @@ -543,19 +553,19 @@ void MKLDNNTester::getOutResult(const std::string& configPath,
void MKLDNNTester::compareResult(DataOut& ref, DataOut& dnn, float eps) {
CHECK_EQ(ref.outValues.size(), dnn.outValues.size());
CHECK_EQ(ref.paraValues.size(), dnn.paraValues.size());
VLOG(MKLDNN_TESTS) << "compare value size: " << ref.outValues.size();
for (size_t i = 0; i < ref.outValues.size(); i++) {
VLOG(MKLDNN_TESTS) << "compare value index: " << i;
EXPECT_LE(fabs(compareMatrix(ref.outValues[i], dnn.outValues[i])), eps);
}
VLOG(MKLDNN_TESTS) << "compare param size: " << ref.outValues.size();
for (size_t i = 0; i < ref.paraValues.size(); i++) {
VLOG(MKLDNN_TESTS) << "compare param index: " << i;
EXPECT_LE(fabs(compareVector(ref.paraValues[i], dnn.paraValues[i])), eps);
}
}

void MKLDNNTester::runBranchesTest(const std::string& configPath,
size_t iter,
float eps) {
void MKLDNNTester::runNetTest(const std::string& configPath,
size_t iter,
float eps) {
DataIn in;
initArgument(in, configPath, iter);
DataOut outCpu, outDnn;
Expand Down
20 changes: 10 additions & 10 deletions paddle/gserver/tests/MKLDNNTester.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,17 +85,17 @@ class MKLDNNTester {
bool printDetails = false,
size_t iter = 3,
float epsilon = 1e-4);
static void runBranchesTest(const std::string& configPath,
size_t iter = 3,
float eps = 1e-4);
static void runNetTest(const std::string& configPath,
size_t iter = 2,
float eps = 1e-4);
static void initArgument(DataIn& data,
const std::string& configPath,
size_t iter = 3);
size_t iter = 2);
static void getOutResult(const std::string& configPath,
DataIn& in,
DataOut& out,
bool use_mkldnn,
size_t iter = 3);
size_t iter = 2);

private:
void reset(const TestConfig& dnn, const TestConfig& ref, size_t batchSize);
Expand Down Expand Up @@ -128,13 +128,13 @@ class MKLDNNTester {

/**
* Get delta percent
* if many(>failRate) wrong(abs(dnn-ref)/abs(ref)>thres) points return the
* max(diff/ref)
* else return sum(abs(a-b)) / sum(abs(b))
* if many(>failRate) wrong(abs(val-ref)/abs(ref) > thres) points
* return the max(diff/ref)
* else return sum(abs(diff)) / sum(abs(ref))
* The return value should be smaller than eps when passing.
*/
static double getDelta(const real* d1,
const real* d2,
static double getDelta(const real* refer,
const real* value,
size_t len,
const float failRate = 1e-3,
const float thres = 0.1);
Expand Down
142 changes: 142 additions & 0 deletions paddle/gserver/tests/mkldnn_branch_net.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
# Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from paddle.trainer_config_helpers import *

settings(batch_size=16)
channels = get_config_arg("channels", int, 2)

def two_conv(input, group_name):
out1 = img_conv_layer(input=input,
name=group_name+'_conv1_',
filter_size=1,
num_filters=channels,
padding=0,
shared_biases=True,
act=ReluActivation())

out2 = img_conv_layer(input=input,
name=group_name+'_conv2_',
filter_size=3,
num_filters=channels,
padding=1,
shared_biases=True,
act=ReluActivation())
return out1, out2

def two_conv_bn(input, group_name):
out1, out2 = two_conv(input, group_name)
out1 = batch_norm_layer(input=out1,
name=group_name+'_bn1_',
use_global_stats=False,
act=ReluActivation())

out2 = batch_norm_layer(input=out2,
name=group_name+'_bn2_',
use_global_stats=False,
act=ReluActivation())
return out1, out2

def two_conv_pool(input, group_name):
out1, out2 = two_conv(input, group_name)
out1 = img_pool_layer(input=out1,
name=group_name+'_pool1_',
pool_size=3,
stride=2,
padding=0,
pool_type=MaxPooling())

out2 = img_pool_layer(input=out2,
name=group_name+'_pool2_',
pool_size=5,
stride=2,
padding=1,
pool_type=MaxPooling())
return out1, out2

def two_fc(input, group_name):
out1 = fc_layer(input=input,
name=group_name+'_fc1_',
size=channels,
bias_attr=False,
act=LinearActivation())

out2 = fc_layer(input=input,
name=group_name+'_fc2_',
size=channels,
bias_attr=False,
act=LinearActivation())
return out1, out2

data = data_layer(name ="input", size=channels*16*16)

tmp = img_conv_layer(input=data,
num_channels=channels,
filter_size=3,
num_filters=channels,
padding=1,
shared_biases=True,
act=ReluActivation())

a1, a2 = two_conv(tmp, 'conv_branch')
tmp = addto_layer(input=[a1, a2],
act=ReluActivation(),
bias_attr=False)

tmp = img_pool_layer(input=tmp,
pool_size=3,
stride=2,
padding=1,
pool_type=AvgPooling())

b1, b2 = two_conv_pool(tmp, 'pool_branch')
tmp = concat_layer(input=[b1, b2])

tmp = img_pool_layer(input=tmp,
num_channels=channels*2,
pool_size=3,
stride=2,
padding=1,
pool_type=MaxPooling())

tmp = img_conv_layer(input=tmp,
filter_size=3,
num_filters=channels,
padding=1,
stride=2,
shared_biases=True,
act=LinearActivation(),
bias_attr=False)

tmp = batch_norm_layer(input=tmp,
use_global_stats=False,
act=ReluActivation())

c1, c2 = two_conv_bn(tmp, 'bn_branch')
tmp = addto_layer(input=[c1, c2],
act=ReluActivation(),
bias_attr=False)

tmp = fc_layer(input=tmp, size=channels,
bias_attr=True,
act=ReluActivation())

d1, d2 = two_fc(tmp, 'fc_branch')
tmp = addto_layer(input=[d1, d2])

out = fc_layer(input=tmp, size=10,
bias_attr=True,
act=SoftmaxActivation())

outputs(out)
58 changes: 0 additions & 58 deletions paddle/gserver/tests/mkldnn_branches_fc.conf

This file was deleted.

Loading

0 comments on commit 634face

Please sign in to comment.