Skip to content

Commit

Permalink
Merge pull request PaddlePaddle#13 from reyoung/final_fixes
Browse files Browse the repository at this point in the history
Update Develop Branch And Fix All unit tests.
  • Loading branch information
qingqing01 authored Aug 14, 2017
2 parents 96fc9e7 + 0b1052f commit 88a3d8d
Show file tree
Hide file tree
Showing 96 changed files with 2,410 additions and 861 deletions.
12 changes: 7 additions & 5 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@

cmake_minimum_required(VERSION 3.0)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
set(PROJ_ROOT ${CMAKE_CURRENT_SOURCE_DIR})
set(PROJ_BINARY_ROOT ${CMAKE_CURRENT_BINARY_DIR})
set(PADDLE_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
set(PADDLE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})

include(system)

Expand Down Expand Up @@ -121,8 +121,8 @@ include(version) # set PADDLE_VERSION
include(coveralls) # set code coverage


include_directories("${PROJ_ROOT}")
include_directories("${PROJ_ROOT}/paddle/cuda/include")
include_directories("${PADDLE_SOURCE_DIR}")
include_directories("${PADDLE_SOURCE_DIR}/paddle/cuda/include")
include_directories("${CMAKE_CURRENT_BINARY_DIR}/proto")
include_directories("${CMAKE_CURRENT_BINARY_DIR}/go/pserver/client/c")
include_directories(${Boost_INCLUDE_DIRS})
Expand All @@ -144,7 +144,7 @@ if(WITH_GPU)
endif(WITH_GPU)

if(WITH_MKLDNN)
list(APPEND EXTERNAL_LIBS ${MKLDNN_LIBRARY} ${MKLDNN_IOMP_LIB})
list(APPEND EXTERNAL_LIBS ${MKLDNN_LIB} ${MKLDNN_IOMP_LIB})
endif()

if(USE_NNPACK)
Expand All @@ -164,10 +164,12 @@ if(WITH_GOLANG)
add_subdirectory(go)
endif(WITH_GOLANG)

set(PADDLE_PYTHON_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/python/build")
add_subdirectory(paddle)
if(WITH_PYTHON)
add_subdirectory(python)
endif()

if(WITH_DOC)
add_subdirectory(doc)
endif()
17 changes: 16 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -64,13 +64,28 @@ RUN pip install --upgrade pip && \
pip install -U sphinx-rtd-theme==0.1.9 recommonmark && \
pip install pre-commit 'requests==2.9.2' 'ipython==5.3.0' && \
pip install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \
pip install rarfile
pip install opencv-python rarfile 'scipy>=0.19.0' 'nltk>=3.2.2'

# To fix https://github.com/PaddlePaddle/Paddle/issues/1954, we use
# the solution in https://urllib3.readthedocs.io/en/latest/user-guide.html#ssl-py2
RUN apt-get install -y libssl-dev libffi-dev
RUN pip install certifi urllib3[secure]

# TODO(qijun) The template library Eigen doesn't work well with GCC 5
# coming with the default Docker image, so we switch to use GCC 4.8
# by default. And I will check Eigen library later.

RUN ln -sf gcc-4.8 /usr/bin/gcc && \
ln -sf gcc-ar-4.8 /usr/bin/gcc-ar && \
ln -sf gcc-nm-4.8 /usr/bin/gcc-nm && \
ln -sf gcc-ranlib-4.8 /usr/bin/gcc-ranlib && \
ln -sf gcc-4.8 /usr/bin/x86_64-linux-gnu-gcc && \
ln -sf gcc-ar-4.8 /usr/bin/x86_64-linux-gnu-gcc-ar && \
ln -sf gcc-nm-4.8 /usr/bin/x86_64-linux-gnu-gcc-nm && \
ln -sf gcc-ranlib-4.8 /usr/bin/x86_64-linux-gnu-gcc-ranlib && \
ln -sf g++-4.8 /usr/bin/g++ && \
ln -sf g++-4.8 /usr/bin/x86_64-linux-gnu-g++

# Install woboq_codebrowser to /woboq
RUN git clone https://github.com/woboq/woboq_codebrowser /woboq && \
(cd /woboq \
Expand Down
2 changes: 1 addition & 1 deletion cmake/configure.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ if(WITH_GOLANG)
add_custom_command(OUTPUT ${CMAKE_BINARY_DIR}/glide
COMMAND env GOPATH=${GOPATH} ${GLIDE} install
COMMAND touch ${CMAKE_BINARY_DIR}/glide
DEPENDS ${PROJ_ROOT}/go/glide.lock
DEPENDS ${PADDLE_SOURCE_DIR}/go/glide.lock
WORKING_DIRECTORY "${PADDLE_IN_GOPATH}/go"
)

Expand Down
2 changes: 1 addition & 1 deletion cmake/cpplint.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ macro(add_style_check_target TARGET_NAME)

if(SOURCES_LIST)
add_custom_command(TARGET ${TARGET_NAME} POST_BUILD
COMMAND "${PYTHON_EXECUTABLE}" "${PROJ_ROOT}/paddle/scripts/cpplint.py"
COMMAND "${PYTHON_EXECUTABLE}" "${PADDLE_SOURCE_DIR}/paddle/scripts/cpplint.py"
"--filter=${STYLE_FILTER}"
${SOURCES_LIST}
COMMENT "cpplint: Checking source code style"
Expand Down
10 changes: 6 additions & 4 deletions cmake/flags.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,12 @@ function(CheckCompilerCXX11Flag)
if(${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 4.8)
message(FATAL_ERROR "Unsupported GCC version. GCC >= 4.8 required.")
endif()
# TODO(qijun) gcc 4.9 or later versions raise SEGV due to the optimization problem.
# Use Debug mode instead for now.
if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 4.9 OR CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 4.9)
set(CMAKE_BUILD_TYPE "Debug" CACHE STRING "" FORCE)
if(NOT ANDROID)
# TODO(qijun) gcc 4.9 or later versions raise SEGV due to the optimization problem.
# Use Debug mode instead for now.
if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 4.9 OR CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 4.9)
set(CMAKE_BUILD_TYPE "Debug" CACHE STRING "" FORCE)
endif()
endif()
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
# cmake >= 3.0 compiler id "AppleClang" on Mac OS X, otherwise "Clang"
Expand Down
2 changes: 1 addition & 1 deletion cmake/generic.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,7 @@ function(py_test TARGET_NAME)
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(py_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
add_test(NAME ${TARGET_NAME}
COMMAND env PYTHONPATH=${PADDLE_PYTHON_PACKAGE_DIR}
COMMAND env PYTHONPATH=${PADDLE_PYTHON_BUILD_DIR}/lib-python
python2 ${py_test_SRCS}
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
endif()
Expand Down
2 changes: 1 addition & 1 deletion cmake/package.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ set(CPACK_PACKAGE_DESCRIPTION "")
set(CPACK_DEBIAN_PACKAGE_DEPENDS "libpython2.7-dev, libstdc++6, python-pip, curl, libgfortran3, python-pip-whl")
set(CPACK_DEBIAN_PACKAGE_SECTION Devel)
set(CPACK_DEBIAN_PACKAGE_VERSION ${PADDLE_VERSION})
set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${PROJ_ROOT}/paddle/scripts/deb/postinst")
set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${PADDLE_SOURCE_DIR}/paddle/scripts/deb/postinst")
#set(CPACK_GENERATOR "DEB")
# Start cpack
include (CMakePackageConfigHelpers)
Expand Down
4 changes: 2 additions & 2 deletions cmake/util.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -141,8 +141,8 @@ endmacro()
function(create_resources res_file output_file)
add_custom_command(
OUTPUT ${output_file}
COMMAND python ARGS ${PROJ_ROOT}/cmake/make_resource.py ${res_file} ${output_file}
DEPENDS ${res_file} ${PROJ_ROOT}/cmake/make_resource.py)
COMMAND python ARGS ${PADDLE_SOURCE_DIR}/cmake/make_resource.py ${res_file} ${output_file}
DEPENDS ${res_file} ${PADDLE_SOURCE_DIR}/cmake/make_resource.py)
endfunction()


Expand Down
2 changes: 1 addition & 1 deletion cmake/version.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ set(tmp_version "HEAD")
while ("${PADDLE_VERSION}" STREQUAL "")
execute_process(
COMMAND ${GIT_EXECUTABLE} describe --tags --abbrev=0 ${tmp_version}
WORKING_DIRECTORY ${PROJ_ROOT}
WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}
OUTPUT_VARIABLE GIT_TAG_NAME
RESULT_VARIABLE GIT_RESULT
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
Expand Down
146 changes: 146 additions & 0 deletions doc/design/auto_gradient_check.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
## Auto Gradient Checker Design

## Backgraound:
- Operator forward computing is easy to check if the result is right because it has a clear definition. **But** backpropagation is a notoriously difficult algorithm to debug and get right:
- 1. you should get the right backpropagation formula according to the forward computation.
- 2. you should implement it right in CPP.
- 3. it's difficult to prepare test data.

- Auto gradient check gets a numeric gradient by forward Operator and use it as a reference of the backward Operator's result. It has several advantages:
- 1. numeric gradient checker only need forward operator.
- 2. user only need to prepare the input data for forward Operator.

## Mathematical Theory
The following two document from stanford has a detailed explanation of how to get numeric gradient and why it's useful.

- [Gradient checking and advanced optimization(en)](http://deeplearning.stanford.edu/wiki/index.php/Gradient_checking_and_advanced_optimization)
- [Gradient checking and advanced optimization(cn)](http://ufldl.stanford.edu/wiki/index.php/%E6%A2%AF%E5%BA%A6%E6%A3%80%E9%AA%8C%E4%B8%8E%E9%AB%98%E7%BA%A7%E4%BC%98%E5%8C%96)


## Numeric Gradient Implementation
### Python Interface
```python
def get_numeric_gradient(op,
input_values,
output_name,
input_to_check,
delta=0.005,
local_scope=None):
"""
Get Numeric Gradient for an operator's input.
:param op: C++ operator instance, could be an network
:param input_values: The input variables. Should be an dictionary, key is
variable name. Value is numpy array.
:param output_name: The final output variable name.
:param input_to_check: The input variable need to get gradient.
:param delta: The perturbation value for numeric gradient method. The
smaller delta is, the more accurate result will get. But if that delta is
too small, it could occur numerical stability problem.
:param local_scope: The local scope used for get_numeric_gradient.
:return: The gradient array in numpy format.
"""
```

### Explaination:

- Why need `output_name`
- One Operator may have multiple Output, you can get independent gradient from each Output. So user should set one output to calculate.

- Why need `input_to_check`
- One operator may have multiple inputs. Gradient Op can calculate the gradient of these Inputs at the same time. But Numeric Gradient needs to calculate them one by one. So `get_numeric_gradient` is designed to calculate the gradient for one input. If you need to compute multiple inputs, you can call `get_numeric_gradient` multiple times.


### Core Algorithm Implementation


```python
# we only compute gradient of one element each time.
# we use a for loop to compute the gradient of every element.
for i in xrange(tensor_size):
# get one input element throw it's index i.
origin = tensor_to_check.get_float_element(i)

# add delta to it, run op and then get the sum of the result tensor.
x_pos = origin + delta
tensor_to_check.set_float_element(i, x_pos)
y_pos = get_output()

# plus delta to this element, run op and get the sum of the result tensor.
x_neg = origin - delta
tensor_to_check.set_float_element(i, x_neg)
y_neg = get_output()

# restore old value
tensor_to_check.set_float_element(i, origin)

# compute the gradient of this element and store it into a numpy array.
gradient_flat[i] = (y_pos - y_neg) / delta / 2

# reshape the gradient result to the shape of the source tensor.
return gradient_flat.reshape(tensor_to_check.get_dims())
```

## Auto Graident Checker Framework

Each Operator Kernel has three kinds of Gradient:

- 1. Numeric Gradient
- 2. CPU Operator Gradient
- 3. GPU Operator Gradient(if supported)

Numeric Gradient Only relies on forward Operator. So we use Numeric Gradient as the reference value.

- 1. calculate the numeric gradient.
- 2. calculate CPU kernel Gradient with the backward Operator and compare it with the numeric gradient.
- 3. calculate GPU kernel Gradient with the backward Operator and compare it with the numeric gradient.(if support GPU)

#### Python Interface

```python
def check_grad(self,
forward_op,
input_vars,
inputs_to_check,
output_name,
no_grad_set=None,
only_cpu=False,
max_relative_error=0.005):
"""
:param forward_op: used to create backward_op
:param input_vars: numpy value of input variable. The following
computation will use these variables.
:param inputs_to_check: inputs var names that should check gradient.
:param output_name: output name that used to
:param max_relative_error: The relative tolerance parameter.
:param no_grad_set: used when create backward ops
:param only_cpu: only compute and check gradient on cpu kernel.
:return:
"""
```

### How to check if two numpy array is close enough?
if `abs_numeric_grad` is nearly zero, then use abs error for numeric_grad, not relative

```python
numeric_grad = ...
operator_grad = numpy.array(scope.find_var(grad_var_name(name)).get_tensor())

abs_numeric_grad = numpy.abs(numeric_grad)
# if abs_numeric_grad is nearly zero, then use abs error for numeric_grad, not relative
# error.
abs_numeric_grad[abs_numeric_grad < 1e-3] = 1

diff_mat = numpy.abs(abs_numeric_grad - operator_grad) / abs_numeric_grad
max_diff = numpy.max(diff_mat)
```


#### Notes:
1,The Input data for auto gradient checker should be reasonable to avoid numeric problem.


#### Refs:

- [Gradient checking and advanced optimization(en)](http://deeplearning.stanford.edu/wiki/index.php/Gradient_checking_and_advanced_optimization)
- [Gradient checking and advanced optimization(cn)](http://ufldl.stanford.edu/wiki/index.php/%E6%A2%AF%E5%BA%A6%E6%A3%80%E9%AA%8C%E4%B8%8E%E9%AB%98%E7%BA%A7%E4%BC%98%E5%8C%96)
6 changes: 3 additions & 3 deletions doc/templates/conf.py.cn.in
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# serve to show the default.
import sys
import os, subprocess
sys.path.insert(0, os.path.abspath('@PROJ_ROOT@/python'))
sys.path.insert(0, os.path.abspath('@PADDLE_SOURCE_DIR@/python'))
import shlex
from recommonmark import parser, transform
import paddle
Expand All @@ -24,7 +24,7 @@ AutoStructify = transform.AutoStructify
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
templates_path = ["@PROJ_ROOT@/doc_theme/templates"]
templates_path = ["@PADDLE_SOURCE_DIR@/doc_theme/templates"]

# -- General configuration ------------------------------------------------

Expand Down Expand Up @@ -120,7 +120,7 @@ html_theme = 'sphinx_rtd_theme'
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['@PROJ_ROOT@/doc_theme/static']
html_static_path = ['@PADDLE_SOURCE_DIR@/doc_theme/static']

# Output file base name for HTML help builder.
htmlhelp_basename = project + 'doc'
Expand Down
6 changes: 3 additions & 3 deletions doc/templates/conf.py.en.in
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# serve to show the default.
import sys
import os, subprocess
sys.path.insert(0, os.path.abspath('@PROJ_ROOT@/python'))
sys.path.insert(0, os.path.abspath('@PADDLE_SOURCE_DIR@/python'))
import shlex
from recommonmark import parser, transform
import paddle
Expand All @@ -25,7 +25,7 @@ AutoStructify = transform.AutoStructify
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
templates_path = ["@PROJ_ROOT@/doc_theme/templates"]
templates_path = ["@PADDLE_SOURCE_DIR@/doc_theme/templates"]

# -- General configuration ------------------------------------------------

Expand Down Expand Up @@ -120,7 +120,7 @@ html_theme = 'sphinx_rtd_theme'
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['@PROJ_ROOT@/doc_theme/static']
html_static_path = ['@PADDLE_SOURCE_DIR@/doc_theme/static']

# Output file base name for HTML help builder.
htmlhelp_basename = project + 'doc'
Expand Down
6 changes: 2 additions & 4 deletions go/pserver/client/c/test/test_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,10 @@ def main():
# network config
x = paddle.layer.data(name='x', type=paddle.data_type.dense_vector(13))
y_predict = paddle.layer.fc(input=x,
param_attr=paddle.attr.Param(
name='w', learning_rate=1e-3),
param_attr=paddle.attr.Param(name='w'),
size=1,
act=paddle.activation.Linear(),
bias_attr=paddle.attr.Param(
name='b', learning_rate=1e-3))
bias_attr=paddle.attr.Param(name='b'))
y = paddle.layer.data(name='y', type=paddle.data_type.dense_vector(1))
cost = paddle.layer.mse_cost(input=y_predict, label=y)

Expand Down
14 changes: 7 additions & 7 deletions paddle/api/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@ add_library(paddle_api STATIC ${API_SOURCES})
add_dependencies(paddle_api paddle_proto paddle_trainer_lib)

INCLUDE(${SWIG_USE_FILE})
INCLUDE_DIRECTORIES(${PROJ_ROOT}/paddle)
INCLUDE_DIRECTORIES(${PADDLE_SOURCE_DIR}/paddle)

FILE(GLOB PY_PADDLE_PYTHON_FILES ${PROJ_ROOT}/paddle/py_paddle/*.py)
FILE(GLOB PY_PADDLE_PYTHON_FILES ${PADDLE_SOURCE_DIR}/paddle/py_paddle/*.py)

SET_SOURCE_FILES_PROPERTIES(Paddle.i PROPERTIES CPLUSPLUS ON)

Expand Down Expand Up @@ -79,16 +79,16 @@ SWIG_LINK_LIBRARIES(swig_paddle
${START_END}
)

add_custom_command(OUTPUT ${PROJ_ROOT}/paddle/py_paddle/_swig_paddle.so
COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/swig_paddle.py ${PROJ_ROOT}/paddle/py_paddle
COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/_swig_paddle.so ${PROJ_ROOT}/paddle/py_paddle
add_custom_command(OUTPUT ${PADDLE_SOURCE_DIR}/paddle/py_paddle/_swig_paddle.so
COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/swig_paddle.py ${PADDLE_SOURCE_DIR}/paddle/py_paddle
COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/_swig_paddle.so ${PADDLE_SOURCE_DIR}/paddle/py_paddle
COMMAND ${CMAKE_COMMAND} -E touch .timestamp
WORKING_DIRECTORY ${PROJ_ROOT}/paddle
WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle
DEPENDS _swig_paddle
)

# TODO(yuyang18) : make wheel name calculated by cmake
add_custom_target(python_api_wheel ALL DEPENDS ${PROJ_ROOT}/paddle/py_paddle/_swig_paddle.so)
add_custom_target(python_api_wheel ALL DEPENDS ${PADDLE_SOURCE_DIR}/paddle/py_paddle/_swig_paddle.so)

if(WITH_TESTING)
IF(NOT PY_PIP_FOUND)
Expand Down
Loading

0 comments on commit 88a3d8d

Please sign in to comment.