Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Jenkins notebook test #4799

Merged
merged 16 commits into from
Jan 25, 2017
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -129,3 +129,7 @@ lib

# Mac OS X
.DS_Store

#Notebook Automated Test
!tests/nightly/test_config.txt
!tests/nightly/TestNotebook
2 changes: 1 addition & 1 deletion mshadow
9 changes: 2 additions & 7 deletions tests/ci_build/Dockerfile.amzn_linux_cpu
Original file line number Diff line number Diff line change
Expand Up @@ -15,21 +15,16 @@ RUN sed 's@session\s*required\s*pam_loginuid.so@session optional pam_loginuid.so
ENV NOTVISIBLE "in users profile"
RUN echo "export VISIBLE=now" >> /etc/profile

RUN chmod -R 755 install
RUN /install/install_opencv.sh
RUN /install/install_openblas.sh
RUN /install/install_python2.sh
RUN /install/install_python3.sh
RUN /install/install_testdeps.sh
RUN /install/install_julia.sh
RUN /install/install_maven.sh
RUN /install/install_library.sh
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

where are these?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They are in tests/ci_build/install.


RUN mkdir -p /home/jenkins/workspace/mxnet-main
RUN mkdir /home/jenkins/.ssh
COPY /files/authorized_keys /home/jenkins/.ssh/authorized_keys
RUN chmod 600 /home/jenkins/.ssh/authorized_keys
RUN chmod 700 /home/jenkins/.ssh
RUN chown -R jenkins /home/jenkins
RUN chgrp -R jenkins /home/jenkins

EXPOSE 22
CMD ["/usr/sbin/sshd", "-D"]
5 changes: 5 additions & 0 deletions tests/ci_build/install/install_library.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/usr/bin/env bash

yum install graphviz
pip install graphviz
pip install opencv-python
1 change: 1 addition & 0 deletions tests/ci_build/install/install_python2.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@ fi

ln -s -f /usr/local/bin/pip /usr/bin/pip
for i in ipython[all] jupyter pandas scikit-image h5py pandas sklearn sympy; do echo "${i}..."; pip install -U $i >/dev/null; done

6 changes: 6 additions & 0 deletions tests/nightly/test_config.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#Testing folder, seperated by comma
#If test_path is empty, by default all the notebooks under root directory will be tested.
#If test_ignored is set to "@@@ IGNORE_ALL", no notebook will be tested.
[Folder Path]
test_path = mxnet-notebooks/python/basic
test_ignored = mxnet-notebooks/python/basic/advanced_img_io.ipynb, mxnet-notebooks/python/basic/image_io.ipynb, mxnet-notebooks/python/basic/mixed.ipynb, mxnet-notebooks/python/basic/module.ipynb, mxnet-notebooks/python/basic/ndarray.ipynb, mxnet-notebooks/python/basic/record_io.ipynb, mxnet-notebooks/python/basic/symbol.ipynb
265 changes: 265 additions & 0 deletions tests/nightly/test_ipynb.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,265 @@
#pylint: disable=no-member, too-many-locals, too-many-branches, no-self-use, broad-except, lost-exception, too-many-nested-blocks, too-few-public-methods
"""
This script runs notebooks in selected directory and report
errors for each notebook.

Traceback information can be found in the output notebooks
generated in coresponding output directories.

Before running this scripe, make sure all the notebooks have
been run at least once and outputs are generated.
"""

import os
import json
import ConfigParser
import re
import sys
from textwrap import dedent
reload(sys)
sys.setdefaultencoding('utf-8')
#pylint: enable=no-member

import nbformat
import nbconvert.preprocessors.execute as execute

TIME_LIMIT_FLAG = '# @@@ AUTOTEST_TIME_LIMT_SECONDS='
IGNORED_CELL_FLAG = '# @@@ AUTOTEST_OUTPUT_IGNORED_CELL'

class CustomizedPreprocessor(execute.ExecutePreprocessor):
"""A customized preprocessor which allows preset for cell.
In this test script, timeout is set before executing a cell.
"""
def preprocess_cell(self, cell, resources, cell_index):
"""
Executes a code cell with timeout. Default timeout is 900 sec.
"""
if cell.cell_type != 'code':
return cell, resources

regex = re.compile(TIME_LIMIT_FLAG + '[0-9]+')
time_flag = re.search(regex, cell.source)
if time_flag is not None:
timeout = int(re.search(r'[0-9]+', time_flag).group())
self.timeout = timeout

outputs = self.run_cell(cell)
cell.outputs = outputs

if not self.allow_errors:
for out in outputs:
if out.output_type == 'error':
pattern = u"""\
An error occurred while executing cell No.{cell.execution_count}:
------------------
{cell.source}
------------------
{out.ename}: {out.evalue}
"""
msg = dedent(pattern).format(out=out, cell=cell)
raise execute.CellExecutionError(msg)
return cell, resources


class NotebookTester(object):
"""The class of notebook automated testing. A NotebookTester loads a test_config
file and execute each notebook. A report containing detail traceback information
will be generated.
"""
def __init__(self, test_config):
self.test_config = test_config

def __read_config(self, test_config):
"""Read notebooks to be tested from test config file.

Parameters
----------
test_config : str
test configuration file

Returns
-------
nb_list : list
Notebook list to be tested
"""
nb_list = []
config_parser = ConfigParser.RawConfigParser()
config_parser.read(test_config)
test_dirs = config_parser.get('Folder Path', 'test_path').split(', ')
if len(test_dirs) == 1 and len(test_dirs[0]) == 0:
test_dirs.append('.')
ignored_item = config_parser.get('Folder Path', 'test_ignored').split(', ')
ignored_dir = set()
ignored_nb = set()
for item in ignored_item:
if item == '@@@ IGNORE_ALL':
return nb_list
if item.endswith('.ipynb'):
ignored_nb.add(os.path.abspath(item))
else:
for root, _, _ in os.walk(item):
ignored_dir.add(os.path.abspath(root))
for test_dir in test_dirs:
for root, _, files in os.walk(test_dir):
if os.path.abspath(root) in ignored_dir:
continue
for test_file in files:
if test_file.endswith('.ipynb') and not \
test_file.endswith('-checkpoint.ipynb'):
notebook = os.path.join(root, test_file)
if os.path.abspath(notebook) not in ignored_nb:
if notebook.startswith('./'):
notebook = notebook[2:]
nb_list.append(notebook)
return nb_list


def __notebook_run(self, path):
"""Execute a notebook via nbconvert and collect output.

Parameters
----------
path : str
notebook file path.

Returns
-------
error : str
notebook first cell execution errors.
"""
error = ""
parent_dir, nb_name = os.path.split(path)
with open(path) as nb_file:
notebook = nbformat.read(nb_file, as_version=4)
eprocessor = CustomizedPreprocessor(timeout=900)
#Use a loop to avoid "Kernel died before replying to kernel_info" error, repeat 5 times
for _ in range(0, 5):
error = ""
try:
eprocessor.preprocess(notebook, {'metadata': {'path': parent_dir}})
except Exception as ex_error:
error = str(ex_error)
finally:
if error != 'Kernel died before replying to kernel_info':
output_nb = os.path.splitext(nb_name)[0] + "_output.ipynb"
with open(output_nb, mode='w') as output_file:
nbformat.write(notebook, output_file)
output_file.close()
nb_file.close()
if len(error) == 0:
cell_num = self.__verify_output(path, output_nb)
if cell_num > 0:
error = "Output in cell No.%d has changed." % cell_num
os.remove(output_nb)
return error
return error


def __verify_output(self, origin_nb, output_nb):
"""Compare the output cells of testing output notebook with original notebook.

Parameters
----------
origin_nb : str
original notebook file path.

output_nb : str
output notebook file path.

Returns
-------
cell_num : int
First cell number in which outputs are incompatible
"""
cell_num = 0
origin_nb_file = open(origin_nb)
origin_nb_js = json.load(origin_nb_file)
output_nb_file = open(output_nb)
output_nb_js = json.load(output_nb_file)
for origin_cell, output_cell in zip(origin_nb_js["cells"], output_nb_js["cells"]):
is_ignored_cell = False
if len(origin_cell["source"]) == 0 or not origin_cell.has_key("outputs"):
is_ignored_cell = True
for line in origin_cell["source"]:
if line.startswith(IGNORED_CELL_FLAG):
is_ignored_cell = True
break
if is_ignored_cell:
continue
if self.__extract_output(origin_cell["outputs"]) != \
self.__extract_output(output_cell["outputs"]):
cell_num = origin_cell["execution_count"]
break
origin_nb_file.close()
output_nb_file.close()
return cell_num


def __extract_output(self, outputs):
"""Extract text part of output of a notebook cell.

Parasmeters
-----------
outputs : list
list of output

Returns
-------
ret : str
Concatenation of all text output contents
"""
ret = ''
for out_dict in outputs:
for key, val in out_dict.items():
if str(key).startswith('text'):
for content in val:
ret += str(content)
elif key == 'data':
for dt_key, dt_val in val.items():
if str(dt_key).startswith('text') and not \
str(dt_key).startswith('text/html'):
for dt_content in dt_val:
if not str(dt_content).startswith('<matplotlib') and not \
str(dt_content).startswith('<graphviz'):
ret += str(dt_content)
return ret


def run_test(self):
"""Run test using config file
"""
nb_to_test = self.__read_config(self.test_config)
test_summary = open('test_summary.txt', mode='w')
fail_nb_dict = {}
test_summary.write("%d notebooks were tested:\n" % len(nb_to_test))
for test_nb in nb_to_test:
test_summary.write("%s\n" % test_nb)
print "Start to test %s.\n" % test_nb
error = self.__notebook_run(test_nb)
if len(error) == 0:
print "Tests for %s all passed!\n" % test_nb
else:
fail_nb_dict[test_nb] = error
print "Tests for %s failed:\n" % test_nb
print error + '\n'
if error == 'Cell execution timed out, see log for details.' or \
error == 'Kernel died before replying to kernel_info':
print "Please manually run this notebook to debug.\n"
print "%d notebooks tested, %d succeeded, %d failed" % (len(nb_to_test),
len(nb_to_test) - len(fail_nb_dict),
len(fail_nb_dict))
if len(fail_nb_dict) > 0:
test_summary.write("\n%d notebook tests failed:\n" % len(fail_nb_dict))
print "Following are failed notebooks:"
for fail_nb, error in fail_nb_dict.items():
test_summary.write("\n%s:\n" % fail_nb)
test_summary.write("%s\n" % error)
print fail_nb
else:
test_summary.write("\nAll notebook tests passed!\n")
test_summary.close()
print "Test summarys are stored in test_summary.txt"

if __name__ == "__main__":
NB_TESTER = NotebookTester('test_config.txt')
NB_TESTER.run_test()
35 changes: 35 additions & 0 deletions tests/nightly/test_notebook.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#!/bin/sh
echo "BUILD make"
cp ./make/config.mk .
echo "USE_CUDA=0" >> ./config.mk
echo "USE_CUDNN=0" >> ./config.mk
echo "USE_BLAS=openblas" >> ./config.mk
echo "ADD_CFLAGS += -I/usr/include/openblas" >> ./config.mk
echo "GTEST_PATH=/usr/local/gtest" >> ./config.mk
echo 'export PKG_CONFIG_PATH=/usr/local/lib/pkgconfig:$PKG_CONFIG_PATH' >> ~/.profile
echo 'export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH' >> ~/.profile
echo 'export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.111-1.b15.25.amzn1.x86_64' >> ~/.profile
echo 'export JRE_HOME=/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.111-1.b15.25.amzn1.x86_64/jre' >> ~/.profile
echo 'export PATH=$PATH:/apache-maven-3.3.9/bin/:/usr/bin:/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.111-1.b15.25.amzn1.x86_64/bin:/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.111-1.b15.25.amzn1.x86_64/jre/bin' >> ~/.profile
source ~/.profile
make clean
make -j 4 || exit -1

echo "BUILD python2 mxnet"
cd ./python
python setup.py install || exit 1

echo "BUILD python3 mxnet"
python3 setup.py install || exit 1
echo "~/.local"
cd ../tests/nightly

echo "Pull mxnet-notebook"
git clone https://github.com/dmlc/mxnet-notebooks.git

echo "Test Jupyter notebook"
python test_ipynb.py

echo "Test Summary Start"
cat test_summary.txt
echo "Test Summary End"