55# This script is intended for both the CI and to check locally that code standards are
66# respected. We are currently linting (PEP-8 and similar), looking for patterns of
77# common mistakes (sphinx directives with missing blank lines, old style classes,
8- # unwanted imports...), and we also run doctests here (currently some files only).
9- # In the future we may want to add the validation of docstrings and other checks here .
8+ # unwanted imports...), we run doctests here (currently some files only), and we
9+ # validate formatting error in docstrings.
1010#
1111# Usage:
1212# $ ./ci/code_checks.sh # run all checks
1313# $ ./ci/code_checks.sh lint # run linting only
1414# $ ./ci/code_checks.sh patterns # check for patterns that should not exist
15+ # $ ./ci/code_checks.sh code # checks on imported code
1516# $ ./ci/code_checks.sh doctests # run doctests
17+ # $ ./ci/code_checks.sh docstrings # validate docstring errors
1618# $ ./ci/code_checks.sh dependencies # check that dependencies are consistent
1719
18- echo " inside $0 "
19- [[ $LINT ]] || { echo " NOT Linting. To lint use: LINT=true $0 $1 " ; exit 0; }
20- [[ -z " $1 " || " $1 " == " lint" || " $1 " == " patterns" || " $1 " == " doctests" || " $1 " == " dependencies" ]] \
21- || { echo " Unknown command $1 . Usage: $0 [lint|patterns|doctests|dependencies]" ; exit 9999; }
20+ [[ -z " $1 " || " $1 " == " lint" || " $1 " == " patterns" || " $1 " == " code" || " $1 " == " doctests" || " $1 " == " docstrings" || " $1 " == " dependencies" ]] || \
21+ { echo " Unknown command $1 . Usage: $0 [lint|patterns|code|doctests|docstrings|dependencies]" ; exit 9999; }
2222
2323BASE_DIR=" $( dirname $0 ) /.."
2424RET=0
2525CHECK=$1
2626
27+ function invgrep {
28+ # grep with inverse exist status and formatting for azure-pipelines
29+ #
30+ # This function works exactly as grep, but with opposite exit status:
31+ # - 0 (success) when no patterns are found
32+ # - 1 (fail) when the patterns are found
33+ #
34+ # This is useful for the CI, as we want to fail if one of the patterns
35+ # that we want to avoid is found by grep.
36+ if [[ " $AZURE " == " true" ]]; then
37+ set -o pipefail
38+ grep -n " $@ " | awk -F " :" ' {print "##vso[task.logissue type=error;sourcepath=" $1 ";linenumber=" $2 ";] Found unwanted pattern: " $3}'
39+ else
40+ grep " $@ "
41+ fi
42+ return $(( ! $? ))
43+ }
44+
45+ if [[ " $AZURE " == " true" ]]; then
46+ FLAKE8_FORMAT=" ##vso[task.logissue type=error;sourcepath=%(path)s;linenumber=%(row)s;columnnumber=%(col)s;code=%(code)s;]%(text)s"
47+ else
48+ FLAKE8_FORMAT=" default"
49+ fi
2750
2851# ## LINTING ###
2952if [[ -z " $CHECK " || " $CHECK " == " lint" ]]; then
@@ -35,30 +58,30 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
3558
3659 # pandas/_libs/src is C code, so no need to search there.
3760 MSG=' Linting .py code' ; echo $MSG
38- flake8 .
61+ flake8 --format= " $FLAKE8_FORMAT " .
3962 RET=$(( $RET + $? )) ; echo $MSG " DONE"
4063
4164 MSG=' Linting .pyx code' ; echo $MSG
42- flake8 pandas --filename=* .pyx --select=E501,E302,E203,E111,E114,E221,E303,E128,E231,E126,E265,E305,E301,E127,E261,E271,E129,W291,E222,E241,E123,F403,C400,C401,C402,C403,C404,C405,C406,C407,C408,C409,C410,C411
65+ flake8 --format= " $FLAKE8_FORMAT " pandas --filename=* .pyx --select=E501,E302,E203,E111,E114,E221,E303,E128,E231,E126,E265,E305,E301,E127,E261,E271,E129,W291,E222,E241,E123,F403,C400,C401,C402,C403,C404,C405,C406,C407,C408,C409,C410,C411
4366 RET=$(( $RET + $? )) ; echo $MSG " DONE"
4467
4568 MSG=' Linting .pxd and .pxi.in' ; echo $MSG
46- flake8 pandas/_libs --filename=* .pxi.in,* .pxd --select=E501,E302,E203,E111,E114,E221,E303,E231,E126,F403
69+ flake8 --format= " $FLAKE8_FORMAT " pandas/_libs --filename=* .pxi.in,* .pxd --select=E501,E302,E203,E111,E114,E221,E303,E231,E126,F403
4770 RET=$(( $RET + $? )) ; echo $MSG " DONE"
4871
4972 echo " flake8-rst --version"
5073 flake8-rst --version
5174
5275 MSG=' Linting code-blocks in .rst documentation' ; echo $MSG
53- flake8-rst doc/source --filename=* .rst
76+ flake8-rst doc/source --filename=* .rst --format= " $FLAKE8_FORMAT "
5477 RET=$(( $RET + $? )) ; echo $MSG " DONE"
5578
5679 # Check that cython casting is of the form `<type>obj` as opposed to `<type> obj`;
5780 # it doesn't make a difference, but we want to be internally consistent.
5881 # Note: this grep pattern is (intended to be) equivalent to the python
5982 # regex r'(?<![ ->])> '
6083 MSG=' Linting .pyx code for spacing conventions in casting' ; echo $MSG
61- ! grep -r -E --include ' *.pyx' --include ' *.pxi.in' ' [a-zA-Z0-9*]> ' pandas/_libs
84+ invgrep -r -E --include ' *.pyx' --include ' *.pxi.in' ' [a-zA-Z0-9*]> ' pandas/_libs
6285 RET=$(( $RET + $? )) ; echo $MSG " DONE"
6386
6487 # readability/casting: Warnings about C casting instead of C++ casting
@@ -88,43 +111,48 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
88111
89112 # Check for imports from pandas.core.common instead of `import pandas.core.common as com`
90113 MSG=' Check for non-standard imports' ; echo $MSG
91- ! grep -R --include=" *.py*" -E " from pandas.core.common import " pandas
114+ invgrep -R --include=" *.py*" -E " from pandas.core.common import " pandas
92115 RET=$(( $RET + $? )) ; echo $MSG " DONE"
93116
94117 MSG=' Check for pytest warns' ; echo $MSG
95- ! grep -r -E --include ' *.py' ' pytest\.warns' pandas/tests/
118+ invgrep -r -E --include ' *.py' ' pytest\.warns' pandas/tests/
96119 RET=$(( $RET + $? )) ; echo $MSG " DONE"
97120
98121 # Check for the following code in testing: `np.testing` and `np.array_equal`
99122 MSG=' Check for invalid testing' ; echo $MSG
100- ! grep -r -E --include ' *.py' --exclude testing.py ' (numpy|np)(\.testing|\.array_equal)' pandas/tests/
123+ invgrep -r -E --include ' *.py' --exclude testing.py ' (numpy|np)(\.testing|\.array_equal)' pandas/tests/
101124 RET=$(( $RET + $? )) ; echo $MSG " DONE"
102125
103126 # Check for the following code in the extension array base tests: `tm.assert_frame_equal` and `tm.assert_series_equal`
104127 MSG=' Check for invalid EA testing' ; echo $MSG
105- ! grep -r -E --include ' *.py' --exclude base.py ' tm.assert_(series|frame)_equal' pandas/tests/extension/base
128+ invgrep -r -E --include ' *.py' --exclude base.py ' tm.assert_(series|frame)_equal' pandas/tests/extension/base
106129 RET=$(( $RET + $? )) ; echo $MSG " DONE"
107130
108131 MSG=' Check for deprecated messages without sphinx directive' ; echo $MSG
109- ! grep -R --include=" *.py" --include=" *.pyx" -E " (DEPRECATED|DEPRECATE|Deprecated)(:|,|\.)" pandas
132+ invgrep -R --include=" *.py" --include=" *.pyx" -E " (DEPRECATED|DEPRECATE|Deprecated)(:|,|\.)" pandas
110133 RET=$(( $RET + $? )) ; echo $MSG " DONE"
111134
112135 MSG=' Check for old-style classes' ; echo $MSG
113- ! grep -R --include=" *.py" -E " class\s\S*[^)]:" pandas scripts
136+ invgrep -R --include=" *.py" -E " class\s\S*[^)]:" pandas scripts
114137 RET=$(( $RET + $? )) ; echo $MSG " DONE"
115138
116139 MSG=' Check for backticks incorrectly rendering because of missing spaces' ; echo $MSG
117- ! grep -R --include=" *.rst" -E " [a-zA-Z0-9]\`\` ?[a-zA-Z0-9]" doc/source/
140+ invgrep -R --include=" *.rst" -E " [a-zA-Z0-9]\`\` ?[a-zA-Z0-9]" doc/source/
118141 RET=$(( $RET + $? )) ; echo $MSG " DONE"
119142
120143 MSG=' Check for incorrect sphinx directives' ; echo $MSG
121- ! grep -R --include=" *.py" --include=" *.pyx" --include=" *.rst" -E " \.\. (autosummary|contents|currentmodule|deprecated|function|image|important|include|ipython|literalinclude|math|module|note|raw|seealso|toctree|versionadded|versionchanged|warning):[^:]" ./pandas ./doc/source
144+ invgrep -R --include=" *.py" --include=" *.pyx" --include=" *.rst" -E " \.\. (autosummary|contents|currentmodule|deprecated|function|image|important|include|ipython|literalinclude|math|module|note|raw|seealso|toctree|versionadded|versionchanged|warning):[^:]" ./pandas ./doc/source
122145 RET=$(( $RET + $? )) ; echo $MSG " DONE"
123146
124147 MSG=' Check that the deprecated `assert_raises_regex` is not used (`pytest.raises(match=pattern)` should be used instead)' ; echo $MSG
125- ! grep -R --exclude=* .pyc --exclude=testing.py --exclude=test_testing.py assert_raises_regex pandas
148+ invgrep -R --exclude=* .pyc --exclude=testing.py --exclude=test_testing.py assert_raises_regex pandas
126149 RET=$(( $RET + $? )) ; echo $MSG " DONE"
127150
151+ fi
152+
153+ # ## CODE ###
154+ if [[ -z " $CHECK " || " $CHECK " == " code" ]]; then
155+
128156 MSG=' Check for modules that pandas should not import' ; echo $MSG
129157 python -c "
130158import sys
@@ -135,7 +163,7 @@ blacklist = {'bs4', 'gcsfs', 'html5lib', 'ipython', 'jinja2' 'hypothesis',
135163 'tables', 'xlrd', 'xlsxwriter', 'xlwt'}
136164mods = blacklist & set(m.split('.')[0] for m in sys.modules)
137165if mods:
138- sys.stderr.write('pandas should not import: {}\n'.format(', '.join(mods)))
166+ sys.stderr.write('err: pandas should not import: {}\n'.format(', '.join(mods)))
139167 sys.exit(len(mods))
140168 "
141169 RET=$(( $RET + $? )) ; echo $MSG " DONE"
@@ -157,7 +185,7 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then
157185
158186 MSG=' Doctests generic.py' ; echo $MSG
159187 pytest -q --doctest-modules pandas/core/generic.py \
160- -k" -_set_axis_name -_xs -describe -droplevel -groupby -interpolate -pct_change -pipe -reindex -reindex_axis -to_json -transpose -values -xs"
188+ -k" -_set_axis_name -_xs -describe -droplevel -groupby -interpolate -pct_change -pipe -reindex -reindex_axis -to_json -transpose -values -xs -to_clipboard "
161189 RET=$(( $RET + $? )) ; echo $MSG " DONE"
162190
163191 MSG=' Doctests top-level reshaping functions' ; echo $MSG
@@ -178,11 +206,22 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then
178206
179207fi
180208
209+ # ## DOCSTRINGS ###
210+ if [[ -z " $CHECK " || " $CHECK " == " docstrings" ]]; then
211+
212+ MSG=' Validate docstrings (GL06, SS04, PR03, PR05, EX04)' ; echo $MSG
213+ $BASE_DIR /scripts/validate_docstrings.py --format=azure --errors=GL06,SS04,PR03,PR05,EX04
214+ RET=$(( $RET + $? )) ; echo $MSG " DONE"
215+
216+ fi
217+
181218# ## DEPENDENCIES ###
182219if [[ -z " $CHECK " || " $CHECK " == " dependencies" ]]; then
220+
183221 MSG=' Check that requirements-dev.txt has been generated from environment.yml' ; echo $MSG
184- $BASE_DIR /scripts/generate_pip_deps_from_conda.py --compare
222+ $BASE_DIR /scripts/generate_pip_deps_from_conda.py --compare --azure
185223 RET=$(( $RET + $? )) ; echo $MSG " DONE"
224+
186225fi
187226
188227exit $RET
0 commit comments