Skip to content

Commit 57e0b91

Browse files
committed
Merge branch 'master' of https://github.com/pandas-dev/pandas into cln-gb
2 parents 2c67e86 + 054936f commit 57e0b91

File tree

23 files changed

+337
-122
lines changed

23 files changed

+337
-122
lines changed

.github/workflows/assign.yml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
name: Assign
2+
on:
3+
issue_comment:
4+
types: created
5+
6+
jobs:
7+
one:
8+
runs-on: ubuntu-latest
9+
steps:
10+
- name:
11+
run: |
12+
if [[ "${{ github.event.comment.body }}" == "take" ]]; then
13+
echo "Assigning issue ${{ github.event.issue.number }} to ${{ github.event.comment.user.login }}"
14+
curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -d '{"assignees": ["${{ github.event.comment.user.login }}"]}' https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/assignees
15+
fi

.travis.yml

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -85,15 +85,6 @@ install:
8585
- ci/submit_cython_cache.sh
8686
- echo "install done"
8787

88-
89-
before_script:
90-
# display server (for clipboard functionality) needs to be started here,
91-
# does not work if done in install:setup_env.sh (GH-26103)
92-
- export DISPLAY=":99.0"
93-
- echo "sh -e /etc/init.d/xvfb start"
94-
- if [ "$JOB" != "3.8-dev" ]; then sh -e /etc/init.d/xvfb start; fi
95-
- sleep 3
96-
9788
script:
9889
- echo "script start"
9990
- echo "$JOB"

ci/azure/posix.yml

Lines changed: 6 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -73,33 +73,16 @@ jobs:
7373

7474
- task: PublishTestResults@2
7575
inputs:
76-
testResultsFiles: 'test-data-*.xml'
76+
testResultsFiles: 'test-data.xml'
7777
testRunTitle: ${{ format('{0}-$(CONDA_PY)', parameters.name) }}
7878
displayName: 'Publish test results'
7979

8080
- powershell: |
81-
$junitXml = "test-data-single.xml"
82-
$(Get-Content $junitXml | Out-String) -match 'failures="(.*?)"'
83-
if ($matches[1] -eq 0)
84-
{
85-
Write-Host "No test failures in test-data-single"
86-
}
87-
else
88-
{
89-
# note that this will produce $LASTEXITCODE=1
90-
Write-Error "$($matches[1]) tests failed"
91-
}
92-
93-
$junitXmlMulti = "test-data-multiple.xml"
94-
$(Get-Content $junitXmlMulti | Out-String) -match 'failures="(.*?)"'
95-
if ($matches[1] -eq 0)
96-
{
97-
Write-Host "No test failures in test-data-multi"
98-
}
99-
else
100-
{
101-
# note that this will produce $LASTEXITCODE=1
102-
Write-Error "$($matches[1]) tests failed"
81+
$(Get-Content "test-data.xml" | Out-String) -match 'failures="(.*?)"'
82+
if ($matches[1] -eq 0) {
83+
Write-Host "No test failures in test-data"
84+
} else {
85+
Write-Error "$($matches[1]) tests failed" # will produce $LASTEXITCODE=1
10386
}
10487
displayName: 'Check for test failures'
10588

ci/print_skipped.py

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -27,14 +27,13 @@ def main(filename):
2727
if __name__ == "__main__":
2828
print("SKIPPED TESTS:")
2929
i = 1
30-
for file_type in ("-single", "-multiple", ""):
31-
for test_data in main("test-data{}.xml".format(file_type)):
32-
if test_data is None:
33-
print("-" * 80)
34-
else:
35-
print(
36-
"#{i} {class_name}.{test_name}: {message}".format(
37-
**dict(test_data, i=i)
38-
)
30+
for test_data in main("test-data.xml"):
31+
if test_data is None:
32+
print("-" * 80)
33+
else:
34+
print(
35+
"#{i} {class_name}.{test_name}: {message}".format(
36+
**dict(test_data, i=i)
3937
)
40-
i += 1
38+
)
39+
i += 1

ci/run_tests.sh

Lines changed: 17 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -15,37 +15,29 @@ if [ -n "$LOCALE_OVERRIDE" ]; then
1515
# exit 1
1616
fi
1717
fi
18+
1819
if [[ "not network" == *"$PATTERN"* ]]; then
1920
export http_proxy=http://1.2.3.4 https_proxy=http://1.2.3.4;
2021
fi
2122

22-
23-
if [ -n "$PATTERN" ]; then
24-
PATTERN=" and $PATTERN"
23+
if [ "$COVERAGE" ]; then
24+
COVERAGE_FNAME="/tmp/test_coverage.xml"
25+
COVERAGE="-s --cov=pandas --cov-report=xml:$COVERAGE_FNAME"
2526
fi
2627

27-
for TYPE in single multiple
28-
do
29-
if [ "$COVERAGE" ]; then
30-
COVERAGE_FNAME="/tmp/coc-$TYPE.xml"
31-
COVERAGE="-s --cov=pandas --cov-report=xml:$COVERAGE_FNAME"
32-
fi
28+
PYTEST_CMD="pytest -m \"$PATTERN\" -n auto --dist=loadfile -s --strict --durations=10 --junitxml=test-data.xml $TEST_ARGS $COVERAGE pandas"
3329

34-
TYPE_PATTERN=$TYPE
35-
NUM_JOBS=1
36-
if [[ "$TYPE_PATTERN" == "multiple" ]]; then
37-
TYPE_PATTERN="not single"
38-
NUM_JOBS=2
39-
fi
30+
# Travis does not have have an X server
31+
if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then
32+
DISPLAY=DISPLAY=:99.0
33+
PYTEST_CMD="xvfb-run -e /dev/stdout $PYTEST_CMD"
34+
fi
4035

41-
PYTEST_CMD="pytest -m \"$TYPE_PATTERN$PATTERN\" -n $NUM_JOBS -s --strict --durations=10 --junitxml=test-data-$TYPE.xml $TEST_ARGS $COVERAGE pandas"
42-
echo $PYTEST_CMD
43-
# if no tests are found (the case of "single and slow"), pytest exits with code 5, and would make the script fail, if not for the below code
44-
sh -c "$PYTEST_CMD; ret=\$?; [ \$ret = 5 ] && exit 0 || exit \$ret"
36+
echo $PYTEST_CMD
37+
sh -c "$PYTEST_CMD"
4538

46-
if [[ "$COVERAGE" && $? == 0 && "$TRAVIS_BRANCH" == "master" ]]; then
47-
echo "uploading coverage for $TYPE tests"
48-
echo "bash <(curl -s https://codecov.io/bash) -Z -c -F $TYPE -f $COVERAGE_FNAME"
49-
bash <(curl -s https://codecov.io/bash) -Z -c -F $TYPE -f $COVERAGE_FNAME
50-
fi
51-
done
39+
if [[ "$COVERAGE" && $? == 0 && "$TRAVIS_BRANCH" == "master" ]]; then
40+
echo "uploading coverage"
41+
echo "bash <(curl -s https://codecov.io/bash) -Z -c -F $TYPE -f $COVERAGE_FNAME"
42+
bash <(curl -s https://codecov.io/bash) -Z -c -F $TYPE -f $COVERAGE_FNAME
43+
fi

doc/source/development/contributing.rst

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,27 @@ and `good first issue
2424
where you could start out. Once you've found an interesting issue, you can
2525
return here to get your development environment setup.
2626

27+
When you start working on an issue, it's a good idea to assign the issue to yourself,
28+
so nobody else duplicates the work on it. GitHub restricts assigning issues to maintainers
29+
of the project only. In most projects, and until recently in pandas, contributors added a
30+
comment letting others know they are working on an issue. While this is ok, you need to
31+
check each issue individually, and it's not possible to find the unassigned ones.
32+
33+
For this reason, we implemented a workaround consisting of adding a comment with the exact
34+
text `take`. When you do it, a GitHub action will automatically assign you the issue
35+
(this will take seconds, and may require refreshint the page to see it).
36+
By doing this, it's possible to filter the list of issues and find only the unassigned ones.
37+
38+
So, a good way to find an issue to start contributing to pandas is to check the list of
39+
`unassigned good first issues <https://github.com/pandas-dev/pandas/issues?q=is%3Aopen+is%3Aissue+label%3A%22good+first+issue%22+no%3Aassignee>`_
40+
and assign yourself one you like by writing a comment with the exact text `take`.
41+
42+
If for whatever reason you are not able to continue working with the issue, please try to
43+
unassign it, so other people know it's available again. You can check the list of
44+
assigned issues, since people may not be working in them anymore. If you want to work on one
45+
that is assigned, feel free to kindly ask the current assignee if you can take it
46+
(please allow at least a week of inactivity before considering work in the issue discontinued).
47+
2748
Feel free to ask questions on the `mailing list
2849
<https://groups.google.com/forum/?fromgroups#!forum/pydata>`_ or on `Gitter`_.
2950

doc/source/development/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ Development
1313
:maxdepth: 2
1414

1515
contributing
16+
maintaining
1617
internals
1718
extending
1819
developer
Lines changed: 193 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,193 @@
1+
.. _maintaining:
2+
3+
******************
4+
Pandas Maintenance
5+
******************
6+
7+
This guide is for pandas' maintainers. It may also be interesting to contributors
8+
looking to understand the pandas development process and what steps are necessary
9+
to become a maintainer.
10+
11+
The main contributing guide is available at :ref:`contributing`.
12+
13+
Roles
14+
-----
15+
16+
Pandas uses two levels of permissions: **triage** and **core** team members.
17+
18+
Triage members can label and close issues and pull requests.
19+
20+
Core team members can label and close issues and pull request, and can merge
21+
pull requests.
22+
23+
GitHub publishes the full `list of permissions`_.
24+
25+
Tasks
26+
-----
27+
28+
Pandas is largely a volunteer project, so these tasks shouldn't be read as
29+
"expectations" of triage and maintainers. Rather, they're general descriptions
30+
of what it means to be a maintainer.
31+
32+
* Triage newly filed issues (see :ref:`maintaining.triage`)
33+
* Review newly opened pull requests
34+
* Respond to updates on existing issues and pull requests
35+
* Drive discussion and decisions on stalled issues and pull requests
36+
* Provide experience / wisdom on API design questions to ensure consistency and maintainability
37+
* Project organization (run / attend developer meetings, represent pandas)
38+
39+
http://matthewrocklin.com/blog/2019/05/18/maintainer may be interesting background
40+
reading.
41+
42+
.. _maintaining.triage:
43+
44+
Issue Triage
45+
------------
46+
47+
48+
Here's a typical workflow for triaging a newly opened issue.
49+
50+
1. **Thank the reporter for opening an issue**
51+
52+
The issue tracker is many people's first interaction with the pandas project itself,
53+
beyond just using the library. As such, we want it to be a welcoming, pleasant
54+
experience.
55+
56+
2. **Is the necessary information provided?**
57+
58+
Ideally reporters would fill out the issue template, but many don't.
59+
If crucial information (like the version of pandas they used), is missing
60+
feel free to ask for that and label the issue with "Needs info". The
61+
report should follow the guidelines in :ref:`contributing.bug_reports`.
62+
You may want to link to that if they didn't follow the template.
63+
64+
Make sure that the title accurately reflects the issue. Edit it yourself
65+
if it's not clear.
66+
67+
3. **Is this a duplicate issue?**
68+
69+
We have many open issues. If a new issue is clearly a duplicate, label the
70+
new issue as "Duplicate" assign the milestone "No Action", and close the issue
71+
with a link to the original issue. Make sure to still thank the reporter, and
72+
encourage them to chime in on the original issue, and perhaps try to fix it.
73+
74+
If the new issue provides relevant information, such as a better or slightly
75+
different example, add it to the original issue as a comment or an edit to
76+
the original post.
77+
78+
4. **Is the issue minimal and reproducible**?
79+
80+
For bug reports, we ask that the reporter provide a minimal reproducible
81+
example. See http://matthewrocklin.com/blog/work/2018/02/28/minimal-bug-reports
82+
for a good explanation. If the example is not reproducible, or if it's
83+
*clearly* not minimal, feel free to ask the reporter if they can provide
84+
and example or simplify the provided one. Do acknowledge that writing
85+
minimal reproducible examples is hard work. If the reporter is struggling,
86+
you can try to write one yourself and we'll edit the original post to include it.
87+
88+
If a reproducible example can't be provided, add the "Needs info" label.
89+
90+
If a reproducible example is provided, but you see a simplification,
91+
edit the original post with your simpler reproducible example.
92+
93+
5. **Is this a clearly defined feature request?**
94+
95+
Generally, pandas prefers to discuss and design new features in issues, before
96+
a pull request is made. Encourage the submitter to include a proposed API
97+
for the new feature. Having them write a full docstring is a good way to
98+
pin down specifics.
99+
100+
We'll need a discussion from several pandas maintainers before deciding whether
101+
the proposal is in scope for pandas.
102+
103+
6. **Is this a usage question?**
104+
105+
We prefer that usage questions are asked on StackOverflow with the pandas
106+
tag. https://stackoverflow.com/questions/tagged/pandas
107+
108+
If it's easy to answer, feel free to link to the relevant documentation section,
109+
let them know that in the future this kind of question should be on
110+
StackOverflow, and close the issue.
111+
112+
7. **What labels and milestones should I add?**
113+
114+
Apply the relevant labels. This is a bit of an art, and comes with experience.
115+
Look at similar issues to get a feel for how things are labeled.
116+
117+
If the issue is clearly defined and the fix seems relatively straightforward,
118+
label the issue as "Good first issue".
119+
120+
Typically, new issues will be assigned the "Contributions welcome" milestone,
121+
unless it's know that this issue should be addressed in a specific release (say
122+
because it's a large regression).
123+
124+
.. _maintaining.closing:
125+
126+
Closing Issues
127+
--------------
128+
129+
Be delicate here: many people interpret closing an issue as us saying that the
130+
conversation is over. It's typically best to give the reporter some time to
131+
respond or self-close their issue if it's determined that the behavior is not a bug,
132+
or the feature is out of scope. Sometimes reporters just go away though, and
133+
we'll close the issue after the conversation has died.
134+
135+
Reviewing Pull Requests
136+
-----------------------
137+
138+
Anybody can review a pull request: regular contributors, triagers, or core-team
139+
members. Here are some guidelines to check.
140+
141+
* Tests should be in a sensible location.
142+
* New public APIs should be included somewhere in ``doc/source/reference/``.
143+
* New / changed API should use the ``versionadded`` or ``versionchanged`` directives in the docstring.
144+
* User-facing changes should have a whatsnew in the appropriate file.
145+
* Regression tests should reference the original GitHub issue number like ``# GH-1234``.
146+
147+
Cleaning up old Issues
148+
----------------------
149+
150+
Every open issue in pandas has a cost. Open issues make finding duplicates harder,
151+
and can make it harder to know what needs to be done in pandas. That said, closing
152+
issues isn't a goal on its own. Our goal is to make pandas the best it can be,
153+
and that's best done by ensuring that the quality of our open issues is high.
154+
155+
Occasionally, bugs are fixed but the issue isn't linked to in the Pull Request.
156+
In these cases, comment that "This has been fixed, but could use a test." and
157+
label the issue as "Good First Issue" and "Needs Test".
158+
159+
If an older issue doesn't follow our issue template, edit the original post to
160+
include a minimal example, the actual output, and the expected output. Uniformity
161+
in issue reports is valuable.
162+
163+
If an older issue lacks a reproducible example, label it as "Needs Info" and
164+
ask them to provide one (or write one yourself if possible). If one isn't
165+
provide reasonably soon, close it according to the policies in :ref:`maintaining.closing`.
166+
167+
Cleaning up old Pull Requests
168+
-----------------------------
169+
170+
Occasionally, contributors are unable to finish off a pull request.
171+
If some time has passed (two weeks, say) since the last review requesting changes,
172+
gently ask if they're still interested in working on this. If another two weeks or
173+
so passes with no response, thank them for their work and close the pull request.
174+
Comment on the original issue that "There's a stalled PR at #1234 that may be
175+
helpful.", and perhaps label the issue as "Good first issue" if the PR was relatively
176+
close to being accepted.
177+
178+
Additionally, core-team members can push to contributors branches. This can be
179+
helpful for pushing an important PR across the line, or for fixing a small
180+
merge conflict.
181+
182+
Becoming a pandas maintainer
183+
----------------------------
184+
185+
The full process is outlined in our `governance documents`_. In summary,
186+
we're happy to give triage permissions to anyone who shows interest by
187+
being helpful on the issue tracker.
188+
189+
The current list of core-team members is at
190+
https://github.com/pandas-dev/pandas-governance/blob/master/people.md
191+
192+
.. _governance documents: https://github.com/pandas-dev/pandas-governance
193+
.. _list of permissions: https://help.github.com/en/github/setting-up-and-managing-organizations-and-teams/repository-permission-levels-for-an-organization

environment.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ dependencies:
2121
- flake8-comprehensions>=3.1.0 # used by flake8, linting of unnecessary comprehensions
2222
- flake8-rst>=0.6.0,<=0.7.0 # linting of code blocks in rst files
2323
- isort # check that imports are in the right order
24-
- mypy=0.720
24+
- mypy=0.730
2525
- pycodestyle # used by flake8
2626

2727
# documentation
@@ -53,7 +53,7 @@ dependencies:
5353
- moto # mock S3
5454
- pytest>=4.0.2
5555
- pytest-cov
56-
- pytest-xdist
56+
- pytest-xdist>=1.21
5757
- seaborn
5858
- statsmodels
5959

0 commit comments

Comments
 (0)