Skip to content

Commit 014eff3

Browse files
committed
Skip checkpoint-equal incremental items
1 parent 9d0cfdb commit 014eff3

4 files changed

Lines changed: 90 additions & 7 deletions

File tree

CHANGES.rst

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,9 @@ Unreleased
1313
legacy file is removed once existing issue/pull backups have resource
1414
checkpoints (#62).
1515
- Stop paginating pull requests during incremental backups once the sorted
16-
results are older than the active checkpoint.
16+
results are at or older than the active checkpoint.
17+
- Avoid re-fetching discussions and pull requests whose ``updated_at`` exactly
18+
matches the active incremental checkpoint.
1719
- Avoid extra release asset list requests by using asset metadata already
1820
included in GitHub's releases response.
1921
- Add ``--token-from-gh`` to read authentication from ``gh auth token``.

github_backup/github_backup.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2233,7 +2233,7 @@ def retrieve_discussion_summaries(args, repository, since=None):
22332233
if updated_at and (newest_seen is None or updated_at > newest_seen):
22342234
newest_seen = updated_at
22352235

2236-
if since and updated_at and updated_at < since:
2236+
if since and updated_at and updated_at <= since:
22372237
stop = True
22382238
break
22392239

@@ -2654,7 +2654,7 @@ def track_newest_pull_update(pull):
26542654
newest_pull_update = updated_at
26552655

26562656
def pull_is_due_for_repository_checkpoint(pull):
2657-
return not repository_since or pull["updated_at"] >= repository_since
2657+
return not repository_since or pull["updated_at"] > repository_since
26582658

26592659
if not args.include_pull_details:
26602660
pull_states = ["open", "closed"]
@@ -2664,18 +2664,18 @@ def pull_is_due_for_repository_checkpoint(pull):
26642664
args, _pulls_template, query_args=query_args, lazy=True
26652665
):
26662666
track_newest_pull_update(pull)
2667-
if pulls_since and pull["updated_at"] < pulls_since:
2667+
if pulls_since and pull["updated_at"] <= pulls_since:
26682668
break
2669-
if not pulls_since or pull["updated_at"] >= pulls_since:
2669+
if not pulls_since or pull["updated_at"] > pulls_since:
26702670
pulls[pull["number"]] = pull
26712671
else:
26722672
for pull in retrieve_data(
26732673
args, _pulls_template, query_args=query_args, lazy=True
26742674
):
26752675
track_newest_pull_update(pull)
2676-
if pulls_since and pull["updated_at"] < pulls_since:
2676+
if pulls_since and pull["updated_at"] <= pulls_since:
26772677
break
2678-
if not pulls_since or pull["updated_at"] >= pulls_since:
2678+
if not pulls_since or pull["updated_at"] > pulls_since:
26792679
if pull_is_due_for_repository_checkpoint(pull):
26802680
pulls[pull["number"]] = retrieve_data(
26812681
args,

tests/test_discussions.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,41 @@ def test_retrieve_discussion_summaries_stops_at_incremental_since(create_args):
5050
)
5151

5252

53+
def test_retrieve_discussion_summaries_excludes_checkpoint_timestamp(create_args):
54+
args = create_args()
55+
repository = {"full_name": "owner/repo"}
56+
57+
page = {
58+
"repository": {
59+
"hasDiscussionsEnabled": True,
60+
"discussions": {
61+
"totalCount": 1,
62+
"nodes": [
63+
{
64+
"number": 1,
65+
"title": "already backed up",
66+
"updatedAt": "2026-01-01T00:00:00Z",
67+
},
68+
],
69+
"pageInfo": {"hasNextPage": True, "endCursor": "NEXT"},
70+
},
71+
}
72+
}
73+
74+
with patch(
75+
"github_backup.github_backup.retrieve_graphql_data", return_value=page
76+
) as mock_retrieve:
77+
summaries, newest, enabled, total = github_backup.retrieve_discussion_summaries(
78+
args, repository, since="2026-01-01T00:00:00Z"
79+
)
80+
81+
assert enabled is True
82+
assert total == 1
83+
assert newest == "2026-01-01T00:00:00Z"
84+
assert summaries == []
85+
assert mock_retrieve.call_count == 1
86+
87+
5388
def test_retrieve_discussion_summaries_disabled_discussions(create_args):
5489
args = create_args()
5590
repository = {"full_name": "owner/repo"}

tests/test_pull_incremental_pagination.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,52 @@ def headers(self):
3131
return headers
3232

3333

34+
def test_backup_pulls_incremental_excludes_checkpoint_timestamp(create_args, tmp_path):
35+
args = create_args(include_pulls=True, incremental=True)
36+
args.since = "2026-04-26T08:13:46Z"
37+
repository = {"full_name": "owner/repo"}
38+
39+
responses = [
40+
MockHTTPResponse([]),
41+
MockHTTPResponse(
42+
[
43+
{
44+
"number": 1,
45+
"title": "already backed up",
46+
"updated_at": "2026-04-26T08:13:46Z",
47+
},
48+
],
49+
link_header='<https://api.github.com/repos/owner/repo/pulls?per_page=100&state=closed&page=2>; rel="next"',
50+
),
51+
MockHTTPResponse(
52+
[
53+
{
54+
"number": 0,
55+
"title": "older pull on page 2",
56+
"updated_at": "2026-04-25T07:00:00Z",
57+
}
58+
]
59+
),
60+
]
61+
requests_made = []
62+
63+
def mock_urlopen(request, *args, **kwargs):
64+
requests_made.append(request.get_full_url())
65+
return responses[len(requests_made) - 1]
66+
67+
with patch("github_backup.github_backup.urlopen", side_effect=mock_urlopen):
68+
github_backup.backup_pulls(
69+
args, tmp_path, repository, "https://api.github.com/repos"
70+
)
71+
72+
assert len(requests_made) == 2
73+
assert "state=open" in requests_made[0]
74+
assert "state=closed" in requests_made[1]
75+
assert all("page=2" not in url for url in requests_made)
76+
assert not os.path.exists(tmp_path / "pulls" / "1.json")
77+
assert not os.path.exists(tmp_path / "pulls" / "0.json")
78+
79+
3480
def test_backup_pulls_incremental_stops_before_fetching_old_pages(
3581
create_args, tmp_path
3682
):

0 commit comments

Comments
 (0)