Skip to content

Commit 82d4c60

Browse files
fix: don't URL-encode V2 continuation tokens under encoding-type=url (#91)
1 parent 65c1dc5 commit 82d4c60

2 files changed

Lines changed: 76 additions & 4 deletions

File tree

s3proxy/xml_responses.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -70,14 +70,18 @@ def list_objects(
7070
<StorageClass>{obj.get("storage_class", "STANDARD")}</StorageClass>{owner_xml}
7171
</Contents>"""
7272

73+
# V2 continuation tokens are opaque cursors, not keys. Per the S3 spec only
74+
# Key/Prefix/Delimiter/StartAfter are URL-encoded under encoding-type=url, and
75+
# clients (botocore) never URL-decode the continuation token. URL-encoding it
76+
# corrupts the round-trip (e.g. '/' -> '%2F'), the backend can't advance, and
77+
# the same token repeats -> botocore aborts with "same next token received
78+
# twice". XML-escape only, regardless of encoding_type.
7379
next_token_xml = (
74-
f"<NextContinuationToken>{_encode_key(next_token, encoding_type)}</NextContinuationToken>"
75-
if next_token
76-
else ""
80+
f"<NextContinuationToken>{escape(next_token)}</NextContinuationToken>" if next_token else ""
7781
)
7882

7983
continuation_token_xml = (
80-
f"<ContinuationToken>{_encode_key(continuation_token, encoding_type)}</ContinuationToken>"
84+
f"<ContinuationToken>{escape(continuation_token)}</ContinuationToken>"
8185
if continuation_token is not None
8286
else ""
8387
)
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
"""Self-check: V2 continuation tokens must not be URL-encoded.
2+
3+
barman-cloud / botocore send encoding-type=url on ListObjectsV2. Continuation
4+
tokens are opaque cursors, not keys: per the S3 spec only Key/Prefix/Delimiter/
5+
StartAfter are URL-encoded, and botocore never URL-decodes the token. When the
6+
proxy URL-encoded a key-shaped token ('a/b/c.tar' -> 'a%2Fb%2Fc.tar') the token
7+
could not round-trip, the backend kept returning the first page, and botocore
8+
aborted the paginator with "The same next token was received twice".
9+
10+
This proves the V2 serializer emits the token verbatim (XML-escaped only) even
11+
under encoding-type=url, so it survives the round-trip and pagination advances.
12+
"""
13+
14+
from xml.etree.ElementTree import fromstring
15+
16+
from s3proxy.xml_responses import list_objects
17+
18+
TOKEN = "production-v3/production/base/20260619T223000/data_0007.tar"
19+
_NS = "{http://s3.amazonaws.com/doc/2006-03-01/}"
20+
21+
22+
def _parse(xml: str) -> dict:
23+
root = fromstring(xml)
24+
return {child.tag.replace(_NS, ""): child.text for child in root}
25+
26+
27+
def test_v2_continuation_token_not_url_encoded():
28+
xml = list_objects(
29+
bucket="oceanio-dc2-postgresql-backups",
30+
prefix="production-v3/production/base/",
31+
max_keys=1000,
32+
is_truncated=True,
33+
next_token=TOKEN,
34+
objects=[],
35+
continuation_token=TOKEN,
36+
encoding_type="url", # what barman/botocore sends
37+
)
38+
fields = _parse(xml)
39+
40+
# Verbatim token -> '/' preserved, not '%2F'. This is the actual bug guard.
41+
assert fields["NextContinuationToken"] == TOKEN
42+
assert fields["ContinuationToken"] == TOKEN
43+
assert "%2F" not in xml
44+
45+
# Keys are still URL-encoded under encoding-type=url (regression guard).
46+
key_xml = list_objects(
47+
bucket="b",
48+
prefix="",
49+
max_keys=1000,
50+
is_truncated=False,
51+
next_token=None,
52+
objects=[
53+
{
54+
"key": "a/b/c.tar",
55+
"last_modified": "2026-06-24T09:00:00",
56+
"etag": "x",
57+
"size": 1,
58+
"storage_class": "STANDARD",
59+
}
60+
],
61+
encoding_type="url",
62+
)
63+
assert "a%2Fb%2Fc.tar" in key_xml
64+
65+
66+
if __name__ == "__main__":
67+
test_v2_continuation_token_not_url_encoded()
68+
print("ok")

0 commit comments

Comments
 (0)