Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions s3proxy/xml_responses.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,14 +70,18 @@ def list_objects(
<StorageClass>{obj.get("storage_class", "STANDARD")}</StorageClass>{owner_xml}
</Contents>"""

# V2 continuation tokens are opaque cursors, not keys. Per the S3 spec only
# Key/Prefix/Delimiter/StartAfter are URL-encoded under encoding-type=url, and
# clients (botocore) never URL-decode the continuation token. URL-encoding it
# corrupts the round-trip (e.g. '/' -> '%2F'), the backend can't advance, and
# the same token repeats -> botocore aborts with "same next token received
# twice". XML-escape only, regardless of encoding_type.
next_token_xml = (
f"<NextContinuationToken>{_encode_key(next_token, encoding_type)}</NextContinuationToken>"
if next_token
else ""
f"<NextContinuationToken>{escape(next_token)}</NextContinuationToken>" if next_token else ""
)

continuation_token_xml = (
f"<ContinuationToken>{_encode_key(continuation_token, encoding_type)}</ContinuationToken>"
f"<ContinuationToken>{escape(continuation_token)}</ContinuationToken>"
if continuation_token is not None
else ""
)
Expand Down
68 changes: 68 additions & 0 deletions tests/unit/test_list_objects_continuation_token.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
"""Self-check: V2 continuation tokens must not be URL-encoded.

barman-cloud / botocore send encoding-type=url on ListObjectsV2. Continuation
tokens are opaque cursors, not keys: per the S3 spec only Key/Prefix/Delimiter/
StartAfter are URL-encoded, and botocore never URL-decodes the token. When the
proxy URL-encoded a key-shaped token ('a/b/c.tar' -> 'a%2Fb%2Fc.tar') the token
could not round-trip, the backend kept returning the first page, and botocore
aborted the paginator with "The same next token was received twice".

This proves the V2 serializer emits the token verbatim (XML-escaped only) even
under encoding-type=url, so it survives the round-trip and pagination advances.
"""

from xml.etree.ElementTree import fromstring

from s3proxy.xml_responses import list_objects

TOKEN = "production-v3/production/base/20260619T223000/data_0007.tar"
_NS = "{http://s3.amazonaws.com/doc/2006-03-01/}"


def _parse(xml: str) -> dict:
root = fromstring(xml)
return {child.tag.replace(_NS, ""): child.text for child in root}


def test_v2_continuation_token_not_url_encoded():
xml = list_objects(
bucket="oceanio-dc2-postgresql-backups",
prefix="production-v3/production/base/",
max_keys=1000,
is_truncated=True,
next_token=TOKEN,
objects=[],
continuation_token=TOKEN,
encoding_type="url", # what barman/botocore sends
)
fields = _parse(xml)

# Verbatim token -> '/' preserved, not '%2F'. This is the actual bug guard.
assert fields["NextContinuationToken"] == TOKEN
assert fields["ContinuationToken"] == TOKEN
assert "%2F" not in xml

# Keys are still URL-encoded under encoding-type=url (regression guard).
key_xml = list_objects(
bucket="b",
prefix="",
max_keys=1000,
is_truncated=False,
next_token=None,
objects=[
{
"key": "a/b/c.tar",
"last_modified": "2026-06-24T09:00:00",
"etag": "x",
"size": 1,
"storage_class": "STANDARD",
}
],
encoding_type="url",
)
assert "a%2Fb%2Fc.tar" in key_xml


if __name__ == "__main__":
test_v2_continuation_token_not_url_encoded()
print("ok")