Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 24 additions & 9 deletions s3proxy/handlers/buckets.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,17 @@ async def handle_list_objects_v1(self, request: Request, creds: S3Credentials) -
max_keys = int(query.get("max-keys", ["1000"])[0])
encoding_type = query.get("encoding-type", [""])[0] or None

# Serve the client's legacy V1 ListObjects via the backend's V2 API.
# Hetzner (and other modern S3 backends) only implement ListObjectsV2 and
# reject V1 with HTTP 400, which breaks any V1 client (scylla-manager's
# rclone 1.51.0, barman-cloud-backup-delete). V1 marker pagination is
# stateless (marker == last key returned), which maps exactly onto V2
# StartAfter, so the translation is lossless for the recursive (no
# delimiter) listings these clients use.
try:
resp = await client.list_objects_v1(bucket, prefix, marker, delimiter, max_keys)
resp = await client.list_objects_v2(
bucket, prefix, None, max_keys, delimiter, marker
)
except ClientError as e:
self._raise_bucket_error(e, bucket)

Expand All @@ -140,14 +149,20 @@ async def handle_list_objects_v1(self, request: Request, creds: S3Credentials) -
if stripped is not None:
common_prefixes.append(stripped)

# V1 uses NextMarker (or last key if truncated and no delimiter)
next_marker = _strip_minio_cache_suffix(resp.get("NextMarker"))
if resp.get("IsTruncated") and not next_marker:
# Fallback: use last object key or last common prefix
if objects:
next_marker = objects[-1]["key"]
elif common_prefixes:
next_marker = common_prefixes[-1]
# V1 resumes via marker == last key returned. Synthesize NextMarker from
# the largest RAW backend key/prefix on this page (not the filtered list:
# a page may be all-internal keys yet still truncated) so the next
# marker -> StartAfter resumes after everything returned.
# ponytail: O(page) max over the page. A single delimiter-level prefix
# with >max_keys *distinct* sub-prefixes can re-emit one boundary prefix
# across pages (harmless, idempotent for these read-only catalog walks);
# upgrade path is a redis-backed marker->continuation-token map.
next_marker = None
if resp.get("IsTruncated"):
raw = [o["Key"] for o in resp.get("Contents", [])]
raw += [cp["Prefix"] for cp in resp.get("CommonPrefixes", [])]
if raw:
next_marker = _strip_minio_cache_suffix(max(raw))

return Response(
content=xml_responses.list_objects_v1(
Expand Down
143 changes: 143 additions & 0 deletions tests/unit/test_list_objects_v1_via_v2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
"""Self-check: client V1 ListObjects is served via the backend's V2 API.

Hetzner Object Storage (and other modern S3 backends) only implement
ListObjectsV2 and reject legacy V1 ListObjects with HTTP 400 — which broke
every V1 client (scylla-manager's rclone 1.51.0, barman-cloud-backup-delete).

This proves handle_list_objects_v1:
- calls the backend's list_objects_v2 (never list_objects_v1),
- maps the client's V1 marker onto V2 StartAfter,
- filters internal keys, and
- synthesizes a V1 NextMarker from the largest backend key when truncated.
"""

import asyncio
import contextlib
import datetime as dt
from xml.etree.ElementTree import fromstring

from s3proxy.handlers.buckets import BucketHandlerMixin

INTERNAL_PREFIX = "s3proxy-internal/"
_NS = "{http://s3.amazonaws.com/doc/2006-03-01/}"


class _URL:
def __init__(self, path, query):
self.path = path
self.query = query


class _Request:
def __init__(self, path, query):
self.url = _URL(path, query)


class _FakeClient:
def __init__(self, resp):
self.resp = resp
self.v2_calls = []

async def list_objects_v1(self, *a, **k): # must never be hit
raise AssertionError("backend V1 ListObjects must not be called")

async def list_objects_v2(
self,
bucket,
prefix=None,
continuation_token=None,
max_keys=1000,
delimiter=None,
start_after=None,
):
self.v2_calls.append({"start_after": start_after, "delimiter": delimiter, "prefix": prefix})
return self.resp

async def head_object(self, bucket, key):
return {"Metadata": {}}


class _Handler(BucketHandlerMixin):
handle_list_objects_v1 = BucketHandlerMixin.handle_list_objects_v1
_process_list_objects = BucketHandlerMixin._process_list_objects

def __init__(self, client):
self._fake = client

def _parse_bucket(self, path):
return path.lstrip("/").split("/")[0]

def _is_internal_key(self, key):
return key.startswith(INTERNAL_PREFIX)

def _get_plaintext_size(self, meta, fallback):
return fallback

def _get_effective_etag(self, meta, fallback):
return fallback.strip('"')

def _client(self, creds):
client = self._fake

@contextlib.asynccontextmanager
async def cm():
yield client

return cm()


def _obj(key):
return {
"Key": key,
"Size": 9,
"ETag": '"e"',
"LastModified": dt.datetime(2026, 6, 30, 9, 0, 0),
"StorageClass": "STANDARD",
}


def _run(resp, query, marker_expected_start_after):
handler = _Handler(_FakeClient(resp))
req = _Request("/postgres-backups", query)
out = asyncio.run(handler.handle_list_objects_v1(req, creds=None))
root = fromstring(out.body)
return (
handler,
{c.tag.replace(_NS, ""): c.text for c in root},
[e.find(f"{_NS}Key").text for e in root.findall(f"{_NS}Contents")],
)


def test_v1_served_via_v2_marker_maps_to_start_after():
resp = {
"Contents": [_obj("a.txt"), _obj(f"{INTERNAL_PREFIX}meta"), _obj("b.txt")],
"CommonPrefixes": [],
"IsTruncated": True,
}
handler, fields, keys = _run(resp, "prefix=&marker=last-seen.txt", "last-seen.txt")

# Backend was hit via V2 with the client marker mapped to StartAfter.
assert handler._fake.v2_calls == [
{"start_after": "last-seen.txt", "delimiter": None, "prefix": ""}
]
# Internal key filtered out; real keys preserved in order.
assert keys == ["a.txt", "b.txt"]
# Truncated -> NextMarker is the largest RAW backend key (incl. would-be internal).
assert fields["IsTruncated"] == "true"
assert fields["NextMarker"] == f"{INTERNAL_PREFIX}meta" # max("a.txt","b.txt",internal)
# Echoes the request marker.
assert fields["Marker"] == "last-seen.txt"


def test_v1_not_truncated_has_no_next_marker():
resp = {"Contents": [_obj("only.txt")], "CommonPrefixes": [], "IsTruncated": False}
_, fields, keys = _run(resp, "prefix=", None)
assert keys == ["only.txt"]
assert fields["IsTruncated"] == "false"
assert fields.get("NextMarker") is None


if __name__ == "__main__":
test_v1_served_via_v2_marker_maps_to_start_after()
test_v1_not_truncated_has_no_next_marker()
print("ok")