From b204fa7edb1ffcf257731837f23c982d99d28e8e Mon Sep 17 00:00:00 2001 From: Nico Kemnitz Date: Sat, 29 Jul 2023 11:23:50 +0200 Subject: [PATCH] Verify Content-Encoding when querying Content-Length --- fsspec/implementations/http.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fsspec/implementations/http.py b/fsspec/implementations/http.py index afd0c2664..5d118dcbd 100644 --- a/fsspec/implementations/http.py +++ b/fsspec/implementations/http.py @@ -195,7 +195,6 @@ async def _ls_real(self, url, detail=True, **kwargs): return list(sorted(out)) async def _ls(self, url, detail=True, **kwargs): - if self.use_listings_cache and url in self.dircache: out = self.dircache[url] else: @@ -841,7 +840,10 @@ async def _file_info(url, session, size_policy="head", **kwargs): # or 'Accept-Ranges': 'none' (not 'bytes') # to mean streaming only, no random access => return None if "Content-Length" in r.headers: - info["size"] = int(r.headers["Content-Length"]) + # Some servers may choose to ignore Accept-Encoding and return + # compressed content, in which case the returned size is unreliable. + if r.headers.get("Content-Encoding", "identity") == "identity": + info["size"] = int(r.headers["Content-Length"]) elif "Content-Range" in r.headers: info["size"] = int(r.headers["Content-Range"].split("/")[1])