fsspec · martindurant · Jul 14, 2025 · Jul 10, 2025 · Jul 11, 2025 · martindurant
diff --git a/fsspec/implementations/http.py b/fsspec/implementations/http.py
@@ -158,14 +158,24 @@ async def _ls_real(self, url, detail=True, **kwargs):
         session = await self.set_session()
         async with session.get(self.encode_url(url), **self.kwargs) as r:
             self._raise_not_found_for_status(r, url)
-            try:
-                text = await r.text()
-                if self.simple_links:
-                    links = ex2.findall(text) + [u[2] for u in ex.findall(text)]
-                else:
-                    links = [u[2] for u in ex.findall(text)]
-            except UnicodeDecodeError:
-                links = []  # binary, not HTML
+
+            if "Content-Type" in r.headers:
+                mimetype = r.headers["Content-Type"].partition(";")[0]
+            else:
+                mimetype = None
+
+            if mimetype in ("text/html", None):
+                try:
+                    text = await r.text(errors="ignore")
+                    if self.simple_links:
+                        links = ex2.findall(text) + [u[2] for u in ex.findall(text)]
+                    else:
+                        links = [u[2] for u in ex.findall(text)]
+                except UnicodeDecodeError:
+                    links = []  # binary, not HTML
+            else:
+                links = []
+
         out = set()
         parts = urlparse(url)
         for l in links:

diff --git a/fsspec/implementations/tests/test_http.py b/fsspec/implementations/tests/test_http.py
@@ -139,6 +139,11 @@ def test_glob_return_subfolders(server):
 
 
 def test_isdir(server):
+    h = fsspec.filesystem("http", headers={"give_mimetype": "true"})
+    assert h.isdir(server.address + "/index/")
+    assert not h.isdir(server.realfile)
+    assert not h.isdir(server.address + "doesnotevenexist")
+
     h = fsspec.filesystem("http")
     assert h.isdir(server.address + "/index/")
     assert not h.isdir(server.realfile)