Skip to content

Prevent returning cached entry if the entry is degenerate #1873

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
Open
34 changes: 31 additions & 3 deletions fsspec/implementations/dbfs.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
from __future__ import annotations

import base64
import urllib

import requests
import requests.exceptions
from requests.adapters import HTTPAdapter, Retry
from typing_extensions import override

from fsspec import AbstractFileSystem
from fsspec.spec import AbstractBufferedFile
Expand Down Expand Up @@ -57,6 +59,24 @@ def __init__(self, instance, token, **kwargs):

super().__init__(**kwargs)

@override
def _ls_from_cache(self, path) -> list[dict[str, str | int]] | None:
"""Check cache for listing

Returns listing, if found (may be empty list for a directory that
exists but contains nothing), None if not in cache.
"""
self.dircache.pop(path.rstrip("/"), None)

parent = self._parent(path)
if parent in self.dircache:
for entry in self.dircache[parent]:
if entry["name"] == path.rstrip("/"):
if entry["type"] != "directory":
return [entry]
return []
raise FileNotFoundError(path)

def ls(self, path, detail=True, **kwargs):
"""
List the contents of the given path.
Expand All @@ -70,7 +90,15 @@ def ls(self, path, detail=True, **kwargs):
but also additional information on file sizes
and types.
"""
out = self._ls_from_cache(path)
try:
out = self._ls_from_cache(path)
except FileNotFoundError:
# This happens if the `path`'s parent was cached, but `path` is not
# there. This suggests that `path` is new since the parent was
# cached. Attempt to invalidate parent's cache before continuing.
self.dircache.pop(self._parent(path), None)
out = None

if not out:
try:
r = self._send_to_api(
Expand Down Expand Up @@ -460,7 +488,7 @@ def _fetch_range(self, start, end):
return return_buffer

def _to_sized_blocks(self, length, start=0):
"""Helper function to split a range from 0 to total_length into bloksizes"""
"""Helper function to split a range from 0 to total_length into blocksizes"""
end = start + length
for data_chunk in range(start, end, self.blocksize):
data_start = data_chunk
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ interactions:
Content-Type:
- application/json
User-Agent:
- python-requests/2.32.3
- python-requests/2.32.4
authorization:
- DUMMY
method: POST
Expand Down Expand Up @@ -44,7 +44,7 @@ interactions:
x-content-type-options:
- nosniff
x-request-id:
- e71bb75f-b05a-4523-b8bc-ce331812fbf0
- 8b8f96eb-6260-476f-9bee-5579fe95ce97
status:
code: 200
message: OK
Expand All @@ -62,17 +62,17 @@ interactions:
Content-Type:
- application/json
User-Agent:
- python-requests/2.32.3
- python-requests/2.32.4
authorization:
- DUMMY
method: GET
uri: https://my_instance.com/api/2.0/dbfs/list
response:
body:
string: !!binary |
H4sIAAAAAAAEAxzMMQ7CMAwF0Lv8OVKYcwAuwIiQ1SaOMKI4ss3SqncvZX3D29DlzY5y3zCmeKIg
X39yCzXO3X1wpTZ3z8P0xTXIVAMJ4tTEUMK+nP4JuayMcklYtEmXOoXoh0KWU/fHfgAAAP//AwCB
uORhbAAAAA==
H4sIAAAAAAAEAxzMOw7CMAwA0Lt4jhQX8VMOwAUYURWVxBFGFEe2Wah6dyrWN7wFGr/IIN0W6JM/
IEG8bHJ1UYrNrFPJ9d4sdpUnFc8q4hCALVdWSK4fCv8kG38JEgaYpXLjMjnLOzvPmw6nw2444x6P
iLiO6w8AAP//AwBhkoZWeAAAAA==
headers:
access-control-allow-headers:
- Authorization, X-Databricks-Azure-Workspace-Resource-Id, X-Databricks-Org-Id,
Expand Down Expand Up @@ -100,7 +100,7 @@ interactions:
x-content-type-options:
- nosniff
x-request-id:
- 6e66514c-c883-49c7-b215-2fecc5ef8222
- 4dab76b2-a193-45d7-8000-642f1c003aa6
status:
code: 200
message: OK
Expand All @@ -118,7 +118,7 @@ interactions:
Content-Type:
- application/json
User-Agent:
- python-requests/2.32.3
- python-requests/2.32.4
authorization:
- DUMMY
method: GET
Expand Down Expand Up @@ -149,7 +149,7 @@ interactions:
x-content-type-options:
- nosniff
x-request-id:
- 59b8c763-be19-4402-8d37-135a7d6c7aed
- 9a476bcf-3900-47ca-b21d-412419e70a11
status:
code: 200
message: OK
Expand All @@ -167,19 +167,19 @@ interactions:
Content-Type:
- application/json
User-Agent:
- python-requests/2.32.3
- python-requests/2.32.4
authorization:
- DUMMY
method: GET
uri: https://my_instance.com/api/2.0/dbfs/list
response:
body:
string: !!binary |
H4sIAAAAAAAEAzyOwWrDMBBEf0XoXNtx4hqSU6B1IZcY4hQKpQhFWrkqtlfdVUtDyL9XbqCnZdh5
M3ORQISkDFqQG3louvb58NCox7bp1L49quZl1x3lnRyBWfezZ4/C+QEEkrCewESks4Afz5EFTiLo
+C6Kp+To0gcKxxzAKHtyXATCjwQoQozF6Jn91Kv/kDzVWIjaDyw3rxe5jecwF84n7xH7AXTwnBsc
i5vMKZj8AJ9fwHE3OUwBdFPK20TWTjuoa8jK2q6yamGq7ATrZbYuYelW9aLU91ViGOj7b4mOOlHy
+nb9BQAA//8DADr+FHAYAQAA
H4sIAAAAAAAEAzyOUUvEMBCE/0rIs20VC7nek3BWuJcrtCcIIiFNtjXSduNuFI/j/rupBz4tw843
M2cJREjaogO5lW3dNc/trtaPTd3pQ3PU9cu+O8obOQOzGVfPAcXgJxBIwnkCG5FOAn48Rxa4iGDi
uyiekqNLHygG5gBWu37gIhB+JEATYixmz+yXUf+H5KnGQTR+Yrl9PcuHeApr4XryEXGcwATPucW5
uMqcgs1b+PwCjvtlwBRAV6W9S2R/V92qsiwzZXqTlRu1yYy5d1mvKuhLU1m1UYlhoO+/JSaaRMnL
2+UXAAD//wMA9nt9bhgBAAA=
headers:
access-control-allow-headers:
- Authorization, X-Databricks-Azure-Workspace-Resource-Id, X-Databricks-Org-Id,
Expand Down Expand Up @@ -207,7 +207,7 @@ interactions:
x-content-type-options:
- nosniff
x-request-id:
- 6fafe66e-16d3-40c4-be92-91e2f3601a54
- b1907444-7aba-4878-aa3d-b79eb4a9c787
status:
code: 404
message: Not Found
Expand Down
Loading