Skip to content

Commit

Permalink
rdir crawler: handle no entries on rdir fetch
Browse files Browse the repository at this point in the history
And monitor orphan check error
  • Loading branch information
IrakozeFD committed Apr 4, 2024
1 parent f34e23e commit 048db1a
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 13 deletions.
33 changes: 25 additions & 8 deletions oio/crawler/rdir/crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,11 @@ def __init__(

self.passes = 0
self.errors = 0
self.nb_entries = 0
self.service_unavailable = 0
self.orphans = 0
self.deleted_orphans = 0
self.orphans_check_errors = 0
self.repaired = 0
self.unrecoverable_content = 0
self.last_report_time = 0
Expand Down Expand Up @@ -132,18 +134,21 @@ def report(self, tag, force=False):
since_last_rprt = (now - self.last_report_time) or 0.00001

self.logger.info(
"%s volume_id=%s pass=%d repaired=%d errors=%d "
"no_entries_on_fetch=%d service_unavailable=%d "
"unrecoverable=%d orphans=%d deleted_orphans=%d chunks=%d "
"%s volume_id=%s nb_entries=%d pass=%d repaired=%d "
"errors=%d service_unavailable=%d "
"unrecoverable=%d orphans=%d orphans_check_errors=%d "
"deleted_orphans=%d chunks=%d "
"rate_since_last_report=%.2f/s",
tag,
self.volume_id,
self.nb_entries,
self.passes,
self.repaired,
self.errors,
self.service_unavailable,
self.unrecoverable_content,
self.orphans,
self.orphans_check_errors,
self.deleted_orphans,
self.scanned_since_last_report,
self.scanned_since_last_report / since_last_rprt,
Expand Down Expand Up @@ -196,7 +201,7 @@ def _check_orphan(self, container_id, chunk_id, value, reqid):
force_master=True,
)
chunkshelper = ChunksHelper(chunks).filter(id=chunk_id, host=self.volume_id)
if len(chunkshelper.chunks) > 0:
if len(chunkshelper) > 0:
return
except exc.NotFound as err:
self.logger.debug(
Expand All @@ -206,6 +211,7 @@ def _check_orphan(self, container_id, chunk_id, value, reqid):
container_id,
err,
)

# The chunk does not exist on the rawx
# and we just confirmed that it is not referenced in
# any meta2 database, we can deindex the chunk reference
Expand Down Expand Up @@ -249,9 +255,16 @@ def _rebuild_chunk(self, container_id, chunk_id, value, reqid):
self.error(container_id, chunk_id, error, reqid=reqid)
elif isinstance(err, exc.OrphanChunk):
self.orphans += 1
# Deindex the chunk if not referenced in any meta2 db
self._check_orphan(container_id, chunk_id, value, reqid)

try:
# Deindex the chunk if not referenced in any meta2 db
self._check_orphan(container_id, chunk_id, value, reqid)
except exc.OioException as oio_err:
self.orphans_check_errors += 1
error = (
f"{oio_err} "
+ "failed to verify orphan chunk is referenced in meta2"
)
self.error(container_id, chunk_id, error, reqid=reqid)
elif isinstance(err, exc.ContentDrained):
self.orphans += 1
error = f"{err}, chunk considered as orphan"
Expand Down Expand Up @@ -280,7 +293,9 @@ def crawl_volume(self):
self.report("starting", force=True)
# reset crawler stats
self.errors = 0
self.nb_entries = 0
self.orphans = 0
self.orphans_check_errors = 0
self.deleted_orphans = 0
self.repaired = 0
self.unrecoverable_content = 0
Expand All @@ -292,8 +307,8 @@ def crawl_volume(self):
if self.use_marker:
marker = self.current_marker
entries = self.index_client.chunk_fetch(self.volume_id, start_after=marker)

for container_id, chunk_id, value in entries:
self.nb_entries += 1
if self._stop_requested.is_set():
self.logger.info("Stop asked")
break
Expand Down Expand Up @@ -324,6 +339,8 @@ def crawl_volume(self):
f"Failed to write progress marker: {err}"
) from err
self.report("running")
if self.nb_entries == 0:
self.logger.debug("No entries found for volume: %s", self.volume_path)
except (exc.ServiceBusy, exc.VolumeException, exc.NotFound) as err:
self.logger.debug("Service busy or not available: %s", err)
self.service_unavailable += 1
Expand Down
26 changes: 21 additions & 5 deletions tests/functional/rdir/test_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,13 @@ def test_rdir_crawler_check_marker_work_as_expected(self):
)
rdir_crawler.crawl_volume()
# If there are no error
if rdir_crawler.service_unavailable == 0 and rdir_crawler.errors == 0:
if not any(
(
rdir_crawler.service_unavailable,
rdir_crawler.errors,
not rdir_crawler.nb_entries,
)
):
# Check that one chunk is repaired
self.assertGreaterEqual(rdir_crawler.repaired, 1)
_, new_chunks_a = self.api.container.content_locate(
Expand Down Expand Up @@ -376,7 +382,13 @@ def test_rdir_crawler_check_marker_work_as_expected(self):
# The marker has been reset at the end of the last crawl
rdir_crawler.crawl_volume()
# If there are no error
if rdir_crawler.service_unavailable == 0 and rdir_crawler.errors == 0:
if not any(
(
rdir_crawler.service_unavailable,
rdir_crawler.errors,
not rdir_crawler.nb_entries,
)
):
# Check that one chunk is repaired
self.assertGreaterEqual(rdir_crawler.repaired, 1)
# we should be able to find the chunk not selected for rebuild
Expand Down Expand Up @@ -441,7 +453,8 @@ def test_rdir_orphan_entry_deindexed_object_exists(self):
# Object creation
container = "rdir_crawler_m_chunks_" + random_str(6)
object_name = "m_chunk-" + random_str(8)
_ = self._create(container, object_name)
chunks = self._create(container, object_name)
chunk_ids = [chunk["url"].split("/", 3)[3] for chunk in chunks]
cid = cid_from_name(self.account, container)
# Retrieve version and content id in order
# to register a false entry (orphan chunk) into rdir repertory
Expand All @@ -450,12 +463,15 @@ def test_rdir_orphan_entry_deindexed_object_exists(self):
cid=cid,
force_master=True,
)
chunk_id = random_id(63)
while True:
chunk_id = random_id(63)
if chunk_id not in chunk_ids:
break
content_id = obj_meta["id"]
content_ver = obj_meta["version"]
self._test_orphan_entry(object_name, cid, chunk_id, content_id, content_ver)

def test_rdir_orphan_entry_deindexed_object_does_not_exists(self):
def test_rdir_orphan_entry_deindexed_object_does_not_exist(self):
"""
Test if an orphan chunk belonging to an object which does
not exits is deindexed
Expand Down

0 comments on commit 048db1a

Please sign in to comment.