From 9eb9146bc36c066ab3ba83fca4c947003dda16c0 Mon Sep 17 00:00:00 2001 From: Joan Antoni RE Date: Tue, 28 Jan 2025 15:40:49 +0100 Subject: [PATCH] [DO NOT MERGE] Remove vectorset storage key hack on KB creation (#2797) * Remove vectorset storage key hack on KB creation * Fix test * Add feature flag to ease rollout --- nucliadb/src/nucliadb/ingest/orm/knowledgebox.py | 13 ++++++++++--- .../ingest/integration/orm/test_orm_knowledgebox.py | 2 +- nucliadb_utils/src/nucliadb_utils/const.py | 1 + .../src/nucliadb_utils/featureflagging.py | 4 ++++ 4 files changed, 16 insertions(+), 4 deletions(-) diff --git a/nucliadb/src/nucliadb/ingest/orm/knowledgebox.py b/nucliadb/src/nucliadb/ingest/orm/knowledgebox.py index 5d79ea0152..c354c0eb36 100644 --- a/nucliadb/src/nucliadb/ingest/orm/knowledgebox.py +++ b/nucliadb/src/nucliadb/ingest/orm/knowledgebox.py @@ -60,11 +60,13 @@ VectorSetPurge, ) from nucliadb_protos.resources_pb2 import Basic +from nucliadb_utils import const from nucliadb_utils.settings import is_onprem_nucliadb from nucliadb_utils.storages.storage import Storage from nucliadb_utils.utilities import ( get_audit, get_storage, + has_feature, ) # XXX Eventually all these keys should be moved to datamanagers.kb @@ -163,10 +165,15 @@ async def create( # be able to force processing to always send vectorset ids and # remove that bw/c behavior # - if len(semantic_models) == 1: - storage_key_kind = knowledgebox_pb2.VectorSetConfig.StorageKeyKind.LEGACY - else: + if has_feature(const.Features.REMOVE_DEFAULT_VECTORSET): storage_key_kind = knowledgebox_pb2.VectorSetConfig.StorageKeyKind.VECTORSET_PREFIX + else: + if len(semantic_models) == 1: + storage_key_kind = knowledgebox_pb2.VectorSetConfig.StorageKeyKind.LEGACY + else: + storage_key_kind = ( + knowledgebox_pb2.VectorSetConfig.StorageKeyKind.VECTORSET_PREFIX + ) for vectorset_id, semantic_model in semantic_models.items(): # type: ignore # if this KB uses a matryoshka model, we can choose a different diff --git a/nucliadb/tests/ingest/integration/orm/test_orm_knowledgebox.py b/nucliadb/tests/ingest/integration/orm/test_orm_knowledgebox.py index 033629060c..c90dd825e4 100644 --- a/nucliadb/tests/ingest/integration/orm/test_orm_knowledgebox.py +++ b/nucliadb/tests/ingest/integration/orm/test_orm_knowledgebox.py @@ -83,7 +83,7 @@ async def test_create_knowledgebox( vs = await datamanagers.vectorsets.get(txn, kbid=kbid, vectorset_id="my-semantic-model") assert vs is not None - assert vs.storage_key_kind == knowledgebox_pb2.VectorSetConfig.StorageKeyKind.LEGACY + assert vs.storage_key_kind == knowledgebox_pb2.VectorSetConfig.StorageKeyKind.VECTORSET_PREFIX async def test_create_knowledgebox_with_multiple_vectorsets( diff --git a/nucliadb_utils/src/nucliadb_utils/const.py b/nucliadb_utils/src/nucliadb_utils/const.py index 64dbea2c31..9c04ff31e4 100644 --- a/nucliadb_utils/src/nucliadb_utils/const.py +++ b/nucliadb_utils/src/nucliadb_utils/const.py @@ -81,3 +81,4 @@ class Features: IGNORE_EXTRACTED_IN_SEARCH = "nucliadb_ignore_extracted_in_search" NIDX_READS = "nucliadb_nidx_reads" FIELD_STATUS = "nucliadb_field_status" + REMOVE_DEFAULT_VECTORSET = "nucliadb_removing_default_vectorset__ingest" diff --git a/nucliadb_utils/src/nucliadb_utils/featureflagging.py b/nucliadb_utils/src/nucliadb_utils/featureflagging.py index 9d941647cc..b538a2513c 100644 --- a/nucliadb_utils/src/nucliadb_utils/featureflagging.py +++ b/nucliadb_utils/src/nucliadb_utils/featureflagging.py @@ -73,6 +73,10 @@ class Settings(pydantic_settings.BaseSettings): "rollout": 0, "variants": {"environment": ["local"]}, }, + const.Features.REMOVE_DEFAULT_VECTORSET: { + "rollout": 0, + "variants": {"environment": ["local"]}, + }, }