Skip to content

Commit

Permalink
[DO NOT MERGE] Remove vectorset storage key hack on KB creation (#2797)
Browse files Browse the repository at this point in the history
* Remove vectorset storage key hack on KB creation

* Fix test

* Add feature flag to ease rollout
  • Loading branch information
jotare authored Jan 28, 2025
1 parent 124e985 commit 9eb9146
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 4 deletions.
13 changes: 10 additions & 3 deletions nucliadb/src/nucliadb/ingest/orm/knowledgebox.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,11 +60,13 @@
VectorSetPurge,
)
from nucliadb_protos.resources_pb2 import Basic
from nucliadb_utils import const
from nucliadb_utils.settings import is_onprem_nucliadb
from nucliadb_utils.storages.storage import Storage
from nucliadb_utils.utilities import (
get_audit,
get_storage,
has_feature,
)

# XXX Eventually all these keys should be moved to datamanagers.kb
Expand Down Expand Up @@ -163,10 +165,15 @@ async def create(
# be able to force processing to always send vectorset ids and
# remove that bw/c behavior
#
if len(semantic_models) == 1:
storage_key_kind = knowledgebox_pb2.VectorSetConfig.StorageKeyKind.LEGACY
else:
if has_feature(const.Features.REMOVE_DEFAULT_VECTORSET):
storage_key_kind = knowledgebox_pb2.VectorSetConfig.StorageKeyKind.VECTORSET_PREFIX
else:
if len(semantic_models) == 1:
storage_key_kind = knowledgebox_pb2.VectorSetConfig.StorageKeyKind.LEGACY
else:
storage_key_kind = (
knowledgebox_pb2.VectorSetConfig.StorageKeyKind.VECTORSET_PREFIX
)

for vectorset_id, semantic_model in semantic_models.items(): # type: ignore
# if this KB uses a matryoshka model, we can choose a different
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ async def test_create_knowledgebox(

vs = await datamanagers.vectorsets.get(txn, kbid=kbid, vectorset_id="my-semantic-model")
assert vs is not None
assert vs.storage_key_kind == knowledgebox_pb2.VectorSetConfig.StorageKeyKind.LEGACY
assert vs.storage_key_kind == knowledgebox_pb2.VectorSetConfig.StorageKeyKind.VECTORSET_PREFIX


async def test_create_knowledgebox_with_multiple_vectorsets(
Expand Down
1 change: 1 addition & 0 deletions nucliadb_utils/src/nucliadb_utils/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,3 +81,4 @@ class Features:
IGNORE_EXTRACTED_IN_SEARCH = "nucliadb_ignore_extracted_in_search"
NIDX_READS = "nucliadb_nidx_reads"
FIELD_STATUS = "nucliadb_field_status"
REMOVE_DEFAULT_VECTORSET = "nucliadb_removing_default_vectorset__ingest"
4 changes: 4 additions & 0 deletions nucliadb_utils/src/nucliadb_utils/featureflagging.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,10 @@ class Settings(pydantic_settings.BaseSettings):
"rollout": 0,
"variants": {"environment": ["local"]},
},
const.Features.REMOVE_DEFAULT_VECTORSET: {
"rollout": 0,
"variants": {"environment": ["local"]},
},
}


Expand Down

0 comments on commit 9eb9146

Please sign in to comment.