From 4bce3b2095ea21d07e676cd846bf22d9d127d572 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20FOUCRET?= Date: Wed, 13 Sep 2023 11:30:34 +0200 Subject: [PATCH] Cohere bench improvements. (#455) * Cohere bench improvements. * Add support for preloading of the vectors into the pagecahe. * Add two different search operations (single clients and multiple clients) * Fix operation naming. --- cohere_vector/challenges/default.json | 50 ++++++++++++++++--- cohere_vector/index-vectors-only-mapping.json | 24 +++++++++ ...n => index-vectors-with-text-mapping.json} | 9 ++-- cohere_vector/operations/default.json | 14 ++++-- cohere_vector/track.json | 12 +++-- 5 files changed, 91 insertions(+), 18 deletions(-) create mode 100644 cohere_vector/index-vectors-only-mapping.json rename cohere_vector/{index.json => index-vectors-with-text-mapping.json} (63%) diff --git a/cohere_vector/challenges/default.json b/cohere_vector/challenges/default.json index 2eade446..2276e377 100644 --- a/cohere_vector/challenges/default.json +++ b/cohere_vector/challenges/default.json @@ -17,10 +17,10 @@ "operation": "check-cluster-health" }, { - "name": "index-documents", - "operation": "index-documents", - "warmup-time-period": {{ bulk_warmup | default(40) | int }}, - "clients": {{bulk_indexing_clients | default(5)}} + "name": "initial-documents-indexing", + "operation": "initial-documents-indexing", + "warmup-time-period": {{ initial_indexing_bulk_warmup | default(40) | int }}, + "clients": {{ initial_indexing_bulk_indexing_clients | default(5) | int }} }, { "name": "refresh-after-index", @@ -44,16 +44,50 @@ } }, { - "name": "knn-search-10-100", + "name": "standalone-search-knn-10-100-single-client", "operation": "knn-search-10-100", "warmup-iterations": 100, - "iterations": 1000 + "iterations": {{ standalone_search_iterations | default(10000) | int }} }, { - "name": "knn-search-100-1000", + "name": "standalone-knn-search-100-1000-single-client", "operation": "knn-search-100-1000", "warmup-iterations": 100, - "iterations": 1000 + "iterations": {{ standalone_search_iterations | default(10000) | int }} + }, + { + "name": "standalone-search-knn-10-100-multiple-clients", + "operation": "knn-search-10-100", + "warmup-iterations": 100, + "clients": {{ standalone_search_clients | default(8) | int }}, + "iterations": {{ standalone_search_iterations | default(10000) | int }} + }, + { + "name": "standalone-search-knn-100-1000-multiple-clients", + "operation": "knn-search-100-1000", + "warmup-iterations": 100, + "clients": {{ standalone_search_clients | default(8) | int }}, + "iterations": {{ standalone_search_iterations | default(10000) | int }} + }, + { + "parallel": { + "tasks": [ + { + "name": "parallel-documents-indexing-bulk", + "operation": "parallel-documents-indexing", + "clients": {{ parallel_indexing_bulk_clients | default(1) | int }}, + "time-period": {{ parallel_indexing_time_period | default(1800) | int }}, + "target-throughput": {{ parallel_indexing_bulk_target_throughput | default(1) | int }} + }, + { + "name": "parallel-documents-indexing-search-knn-10-100", + "operation": "knn-search-10-100", + "clients": {{ parallel_indexing_search_clients | default(3) | int }}, + "time-period": {{ parallel_indexing_time_period | default(1800) | int }}, + "target-throughput": {{ parallel_indexing_search_target_throughput | default(100) | int }} + } + ] + } } ] } diff --git a/cohere_vector/index-vectors-only-mapping.json b/cohere_vector/index-vectors-only-mapping.json new file mode 100644 index 00000000..2913bbc9 --- /dev/null +++ b/cohere_vector/index-vectors-only-mapping.json @@ -0,0 +1,24 @@ +{ + "settings": { + {% if preload_pagecache %} + "index.store.preload": [ "vec", "vex", "vem"], + {% endif %} + "index.number_of_shards": {{number_of_shards | default(1)}}, + "index.number_of_replicas": {{number_of_replicas | default(0)}} + }, + "mappings": { + "dynamic": false, + "_source": { + "enabled": false + }, + "properties": { + "emb": { + "type": "dense_vector", + "element_type": "float", + "dims": 768, + "index": true, + "similarity": "dot_product" + } + } + } +} diff --git a/cohere_vector/index.json b/cohere_vector/index-vectors-with-text-mapping.json similarity index 63% rename from cohere_vector/index.json rename to cohere_vector/index-vectors-with-text-mapping.json index 5ced7039..b688157a 100644 --- a/cohere_vector/index.json +++ b/cohere_vector/index-vectors-with-text-mapping.json @@ -1,9 +1,10 @@ { "settings": { - "index": { - "number_of_shards": {{number_of_shards | default(1)}}, - "number_of_replicas": {{number_of_replicas | default(0)}} - } + {% if preload_pagecache %} + "index.store.preload": [ "vec", "vex", "vem"], + {% endif %} + "index.number_of_shards": {{number_of_shards | default(1)}}, + "index.number_of_replicas": {{number_of_replicas | default(0)}} }, "mappings": { "properties": { diff --git a/cohere_vector/operations/default.json b/cohere_vector/operations/default.json index 365eb3e7..ff52919a 100644 --- a/cohere_vector/operations/default.json +++ b/cohere_vector/operations/default.json @@ -12,10 +12,18 @@ "retry-until-success": true }, { - "name": "index-documents", + "name": "initial-documents-indexing", "operation-type": "bulk", - "bulk-size": {{bulk_size | default(500)}}, - "ingest-percentage": {{ingest_percentage | default(100)}} + "corpora": "cohere-initial-indexing", + "bulk-size": {{initial_indexing_bulk_size | default(500)}}, + "ingest-percentage": {{initial_indexing_ingest_percentage | default(100)}} +}, +{ + "name": "parallel-documents-indexing", + "operation-type": "bulk", + "corpora": "cohere-parallel-indexing", + "bulk-size": {{parallel_indexing_bulk_size | default(500)}}, + "ingest-percentage": {{parallel_indexing_ingest_percentage | default(100)}} }, { "name": "knn-search-10-100", diff --git a/cohere_vector/track.json b/cohere_vector/track.json index 6086d394..31b59f4e 100644 --- a/cohere_vector/track.json +++ b/cohere_vector/track.json @@ -6,12 +6,12 @@ "indices": [ { "name": "cohere", - "body": "index.json" + "body": "index-{{ mapping_type | default("vectors-only") }}-mapping.json" } ], "corpora": [ { - "name": "cohere", + "name": "cohere-initial-indexing", "base-url": "https://rally-tracks.elastic.co/cohere_vector", "documents": [ { @@ -73,7 +73,13 @@ "document-count": 3000000, "compressed-bytes": 19233451681, "uncompressed-bytes": 52331515902 - }, + } + ] + }, + { + "name": "cohere-parallel-indexing", + "base-url": "https://rally-tracks.elastic.co/cohere_vector", + "documents": [ { "source-file": "cohere-documents-11.json.bz2", "document-count": 2893221,