Skip to content

Commit b5d1de8

Browse files
authored
Add possibility to specify config_file instead of configuration name pattern, fix --describe (#44)
1 parent f92dd16 commit b5d1de8

File tree

2 files changed

+166
-25
lines changed

2 files changed

+166
-25
lines changed

README.md

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,15 @@ python run.py --engines redis-default-simple --datasets random-100
225225
python run.py --engines redis-default-simple --datasets glove-25-angular
226226
python run.py --engines "*-m-16-*" --datasets "glove-*"
227227

228+
# Using custom engine configurations from a JSON file
229+
python run.py --engines-file custom_engines.json --datasets glove-25-angular
230+
231+
# Get information about available engines (with pattern matching)
232+
python run.py --engines "*redis*" --describe engines --verbose
233+
234+
# Get information about engines from a custom file
235+
python run.py --engines-file custom_engines.json --describe engines --verbose
236+
228237
# Docker usage (recommended)
229238
docker run --rm -v $(pwd)/results:/app/results --network=host \
230239
redis/vector-db-benchmark:latest \
@@ -237,6 +246,62 @@ python run.py --help
237246
Command allows you to specify wildcards for engines and datasets.
238247
Results of the benchmarks are stored in the `./results/` directory.
239248

249+
## Using Custom Engine Configurations
250+
251+
The benchmark tool supports two ways to specify which engine configurations to use:
252+
253+
### 1. Pattern Matching (Default)
254+
Use the `--engines` flag with wildcard patterns to select configurations from the `experiments/configurations/` directory:
255+
256+
```bash
257+
python run.py --engines "*redis*" --datasets glove-25-angular
258+
python run.py --engines "qdrant-m-*" --datasets random-100
259+
```
260+
261+
### 2. Custom Configuration File
262+
Use the `--engines-file` flag to specify a JSON file containing custom engine configurations:
263+
264+
```bash
265+
python run.py --engines-file my_engines.json --datasets glove-25-angular
266+
```
267+
268+
The JSON file should contain an array of engine configuration objects. Each configuration must have a `name` field and follow the same structure as configurations in `experiments/configurations/`:
269+
270+
```json
271+
[
272+
{
273+
"name": "my-custom-redis-config",
274+
"engine": "redis",
275+
"connection_params": {},
276+
"collection_params": {
277+
"algorithm": "hnsw",
278+
"data_type": "FLOAT32",
279+
"hnsw_config": {
280+
"M": 16,
281+
"DISTANCE_METRIC": "L2",
282+
"EF_CONSTRUCTION": 200
283+
}
284+
},
285+
"search_params": [
286+
{
287+
"parallel": 1,
288+
"top": 10,
289+
"search_params": {
290+
"ef": 100,
291+
"data_type": "FLOAT32"
292+
}
293+
}
294+
],
295+
"upload_params": {
296+
"parallel": 16,
297+
"data_type": "FLOAT32"
298+
}
299+
}
300+
]
301+
```
302+
303+
**Note:** You cannot use both `--engines` and `--engines-file` at the same time.
304+
240305
## How to update benchmark parameters?
241306

242307
Each engine has a configuration file, which is used to define the parameters for the benchmark.

run.py

Lines changed: 101 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
import fnmatch
2+
import json
3+
import os
24
import traceback
35
import warnings
46
from typing import List
@@ -17,9 +19,54 @@
1719
app = typer.Typer()
1820

1921

22+
def load_engines(engines: List[str], engines_file: str = None) -> dict:
23+
"""Load engine configurations from file or pattern matching."""
24+
# Check if both engines and engines_file are provided
25+
if engines != ["*"] and engines_file is not None:
26+
typer.echo("Error: Cannot use both --engines and --engines-file at the same time.", err=True)
27+
raise typer.Exit(1)
28+
29+
# Load engine configurations
30+
if engines_file is not None:
31+
# Load engines from specified file
32+
if not os.path.exists(engines_file):
33+
typer.echo(f"Error: Engines file '{engines_file}' not found.", err=True)
34+
raise typer.Exit(1)
35+
36+
try:
37+
with open(engines_file, 'r') as f:
38+
engines_from_file = json.load(f)
39+
40+
# Convert list of engine configs to dictionary with name as key
41+
selected_engines = {}
42+
for config in engines_from_file:
43+
if 'name' not in config:
44+
typer.echo(f"Error: Engine configuration missing 'name' field in {engines_file}", err=True)
45+
raise typer.Exit(1)
46+
selected_engines[config['name']] = config
47+
48+
except json.JSONDecodeError as e:
49+
typer.echo(f"Error: Invalid JSON in engines file '{engines_file}': {e}", err=True)
50+
raise typer.Exit(1)
51+
except Exception as e:
52+
typer.echo(f"Error reading engines file '{engines_file}': {e}", err=True)
53+
raise typer.Exit(1)
54+
else:
55+
# Load engines using pattern matching (original behavior)
56+
all_engines = read_engine_configs()
57+
selected_engines = {
58+
name: config
59+
for name, config in all_engines.items()
60+
if any(fnmatch.fnmatch(name, engine) for engine in engines)
61+
}
62+
63+
return selected_engines
64+
65+
2066
@app.command()
2167
def run(
2268
engines: List[str] = typer.Option(["*"]),
69+
engines_file: str = typer.Option(None, help="Path to JSON file containing engine configurations to use instead of searching by pattern"),
2370
datasets: List[str] = typer.Option(["*"]),
2471
parallels: List[int] = typer.Option([]),
2572
host: str = "localhost",
@@ -36,8 +83,14 @@ def run(
3683
verbose: bool = typer.Option(False, "--verbose", "-v", help="Show detailed information when using --describe"),
3784
):
3885
"""
39-
Example:
86+
Examples:
87+
# Use pattern matching to select engines (original behavior)
4088
python3 run.py --engines *-m-16-* --engines qdrant-* --datasets glove-*
89+
90+
# Use engines from a specific JSON file
91+
python3 run.py --engines-file my_engines.json --datasets glove-*
92+
93+
# Describe available options
4194
python3 run.py --describe datasets
4295
python3 run.py --describe engines --verbose
4396
"""
@@ -47,20 +100,22 @@ def run(
47100
describe_datasets(datasets[0] if datasets != ["*"] else "*", verbose)
48101
return
49102
elif describe.lower() == "engines":
50-
describe_engines(engines[0] if engines != ["*"] else "*", verbose)
103+
# Load engines using same logic as main function
104+
selected_engines = load_engines(engines, engines_file)
105+
# For describe engines, we'll pass all loaded engines or filter by pattern
106+
if engines_file is not None:
107+
# When using engines_file, show all engines from the file
108+
describe_engines_with_configs(selected_engines, "*", verbose)
109+
else:
110+
# When using pattern matching, use the pattern
111+
describe_engines_with_configs(selected_engines, engines[0] if engines != ["*"] else "*", verbose)
51112
return
52113
else:
53114
typer.echo(f"Error: Unknown describe target '{describe}'. Use 'datasets' or 'engines'.", err=True)
54115
raise typer.Exit(1)
55116

56-
all_engines = read_engine_configs()
57117
all_datasets = read_dataset_config()
58-
59-
selected_engines = {
60-
name: config
61-
for name, config in all_engines.items()
62-
if any(fnmatch.fnmatch(name, engine) for engine in engines)
63-
}
118+
selected_engines = load_engines(engines, engines_file)
64119

65120
selected_datasets = {
66121
name: config
@@ -263,18 +318,12 @@ def get_sort_key(item):
263318
typer.echo("\nUse --verbose for detailed information")
264319

265320

266-
def describe_engines(filter_pattern: str = "*", verbose: bool = False):
267-
"""Display information about available engines."""
268-
try:
269-
all_engines = read_engine_configs()
270-
except Exception as e:
271-
typer.echo(f"Error reading engine configuration: {e}", err=True)
272-
raise typer.Exit(1)
273-
321+
def describe_engines_with_configs(engines_dict: dict, filter_pattern: str = "*", verbose: bool = False):
322+
"""Display information about engines from provided configurations."""
274323
# Filter engines
275324
filtered_engines = {
276325
name: config
277-
for name, config in all_engines.items()
326+
for name, config in engines_dict.items()
278327
if fnmatch.fnmatch(name, filter_pattern)
279328
}
280329

@@ -296,11 +345,23 @@ def describe_engines(filter_pattern: str = "*", verbose: bool = False):
296345
if 'search_params' in config:
297346
search_params = config['search_params']
298347
typer.echo(f" Search Params:")
299-
for param, values in search_params.items():
300-
if isinstance(values, list):
301-
typer.echo(f" {param}: {values}")
302-
else:
303-
typer.echo(f" {param}: {values}")
348+
if isinstance(search_params, list):
349+
for i, param_config in enumerate(search_params):
350+
typer.echo(f" Config {i+1}:")
351+
for param, value in param_config.items():
352+
if isinstance(value, dict):
353+
typer.echo(f" {param}:")
354+
for subparam, subvalue in value.items():
355+
typer.echo(f" {subparam}: {subvalue}")
356+
else:
357+
typer.echo(f" {param}: {value}")
358+
else:
359+
# Legacy format - dict
360+
for param, values in search_params.items():
361+
if isinstance(values, list):
362+
typer.echo(f" {param}: {values}")
363+
else:
364+
typer.echo(f" {param}: {values}")
304365
if 'upload_params' in config:
305366
upload_params = config['upload_params']
306367
typer.echo(f" Upload Params:")
@@ -313,12 +374,27 @@ def describe_engines(filter_pattern: str = "*", verbose: bool = False):
313374
for name, config in sorted(filtered_engines.items()):
314375
engine_type = config.get('engine', 'N/A')
315376
module = config.get('module', 'N/A')
316-
typer.echo(f"{name:<40} {engine_type:<15} {module:<25}")
377+
display_name = name[:37] + "..." if len(name) > 40 else name
378+
display_engine = engine_type[:12] + "..." if len(engine_type) > 15 else engine_type
379+
display_module = module[:22] + "..." if len(module) > 25 else module
380+
typer.echo(f"{display_name:<40} {display_engine:<15} {display_module:<25}")
317381

318382
typer.echo(f"\nTotal: {len(filtered_engines)} engines")
319383
if filter_pattern != "*":
320384
typer.echo(f"Filter: '{filter_pattern}'")
321-
typer.echo("\nUse --verbose for detailed information")
385+
if not verbose:
386+
typer.echo("\nUse --verbose for detailed information")
387+
388+
389+
def describe_engines(filter_pattern: str = "*", verbose: bool = False):
390+
"""Display information about available engines using default configuration loading."""
391+
try:
392+
all_engines = read_engine_configs()
393+
except Exception as e:
394+
typer.echo(f"Error reading engine configuration: {e}", err=True)
395+
raise typer.Exit(1)
396+
397+
describe_engines_with_configs(all_engines, filter_pattern, verbose)
322398

323399

324400
if __name__ == "__main__":

0 commit comments

Comments
 (0)