Add possibility to specify config_file instead of configuration name pattern, fix --describe (#44)

mpozniak95 · web-flow · commit b5d1de847bf2 · 2025-09-25T18:03:29.000+01:00
diff --git a/README.md b/README.md
@@ -225,6 +225,15 @@ python run.py --engines redis-default-simple --datasets random-100
 python run.py --engines redis-default-simple --datasets glove-25-angular
 python run.py --engines "*-m-16-*" --datasets "glove-*"
 
+# Using custom engine configurations from a JSON file
+python run.py --engines-file custom_engines.json --datasets glove-25-angular
+
+# Get information about available engines (with pattern matching)
+python run.py --engines "*redis*" --describe engines --verbose
+
+# Get information about engines from a custom file  
+python run.py --engines-file custom_engines.json --describe engines --verbose
+
 # Docker usage (recommended)
 docker run --rm -v $(pwd)/results:/app/results --network=host \
   redis/vector-db-benchmark:latest \
@@ -237,6 +246,62 @@ python run.py --help
 Command allows you to specify wildcards for engines and datasets.
 Results of the benchmarks are stored in the `./results/` directory.
 
+## Using Custom Engine Configurations
+
+The benchmark tool supports two ways to specify which engine configurations to use:
+
+### 1. Pattern Matching (Default)
+Use the `--engines` flag with wildcard patterns to select configurations from the `experiments/configurations/` directory:
+
+```bash
+python run.py --engines "*redis*" --datasets glove-25-angular
+python run.py --engines "qdrant-m-*" --datasets random-100
+```
+
+### 2. Custom Configuration File
+Use the `--engines-file` flag to specify a JSON file containing custom engine configurations:
+
+```bash
+python run.py --engines-file my_engines.json --datasets glove-25-angular
+```
+
+The JSON file should contain an array of engine configuration objects. Each configuration must have a `name` field and follow the same structure as configurations in `experiments/configurations/`:
+
+```json
+[
+  {
+    "name": "my-custom-redis-config",
+    "engine": "redis",
+    "connection_params": {},
+    "collection_params": {
+      "algorithm": "hnsw",
+      "data_type": "FLOAT32",
+      "hnsw_config": {
+        "M": 16,
+        "DISTANCE_METRIC": "L2",
+        "EF_CONSTRUCTION": 200
+      }
+    },
+    "search_params": [
+      {
+        "parallel": 1,
+        "top": 10,
+        "search_params": {
+          "ef": 100,
+          "data_type": "FLOAT32"
+        }
+      }
+    ],
+    "upload_params": {
+      "parallel": 16,
+      "data_type": "FLOAT32"
+    }
+  }
+]
+```
+
+**Note:** You cannot use both `--engines` and `--engines-file` at the same time.
+
 ## How to update benchmark parameters?
 
 Each engine has a configuration file, which is used to define the parameters for the benchmark.
diff --git a/run.py b/run.py
@@ -1,4 +1,6 @@
 import fnmatch
+import json
+import os
 import traceback
 import warnings
 from typing import List
@@ -17,9 +19,54 @@
 app = typer.Typer()
 
 
+def load_engines(engines: List[str], engines_file: str = None) -> dict:
+    """Load engine configurations from file or pattern matching."""
+    # Check if both engines and engines_file are provided
+    if engines != ["*"] and engines_file is not None:
+        typer.echo("Error: Cannot use both --engines and --engines-file at the same time.", err=True)
+        raise typer.Exit(1)
+
+    # Load engine configurations
+    if engines_file is not None:
+        # Load engines from specified file
+        if not os.path.exists(engines_file):
+            typer.echo(f"Error: Engines file '{engines_file}' not found.", err=True)
+            raise typer.Exit(1)
+        
+        try:
+            with open(engines_file, 'r') as f:
+                engines_from_file = json.load(f)
+            
+            # Convert list of engine configs to dictionary with name as key
+            selected_engines = {}
+            for config in engines_from_file:
+                if 'name' not in config:
+                    typer.echo(f"Error: Engine configuration missing 'name' field in {engines_file}", err=True)
+                    raise typer.Exit(1)
+                selected_engines[config['name']] = config
+                
+        except json.JSONDecodeError as e:
+            typer.echo(f"Error: Invalid JSON in engines file '{engines_file}': {e}", err=True)
+            raise typer.Exit(1)
+        except Exception as e:
+            typer.echo(f"Error reading engines file '{engines_file}': {e}", err=True)
+            raise typer.Exit(1)
+    else:
+        # Load engines using pattern matching (original behavior)
+        all_engines = read_engine_configs()
+        selected_engines = {
+            name: config
+            for name, config in all_engines.items()
+            if any(fnmatch.fnmatch(name, engine) for engine in engines)
+        }
+    
+    return selected_engines
+
+
 @app.command()
 def run(
     engines: List[str] = typer.Option(["*"]),
+    engines_file: str = typer.Option(None, help="Path to JSON file containing engine configurations to use instead of searching by pattern"),
     datasets: List[str] = typer.Option(["*"]),
     parallels: List[int] = typer.Option([]),
     host: str = "localhost",
@@ -36,8 +83,14 @@ def run(
     verbose: bool = typer.Option(False, "--verbose", "-v", help="Show detailed information when using --describe"),
 ):
     """
-    Example:
+    Examples:
+        # Use pattern matching to select engines (original behavior)
         python3 run.py --engines *-m-16-* --engines qdrant-* --datasets glove-*
+        
+        # Use engines from a specific JSON file
+        python3 run.py --engines-file my_engines.json --datasets glove-*
+        
+        # Describe available options
         python3 run.py --describe datasets
         python3 run.py --describe engines --verbose
     """
@@ -47,20 +100,22 @@ def run(
             describe_datasets(datasets[0] if datasets != ["*"] else "*", verbose)
             return
         elif describe.lower() == "engines":
-            describe_engines(engines[0] if engines != ["*"] else "*", verbose)
+            # Load engines using same logic as main function
+            selected_engines = load_engines(engines, engines_file)
+            # For describe engines, we'll pass all loaded engines or filter by pattern
+            if engines_file is not None:
+                # When using engines_file, show all engines from the file
+                describe_engines_with_configs(selected_engines, "*", verbose)
+            else:
+                # When using pattern matching, use the pattern
+                describe_engines_with_configs(selected_engines, engines[0] if engines != ["*"] else "*", verbose)
             return
         else:
             typer.echo(f"Error: Unknown describe target '{describe}'. Use 'datasets' or 'engines'.", err=True)
             raise typer.Exit(1)
 
-    all_engines = read_engine_configs()
     all_datasets = read_dataset_config()
-
-    selected_engines = {
-        name: config
-        for name, config in all_engines.items()
-        if any(fnmatch.fnmatch(name, engine) for engine in engines)
-    }
+    selected_engines = load_engines(engines, engines_file)
 
     selected_datasets = {
         name: config
@@ -263,18 +318,12 @@ def get_sort_key(item):
     typer.echo("\nUse --verbose for detailed information")
 
 
-def describe_engines(filter_pattern: str = "*", verbose: bool = False):
-    """Display information about available engines."""
-    try:
-        all_engines = read_engine_configs()
-    except Exception as e:
-        typer.echo(f"Error reading engine configuration: {e}", err=True)
-        raise typer.Exit(1)
-
+def describe_engines_with_configs(engines_dict: dict, filter_pattern: str = "*", verbose: bool = False):
+    """Display information about engines from provided configurations."""
     # Filter engines
     filtered_engines = {
         name: config
-        for name, config in all_engines.items()
+        for name, config in engines_dict.items()
         if fnmatch.fnmatch(name, filter_pattern)
     }
 
@@ -296,11 +345,23 @@ def describe_engines(filter_pattern: str = "*", verbose: bool = False):
             if 'search_params' in config:
                 search_params = config['search_params']
                 typer.echo(f"   Search Params:")
-                for param, values in search_params.items():
-                    if isinstance(values, list):
-                        typer.echo(f"     {param}: {values}")
-                    else:
-                        typer.echo(f"     {param}: {values}")
+                if isinstance(search_params, list):
+                    for i, param_config in enumerate(search_params):
+                        typer.echo(f"     Config {i+1}:")
+                        for param, value in param_config.items():
+                            if isinstance(value, dict):
+                                typer.echo(f"       {param}:")
+                                for subparam, subvalue in value.items():
+                                    typer.echo(f"         {subparam}: {subvalue}")
+                            else:
+                                typer.echo(f"       {param}: {value}")
+                else:
+                    # Legacy format - dict
+                    for param, values in search_params.items():
+                        if isinstance(values, list):
+                            typer.echo(f"     {param}: {values}")
+                        else:
+                            typer.echo(f"     {param}: {values}")
             if 'upload_params' in config:
                 upload_params = config['upload_params']
                 typer.echo(f"   Upload Params:")
@@ -313,12 +374,27 @@ def describe_engines(filter_pattern: str = "*", verbose: bool = False):
         for name, config in sorted(filtered_engines.items()):
             engine_type = config.get('engine', 'N/A')
             module = config.get('module', 'N/A')
-            typer.echo(f"{name:<40} {engine_type:<15} {module:<25}")
+            display_name = name[:37] + "..." if len(name) > 40 else name
+            display_engine = engine_type[:12] + "..." if len(engine_type) > 15 else engine_type
+            display_module = module[:22] + "..." if len(module) > 25 else module
+            typer.echo(f"{display_name:<40} {display_engine:<15} {display_module:<25}")
 
     typer.echo(f"\nTotal: {len(filtered_engines)} engines")
     if filter_pattern != "*":
         typer.echo(f"Filter: '{filter_pattern}'")
-    typer.echo("\nUse --verbose for detailed information")
+    if not verbose:
+        typer.echo("\nUse --verbose for detailed information")
+
+
+def describe_engines(filter_pattern: str = "*", verbose: bool = False):
+    """Display information about available engines using default configuration loading."""
+    try:
+        all_engines = read_engine_configs()
+    except Exception as e:
+        typer.echo(f"Error reading engine configuration: {e}", err=True)
+        raise typer.Exit(1)
+    
+    describe_engines_with_configs(all_engines, filter_pattern, verbose)
 
 
 if __name__ == "__main__":