feat: add text field weights support to TextQuery (#360)

bsbodden · bsbodden · commit 3d45afdd4b14 · 2025-09-25T14:57:11.000-07:00
Adds the ability to specify weights for text fields in RedisVL queries,
enabling users to prioritize certain fields over others in search results.

- Support dictionary of field:weight mappings in TextQuery constructor
- Maintain backward compatibility with single string field names
- Add set_field_weights() method for dynamic weight updates
- Generate proper Redis query syntax with weight modifiers
- Comprehensive validation for positive numeric weights

Example usage:
```python
# Single field with weight
query = TextQuery(text="search", text_field_name={"title": 5.0})

# Multiple fields with weights
query = TextQuery(
    text="search",
    text_field_name={"title": 3.0, "content": 1.5, "tags": 1.0}
)
```
diff --git a/redisvl/query/query.py b/redisvl/query/query.py
@@ -801,7 +801,7 @@ class TextQuery(BaseQuery):
     def __init__(
         self,
         text: str,
-        text_field_name: str,
+        text_field_name: Union[str, Dict[str, float]],
         text_scorer: str = "BM25STD",
         filter_expression: Optional[Union[str, FilterExpression]] = None,
         return_fields: Optional[List[str]] = None,
@@ -817,7 +817,8 @@ def __init__(
 
         Args:
             text (str): The text string to perform the text search with.
-            text_field_name (str): The name of the document field to perform text search on.
+            text_field_name (Union[str, Dict[str, float]]): The name of the document field to perform
+                text search on, or a dictionary mapping field names to their weights.
             text_scorer (str, optional): The text scoring algorithm to use.
                 Defaults to BM25STD. Options are {TFIDF, BM25STD, BM25, TFIDF.DOCNORM, DISMAX, DOCSCORE}.
                 See https://redis.io/docs/latest/develop/interact/search-and-query/advanced-concepts/scoring/
@@ -849,7 +850,7 @@ def __init__(
             TypeError: If stopwords is not a valid iterable set of strings.
         """
         self._text = text
-        self._text_field_name = text_field_name
+        self._field_weights = self._parse_field_weights(text_field_name)
         self._num_results = num_results
 
         self._set_stopwords(stopwords)
@@ -934,15 +935,97 @@ def _tokenize_and_escape_query(self, user_query: str) -> str:
             [token for token in tokens if token and token not in self._stopwords]
         )
 
+    def _parse_field_weights(
+        self, field_spec: Union[str, Dict[str, float]]
+    ) -> Dict[str, float]:
+        """Parse the field specification into a weights dictionary.
+
+        Args:
+            field_spec: Either a single field name or dictionary of field:weight mappings
+
+        Returns:
+            Dictionary mapping field names to their weights
+        """
+        if isinstance(field_spec, str):
+            return {field_spec: 1.0}
+        elif isinstance(field_spec, dict):
+            # Validate all weights are numeric and positive
+            for field, weight in field_spec.items():
+                if not isinstance(field, str):
+                    raise TypeError(f"Field name must be a string, got {type(field)}")
+                if not isinstance(weight, (int, float)):
+                    raise TypeError(
+                        f"Weight for field '{field}' must be numeric, got {type(weight)}"
+                    )
+                if weight <= 0:
+                    raise ValueError(
+                        f"Weight for field '{field}' must be positive, got {weight}"
+                    )
+            return field_spec
+        else:
+            raise TypeError(
+                "text_field_name must be a string or dictionary of field:weight mappings"
+            )
+
+    def set_field_weights(self, field_weights: Union[str, Dict[str, float]]):
+        """Set or update the field weights for the query.
+
+        Args:
+            field_weights: Either a single field name or dictionary of field:weight mappings
+        """
+        self._field_weights = self._parse_field_weights(field_weights)
+        # Invalidate the query string
+        self._built_query_string = None
+
+    @property
+    def field_weights(self) -> Dict[str, float]:
+        """Get the field weights for the query.
+
+        Returns:
+            Dictionary mapping field names to their weights
+        """
+        return self._field_weights.copy()
+
+    @property
+    def text_field_name(self) -> Union[str, Dict[str, float]]:
+        """Get the text field name(s) - for backward compatibility.
+
+        Returns:
+            Either a single field name string (if only one field with weight 1.0)
+            or a dictionary of field:weight mappings.
+        """
+        if len(self._field_weights) == 1:
+            field, weight = next(iter(self._field_weights.items()))
+            if weight == 1.0:
+                return field
+        return self._field_weights.copy()
+
     def _build_query_string(self) -> str:
         """Build the full query string for text search with optional filtering."""
         filter_expression = self._filter_expression
         if isinstance(filter_expression, FilterExpression):
             filter_expression = str(filter_expression)
 
-        text = (
-            f"@{self._text_field_name}:({self._tokenize_and_escape_query(self._text)})"
-        )
+        escaped_query = self._tokenize_and_escape_query(self._text)
+
+        # Build query parts for each field with its weight
+        field_queries = []
+        for field, weight in self._field_weights.items():
+            if weight == 1.0:
+                # Default weight doesn't need explicit weight syntax
+                field_queries.append(f"@{field}:({escaped_query})")
+            else:
+                # Use Redis weight syntax for non-default weights
+                field_queries.append(
+                    f"@{field}:({escaped_query}) => {{ $weight: {weight} }}"
+                )
+
+        # Join multiple field queries with OR operator
+        if len(field_queries) == 1:
+            text = field_queries[0]
+        else:
+            text = "(" + " | ".join(field_queries) + ")"
+
         if filter_expression and filter_expression != "*":
             text += f" AND {filter_expression}"
         return text
diff --git a/tests/integration/test_text_query_weights_integration.py b/tests/integration/test_text_query_weights_integration.py
@@ -0,0 +1,187 @@
+"""Integration tests for TextQuery with field weights."""
+
+import uuid
+
+import pytest
+
+from redisvl.index import SearchIndex
+from redisvl.query import TextQuery
+from redisvl.query.filter import Tag
+
+
+@pytest.fixture
+def weighted_index(redis_url, worker_id):
+    """Create an index with multiple text fields for testing weights."""
+    unique_id = str(uuid.uuid4())[:8]
+    schema_dict = {
+        "index": {
+            "name": f"weighted_test_idx_{worker_id}_{unique_id}",
+            "prefix": f"weighted_doc_{worker_id}_{unique_id}",
+            "storage_type": "json",
+        },
+        "fields": [
+            {"name": "title", "type": "text"},
+            {"name": "content", "type": "text"},
+            {"name": "tags", "type": "text"},
+            {"name": "category", "type": "tag"},
+            {"name": "score", "type": "numeric"},
+        ],
+    }
+
+    index = SearchIndex.from_dict(schema_dict, redis_url=redis_url)
+    index.create(overwrite=True)
+
+    # Load test data
+    data = [
+        {
+            "id": "1",
+            "title": "Redis database introduction",
+            "content": "A comprehensive guide to getting started with Redis",
+            "tags": "tutorial beginner",
+            "category": "database",
+            "score": 95,
+        },
+        {
+            "id": "2",
+            "title": "Advanced caching strategies",
+            "content": "Learn about Redis caching patterns and best practices",
+            "tags": "redis cache performance",
+            "category": "optimization",
+            "score": 88,
+        },
+        {
+            "id": "3",
+            "title": "Python programming basics",
+            "content": "Introduction to Python with examples using Redis client",
+            "tags": "python redis programming",
+            "category": "programming",
+            "score": 90,
+        },
+        {
+            "id": "4",
+            "title": "Data structures overview",
+            "content": "Understanding Redis data structures and their applications",
+            "tags": "redis structures",
+            "category": "database",
+            "score": 85,
+        },
+    ]
+
+    index.load(data)
+    yield index
+    index.delete(drop=True)
+
+
+def test_text_query_with_single_weighted_field(weighted_index):
+    """Test TextQuery with a single weighted field."""
+    text = "redis"
+
+    # Query with higher weight on title
+    query = TextQuery(
+        text=text,
+        text_field_name={"title": 5.0},
+        return_fields=["title", "content"],
+        num_results=4,
+    )
+
+    results = weighted_index.query(query)
+    assert len(results) > 0
+
+    # The document with "Redis" in the title should rank high
+    top_result = results[0]
+    assert "redis" in top_result["title"].lower()
+
+
+def test_text_query_with_multiple_weighted_fields(weighted_index):
+    """Test TextQuery with multiple weighted fields."""
+    text = "redis"
+
+    # Query across multiple fields with different weights
+    query = TextQuery(
+        text=text,
+        text_field_name={"title": 3.0, "content": 2.0, "tags": 1.0},
+        return_fields=["title", "content", "tags"],
+        num_results=4,
+    )
+
+    results = weighted_index.query(query)
+    assert len(results) > 0
+
+    # Check that results contain the search term in at least one field
+    for result in results:
+        text_found = (
+            "redis" in result.get("title", "").lower()
+            or "redis" in result.get("content", "").lower()
+            or "redis" in result.get("tags", "").lower()
+        )
+        assert text_found
+
+
+def test_text_query_weights_with_filter(weighted_index):
+    """Test TextQuery with weights and filter expression."""
+    text = "redis"
+
+    # Query with weights and filter
+    filter_expr = Tag("category") == "database"
+    query = TextQuery(
+        text=text,
+        text_field_name={"title": 5.0, "content": 1.0},
+        filter_expression=filter_expr,
+        return_fields=["title", "content", "category"],
+        num_results=4,
+    )
+
+    results = weighted_index.query(query)
+
+    # Should only get database category results
+    for result in results:
+        assert result["category"] == "database"
+
+
+def test_dynamic_weight_update(weighted_index):
+    """Test updating field weights dynamically."""
+    text = "redis"
+
+    # Start with equal weights
+    query = TextQuery(
+        text=text,
+        text_field_name={"title": 1.0, "content": 1.0},
+        return_fields=["title", "content"],
+        num_results=4,
+    )
+
+    results1 = weighted_index.query(query)
+
+    # Update to prioritize title
+    query.set_field_weights({"title": 10.0, "content": 1.0})
+
+    results2 = weighted_index.query(query)
+
+    # Results might be reordered based on new weights
+    # At minimum, both queries should return results
+    assert len(results1) > 0
+    assert len(results2) > 0
+
+
+def test_backward_compatibility_single_field(weighted_index):
+    """Test that the original single field API still works."""
+    text = "redis"
+
+    # Original API with single field name
+    query = TextQuery(
+        text=text,
+        text_field_name="content",
+        return_fields=["title", "content"],
+        num_results=4,
+    )
+
+    results = weighted_index.query(query)
+    assert len(results) > 0
+
+    # Check results are from content field
+    for result in results:
+        if "redis" in result.get("content", "").lower():
+            break
+    else:
+        # At least one result should have redis in content
+        assert False, "No results with 'redis' in content field"
diff --git a/tests/unit/test_text_query_weights.py b/tests/unit/test_text_query_weights.py