Skip to content

Commit 103c221

Browse files
Add datetime-enabled filters (#295)
Goal: 1. Create a new DatetimeFilter or TimestampFilter - Allow querying for a specific date without time - Allow querying for a specific date with time - Allow querying for a date range - Allow querying for a time range - Allow querying with or without a timezone - Default to timezone-aware UTC datetimes 2. Alternatively, create a new Timestamp field type that allows specifying via YAML or dictionary that a numeric field is actually a timestamp, with or without a timezone. --------- Co-authored-by: Tyler Hutcherson <tyler.hutcherson@redis.com>
1 parent 3ca4c97 commit 103c221

File tree

20 files changed

+1086
-168
lines changed

20 files changed

+1086
-168
lines changed

docs/user_guide/02_hybrid_queries.ipynb

Lines changed: 170 additions & 41 deletions
Large diffs are not rendered by default.
62 Bytes
Binary file not shown.

redisvl/extensions/llmcache/semantic.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -312,15 +312,17 @@ def _vectorize_prompt(self, prompt: Optional[str]) -> List[float]:
312312
if not isinstance(prompt, str):
313313
raise TypeError("Prompt must be a string.")
314314

315-
return self._vectorizer.embed(prompt)
315+
result = self._vectorizer.embed(prompt)
316+
return result # type: ignore
316317

317318
async def _avectorize_prompt(self, prompt: Optional[str]) -> List[float]:
318319
"""Converts a text prompt to its vector representation using the
319320
configured vectorizer."""
320321
if not isinstance(prompt, str):
321322
raise TypeError("Prompt must be a string.")
322323

323-
return await self._vectorizer.aembed(prompt)
324+
result = await self._vectorizer.aembed(prompt)
325+
return result # type: ignore
324326

325327
def _check_vector_dims(self, vector: List[float]):
326328
"""Checks the size of the provided vector and raises an error if it

redisvl/extensions/router/semantic.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -374,14 +374,14 @@ def __call__(
374374
if not vector:
375375
if not statement:
376376
raise ValueError("Must provide a vector or statement to the router")
377-
vector = self.vectorizer.embed(statement)
377+
vector = self.vectorizer.embed(statement) # type: ignore
378378

379379
aggregation_method = (
380380
aggregation_method or self.routing_config.aggregation_method
381381
)
382382

383383
# perform route classification
384-
top_route_match = self._classify_route(vector, aggregation_method)
384+
top_route_match = self._classify_route(vector, aggregation_method) # type: ignore
385385
return top_route_match
386386

387387
@deprecated_argument("distance_threshold")
@@ -408,7 +408,7 @@ def route_many(
408408
if not vector:
409409
if not statement:
410410
raise ValueError("Must provide a vector or statement to the router")
411-
vector = self.vectorizer.embed(statement)
411+
vector = self.vectorizer.embed(statement) # type: ignore
412412

413413
max_k = max_k or self.routing_config.max_k
414414
aggregation_method = (
@@ -417,7 +417,7 @@ def route_many(
417417

418418
# classify routes
419419
top_route_matches = self._classify_multi_route(
420-
vector, max_k, aggregation_method
420+
vector, max_k, aggregation_method # type: ignore
421421
)
422422

423423
return top_route_matches

redisvl/extensions/session_manager/semantic_session.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -349,7 +349,7 @@ def add_messages(
349349
role=message[ROLE_FIELD_NAME],
350350
content=message[CONTENT_FIELD_NAME],
351351
session_tag=session_tag,
352-
vector_field=content_vector,
352+
vector_field=content_vector, # type: ignore
353353
)
354354

355355
if TOOL_FIELD_NAME in message:

redisvl/query/filter.py

Lines changed: 271 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import datetime
2+
import re
13
from enum import Enum
24
from functools import wraps
35
from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union
@@ -8,6 +10,19 @@
810
# mypy: disable-error-code="override"
911

1012

13+
class Inclusive(str, Enum):
14+
"""Enum for valid inclusive options"""
15+
16+
BOTH = "both"
17+
"""Inclusive of both sides of range (default)"""
18+
NEITHER = "neither"
19+
"""Inclusive of neither side of range"""
20+
LEFT = "left"
21+
"""Inclusive of only left"""
22+
RIGHT = "right"
23+
"""Inclusive of only right"""
24+
25+
1126
class FilterOperator(Enum):
1227
EQ = 1
1328
NE = 2
@@ -19,6 +34,7 @@ class FilterOperator(Enum):
1934
AND = 8
2035
LIKE = 9
2136
IN = 10
37+
BETWEEN = 11
2238

2339

2440
class FilterField:
@@ -267,6 +283,7 @@ class Num(FilterField):
267283
FilterOperator.GT: ">",
268284
FilterOperator.LE: "<=",
269285
FilterOperator.GE: ">=",
286+
FilterOperator.BETWEEN: "between",
270287
}
271288
OPERATOR_MAP: Dict[FilterOperator, str] = {
272289
FilterOperator.EQ: "@%s:[%s %s]",
@@ -275,8 +292,10 @@ class Num(FilterField):
275292
FilterOperator.LT: "@%s:[-inf (%s]",
276293
FilterOperator.GE: "@%s:[%s +inf]",
277294
FilterOperator.LE: "@%s:[-inf %s]",
295+
FilterOperator.BETWEEN: "@%s:[%s %s]",
278296
}
279-
SUPPORTED_VAL_TYPES = (int, float, type(None))
297+
298+
SUPPORTED_VAL_TYPES = (int, float, tuple, type(None))
280299

281300
def __eq__(self, other: int) -> "FilterExpression":
282301
"""Create a Numeric equality filter expression.
@@ -373,10 +392,51 @@ def __le__(self, other: int) -> "FilterExpression":
373392
self._set_value(other, self.SUPPORTED_VAL_TYPES, FilterOperator.LE)
374393
return FilterExpression(str(self))
375394

395+
@staticmethod
396+
def _validate_inclusive_string(inclusive: str) -> Inclusive:
397+
try:
398+
return Inclusive(inclusive)
399+
except:
400+
raise ValueError(
401+
f"Invalid inclusive value must be: {[i.value for i in Inclusive]}"
402+
)
403+
404+
def _format_inclusive_between(
405+
self, inclusive: Inclusive, start: int, end: int
406+
) -> str:
407+
if inclusive.value == Inclusive.BOTH.value:
408+
return f"@{self._field}:[{start} {end}]"
409+
410+
if inclusive.value == Inclusive.NEITHER.value:
411+
return f"@{self._field}:[({start} ({end}]"
412+
413+
if inclusive.value == Inclusive.LEFT.value:
414+
return f"@{self._field}:[{start} ({end}]"
415+
416+
if inclusive.value == Inclusive.RIGHT.value:
417+
return f"@{self._field}:[({start} {end}]"
418+
419+
raise ValueError(f"Inclusive value not found")
420+
421+
def between(
422+
self, start: int, end: int, inclusive: str = "both"
423+
) -> "FilterExpression":
424+
"""Operator for searching values between two numeric values."""
425+
inclusive = self._validate_inclusive_string(inclusive)
426+
expression = self._format_inclusive_between(inclusive, start, end)
427+
428+
return FilterExpression(expression)
429+
376430
def __str__(self) -> str:
377431
"""Return the Redis Query string for the Numeric filter"""
378432
if self._value is None:
379433
return "*"
434+
if self._operator == FilterOperator.BETWEEN:
435+
return self.OPERATOR_MAP[self._operator] % (
436+
self._field,
437+
self._value[0],
438+
self._value[1],
439+
)
380440
if self._operator == FilterOperator.EQ or self._operator == FilterOperator.NE:
381441
return self.OPERATOR_MAP[self._operator] % (
382442
self._field,
@@ -562,3 +622,213 @@ def __str__(self) -> str:
562622
if not self._filter:
563623
raise ValueError("Improperly initialized FilterExpression")
564624
return self._filter
625+
626+
627+
class Timestamp(Num):
628+
"""
629+
A timestamp filter for querying date/time fields in Redis.
630+
631+
This filter can handle various date and time formats, including:
632+
- datetime objects (with or without timezone)
633+
- date objects
634+
- ISO-8601 formatted strings
635+
- Unix timestamps (as integers or floats)
636+
637+
All timestamps are converted to Unix timestamps in UTC for consistency.
638+
"""
639+
640+
SUPPORTED_TYPES = (
641+
datetime.datetime,
642+
datetime.date,
643+
tuple, # Date range
644+
str, # ISO format
645+
int, # Unix timestamp
646+
float, # Unix timestamp with fractional seconds
647+
type(None),
648+
)
649+
650+
@staticmethod
651+
def _is_date(value: Any) -> bool:
652+
"""Check if the value is a date object. Either ISO string or datetime.date."""
653+
return (
654+
isinstance(value, datetime.date)
655+
and not isinstance(value, datetime.datetime)
656+
) or (isinstance(value, str) and Timestamp._is_date_only(value))
657+
658+
@staticmethod
659+
def _is_date_only(iso_string: str) -> bool:
660+
"""Check if an ISO formatted string only includes date information using regex."""
661+
# Match YYYY-MM-DD format exactly
662+
date_pattern = r"^\d{4}-\d{2}-\d{2}$"
663+
return bool(re.match(date_pattern, iso_string))
664+
665+
def _convert_to_timestamp(self, value, end_date=False):
666+
"""
667+
Convert various inputs to a Unix timestamp (seconds since epoch in UTC).
668+
669+
Args:
670+
value: A datetime, date, string, int, or float
671+
672+
Returns:
673+
float: Unix timestamp
674+
"""
675+
if value is None:
676+
return None
677+
678+
if isinstance(value, (int, float)):
679+
# Already a Unix timestamp
680+
return float(value)
681+
682+
if isinstance(value, str):
683+
# Parse ISO format
684+
try:
685+
value = datetime.datetime.fromisoformat(value)
686+
except ValueError:
687+
raise ValueError(f"String timestamp must be in ISO format: {value}")
688+
689+
if isinstance(value, datetime.date) and not isinstance(
690+
value, datetime.datetime
691+
):
692+
# Convert to max or min if for dates based on end or not
693+
if end_date:
694+
value = datetime.datetime.combine(value, datetime.time.max)
695+
else:
696+
value = datetime.datetime.combine(value, datetime.time.min)
697+
698+
# Ensure the datetime is timezone-aware (UTC)
699+
if isinstance(value, datetime.datetime):
700+
if value.tzinfo is None:
701+
value = value.replace(tzinfo=datetime.timezone.utc)
702+
else:
703+
value = value.astimezone(datetime.timezone.utc)
704+
705+
# Convert to Unix timestamp
706+
return value.timestamp()
707+
708+
raise TypeError(f"Unsupported type for timestamp conversion: {type(value)}")
709+
710+
def __eq__(self, other) -> FilterExpression:
711+
"""
712+
Filter for timestamps equal to the specified value.
713+
For date objects (without time), this matches the entire day.
714+
715+
Args:
716+
other: A datetime, date, ISO string, or Unix timestamp
717+
718+
Returns:
719+
self: The filter object for method chaining
720+
"""
721+
if self._is_date(other):
722+
# For date objects, match the entire day
723+
if isinstance(other, str):
724+
other = datetime.datetime.strptime(other, "%Y-%m-%d").date()
725+
start = datetime.datetime.combine(other, datetime.time.min).astimezone(
726+
datetime.timezone.utc
727+
)
728+
end = datetime.datetime.combine(other, datetime.time.max).astimezone(
729+
datetime.timezone.utc
730+
)
731+
return self.between(start, end)
732+
733+
timestamp = self._convert_to_timestamp(other)
734+
self._set_value(timestamp, self.SUPPORTED_TYPES, FilterOperator.EQ)
735+
return FilterExpression(str(self))
736+
737+
def __ne__(self, other) -> FilterExpression:
738+
"""
739+
Filter for timestamps not equal to the specified value.
740+
For date objects (without time), this excludes the entire day.
741+
742+
Args:
743+
other: A datetime, date, ISO string, or Unix timestamp
744+
745+
Returns:
746+
self: The filter object for method chaining
747+
"""
748+
if self._is_date(other):
749+
# For date objects, exclude the entire day
750+
if isinstance(other, str):
751+
other = datetime.datetime.strptime(other, "%Y-%m-%d").date()
752+
start = datetime.datetime.combine(other, datetime.time.min)
753+
end = datetime.datetime.combine(other, datetime.time.max)
754+
return self.between(start, end)
755+
756+
timestamp = self._convert_to_timestamp(other)
757+
self._set_value(timestamp, self.SUPPORTED_TYPES, FilterOperator.NE)
758+
return FilterExpression(str(self))
759+
760+
def __gt__(self, other):
761+
"""
762+
Filter for timestamps greater than the specified value.
763+
764+
Args:
765+
other: A datetime, date, ISO string, or Unix timestamp
766+
767+
Returns:
768+
self: The filter object for method chaining
769+
"""
770+
timestamp = self._convert_to_timestamp(other)
771+
self._set_value(timestamp, self.SUPPORTED_TYPES, FilterOperator.GT)
772+
return FilterExpression(str(self))
773+
774+
def __lt__(self, other):
775+
"""
776+
Filter for timestamps less than the specified value.
777+
778+
Args:
779+
other: A datetime, date, ISO string, or Unix timestamp
780+
781+
Returns:
782+
self: The filter object for method chaining
783+
"""
784+
timestamp = self._convert_to_timestamp(other)
785+
self._set_value(timestamp, self.SUPPORTED_TYPES, FilterOperator.LT)
786+
return FilterExpression(str(self))
787+
788+
def __ge__(self, other):
789+
"""
790+
Filter for timestamps greater than or equal to the specified value.
791+
792+
Args:
793+
other: A datetime, date, ISO string, or Unix timestamp
794+
795+
Returns:
796+
self: The filter object for method chaining
797+
"""
798+
timestamp = self._convert_to_timestamp(other)
799+
self._set_value(timestamp, self.SUPPORTED_TYPES, FilterOperator.GE)
800+
return FilterExpression(str(self))
801+
802+
def __le__(self, other):
803+
"""
804+
Filter for timestamps less than or equal to the specified value.
805+
806+
Args:
807+
other: A datetime, date, ISO string, or Unix timestamp
808+
809+
Returns:
810+
self: The filter object for method chaining
811+
"""
812+
timestamp = self._convert_to_timestamp(other)
813+
self._set_value(timestamp, self.SUPPORTED_TYPES, FilterOperator.LE)
814+
return FilterExpression(str(self))
815+
816+
def between(self, start, end, inclusive: str = "both"):
817+
"""
818+
Filter for timestamps between start and end (inclusive).
819+
820+
Args:
821+
start: A datetime, date, ISO string, or Unix timestamp
822+
end: A datetime, date, ISO string, or Unix timestamp
823+
824+
Returns:
825+
self: The filter object for method chaining
826+
"""
827+
inclusive = self._validate_inclusive_string(inclusive)
828+
829+
start_ts = self._convert_to_timestamp(start)
830+
end_ts = self._convert_to_timestamp(end, end_date=True)
831+
832+
expression = self._format_inclusive_between(inclusive, start_ts, end_ts)
833+
834+
return FilterExpression(expression)

0 commit comments

Comments
 (0)