|
17 | 17 | from enum import Enum
|
18 | 18 | from typing import Any, Dict
|
19 | 19 |
|
| 20 | +_cql2_like_patterns = re.compile(r"\\.|[%_]|\\$") |
| 21 | +_valid_like_substitutions = { |
| 22 | + "\\\\": "\\", |
| 23 | + "\\%": "%", |
| 24 | + "\\_": "_", |
| 25 | + "%": "*", |
| 26 | + "_": "?", |
| 27 | +} |
| 28 | + |
| 29 | + |
| 30 | +def _replace_like_patterns(match: re.Match) -> str: |
| 31 | + pattern = match.group() |
| 32 | + try: |
| 33 | + return _valid_like_substitutions[pattern] |
| 34 | + except KeyError: |
| 35 | + raise ValueError(f"'{pattern}' is not a valid escape sequence") |
| 36 | + |
20 | 37 |
|
21 | 38 | def cql2_like_to_es(string: str) -> str:
|
22 | 39 | """
|
23 |
| - Convert CQL2 wildcard characters to Elasticsearch wildcard characters. Specifically, it converts '_' to '?' and '%' to '*', handling escape characters properly. |
| 40 | + Convert CQL2 "LIKE" characters to Elasticsearch "wildcard" characters. |
24 | 41 |
|
25 | 42 | Args:
|
26 | 43 | string (str): The string containing CQL2 wildcard characters.
|
27 | 44 |
|
28 | 45 | Returns:
|
29 | 46 | str: The converted string with Elasticsearch compatible wildcards.
|
| 47 | +
|
| 48 | + Raises: |
| 49 | + ValueError: If an invalid escape sequence is encountered. |
30 | 50 | """
|
31 |
| - # Translate '%' and '_' only if they are not preceded by a backslash '\' |
32 |
| - percent_pattern = r"(?<!\\)%" |
33 |
| - underscore_pattern = r"(?<!\\)_" |
34 |
| - # Remove the escape character before '%' or '_' |
35 |
| - escape_pattern = r"\\(?=[_%])" |
36 |
| - |
37 |
| - # Replace '%' with '*' for broad wildcard matching |
38 |
| - string = re.sub(percent_pattern, "*", string) |
39 |
| - # Replace '_' with '?' for single character wildcard matching |
40 |
| - string = re.sub(underscore_pattern, "?", string) |
41 |
| - # Remove the escape character used in the CQL2 format |
42 |
| - string = re.sub(escape_pattern, "", string) |
43 |
| - |
44 |
| - return string |
| 51 | + return _cql2_like_patterns.sub( |
| 52 | + repl=_replace_like_patterns, |
| 53 | + string=string, |
| 54 | + ) |
45 | 55 |
|
46 | 56 |
|
47 | 57 | class LogicalOp(str, Enum):
|
|
0 commit comments