diff --git a/opteryx/__version__.py b/opteryx/__version__.py index c1afc940..0595e543 100644 --- a/opteryx/__version__.py +++ b/opteryx/__version__.py @@ -1,4 +1,4 @@ -__build__ = 913 +__build__ = 915 # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/opteryx/managers/expression/ops.py b/opteryx/managers/expression/ops.py index bec0a740..b07bc40a 100644 --- a/opteryx/managers/expression/ops.py +++ b/opteryx/managers/expression/ops.py @@ -180,71 +180,24 @@ def _inner_filter_operations(arr, operator, value): return list_ops.cython_allop_neq(arr[0], value) if operator == "AnyOpILike": - patterns = value[0] + from opteryx.utils.sql import regex_match_any - combined_regex_pattern = r"|".join(sql_like_to_regex(p) for p in patterns if p) - combined_regex = re.compile(combined_regex_pattern, re.IGNORECASE) - - out = numpy.zeros(arr.size, dtype=bool) - for i, row in enumerate(arr): - if row is None: - out[i] = None - continue - if row.size == 0: - continue - out[i] = any(combined_regex.search(elem) for elem in row) - - return out + return regex_match_any(arr, value[0], flags=re.IGNORECASE) if operator == "AnyOpLike": - patterns = value[0] + from opteryx.utils.sql import regex_match_any - combined_regex_pattern = r"|".join(sql_like_to_regex(p) for p in patterns if p) - combined_regex = re.compile(combined_regex_pattern) + return regex_match_any(arr, value[0]) - out = numpy.zeros(arr.size, dtype=bool) - for i, row in enumerate(arr): - if row is None: - out[i] = None - continue - if row.size == 0: - continue - out[i] = any(combined_regex.search(elem) for elem in row) - - return out if operator == "AnyOpNotLike": - patterns = value[0] - - combined_regex_pattern = r"|".join(sql_like_to_regex(p) for p in patterns if p) - combined_regex = re.compile(combined_regex_pattern) + from opteryx.utils.sql import regex_match_any - out = numpy.zeros(arr.size, dtype=bool) - for i, row in enumerate(arr): - if row is None: - out[i] = None - continue - if row.size == 0: - continue - out[i] = any(combined_regex.search(elem) for elem in row) - - return numpy.invert(out) + return regex_match_any(arr, value[0], invert=True) if operator == "AnyOpNotILike": - patterns = value[0] - - combined_regex_pattern = r"|".join(sql_like_to_regex(p) for p in patterns if p) - combined_regex = re.compile(combined_regex_pattern, re.IGNORECASE) - - out = numpy.zeros(arr.size, dtype=bool) - for i, row in enumerate(arr): - if row is None: - out[i] = None - continue - if row.size == 0: - continue - out[i] = any(combined_regex.search(elem) for elem in row) + from opteryx.utils.sql import regex_match_any - return numpy.invert(out) + return regex_match_any(arr, value[0], flags=re.IGNORECASE, invert=True) if operator == "AtQuestion": import simdjson diff --git a/opteryx/utils/sql.py b/opteryx/utils/sql.py index b5bc459c..125a8afe 100644 --- a/opteryx/utils/sql.py +++ b/opteryx/utils/sql.py @@ -1,6 +1,8 @@ import re from typing import List +import numpy + ESCAPE_SPECIAL_CHARS = re.compile(r"([.^$*+?{}[\]|()\\])") @@ -114,3 +116,84 @@ def split_sql_statements(sql: str) -> List[str]: statements.append("".join(buffer).strip()) return [s for s in statements if s != ""] + + +def regex_match_any( + arr: numpy.ndarray, + patterns: List[str], + flags: int = re.NOFLAG, + invert: bool = False, +) -> numpy.ndarray: + """ + Evaluates whether each row in `arr` matches ANY of the given LIKE patterns. + Patterns are converted to regexes, combined, and compiled once. + + Parameters: + arr: numpy.ndarray + 1D array of rows. Each element can be: + - None + - A single string/bytes + - A list/tuple/array of strings/bytes + (all non-None elements are assumed to be the same structure). + patterns: List[str] + A list of SQL LIKE patterns. These get combined into a single regex. + flags: int, optional + Flags to pass to `re.compile()`, e.g. re.IGNORECASE for ILIKE. + + Returns: + numpy.ndarray: + A 1D object array with True, False, or None, + indicating whether each row did (or did not) match the patterns. + """ + # 1) Combine the LIKE patterns into a single compiled regex + # (Empty patterns list => empty string => matches nothing) + combined_pattern_str = r"|".join(sql_like_to_regex(p) for p in patterns if p) + # If there are no valid patterns, we build a "never match" pattern + if not combined_pattern_str: + combined_pattern_str = r"(?!x)" # Negative lookahead to never match + + combined_regex = re.compile(combined_pattern_str, flags=flags) + + # 2) Create the output array (dtype=object so we can store None/bool) + out = numpy.empty(arr.size, dtype=object) + + # 3) Determine if the array consists of single strings or lists-of-strings + first_non_none = None + for x in arr: + if x is not None: + first_non_none = x + break + + # If the entire array is None, just return all None + if first_non_none is None: + out[:] = None + return out + + single_string_mode = isinstance(first_non_none, (str, bytes)) + + # 4) Main loop + if single_string_mode: + # Single-string mode + for i, row in enumerate(arr): + if row is None: + out[i] = None + else: + # Match or not? + is_match = combined_regex.search(row) is not None + out[i] = (not is_match) if invert else is_match + else: + # Lists-of-strings mode + for i, row in enumerate(arr): + if row is None: + out[i] = None + else: + # row is assumed to be an iterable of strings/bytes + if row.size == 0: + # Probably a numpy array with zero length + is_match = False + else: + # If anything in the row matches, it's True + is_match = any(combined_regex.search(elem) for elem in row) + out[i] = (not is_match) if invert else is_match + + return out diff --git a/tests/sql_battery/test_shapes_and_errors_battery.py b/tests/sql_battery/test_shapes_and_errors_battery.py index 05811599..468d6252 100644 --- a/tests/sql_battery/test_shapes_and_errors_battery.py +++ b/tests/sql_battery/test_shapes_and_errors_battery.py @@ -1857,29 +1857,60 @@ ("SELECT name, missions FROM $astronauts WHERE missions ILIKE ANY ('%Apoll%')", 34, 2, None), ("SELECT name, missions FROM $astronauts WHERE missions LIKE ANY ('%Apoll%', 'mission')", 34, 2, None), ("SELECT name, missions FROM $astronauts WHERE missions ILIKE ANY ('%Apoll%', 'mission')", 34, 2, None), - ("SELECT name, missions FROM $astronauts WHERE missions NOT LIKE ANY '%apoll%'", 357, 2, None), - ("SELECT name, missions FROM $astronauts WHERE missions NOT ILIKE ANY '%apoll%'", 323, 2, None), - ("SELECT name, missions FROM $astronauts WHERE missions NOT LIKE ANY ('%apoll%')", 357, 2, None), - ("SELECT name, missions FROM $astronauts WHERE missions NOT ILIKE ANY ('%apoll%')", 323, 2, None), - ("SELECT name, missions FROM $astronauts WHERE missions NOT LIKE ANY ('%Apoll%')", 323, 2, None), - ("SELECT name, missions FROM $astronauts WHERE missions NOT ILIKE ANY ('%Apoll%')", 323, 2, None), - ("SELECT name, missions FROM $astronauts WHERE missions NOT LIKE ANY ('%Apoll%', 'mission')", 323, 2, None), - ("SELECT name, missions FROM $astronauts WHERE missions NOT ILIKE ANY ('%Apoll%', 'mission')", 323, 2, None), + ("SELECT name, missions FROM $astronauts WHERE missions NOT LIKE ANY '%apoll%'", 334, 2, None), + ("SELECT name, missions FROM $astronauts WHERE missions NOT ILIKE ANY '%apoll%'", 300, 2, None), + ("SELECT name, missions FROM $astronauts WHERE missions NOT LIKE ANY ('%apoll%')", 334, 2, None), + ("SELECT name, missions FROM $astronauts WHERE missions NOT ILIKE ANY ('%apoll%')", 300, 2, None), + ("SELECT name, missions FROM $astronauts WHERE missions NOT LIKE ANY ('%Apoll%')", 300, 2, None), + ("SELECT name, missions FROM $astronauts WHERE missions NOT ILIKE ANY ('%Apoll%')", 300, 2, None), + ("SELECT name, missions FROM $astronauts WHERE missions NOT LIKE ANY ('%Apoll%', 'mission')", 300, 2, None), + ("SELECT name, missions FROM $astronauts WHERE missions NOT ILIKE ANY ('%Apoll%', 'mission')", 300, 2, None), ("SELECT name, missions FROM $astronauts WHERE missions LIKE ANY ('Apoll%', 'Gemini%', 'Mercury%')", 37, 2, None), - ("SELECT name, missions FROM $astronauts WHERE missions NOT LIKE ANY ('Apoll%', 'Gemini%', 'Mercury%')", 320, 2, None), + ("SELECT name, missions FROM $astronauts WHERE missions NOT LIKE ANY ('Apoll%', 'Gemini%', 'Mercury%')", 297, 2, None), ("SELECT name, missions FROM $astronauts WHERE missions LIKE ANY ()", 0, 2, SqlError), ("SELECT name, missions FROM $astronauts WHERE missions NOT LIKE ANY ()", 0, 2, SqlError), ("SELECT name, missions FROM $astronauts WHERE missions LIKE ANY ('%Apoll%', null)", 34, 2, None), - ("SELECT name, missions FROM $astronauts WHERE missions NOT LIKE ANY ('%Apoll%', null)", 323, 2, None), + ("SELECT name, missions FROM $astronauts WHERE missions NOT LIKE ANY ('%Apoll%', null)", 300, 2, None), ("SELECT name, missions FROM $astronauts WHERE missions LIKE ANY ('%aPoll%')", 0, 2, None), ("SELECT name, missions FROM $astronauts WHERE missions ILIKE ANY ('%aPoll%')", 34, 2, None), ("SELECT name, missions FROM $astronauts WHERE missions LIKE ANY ('Apollo 11')", 3, 2, None), - ("SELECT name, missions FROM $astronauts WHERE missions NOT LIKE ANY ('Apollo 11')", 354, 2, None), + ("SELECT name, missions FROM $astronauts WHERE missions NOT LIKE ANY ('Apollo 11')", 331, 2, None), ("SELECT name, missions FROM $astronauts WHERE missions LIKE ANY ('Apollo_%')", 34, 2, None), ("SELECT name, missions FROM $astronauts WHERE missions LIKE ANY ('Apo__o%')", 34, 2, None), ("SELECT name, missions FROM $astronauts WHERE missions LIKE ANY ('%Apoll%', 123)", 34, 2, None), ("SELECT name, missions FROM $astronauts WHERE missions LIKE ANY ('%pattern1%', '%pattern2%', '%pattern3%', '%pattern4%', '%pattern5%', '%pattern6%', '%pattern7%', '%pattern8%', '%pattern9%', '%pattern10%', '%pattern11%', '%pattern12%', '%pattern13%', '%pattern14%', '%pattern15%', '%pattern16%', '%pattern17%', '%pattern18%', '%pattern19%', '%pattern20%', '%pattern21%', '%pattern22%', '%pattern23%', '%pattern24%', '%pattern25%', '%pattern26%', '%pattern27%', '%pattern28%', '%pattern29%', '%pattern30%', '%pattern31%', '%pattern32%', '%pattern33%', '%pattern34%', '%pattern35%', '%pattern36%', '%pattern37%', '%pattern38%', '%pattern39%', '%pattern40%', '%pattern41%', '%pattern42%', '%pattern43%', '%pattern44%', '%pattern45%', '%pattern46%', '%pattern47%', '%pattern48%', '%pattern49%', '%pattern50%');", 0, 2, None), + ("SELECT name, missions FROM $astronauts WHERE name LIKE ANY '%armstrong%'", 0, 2, None), + ("SELECT name, missions FROM $astronauts WHERE name ILIKE ANY '%armstrong%'", 1, 2, None), + ("SELECT name, missions FROM $astronauts WHERE name LIKE ANY ('%arms%')", 0, 2, None), + ("SELECT name, missions FROM $astronauts WHERE name ILIKE ANY ('%arms%')", 1, 2, None), + ("SELECT name, missions FROM $astronauts WHERE name LIKE ANY ('%Armstrong%')", 1, 2, None), + ("SELECT name, missions FROM $astronauts WHERE name ILIKE ANY ('%Arms%')", 1, 2, None), + ("SELECT name, missions FROM $astronauts WHERE name LIKE ANY ('%Armstrong%', 'mission')", 1, 2, None), + ("SELECT name, missions FROM $astronauts WHERE name ILIKE ANY ('%Armstrong%', 'mission')", 1, 2, None), + ("SELECT name, missions FROM $astronauts WHERE name NOT LIKE ANY '%armstrong%'", 357, 2, None), + ("SELECT name, missions FROM $astronauts WHERE name NOT ILIKE ANY '%armstrong%'", 356, 2, None), + ("SELECT name, missions FROM $astronauts WHERE name NOT LIKE ANY ('%armstrong%')", 357, 2, None), + ("SELECT name, missions FROM $astronauts WHERE name NOT ILIKE ANY ('%armstrong%')", 356, 2, None), + ("SELECT name, missions FROM $astronauts WHERE name NOT LIKE ANY ('%Armstrong%')", 356, 2, None), + ("SELECT name, missions FROM $astronauts WHERE name NOT ILIKE ANY ('%Armstrong%')", 356, 2, None), + ("SELECT name, missions FROM $astronauts WHERE name NOT LIKE ANY ('%Armstrong%', 'mission')", 356, 2, None), + ("SELECT name, missions FROM $astronauts WHERE name NOT ILIKE ANY ('%Armstrong%', 'mission')", 356, 2, None), + ("SELECT name, missions FROM $astronauts WHERE name LIKE ANY ('%Armstrong%', '%Aldrin%', '%Collins%')", 4, 2, None), + ("SELECT name, missions FROM $astronauts WHERE name NOT LIKE ANY ('%Armstrong%', '%Aldrin%', '%Collins%')", 353, 2, None), + ("SELECT name, missions FROM $astronauts WHERE name LIKE ANY ()", 0, 2, SqlError), + ("SELECT name, missions FROM $astronauts WHERE name NOT LIKE ANY ()", 0, 2, SqlError), + ("SELECT name, missions FROM $astronauts WHERE name LIKE ANY ('%Armstrong%', null)", 1, 2, None), + ("SELECT name, missions FROM $astronauts WHERE name NOT LIKE ANY ('%Armstrong%', null)", 356, 2, None), + ("SELECT name, missions FROM $astronauts WHERE name LIKE ANY ('%aRmstrong%')", 0, 2, None), + ("SELECT name, missions FROM $astronauts WHERE name ILIKE ANY ('%aRmstrong%')", 1, 2, None), + ("SELECT name, missions FROM $astronauts WHERE name LIKE ANY ('Neil A. Armstrong')", 1, 2, None), + ("SELECT name, missions FROM $astronauts WHERE name NOT LIKE ANY ('Neil A. Armstrong')", 356, 2, None), + ("SELECT name, missions FROM $astronauts WHERE name LIKE ANY ('%__Armstrong%')", 1, 2, None), + ("SELECT name, missions FROM $astronauts WHERE name LIKE ANY ('%Arm__rong%')", 1, 2, None), + ("SELECT name, missions FROM $astronauts WHERE name LIKE ANY ('%Armstrong%', 123)", 1, 2, None), + ("SELECT name, missions FROM $astronauts WHERE name LIKE ANY ('%pattern1%', '%pattern2%', '%pattern3%', '%pattern4%', '%pattern5%', '%pattern6%', '%pattern7%', '%pattern8%', '%pattern9%', '%pattern10%', '%pattern11%', '%pattern12%', '%pattern13%', '%pattern14%', '%pattern15%', '%pattern16%', '%pattern17%', '%pattern18%', '%pattern19%', '%pattern20%', '%pattern21%', '%pattern22%', '%pattern23%', '%pattern24%', '%pattern25%', '%pattern26%', '%pattern27%', '%pattern28%', '%pattern29%', '%pattern30%', '%pattern31%', '%pattern32%', '%pattern33%', '%pattern34%', '%pattern35%', '%pattern36%', '%pattern37%', '%pattern38%', '%pattern39%', '%pattern40%', '%pattern41%', '%pattern42%', '%pattern43%', '%pattern44%', '%pattern45%', '%pattern46%', '%pattern47%', '%pattern48%', '%pattern49%', '%pattern50%');", 0, 2, None), + # **************************************************************************************** # These are queries which have been found to return the wrong result or not run correctly diff --git a/tests/storage/test_cache_memcached.py b/tests/storage/test_cache_memcached.py index 8f0e5240..93c1a53f 100644 --- a/tests/storage/test_cache_memcached.py +++ b/tests/storage/test_cache_memcached.py @@ -6,7 +6,6 @@ import os import sys -import pytest os.environ["OPTERYX_DEBUG"] = "1" @@ -62,6 +61,7 @@ def test_memcached_cache(): assert stats.get("remote_cache_hits", 0) >= stats["blobs_read"], str(stats) assert stats.get("cache_misses", 0) == 0, stats +@skip_if(is_arm() or is_windows() or is_mac()) def test_memcache_stand_alone(): os.environ["OPTERYX_DEBUG"] = "1" from opteryx.managers.cache import MemcachedCache @@ -110,6 +110,7 @@ def threaded_cache_operations(cache: MemcachedCache, payloads: list): for thread in threads: thread.join() +@skip_if(is_arm() or is_windows() or is_mac()) def test_memcache_threaded(): os.environ["OPTERYX_DEBUG"] = "1" @@ -128,7 +129,7 @@ def test_memcache_threaded(): if result: assert result == load, f"Post-thread check failed: {result} != {load}" - +@skip_if(is_arm() or is_windows() or is_mac()) def test_skip_on_error(): from opteryx.managers.cache import MemcachedCache cache = MemcachedCache()