Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
8e5bf2f
cancel processing if one of the columns in nano arrow couldn't be con…
sfc-gh-mkubik Mar 20, 2025
9f4e611
Merge branch 'main' into SNOW-1989239-invalid-date-segfault
sfc-gh-mkubik Mar 20, 2025
497e403
move fetchall test to cursor tests
sfc-gh-mkubik Mar 20, 2025
6ad5484
handle DictCArrowChunkIterator errors
sfc-gh-mkubik Mar 21, 2025
1896aba
fail fast only on CArrowChunkIterator/SnowflakeCursor
sfc-gh-mkubik Mar 21, 2025
9823333
update description, skip test on olddriver
sfc-gh-mkubik Mar 21, 2025
67e5e5c
Merge branch 'main' into SNOW-1989239-invalid-date-segfault
sfc-gh-mkubik Mar 26, 2025
b636230
add connection parameter that allows rolling back the fail-fast behav…
sfc-gh-mkubik Mar 26, 2025
b857dae
don't incorporate fail-fast mechanic in table iterator
sfc-gh-mkubik Mar 26, 2025
eca8e44
add new parameter to create_nanoarrow_pyarrow_iterator helper
sfc-gh-mkubik Mar 26, 2025
c6277bd
add check error on every column parameter to Table iterator to fulfil…
sfc-gh-mkubik Mar 27, 2025
0652512
SNOW-2007887: improve error message handling related to timeout (#2236)
sfc-gh-aling Mar 26, 2025
6f45713
Merge branch 'main' into SNOW-1989239-invalid-date-segfault
sfc-gh-mkubik Mar 27, 2025
0e99ba1
handle non-existent connection
sfc-gh-mkubik Mar 27, 2025
78e9d31
add parameter to TableIterator helper
sfc-gh-mkubik Mar 27, 2025
cc8f37f
Merge remote-tracking branch 'origin' into SNOW-1989239-invalid-date-…
sfc-gh-mkubik Mar 27, 2025
12be9e8
resolve conflicts, upgrade description, create getter and setter
sfc-gh-mkubik Mar 27, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions DESCRIPTION.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ Source code is also available at: https://github.com/snowflakedb/snowflake-conne
- Improved error message for client-side query cancellations due to timeouts.
- Added support of GCS regional endpoints.
- Added `gcs_use_virtual_endpoints` connection property that forces the usage of the virtual GCS usage. See more: https://cloud.google.com/storage/docs/request-endpoints#xml-api
- Fixed a bug that caused driver to fail silently on `TO_DATE` arrow to python conversion when invalid date was followed by the correct one.
- Added `check_arrow_conversion_error_on_every_column` connection property that can be set to `False` to restore previous behaviour in which driver will ignore errors until it occurs in the last column. This flag's purpose is to unblock workflows that may be impacted by the bugfix and will be removed in later releases.

- v3.14.0(March 03, 2025)
- Bumped pyOpenSSL dependency upper boundary from <25.0.0 to <26.0.0.
Expand Down
13 changes: 13 additions & 0 deletions src/snowflake/connector/connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,10 @@ def _get_private_bytes_from_file(
False,
bool,
), # use https://{bucket}.storage.googleapis.com instead of https://storage.googleapis.com/{bucket}
"check_arrow_conversion_error_on_every_column": (
True,
bool,
), # SNOW-XXXXX: remove the check_arrow_conversion_error_on_every_column flag
}

APPLICATION_RE = re.compile(r"[\w\d_]+")
Expand Down Expand Up @@ -397,6 +401,7 @@ class SnowflakeConnection:
token_file_path: The file path of the token file. If both token and token_file_path are provided, the token in token_file_path will be used.
unsafe_file_write: When true, files downloaded by GET will be saved with 644 permissions. Otherwise, files will be saved with safe - owner-only permissions: 600.
gcs_use_virtual_endpoints: When true, the virtual endpoint url is used, see: https://cloud.google.com/storage/docs/request-endpoints#xml-api
check_arrow_conversion_error_on_every_column: When true, the error check after the conversion from arrow to python types will happen for every column in the row. This is a new behaviour which fixes the bug that caused the type errors to trigger silently when occurring at any place other than last column in a row. To revert the previous (faulty) behaviour, please set this flag to false.
"""

OCSP_ENV_LOCK = Lock()
Expand Down Expand Up @@ -797,6 +802,14 @@ def gcs_use_virtual_endpoints(self) -> bool:
def gcs_use_virtual_endpoints(self, value: bool) -> None:
self._gcs_use_virtual_endpoints = value

@property
def check_arrow_conversion_error_on_every_column(self) -> bool:
return self._check_arrow_conversion_error_on_every_column

@check_arrow_conversion_error_on_every_column.setter
def check_arrow_conversion_error_on_every_column(self, value: bool) -> bool:
self._check_arrow_conversion_error_on_every_column = value

def connect(self, **kwargs) -> None:
"""Establishes connection to Snowflake."""
logger.debug("connect")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ namespace sf {

CArrowChunkIterator::CArrowChunkIterator(PyObject* context, char* arrow_bytes,
int64_t arrow_bytes_size,
PyObject* use_numpy)
PyObject* use_numpy,
PyObject* check_error_on_every_column)
: CArrowIterator(arrow_bytes, arrow_bytes_size),
m_latestReturnedRow(nullptr),
m_context(context) {
Expand All @@ -39,6 +40,7 @@ CArrowChunkIterator::CArrowChunkIterator(PyObject* context, char* arrow_bytes,
m_rowCountInBatch = 0;
m_latestReturnedRow.reset();
m_useNumpy = PyObject_IsTrue(use_numpy);
m_checkErrorOnEveryColumn = PyObject_IsTrue(check_error_on_every_column);

m_batchCount = m_ipcArrowArrayVec.size();
m_columnCount = m_batchCount > 0 ? m_ipcArrowSchema->n_children : 0;
Expand Down Expand Up @@ -92,6 +94,9 @@ void CArrowChunkIterator::createRowPyObject() {
PyTuple_SET_ITEM(
m_latestReturnedRow.get(), i,
m_currentBatchConverters[i]->toPyObject(m_rowIndexInBatch));
if (m_checkErrorOnEveryColumn && py::checkPyError()) {
return;
}
}
return;
}
Expand Down Expand Up @@ -505,7 +510,8 @@ DictCArrowChunkIterator::DictCArrowChunkIterator(PyObject* context,
char* arrow_bytes,
int64_t arrow_bytes_size,
PyObject* use_numpy)
: CArrowChunkIterator(context, arrow_bytes, arrow_bytes_size, use_numpy) {}
: CArrowChunkIterator(context, arrow_bytes, arrow_bytes_size, use_numpy,
Py_False) {}

void DictCArrowChunkIterator::createRowPyObject() {
m_latestReturnedRow.reset(PyDict_New());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ class CArrowChunkIterator : public CArrowIterator {
* Constructor
*/
CArrowChunkIterator(PyObject* context, char* arrow_bytes,
int64_t arrow_bytes_size, PyObject* use_numpy);
int64_t arrow_bytes_size, PyObject* use_numpy,
PyObject* check_error_on_every_column);

/**
* Destructor
Expand Down Expand Up @@ -78,6 +79,10 @@ class CArrowChunkIterator : public CArrowIterator {
/** true if return numpy int64 float64 datetime*/
bool m_useNumpy;

/** a flag that ensures running py::checkPyError after each column processing
* in order to fail early on first python processing error */
bool m_checkErrorOnEveryColumn;

void initColumnConverters();
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ cdef extern from "CArrowChunkIterator.hpp" namespace "sf":
char* arrow_bytes,
int64_t arrow_bytes_size,
PyObject* use_numpy,
PyObject* check_error_on_every_column,
) except +

cdef cppclass DictCArrowChunkIterator(CArrowChunkIterator):
Expand Down Expand Up @@ -100,6 +101,7 @@ cdef class PyArrowIterator(EmptyPyArrowIterator):
# still be converted into native python types.
# https://docs.snowflake.com/en/user-guide/sqlalchemy.html#numpy-data-type-support
cdef object use_numpy
cdef object check_error_on_every_column
cdef object number_to_decimal
cdef object pyarrow_table

Expand All @@ -111,12 +113,14 @@ cdef class PyArrowIterator(EmptyPyArrowIterator):
object use_dict_result,
object numpy,
object number_to_decimal,
object check_error_on_every_column
):
self.context = arrow_context
self.cIterator = NULL
self.use_dict_result = use_dict_result
self.cursor = cursor
self.use_numpy = numpy
self.check_error_on_every_column = check_error_on_every_column
self.number_to_decimal = number_to_decimal
self.pyarrow_table = None
self.table_returned = False
Expand All @@ -139,8 +143,9 @@ cdef class PyArrowRowIterator(PyArrowIterator):
object use_dict_result,
object numpy,
object number_to_decimal,
object check_error_on_every_column,
):
super().__init__(cursor, py_inputstream, arrow_context, use_dict_result, numpy, number_to_decimal)
super().__init__(cursor, py_inputstream, arrow_context, use_dict_result, numpy, number_to_decimal, check_error_on_every_column)
if self.cIterator is not NULL:
return

Expand All @@ -155,7 +160,8 @@ cdef class PyArrowRowIterator(PyArrowIterator):
<PyObject *> self.context,
self.arrow_bytes,
self.arrow_bytes_size,
<PyObject *> self.use_numpy
<PyObject *> self.use_numpy,
<PyObject *> self.check_error_on_every_column
)
cdef ReturnVal cret = self.cIterator.checkInitializationStatus()
if cret.exception:
Expand Down Expand Up @@ -200,8 +206,9 @@ cdef class PyArrowTableIterator(PyArrowIterator):
object use_dict_result,
object numpy,
object number_to_decimal,
object check_error_on_every_column
):
super().__init__(cursor, py_inputstream, arrow_context, use_dict_result, numpy, number_to_decimal)
super().__init__(cursor, py_inputstream, arrow_context, use_dict_result, numpy, number_to_decimal, check_error_on_every_column)
if not INSTALLED_PYARROW:
raise Error.errorhandler_make_exception(
ProgrammingError,
Expand Down
16 changes: 14 additions & 2 deletions src/snowflake/connector/result_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ def _create_nanoarrow_iterator(
numpy: bool,
number_to_decimal: bool,
row_unit: IterUnit,
check_error_on_every_column: bool = True,
):
from .nanoarrow_arrow_iterator import PyArrowRowIterator, PyArrowTableIterator

Expand All @@ -74,6 +75,7 @@ def _create_nanoarrow_iterator(
use_dict_result,
numpy,
number_to_decimal,
check_error_on_every_column,
)
if row_unit == IterUnit.ROW_UNIT
else PyArrowTableIterator(
Expand All @@ -83,6 +85,7 @@ def _create_nanoarrow_iterator(
use_dict_result,
numpy,
number_to_decimal,
check_error_on_every_column,
)
)

Expand Down Expand Up @@ -614,7 +617,7 @@ def _load(
)

def _from_data(
self, data: str, iter_unit: IterUnit
self, data: str, iter_unit: IterUnit, check_error_on_every_column: bool = True
) -> Iterator[dict | Exception] | Iterator[tuple | Exception]:
"""Creates a ``PyArrowIterator`` files from a str.

Expand All @@ -631,6 +634,7 @@ def _from_data(
self._numpy,
self._number_to_decimal,
iter_unit,
check_error_on_every_column,
)

@classmethod
Expand Down Expand Up @@ -665,7 +669,15 @@ def _create_iter(
"""Create an iterator for the ResultBatch. Used by get_arrow_iter."""
if self._local:
try:
return self._from_data(self._data, iter_unit)
return self._from_data(
self._data,
iter_unit,
(
connection.check_arrow_conversion_error_on_every_column
if connection
else None
),
)
except Exception:
if connection and getattr(connection, "_debug_arrow_chunk", False):
logger.debug(f"arrow data can not be parsed: {self._data}")
Expand Down
2 changes: 2 additions & 0 deletions test/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ def create_nanoarrow_pyarrow_iterator(input_data, use_table_iterator):
False,
False,
False,
True,
)
if not use_table_iterator
else NanoarrowPyArrowTableIterator(
Expand All @@ -135,6 +136,7 @@ def create_nanoarrow_pyarrow_iterator(input_data, use_table_iterator):
False,
False,
False,
False,
)
)

Expand Down
4 changes: 3 additions & 1 deletion test/integ/pandas/test_unit_arrow_chunk_iterator.py
Original file line number Diff line number Diff line change
Expand Up @@ -430,7 +430,9 @@ def iterate_over_test_chunk(
stream.seek(0)
context = ArrowConverterContext()

it = NanoarrowPyArrowRowIterator(None, stream.read(), context, False, False, False)
it = NanoarrowPyArrowRowIterator(
None, stream.read(), context, False, False, False, True
)

count = 0
while True:
Expand Down
17 changes: 17 additions & 0 deletions test/integ/test_cursor.py
Original file line number Diff line number Diff line change
Expand Up @@ -1790,6 +1790,23 @@ def test_out_of_range_year(conn_cnx, result_format, cursor_type, fetch_method):
fetch_next_fn()


@pytest.mark.parametrize("result_format", ("json", "arrow"))
def test_out_of_range_year_followed_by_correct_year(conn_cnx, result_format):
"""Tests whether the year 10000 is out of range exception is raised as expected."""
with conn_cnx(
session_parameters={
PARAMETER_PYTHON_CONNECTOR_QUERY_RESULT_FORMAT: result_format
}
) as con:
with con.cursor() as cur:
cur.execute("select TO_DATE('10000-01-01'), TO_DATE('9999-01-01')")
with pytest.raises(
InterfaceError,
match="out of range",
):
cur.fetchall()


@pytest.mark.skipolddriver
def test_describe(conn_cnx):
with conn_cnx() as con:
Expand Down
Loading