snowflakedb · sfc-gh-mkubik · Mar 28, 2025 · Mar 20, 2025 · Mar 20, 2025 · Mar 20, 2025
@@ -13,6 +13,8 @@ Source code is also available at: https://github.com/snowflakedb/snowflake-conne
   - Improved error message for client-side query cancellations due to timeouts.
   - Added support of GCS regional endpoints.
   - Added `gcs_use_virtual_endpoints` connection property that forces the usage of the virtual GCS usage. See more: https://cloud.google.com/storage/docs/request-endpoints#xml-api
+  - Fixed a bug that caused driver to fail silently on `TO_DATE` arrow to python conversion when invalid date was followed by the correct one.
+  - Added `check_arrow_conversion_error_on_every_column` connection property that can be set to `False` to restore previous behaviour in which driver will ignore errors until it occurs in the last column. This flag's purpose is to unblock workflows that may be impacted by the bugfix and will be removed in later releases.
 
 - v3.14.0(March 03, 2025)
   - Bumped pyOpenSSL dependency upper boundary from <25.0.0 to <26.0.0.

@@ -317,6 +317,10 @@ def _get_private_bytes_from_file(
         False,
         bool,
     ),  # use https://{bucket}.storage.googleapis.com instead of https://storage.googleapis.com/{bucket}
+    "check_arrow_conversion_error_on_every_column": (
+        True,
+        bool,
+    ),  # SNOW-XXXXX: remove the check_arrow_conversion_error_on_every_column flag
 }
 
 APPLICATION_RE = re.compile(r"[\w\d_]+")
@@ -397,6 +401,7 @@ class SnowflakeConnection:
         token_file_path: The file path of the token file. If both token and token_file_path are provided, the token in token_file_path will be used.
         unsafe_file_write: When true, files downloaded by GET will be saved with 644 permissions. Otherwise, files will be saved with safe - owner-only permissions: 600.
         gcs_use_virtual_endpoints: When true, the virtual endpoint url is used, see: https://cloud.google.com/storage/docs/request-endpoints#xml-api
+        check_arrow_conversion_error_on_every_column: When true, the error check after the conversion from arrow to python types will happen for every column in the row. This is a new behaviour which fixes the bug that caused the type errors to trigger silently when occurring at any place other than last column in a row. To revert the previous (faulty) behaviour, please set this flag to false.
     """
 
     OCSP_ENV_LOCK = Lock()
@@ -797,6 +802,14 @@ def gcs_use_virtual_endpoints(self) -> bool:
     def gcs_use_virtual_endpoints(self, value: bool) -> None:
         self._gcs_use_virtual_endpoints = value
 
+    @property
+    def check_arrow_conversion_error_on_every_column(self) -> bool:
+        return self._check_arrow_conversion_error_on_every_column
+
+    @check_arrow_conversion_error_on_every_column.setter
+    def check_arrow_conversion_error_on_every_column(self, value: bool) -> bool:
+        self._check_arrow_conversion_error_on_every_column = value
+
     def connect(self, **kwargs) -> None:
         """Establishes connection to Snowflake."""
         logger.debug("connect")

@@ -27,7 +27,8 @@ namespace sf {
 
 CArrowChunkIterator::CArrowChunkIterator(PyObject* context, char* arrow_bytes,
                                          int64_t arrow_bytes_size,
-                                         PyObject* use_numpy)
+                                         PyObject* use_numpy,
+                                         PyObject* check_error_on_every_column)
     : CArrowIterator(arrow_bytes, arrow_bytes_size),
       m_latestReturnedRow(nullptr),
       m_context(context) {
@@ -39,6 +40,7 @@ CArrowChunkIterator::CArrowChunkIterator(PyObject* context, char* arrow_bytes,
   m_rowCountInBatch = 0;
   m_latestReturnedRow.reset();
   m_useNumpy = PyObject_IsTrue(use_numpy);
+  m_checkErrorOnEveryColumn = PyObject_IsTrue(check_error_on_every_column);
 
   m_batchCount = m_ipcArrowArrayVec.size();
   m_columnCount = m_batchCount > 0 ? m_ipcArrowSchema->n_children : 0;
@@ -92,6 +94,9 @@ void CArrowChunkIterator::createRowPyObject() {
     PyTuple_SET_ITEM(
         m_latestReturnedRow.get(), i,
         m_currentBatchConverters[i]->toPyObject(m_rowIndexInBatch));
+    if (m_checkErrorOnEveryColumn && py::checkPyError()) {
+      return;
+    }
   }
   return;
 }
@@ -505,7 +510,8 @@ DictCArrowChunkIterator::DictCArrowChunkIterator(PyObject* context,
                                                  char* arrow_bytes,
                                                  int64_t arrow_bytes_size,
                                                  PyObject* use_numpy)
-    : CArrowChunkIterator(context, arrow_bytes, arrow_bytes_size, use_numpy) {}
+    : CArrowChunkIterator(context, arrow_bytes, arrow_bytes_size, use_numpy,
+                          Py_False) {}
 
 void DictCArrowChunkIterator::createRowPyObject() {
   m_latestReturnedRow.reset(PyDict_New());

@@ -33,7 +33,8 @@ class CArrowChunkIterator : public CArrowIterator {
    * Constructor
    */
   CArrowChunkIterator(PyObject* context, char* arrow_bytes,
-                      int64_t arrow_bytes_size, PyObject* use_numpy);
+                      int64_t arrow_bytes_size, PyObject* use_numpy,
+                      PyObject* check_error_on_every_column);
 
   /**
    * Destructor
@@ -78,6 +79,10 @@ class CArrowChunkIterator : public CArrowIterator {
   /** true if return numpy int64 float64 datetime*/
   bool m_useNumpy;
 
+  /** a flag that ensures running py::checkPyError after each column processing
+   * in order to fail early on first python processing error */
+  bool m_checkErrorOnEveryColumn;
+
   void initColumnConverters();
 };
 

@@ -50,6 +50,7 @@ cdef extern from "CArrowChunkIterator.hpp" namespace "sf":
             char* arrow_bytes,
             int64_t arrow_bytes_size,
             PyObject* use_numpy,
+            PyObject* check_error_on_every_column,
         ) except +
 
     cdef cppclass DictCArrowChunkIterator(CArrowChunkIterator):
@@ -100,6 +101,7 @@ cdef class PyArrowIterator(EmptyPyArrowIterator):
     # still be converted into native python types.
     # https://docs.snowflake.com/en/user-guide/sqlalchemy.html#numpy-data-type-support
     cdef object use_numpy
+    cdef object check_error_on_every_column
     cdef object number_to_decimal
     cdef object pyarrow_table
 
@@ -111,12 +113,14 @@ cdef class PyArrowIterator(EmptyPyArrowIterator):
             object use_dict_result,
             object numpy,
             object number_to_decimal,
+            object check_error_on_every_column
     ):
         self.context = arrow_context
         self.cIterator = NULL
         self.use_dict_result = use_dict_result
         self.cursor = cursor
         self.use_numpy = numpy
+        self.check_error_on_every_column = check_error_on_every_column
         self.number_to_decimal = number_to_decimal
         self.pyarrow_table = None
         self.table_returned = False
@@ -139,8 +143,9 @@ cdef class PyArrowRowIterator(PyArrowIterator):
         object use_dict_result,
         object numpy,
         object number_to_decimal,
+        object check_error_on_every_column,
     ):
-        super().__init__(cursor, py_inputstream, arrow_context, use_dict_result, numpy, number_to_decimal)
+        super().__init__(cursor, py_inputstream, arrow_context, use_dict_result, numpy, number_to_decimal, check_error_on_every_column)
         if self.cIterator is not NULL:
             return
 
@@ -155,7 +160,8 @@ cdef class PyArrowRowIterator(PyArrowIterator):
             <PyObject *> self.context,
             self.arrow_bytes,
             self.arrow_bytes_size,
-            <PyObject *> self.use_numpy
+            <PyObject *> self.use_numpy,
+            <PyObject *> self.check_error_on_every_column
             )
         cdef ReturnVal cret = self.cIterator.checkInitializationStatus()
         if cret.exception:
@@ -200,8 +206,9 @@ cdef class PyArrowTableIterator(PyArrowIterator):
         object use_dict_result,
         object numpy,
         object number_to_decimal,
+        object check_error_on_every_column
     ):
-        super().__init__(cursor, py_inputstream, arrow_context, use_dict_result, numpy, number_to_decimal)
+        super().__init__(cursor, py_inputstream, arrow_context, use_dict_result, numpy, number_to_decimal, check_error_on_every_column)
         if not INSTALLED_PYARROW:
             raise Error.errorhandler_make_exception(
                 ProgrammingError,

@@ -62,6 +62,7 @@ def _create_nanoarrow_iterator(
     numpy: bool,
     number_to_decimal: bool,
     row_unit: IterUnit,
+    check_error_on_every_column: bool = True,
 ):
     from .nanoarrow_arrow_iterator import PyArrowRowIterator, PyArrowTableIterator
 
@@ -74,6 +75,7 @@ def _create_nanoarrow_iterator(
             use_dict_result,
             numpy,
             number_to_decimal,
+            check_error_on_every_column,
         )
         if row_unit == IterUnit.ROW_UNIT
         else PyArrowTableIterator(
@@ -83,6 +85,7 @@ def _create_nanoarrow_iterator(
             use_dict_result,
             numpy,
             number_to_decimal,
+            check_error_on_every_column,
         )
     )
 
@@ -614,7 +617,7 @@ def _load(
         )
 
     def _from_data(
-        self, data: str, iter_unit: IterUnit
+        self, data: str, iter_unit: IterUnit, check_error_on_every_column: bool = True
     ) -> Iterator[dict | Exception] | Iterator[tuple | Exception]:
         """Creates a ``PyArrowIterator`` files from a str.
 
@@ -631,6 +634,7 @@ def _from_data(
             self._numpy,
             self._number_to_decimal,
             iter_unit,
+            check_error_on_every_column,
         )
 
     @classmethod
@@ -665,7 +669,15 @@ def _create_iter(
         """Create an iterator for the ResultBatch. Used by get_arrow_iter."""
         if self._local:
             try:
-                return self._from_data(self._data, iter_unit)
+                return self._from_data(
+                    self._data,
+                    iter_unit,
+                    (
+                        connection.check_arrow_conversion_error_on_every_column
+                        if connection
+                        else None
+                    ),
+                )
             except Exception:
                 if connection and getattr(connection, "_debug_arrow_chunk", False):
                     logger.debug(f"arrow data can not be parsed: {self._data}")

@@ -124,6 +124,7 @@ def create_nanoarrow_pyarrow_iterator(input_data, use_table_iterator):
             False,
             False,
             False,
+            True,
         )
         if not use_table_iterator
         else NanoarrowPyArrowTableIterator(
@@ -135,6 +136,7 @@ def create_nanoarrow_pyarrow_iterator(input_data, use_table_iterator):
             False,
             False,
             False,
+            False,
         )
     )
 

@@ -430,7 +430,9 @@ def iterate_over_test_chunk(
     stream.seek(0)
     context = ArrowConverterContext()
 
-    it = NanoarrowPyArrowRowIterator(None, stream.read(), context, False, False, False)
+    it = NanoarrowPyArrowRowIterator(
+        None, stream.read(), context, False, False, False, True
+    )
 
     count = 0
     while True:

@@ -1790,6 +1790,23 @@ def test_out_of_range_year(conn_cnx, result_format, cursor_type, fetch_method):
                 fetch_next_fn()
 
 
+@pytest.mark.parametrize("result_format", ("json", "arrow"))
+def test_out_of_range_year_followed_by_correct_year(conn_cnx, result_format):
+    """Tests whether the year 10000 is out of range exception is raised as expected."""
+    with conn_cnx(
+        session_parameters={
+            PARAMETER_PYTHON_CONNECTOR_QUERY_RESULT_FORMAT: result_format
+        }
+    ) as con:
+        with con.cursor() as cur:
+            cur.execute("select TO_DATE('10000-01-01'), TO_DATE('9999-01-01')")
+            with pytest.raises(
+                InterfaceError,
+                match="out of range",
+            ):
+                cur.fetchall()
+
+
 @pytest.mark.skipolddriver
 def test_describe(conn_cnx):
     with conn_cnx() as con:
-Original file line number
+Diff line change
@@ Expand Up @@
                 False,
                 False,
                 False,
+                True,
             )
             if not use_table_iterator
             else NanoarrowPyArrowTableIterator(
@@ Expand All @@
                 False,
                 False,
                 False,
+                False,
             )
         )
@@ Expand Down @@