3131#define ARCHITECTURE " win64" // Default to win64 if not defined during compilation
3232#endif
3333#define DAE_CHUNK_SIZE 8192
34+ #define SQL_MAX_LOB_SIZE 8000
3435// -------------------------------------------------------------------------------------------------
3536// Class definitions
3637// -------------------------------------------------------------------------------------------------
@@ -1747,8 +1748,13 @@ static py::object FetchLobColumnData(SQLHSTMT hStmt,
17471748 &actualRead);
17481749
17491750 if (ret == SQL_ERROR || !SQL_SUCCEEDED (ret) && ret != SQL_SUCCESS_WITH_INFO) {
1750- LOG (" Loop {}: Error fetching column {} with cType={}" , loopCount, colIndex, cType);
1751- ThrowStdException (" Error fetching column data" );
1751+ std::ostringstream oss;
1752+ oss << " Error fetching LOB for column " << colIndex
1753+ << " , cType=" << cType
1754+ << " , loop=" << loopCount
1755+ << " , SQLGetData return=" << ret;
1756+ LOG (oss.str ());
1757+ ThrowStdException (oss.str ());
17521758 }
17531759 if (actualRead == SQL_NULL_DATA) {
17541760 LOG (" Loop {}: Column {} is NULL" , loopCount, colIndex);
@@ -1862,7 +1868,7 @@ SQLRETURN SQLGetData_wrap(SqlHandlePtr StatementHandle, SQLUSMALLINT colCount, p
18621868 case SQL_CHAR:
18631869 case SQL_VARCHAR:
18641870 case SQL_LONGVARCHAR: {
1865- if (columnSize == SQL_NO_TOTAL || columnSize == 0 || columnSize > 8000 ) {
1871+ if (columnSize == SQL_NO_TOTAL || columnSize == 0 || columnSize > SQL_MAX_LOB_SIZE ) {
18661872 LOG (" Streaming LOB for column {}" , i);
18671873 row.append (FetchLobColumnData (hStmt, i, SQL_C_CHAR, false , false ));
18681874 } else {
@@ -1884,6 +1890,10 @@ SQLRETURN SQLGetData_wrap(SqlHandlePtr StatementHandle, SQLUSMALLINT colCount, p
18841890 #else
18851891 row.append (std::string (reinterpret_cast <char *>(dataBuffer.data ())));
18861892 #endif
1893+ } else {
1894+ // Buffer too small, fallback to streaming
1895+ LOG (" CHAR column {} data truncated, using streaming LOB" , i);
1896+ row.append (FetchLobColumnData (hStmt, i, SQL_C_CHAR, false , false ));
18871897 }
18881898 } else if (dataLen == SQL_NULL_DATA) {
18891899 LOG (" Column {} is NULL (CHAR)" , i);
@@ -1911,62 +1921,53 @@ SQLRETURN SQLGetData_wrap(SqlHandlePtr StatementHandle, SQLUSMALLINT colCount, p
19111921 }
19121922 case SQL_WCHAR:
19131923 case SQL_WVARCHAR:
1914- case SQL_WLONGVARCHAR: {
1915- // TODO: revisit
1916- HandleZeroColumnSizeAtFetch (columnSize);
1917- uint64_t fetchBufferSize = columnSize + 1 /* null-termination */ ;
1918- std::vector<SQLWCHAR> dataBuffer (fetchBufferSize);
1919- SQLLEN dataLen;
1920- ret = SQLGetData_ptr (hStmt, i, SQL_C_WCHAR, dataBuffer.data (),
1921- dataBuffer.size () * sizeof (SQLWCHAR), &dataLen);
1922-
1923- if (SQL_SUCCEEDED (ret)) {
1924- // TODO: Refactor these if's across other switches to avoid code duplication
1925- if (dataLen > 0 ) {
1926- uint64_t numCharsInData = dataLen / sizeof (SQLWCHAR);
1927- if (numCharsInData < dataBuffer.size ()) {
1928- // SQLGetData will null-terminate the data
1924+ case SQL_WLONGVARCHAR: {
1925+ if (columnSize == SQL_NO_TOTAL || columnSize == 0 || columnSize > 4000 ) {
1926+ LOG (" Streaming LOB for column {} (NVARCHAR)" , i);
1927+ row.append (FetchLobColumnData (hStmt, i, SQL_C_WCHAR, true , false ));
1928+ } else {
1929+ uint64_t fetchBufferSize = (columnSize + 1 ) * sizeof (SQLWCHAR); // +1 for null terminator
1930+ std::vector<SQLWCHAR> dataBuffer (columnSize + 1 );
1931+ SQLLEN dataLen;
1932+ ret = SQLGetData_ptr (hStmt, i, SQL_C_WCHAR, dataBuffer.data (), fetchBufferSize, &dataLen);
1933+ if (SQL_SUCCEEDED (ret)) {
1934+ if (dataLen > 0 ) {
1935+ uint64_t numCharsInData = dataLen / sizeof (SQLWCHAR);
1936+ if (numCharsInData < dataBuffer.size ()) {
19291937#if defined(__APPLE__) || defined(__linux__)
1930- auto raw_bytes = reinterpret_cast <const char *>(dataBuffer.data ());
1931- size_t actualBufferSize = dataBuffer.size () * sizeof (SQLWCHAR);
1932- if (dataLen < 0 || static_cast <size_t >(dataLen) > actualBufferSize) {
1933- LOG (" Error: py::bytes creation request exceeds buffer size. dataLen={} buffer={}" ,
1934- dataLen, actualBufferSize);
1935- ThrowStdException (" Invalid buffer length for py::bytes" );
1936- }
1937- py::bytes py_bytes (raw_bytes, dataLen);
1938- py::str decoded = py_bytes.attr (" decode" )(" utf-16-le" );
1939- row.append (decoded);
1938+ const SQLWCHAR* sqlwBuf = reinterpret_cast <const SQLWCHAR*>(dataBuffer.data ());
1939+ std::wstring wstr = SQLWCHARToWString (sqlwBuf, numCharsInData);
1940+ std::string utf8str = WideToUTF8 (wstr);
1941+ row.append (py::str (utf8str));
19401942#else
1941- row.append (std::wstring (dataBuffer.data ()));
1943+ std::wstring wstr (reinterpret_cast <wchar_t *>(dataBuffer.data ()));
1944+ row.append (py::cast (wstr));
19421945#endif
1943- } else {
1944- // In this case, buffer size is smaller, and data to be retrieved is longer
1945- // TODO: Revisit
1946- std::ostringstream oss;
1947- oss << " Buffer length for fetch (" << dataBuffer.size ()-1 << " ) is smaller, & data "
1948- << " to be retrieved is longer (" << numCharsInData << " ). ColumnID - "
1949- << i << " , datatype - " << dataType;
1950- ThrowStdException (oss.str ());
1946+ LOG (" Appended NVARCHAR string of length {} to result row" , numCharsInData);
1947+ } else {
1948+ // Buffer too small, fallback to streaming
1949+ LOG (" NVARCHAR column {} data truncated, using streaming LOB" , i);
1950+ row.append (FetchLobColumnData (hStmt, i, SQL_C_WCHAR, true , false ));
1951+ }
1952+ } else if (dataLen == SQL_NULL_DATA) {
1953+ LOG (" Column {} is NULL (CHAR)" , i);
1954+ row.append (py::none ());
1955+ } else if (dataLen == 0 ) {
1956+ row.append (py::str (" " ));
1957+ } else if (dataLen == SQL_NO_TOTAL) {
1958+ LOG (" SQLGetData couldn't determine the length of the NVARCHAR data. Returning NULL. Column ID - {}" , i);
1959+ row.append (py::none ());
1960+ } else if (dataLen < 0 ) {
1961+ LOG (" SQLGetData returned an unexpected negative data length. "
1962+ " Raising exception. Column ID - {}, Data Type - {}, Data Length - {}" ,
1963+ i, dataType, dataLen);
1964+ ThrowStdException (" SQLGetData returned an unexpected negative data length" );
19511965 }
1952- } else if (dataLen == SQL_NULL_DATA) {
1953- row.append (py::none ());
1954- } else if (dataLen == 0 ) {
1955- // Handle zero-length (non-NULL) data
1956- row.append (py::str (" " ));
1957- } else if (dataLen < 0 ) {
1958- // This is unexpected
1959- LOG (" SQLGetData returned an unexpected negative data length. "
1960- " Raising exception. Column ID - {}, Data Type - {}, Data Length - {}" ,
1961- i, dataType, dataLen);
1962- ThrowStdException (" SQLGetData returned an unexpected negative data length" );
1966+ } else {
1967+ LOG (" Error retrieving data for column {} (NVARCHAR), SQLGetData return code {}" , i, ret);
1968+ row.append (py::none ());
19631969 }
1964- } else {
1965- LOG (" Error retrieving data for column - {}, data type - {}, SQLGetData return "
1966- " code - {}. Returning NULL value instead" ,
1967- i, dataType, ret);
1968- row.append (py::none ());
1969- }
1970+ }
19701971 break ;
19711972 }
19721973 case SQL_INTEGER: {
@@ -2411,7 +2412,7 @@ SQLRETURN SQLBindColums(SQLHSTMT hStmt, ColumnBuffers& buffers, py::list& column
24112412// Fetch rows in batches
24122413// TODO: Move to anonymous namespace, since it is not used outside this file
24132414SQLRETURN FetchBatchData (SQLHSTMT hStmt, ColumnBuffers& buffers, py::list& columnNames,
2414- py::list& rows, SQLUSMALLINT numCols, SQLULEN& numRowsFetched) {
2415+ py::list& rows, SQLUSMALLINT numCols, SQLULEN& numRowsFetched, const std::vector<SQLUSMALLINT>& lobColumns ) {
24152416 LOG (" Fetching data in batches" );
24162417 SQLRETURN ret = SQLFetchScroll_ptr (hStmt, SQL_FETCH_NEXT, 0 );
24172418 if (ret == SQL_NO_DATA) {
@@ -2471,25 +2472,19 @@ SQLRETURN FetchBatchData(SQLHSTMT hStmt, ColumnBuffers& buffers, py::list& colum
24712472 case SQL_CHAR:
24722473 case SQL_VARCHAR:
24732474 case SQL_LONGVARCHAR: {
2474- // TODO: variable length data needs special handling, this logic wont suffice
24752475 SQLULEN columnSize = columnMeta[" ColumnSize" ].cast <SQLULEN>();
24762476 HandleZeroColumnSizeAtFetch (columnSize);
24772477 uint64_t fetchBufferSize = columnSize + 1 /* null-terminator*/ ;
24782478 uint64_t numCharsInData = dataLen / sizeof (SQLCHAR);
2479+ bool isLob = std::find (lobColumns.begin (), lobColumns.end (), col) != lobColumns.end ();
24792480 // fetchBufferSize includes null-terminator, numCharsInData doesn't. Hence '<'
2480- if (numCharsInData < fetchBufferSize) {
2481+ if (!isLob && numCharsInData < fetchBufferSize) {
24812482 // SQLFetch will nullterminate the data
24822483 row.append (std::string (
24832484 reinterpret_cast <char *>(&buffers.charBuffers [col - 1 ][i * fetchBufferSize]),
24842485 numCharsInData));
24852486 } else {
2486- // In this case, buffer size is smaller, and data to be retrieved is longer
2487- // TODO: Revisit
2488- std::ostringstream oss;
2489- oss << " Buffer length for fetch (" << columnSize << " ) is smaller, & data "
2490- << " to be retrieved is longer (" << numCharsInData << " ). ColumnID - "
2491- << col << " , datatype - " << dataType;
2492- ThrowStdException (oss.str ());
2487+ row.append (FetchLobColumnData (hStmt, col, SQL_C_CHAR, false , false ));
24932488 }
24942489 break ;
24952490 }
@@ -2501,8 +2496,9 @@ SQLRETURN FetchBatchData(SQLHSTMT hStmt, ColumnBuffers& buffers, py::list& colum
25012496 HandleZeroColumnSizeAtFetch (columnSize);
25022497 uint64_t fetchBufferSize = columnSize + 1 /* null-terminator*/ ;
25032498 uint64_t numCharsInData = dataLen / sizeof (SQLWCHAR);
2499+ bool isLob = std::find (lobColumns.begin (), lobColumns.end (), col) != lobColumns.end ();
25042500 // fetchBufferSize includes null-terminator, numCharsInData doesn't. Hence '<'
2505- if (numCharsInData < fetchBufferSize) {
2501+ if (!isLob && numCharsInData < fetchBufferSize) {
25062502 // SQLFetch will nullterminate the data
25072503#if defined(__APPLE__) || defined(__linux__)
25082504 // Use unix-specific conversion to handle the wchar_t/SQLWCHAR size difference
@@ -2516,13 +2512,7 @@ SQLRETURN FetchBatchData(SQLHSTMT hStmt, ColumnBuffers& buffers, py::list& colum
25162512 numCharsInData));
25172513#endif
25182514 } else {
2519- // In this case, buffer size is smaller, and data to be retrieved is longer
2520- // TODO: Revisit
2521- std::ostringstream oss;
2522- oss << " Buffer length for fetch (" << columnSize << " ) is smaller, & data "
2523- << " to be retrieved is longer (" << numCharsInData << " ). ColumnID - "
2524- << col << " , datatype - " << dataType;
2525- ThrowStdException (oss.str ());
2515+ row.append (FetchLobColumnData (hStmt, col, SQL_C_WCHAR, true , false ));
25262516 }
25272517 break ;
25282518 }
@@ -2608,21 +2598,15 @@ SQLRETURN FetchBatchData(SQLHSTMT hStmt, ColumnBuffers& buffers, py::list& colum
26082598 case SQL_BINARY:
26092599 case SQL_VARBINARY:
26102600 case SQL_LONGVARBINARY: {
2611- // TODO: variable length data needs special handling, this logic wont suffice
26122601 SQLULEN columnSize = columnMeta[" ColumnSize" ].cast <SQLULEN>();
26132602 HandleZeroColumnSizeAtFetch (columnSize);
2614- if (static_cast <size_t >(dataLen) <= columnSize) {
2603+ bool isLob = std::find (lobColumns.begin (), lobColumns.end (), col) != lobColumns.end ();
2604+ if (!isLob && static_cast <size_t >(dataLen) <= columnSize) {
26152605 row.append (py::bytes (reinterpret_cast <const char *>(
26162606 &buffers.charBuffers [col - 1 ][i * columnSize]),
26172607 dataLen));
26182608 } else {
2619- // In this case, buffer size is smaller, and data to be retrieved is longer
2620- // TODO: Revisit
2621- std::ostringstream oss;
2622- oss << " Buffer length for fetch (" << columnSize << " ) is smaller, & data "
2623- << " to be retrieved is longer (" << dataLen << " ). ColumnID - "
2624- << col << " , datatype - " << dataType;
2625- ThrowStdException (oss.str ());
2609+ row.append (FetchLobColumnData (hStmt, col, SQL_C_BINARY, false , true ));
26262610 }
26272611 break ;
26282612 }
@@ -2751,6 +2735,35 @@ SQLRETURN FetchMany_wrap(SqlHandlePtr StatementHandle, py::list& rows, int fetch
27512735 return ret;
27522736 }
27532737
2738+ std::vector<SQLUSMALLINT> lobColumns;
2739+ for (SQLSMALLINT i = 0 ; i < numCols; i++) {
2740+ auto colMeta = columnNames[i].cast <py::dict>();
2741+ SQLSMALLINT dataType = colMeta[" DataType" ].cast <SQLSMALLINT>();
2742+ SQLULEN columnSize = colMeta[" ColumnSize" ].cast <SQLULEN>();
2743+
2744+ if ((dataType == SQL_WVARCHAR || dataType == SQL_WLONGVARCHAR ||
2745+ dataType == SQL_VARCHAR || dataType == SQL_LONGVARCHAR ||
2746+ dataType == SQL_VARBINARY || dataType == SQL_LONGVARBINARY) &&
2747+ (columnSize == 0 || columnSize == SQL_NO_TOTAL || columnSize > SQL_MAX_LOB_SIZE)) {
2748+ lobColumns.push_back (i + 1 ); // 1-based
2749+ }
2750+ }
2751+
2752+ // If we have LOBs → fall back to row-by-row fetch + SQLGetData_wrap
2753+ if (!lobColumns.empty ()) {
2754+ LOG (" LOB columns detected → using per-row SQLGetData path" );
2755+ while (true ) {
2756+ ret = SQLFetch_ptr (hStmt);
2757+ if (ret == SQL_NO_DATA) break ;
2758+ if (!SQL_SUCCEEDED (ret)) return ret;
2759+
2760+ py::list row;
2761+ SQLGetData_wrap (StatementHandle, numCols, row); // <-- streams LOBs correctly
2762+ rows.append (row);
2763+ }
2764+ return SQL_SUCCESS;
2765+ }
2766+
27542767 // Initialize column buffers
27552768 ColumnBuffers buffers (numCols, fetchSize);
27562769
@@ -2765,7 +2778,7 @@ SQLRETURN FetchMany_wrap(SqlHandlePtr StatementHandle, py::list& rows, int fetch
27652778 SQLSetStmtAttr_ptr (hStmt, SQL_ATTR_ROW_ARRAY_SIZE, (SQLPOINTER)(intptr_t )fetchSize, 0 );
27662779 SQLSetStmtAttr_ptr (hStmt, SQL_ATTR_ROWS_FETCHED_PTR, &numRowsFetched, 0 );
27672780
2768- ret = FetchBatchData (hStmt, buffers, columnNames, rows, numCols, numRowsFetched);
2781+ ret = FetchBatchData (hStmt, buffers, columnNames, rows, numCols, numRowsFetched, lobColumns );
27692782 if (!SQL_SUCCEEDED (ret) && ret != SQL_NO_DATA) {
27702783 LOG (" Error when fetching data" );
27712784 return ret;
@@ -2844,6 +2857,35 @@ SQLRETURN FetchAll_wrap(SqlHandlePtr StatementHandle, py::list& rows) {
28442857 }
28452858 LOG (" Fetching data in batch sizes of {}" , fetchSize);
28462859
2860+ std::vector<SQLUSMALLINT> lobColumns;
2861+ for (SQLSMALLINT i = 0 ; i < numCols; i++) {
2862+ auto colMeta = columnNames[i].cast <py::dict>();
2863+ SQLSMALLINT dataType = colMeta[" DataType" ].cast <SQLSMALLINT>();
2864+ SQLULEN columnSize = colMeta[" ColumnSize" ].cast <SQLULEN>();
2865+
2866+ if ((dataType == SQL_WVARCHAR || dataType == SQL_WLONGVARCHAR ||
2867+ dataType == SQL_VARCHAR || dataType == SQL_LONGVARCHAR ||
2868+ dataType == SQL_VARBINARY || dataType == SQL_LONGVARBINARY) &&
2869+ (columnSize == 0 || columnSize == SQL_NO_TOTAL || columnSize > SQL_MAX_LOB_SIZE)) {
2870+ lobColumns.push_back (i + 1 ); // 1-based
2871+ }
2872+ }
2873+
2874+ // If we have LOBs → fall back to row-by-row fetch + SQLGetData_wrap
2875+ if (!lobColumns.empty ()) {
2876+ LOG (" LOB columns detected → using per-row SQLGetData path" );
2877+ while (true ) {
2878+ ret = SQLFetch_ptr (hStmt);
2879+ if (ret == SQL_NO_DATA) break ;
2880+ if (!SQL_SUCCEEDED (ret)) return ret;
2881+
2882+ py::list row;
2883+ SQLGetData_wrap (StatementHandle, numCols, row); // <-- streams LOBs correctly
2884+ rows.append (row);
2885+ }
2886+ return SQL_SUCCESS;
2887+ }
2888+
28472889 ColumnBuffers buffers (numCols, fetchSize);
28482890
28492891 // Bind columns
@@ -2858,7 +2900,7 @@ SQLRETURN FetchAll_wrap(SqlHandlePtr StatementHandle, py::list& rows) {
28582900 SQLSetStmtAttr_ptr (hStmt, SQL_ATTR_ROWS_FETCHED_PTR, &numRowsFetched, 0 );
28592901
28602902 while (ret != SQL_NO_DATA) {
2861- ret = FetchBatchData (hStmt, buffers, columnNames, rows, numCols, numRowsFetched);
2903+ ret = FetchBatchData (hStmt, buffers, columnNames, rows, numCols, numRowsFetched, lobColumns );
28622904 if (!SQL_SUCCEEDED (ret) && ret != SQL_NO_DATA) {
28632905 LOG (" Error when fetching data" );
28642906 return ret;
0 commit comments