@@ -2956,13 +2956,22 @@ SQLRETURN SQLGetData_wrap(SqlHandlePtr StatementHandle, SQLUSMALLINT colCount, p
29562956 row.append (
29572957 FetchLobColumnData (hStmt, i, SQL_C_CHAR, false , false , charEncoding));
29582958 } else {
2959- // Use columnSize * 4 + 1 to accommodate worst-case UTF-8 expansion.
2960- // columnSize is in characters, but the ODBC driver may return UTF-8
2961- // encoded bytes where each character can be up to 4 bytes. This
2962- // applies on Linux/macOS (driver always returns UTF-8 for SQL_C_CHAR)
2963- // and on Windows when the database uses a UTF-8 collation. Without
2964- // this, data at exact column boundary with multi-byte chars (e.g.,
2965- // CP1252 é in VARCHAR(10)) causes truncation and corruption.
2959+ // Allocate columnSize * 4 + 1 on ALL platforms (no #if guard).
2960+ //
2961+ // Why this differs from SQLBindColums / FetchBatchData:
2962+ // Those two functions use #if to apply *4 only on Linux/macOS,
2963+ // because on Windows with a non-UTF-8 collation (e.g. CP1252)
2964+ // each character occupies exactly 1 byte, so *1 suffices and
2965+ // saves memory across the entire batch (fetchSize × numCols
2966+ // buffers).
2967+ //
2968+ // SQLGetData_wrap allocates a single temporary buffer per
2969+ // column per row, so the over-allocation cost is negligible.
2970+ // Using *4 unconditionally here keeps the code simple and
2971+ // correct on every platform—including Windows with a UTF-8
2972+ // collation where multi-byte chars could otherwise cause
2973+ // truncation at the exact column boundary (e.g. CP1252 é in
2974+ // VARCHAR(10)).
29662975 uint64_t fetchBufferSize = columnSize * 4 + 1 /* null-termination */ ;
29672976 std::vector<SQLCHAR> dataBuffer (fetchBufferSize);
29682977 SQLLEN dataLen;
@@ -3697,6 +3706,7 @@ SQLRETURN FetchBatchData(SQLHSTMT hStmt, ColumnBuffers& buffers, py::list& colum
36973706 columnInfosExt[col].fetchBufferSize = columnInfos[col].fetchBufferSize ;
36983707 columnInfosExt[col].isLob = columnInfos[col].isLob ;
36993708 columnInfosExt[col].charEncoding = effectiveCharEnc;
3709+ columnInfosExt[col].isUtf8 = (effectiveCharEnc == " utf-8" );
37003710
37013711 // Map data type to processor function (switch executed once per column,
37023712 // not per cell)
0 commit comments