Skip to content

Commit 78717b2

Browse files
committed
OPTIMIZATION #3: Metadata prefetch caching to eliminate repeated lookups
- Added PERF_TIMER for metadata prefetch measurement - Pre-fetch column metadata (dataType, columnSize, processedColumnSize, fetchBufferSize, isLob) into local variables before column loop - Eliminates repeated access to colInfo struct fields within switch cases - Moves metadata extraction from O(rows × columns) to O(columns) per batch - Updated SQL_CHAR, SQL_WCHAR, SQL_BINARY cases to use cached values - This was the biggest bottleneck - dictionary lookups in hot path
1 parent d72de69 commit 78717b2

File tree

1 file changed

+8
-13
lines changed

1 file changed

+8
-13
lines changed

mssql_python/pybind/ddbc_bindings.cpp

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3264,8 +3264,14 @@ SQLRETURN FetchBatchData(SQLHSTMT hStmt, ColumnBuffers& buffers, py::list& colum
32643264
{
32653265
PERF_TIMER("construct_rows::all_columns_processing");
32663266
for (SQLUSMALLINT col = 1; col <= numCols; col++) {
3267+
PERF_TIMER("construct_rows::prefetch_metadata");
32673268
const ColumnInfo& colInfo = columnInfos[col - 1];
32683269
SQLSMALLINT dataType = colInfo.dataType;
3270+
SQLULEN columnSize = colInfo.columnSize;
3271+
SQLULEN processedColumnSize = colInfo.processedColumnSize;
3272+
uint64_t fetchBufferSize = colInfo.fetchBufferSize;
3273+
bool isLob = colInfo.isLob;
3274+
32693275
SQLLEN dataLen = buffers.indicators[col - 1][i];
32703276
if (dataLen == SQL_NULL_DATA) {
32713277
row[col - 1] = py::none();
@@ -3301,11 +3307,7 @@ SQLRETURN FetchBatchData(SQLHSTMT hStmt, ColumnBuffers& buffers, py::list& colum
33013307
case SQL_CHAR:
33023308
case SQL_VARCHAR:
33033309
case SQL_LONGVARCHAR: {
3304-
SQLULEN columnSize = colInfo.columnSize;
3305-
HandleZeroColumnSizeAtFetch(columnSize);
3306-
uint64_t fetchBufferSize = columnSize + 1 /*null-terminator*/;
33073310
uint64_t numCharsInData = dataLen / sizeof(SQLCHAR);
3308-
bool isLob = colInfo.isLob;
33093311
// fetchBufferSize includes null-terminator, numCharsInData doesn't. Hence '<'
33103312
if (!isLob && numCharsInData < fetchBufferSize) {
33113313
row[col - 1] = py::str(
@@ -3321,11 +3323,7 @@ SQLRETURN FetchBatchData(SQLHSTMT hStmt, ColumnBuffers& buffers, py::list& colum
33213323
case SQL_WLONGVARCHAR: {
33223324
PERF_TIMER("construct_rows::wstring_conversion");
33233325
// TODO: variable length data needs special handling, this logic wont suffice
3324-
SQLULEN columnSize = colInfo.columnSize;
3325-
HandleZeroColumnSizeAtFetch(columnSize);
3326-
uint64_t fetchBufferSize = columnSize + 1 /*null-terminator*/;
33273326
uint64_t numCharsInData = dataLen / sizeof(SQLWCHAR);
3328-
bool isLob = colInfo.isLob;
33293327
// fetchBufferSize includes null-terminator, numCharsInData doesn't. Hence '<'
33303328
if (!isLob && numCharsInData < fetchBufferSize) {
33313329
#if defined(__APPLE__) || defined(__linux__)
@@ -3525,12 +3523,9 @@ SQLRETURN FetchBatchData(SQLHSTMT hStmt, ColumnBuffers& buffers, py::list& colum
35253523
case SQL_BINARY:
35263524
case SQL_VARBINARY:
35273525
case SQL_LONGVARBINARY: {
3528-
SQLULEN columnSize = colInfo.columnSize;
3529-
HandleZeroColumnSizeAtFetch(columnSize);
3530-
bool isLob = colInfo.isLob;
3531-
if (!isLob && static_cast<size_t>(dataLen) <= columnSize) {
3526+
if (!isLob && static_cast<size_t>(dataLen) <= processedColumnSize) {
35323527
row[col - 1] = py::bytes(reinterpret_cast<const char*>(
3533-
&buffers.charBuffers[col - 1][i * columnSize]),
3528+
&buffers.charBuffers[col - 1][i * processedColumnSize]),
35343529
dataLen);
35353530
} else {
35363531
row[col - 1] = FetchLobColumnData(hStmt, col, SQL_C_BINARY, false, true);

0 commit comments

Comments
 (0)