Skip to content

Commit 262fb3c

Browse files
committed
OPTIMIZATION #4: Batch row allocation with direct Python C API
- Replaced placeholder allocation (py::none()) with direct PyList_New + PyList_Append - Pre-allocate all row lists before populating them - Uses PyList_GET_ITEM to retrieve pre-allocated rows (no bounds checking) - Wrap raw PyObject* in py::list for pybind11 compatibility - Eliminates redundant row assignment at end of loop (row already in list) - Renamed PERF_TIMER from allocate_placeholder_rows to rows_append - Reduces memory allocator calls from O(2×rows) to O(rows)
1 parent 78717b2 commit 262fb3c

File tree

1 file changed

+48
-42
lines changed

1 file changed

+48
-42
lines changed

mssql_python/pybind/ddbc_bindings.cpp

Lines changed: 48 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -3246,10 +3246,15 @@ SQLRETURN FetchBatchData(SQLHSTMT hStmt, ColumnBuffers& buffers, py::list& colum
32463246
std::string decimalSeparator = GetDecimalSeparator(); // Cache decimal separator
32473247

32483248
size_t initialSize = rows.size();
3249+
PyObject* rowsList = rows.ptr(); // Get raw Python list pointer
3250+
32493251
{
3250-
PERF_TIMER("FetchBatchData::allocate_placeholder_rows");
3252+
PERF_TIMER("FetchBatchData::rows_append");
3253+
// Pre-allocate all rows at once using direct Python C API
32513254
for (SQLULEN i = 0; i < numRowsFetched; i++) {
3252-
rows.append(py::none());
3255+
PyObject* row = PyList_New(numCols); // Allocate list with numCols elements
3256+
PyList_Append(rowsList, row);
3257+
Py_DECREF(row); // PyList_Append increments refcount
32533258
}
32543259
}
32553260

@@ -3258,8 +3263,9 @@ SQLRETURN FetchBatchData(SQLHSTMT hStmt, ColumnBuffers& buffers, py::list& colum
32583263
for (SQLULEN i = 0; i < numRowsFetched; i++) {
32593264
PERF_TIMER("construct_rows::per_row_total");
32603265

3261-
// Create row container pre-allocated with known column count
3262-
py::list row(numCols);
3266+
// Get pre-allocated row from list
3267+
PyObject* row = PyList_GET_ITEM(rowsList, initialSize + i);
3268+
py::list rowWrapper = py::reinterpret_borrow<py::list>(row);
32633269

32643270
{
32653271
PERF_TIMER("construct_rows::all_columns_processing");
@@ -3274,26 +3280,26 @@ SQLRETURN FetchBatchData(SQLHSTMT hStmt, ColumnBuffers& buffers, py::list& colum
32743280

32753281
SQLLEN dataLen = buffers.indicators[col - 1][i];
32763282
if (dataLen == SQL_NULL_DATA) {
3277-
row[col - 1] = py::none();
3283+
rowWrapper[col - 1] = py::none();
32783284
continue;
32793285
}
32803286
if (dataLen == SQL_NO_TOTAL) {
32813287
LOG("Cannot determine the length of the data. Returning NULL value instead."
32823288
"Column ID - {}", col);
3283-
row[col - 1] = py::none();
3289+
rowWrapper[col - 1] = py::none();
32843290
continue;
32853291
} else if (dataLen == 0) {
32863292
// Handle zero-length (non-NULL) data
32873293
if (dataType == SQL_CHAR || dataType == SQL_VARCHAR || dataType == SQL_LONGVARCHAR) {
3288-
row[col - 1] = std::string("");
3294+
rowWrapper[col - 1] = std::string("");
32893295
} else if (dataType == SQL_WCHAR || dataType == SQL_WVARCHAR || dataType == SQL_WLONGVARCHAR) {
3290-
row[col - 1] = std::wstring(L"");
3296+
rowWrapper[col - 1] = std::wstring(L"");
32913297
} else if (dataType == SQL_BINARY || dataType == SQL_VARBINARY || dataType == SQL_LONGVARBINARY) {
3292-
row[col - 1] = py::bytes("");
3298+
rowWrapper[col - 1] = py::bytes("");
32933299
} else {
32943300
// For other datatypes, 0 length is unexpected. Log & set None
32953301
LOG("Column data length is 0 for non-string/binary datatype. Setting None to the result row. Column ID - {}", col);
3296-
row[col - 1] = py::none();
3302+
rowWrapper[col - 1] = py::none();
32973303
}
32983304
continue;
32993305
} else if (dataLen < 0) {
@@ -3310,11 +3316,11 @@ SQLRETURN FetchBatchData(SQLHSTMT hStmt, ColumnBuffers& buffers, py::list& colum
33103316
uint64_t numCharsInData = dataLen / sizeof(SQLCHAR);
33113317
// fetchBufferSize includes null-terminator, numCharsInData doesn't. Hence '<'
33123318
if (!isLob && numCharsInData < fetchBufferSize) {
3313-
row[col - 1] = py::str(
3319+
rowWrapper[col - 1] = py::str(
33143320
reinterpret_cast<char*>(&buffers.charBuffers[col - 1][i * fetchBufferSize]),
33153321
numCharsInData);
33163322
} else {
3317-
row[col - 1] = FetchLobColumnData(hStmt, col, SQL_C_CHAR, false, false);
3323+
rowWrapper[col - 1] = FetchLobColumnData(hStmt, col, SQL_C_CHAR, false, false);
33183324
}
33193325
break;
33203326
}
@@ -3336,73 +3342,73 @@ SQLRETURN FetchBatchData(SQLHSTMT hStmt, ColumnBuffers& buffers, py::list& colum
33363342
NULL // byteorder - auto-detect
33373343
);
33383344
if (pyStr) {
3339-
row[col - 1] = py::reinterpret_steal<py::object>(pyStr);
3345+
rowWrapper[col - 1] = py::reinterpret_steal<py::object>(pyStr);
33403346
} else {
33413347
PyErr_Clear();
3342-
row[col - 1] = std::wstring(L"");
3348+
rowWrapper[col - 1] = std::wstring(L"");
33433349
}
33443350
#else
3345-
row[col - 1] = std::wstring(
3351+
rowWrapper[col - 1] = std::wstring(
33463352
reinterpret_cast<wchar_t*>(&buffers.wcharBuffers[col - 1][i * fetchBufferSize]),
33473353
numCharsInData);
33483354
#endif
33493355
} else {
3350-
row[col - 1] = FetchLobColumnData(hStmt, col, SQL_C_WCHAR, true, false);
3356+
rowWrapper[col - 1] = FetchLobColumnData(hStmt, col, SQL_C_WCHAR, true, false);
33513357
}
33523358
break;
33533359
}
33543360
case SQL_INTEGER: {
33553361
PERF_TIMER("construct_rows::int_c_api_assign");
33563362
if (buffers.indicators[col - 1][i] == SQL_NULL_DATA) {
33573363
Py_INCREF(Py_None);
3358-
PyList_SET_ITEM(row.ptr(), col - 1, Py_None);
3364+
PyList_SET_ITEM(row, col - 1, Py_None);
33593365
} else {
33603366
PyObject* pyInt = PyLong_FromLong(buffers.intBuffers[col - 1][i]);
3361-
PyList_SET_ITEM(row.ptr(), col - 1, pyInt);
3367+
PyList_SET_ITEM(row, col - 1, pyInt);
33623368
}
33633369
break;
33643370
}
33653371
case SQL_SMALLINT: {
33663372
PERF_TIMER("construct_rows::smallint_c_api_assign");
33673373
if (buffers.indicators[col - 1][i] == SQL_NULL_DATA) {
33683374
Py_INCREF(Py_None);
3369-
PyList_SET_ITEM(row.ptr(), col - 1, Py_None);
3375+
PyList_SET_ITEM(row, col - 1, Py_None);
33703376
} else {
33713377
PyObject* pyInt = PyLong_FromLong(buffers.smallIntBuffers[col - 1][i]);
3372-
PyList_SET_ITEM(row.ptr(), col - 1, pyInt);
3378+
PyList_SET_ITEM(row, col - 1, pyInt);
33733379
}
33743380
break;
33753381
}
33763382
case SQL_TINYINT: {
33773383
PERF_TIMER("construct_rows::tinyint_c_api_assign");
33783384
if (buffers.indicators[col - 1][i] == SQL_NULL_DATA) {
33793385
Py_INCREF(Py_None);
3380-
PyList_SET_ITEM(row.ptr(), col - 1, Py_None);
3386+
PyList_SET_ITEM(row, col - 1, Py_None);
33813387
} else {
33823388
PyObject* pyInt = PyLong_FromLong(buffers.charBuffers[col - 1][i]);
3383-
PyList_SET_ITEM(row.ptr(), col - 1, pyInt);
3389+
PyList_SET_ITEM(row, col - 1, pyInt);
33843390
}
33853391
break;
33863392
}
33873393
case SQL_BIT: {
33883394
PERF_TIMER("construct_rows::bit_c_api_assign");
33893395
if (buffers.indicators[col - 1][i] == SQL_NULL_DATA) {
33903396
Py_INCREF(Py_None);
3391-
PyList_SET_ITEM(row.ptr(), col - 1, Py_None);
3397+
PyList_SET_ITEM(row, col - 1, Py_None);
33923398
} else {
33933399
PyObject* pyBool = PyBool_FromLong(buffers.charBuffers[col - 1][i]);
3394-
PyList_SET_ITEM(row.ptr(), col - 1, pyBool);
3400+
PyList_SET_ITEM(row, col - 1, pyBool);
33953401
}
33963402
break;
33973403
}
33983404
case SQL_REAL: {
33993405
PERF_TIMER("construct_rows::real_c_api_assign");
34003406
if (buffers.indicators[col - 1][i] == SQL_NULL_DATA) {
34013407
Py_INCREF(Py_None);
3402-
PyList_SET_ITEM(row.ptr(), col - 1, Py_None);
3408+
PyList_SET_ITEM(row, col - 1, Py_None);
34033409
} else {
34043410
PyObject* pyFloat = PyFloat_FromDouble(buffers.realBuffers[col - 1][i]);
3405-
PyList_SET_ITEM(row.ptr(), col - 1, pyFloat);
3411+
PyList_SET_ITEM(row, col - 1, pyFloat);
34063412
}
34073413
break;
34083414
}
@@ -3415,11 +3421,11 @@ SQLRETURN FetchBatchData(SQLHSTMT hStmt, ColumnBuffers& buffers, py::list& colum
34153421

34163422
// Always use standard decimal point for Python Decimal parsing
34173423
// The decimal separator only affects display formatting, not parsing
3418-
row[col - 1] = PythonObjectCache::get_decimal_class()(py::str(rawData, decimalDataLen));
3424+
rowWrapper[col - 1] = PythonObjectCache::get_decimal_class()(py::str(rawData, decimalDataLen));
34193425
} catch (const py::error_already_set& e) {
34203426
// Handle the exception, e.g., log the error and set py::none()
34213427
LOG("Error converting to decimal: {}", e.what());
3422-
row[col - 1] = py::none();
3428+
rowWrapper[col - 1] = py::none();
34233429
}
34243430
break;
34253431
}
@@ -3428,18 +3434,18 @@ SQLRETURN FetchBatchData(SQLHSTMT hStmt, ColumnBuffers& buffers, py::list& colum
34283434
PERF_TIMER("construct_rows::double_c_api_assign");
34293435
if (buffers.indicators[col - 1][i] == SQL_NULL_DATA) {
34303436
Py_INCREF(Py_None);
3431-
PyList_SET_ITEM(row.ptr(), col - 1, Py_None);
3437+
PyList_SET_ITEM(row, col - 1, Py_None);
34323438
} else {
34333439
PyObject* pyFloat = PyFloat_FromDouble(buffers.doubleBuffers[col - 1][i]);
3434-
PyList_SET_ITEM(row.ptr(), col - 1, pyFloat);
3440+
PyList_SET_ITEM(row, col - 1, pyFloat);
34353441
}
34363442
break;
34373443
}
34383444
case SQL_TIMESTAMP:
34393445
case SQL_TYPE_TIMESTAMP:
34403446
case SQL_DATETIME: {
34413447
const SQL_TIMESTAMP_STRUCT& ts = buffers.timestampBuffers[col - 1][i];
3442-
row[col - 1] = PythonObjectCache::get_datetime_class()(ts.year, ts.month, ts.day,
3448+
rowWrapper[col - 1] = PythonObjectCache::get_datetime_class()(ts.year, ts.month, ts.day,
34433449
ts.hour, ts.minute, ts.second,
34443450
ts.fraction / 1000);
34453451
break;
@@ -3448,23 +3454,23 @@ SQLRETURN FetchBatchData(SQLHSTMT hStmt, ColumnBuffers& buffers, py::list& colum
34483454
PERF_TIMER("construct_rows::bigint_c_api_assign");
34493455
if (buffers.indicators[col - 1][i] == SQL_NULL_DATA) {
34503456
Py_INCREF(Py_None);
3451-
PyList_SET_ITEM(row.ptr(), col - 1, Py_None);
3457+
PyList_SET_ITEM(row, col - 1, Py_None);
34523458
} else {
34533459
PyObject* pyInt = PyLong_FromLongLong(buffers.bigIntBuffers[col - 1][i]);
3454-
PyList_SET_ITEM(row.ptr(), col - 1, pyInt);
3460+
PyList_SET_ITEM(row, col - 1, pyInt);
34553461
}
34563462
break;
34573463
}
34583464
case SQL_TYPE_DATE: {
3459-
row[col - 1] = PythonObjectCache::get_date_class()(buffers.dateBuffers[col - 1][i].year,
3465+
rowWrapper[col - 1] = PythonObjectCache::get_date_class()(buffers.dateBuffers[col - 1][i].year,
34603466
buffers.dateBuffers[col - 1][i].month,
34613467
buffers.dateBuffers[col - 1][i].day);
34623468
break;
34633469
}
34643470
case SQL_TIME:
34653471
case SQL_TYPE_TIME:
34663472
case SQL_SS_TIME2: {
3467-
row[col - 1] = PythonObjectCache::get_time_class()(buffers.timeBuffers[col - 1][i].hour,
3473+
rowWrapper[col - 1] = PythonObjectCache::get_time_class()(buffers.timeBuffers[col - 1][i].hour,
34683474
buffers.timeBuffers[col - 1][i].minute,
34693475
buffers.timeBuffers[col - 1][i].second);
34703476
break;
@@ -3489,16 +3495,16 @@ SQLRETURN FetchBatchData(SQLHSTMT hStmt, ColumnBuffers& buffers, py::list& colum
34893495
dtoValue.fraction / 1000, // ns → µs
34903496
tzinfo
34913497
);
3492-
row[col - 1] = py_dt;
3498+
rowWrapper[col - 1] = py_dt;
34933499
} else {
3494-
row[col - 1] = py::none();
3500+
rowWrapper[col - 1] = py::none();
34953501
}
34963502
break;
34973503
}
34983504
case SQL_GUID: {
34993505
SQLLEN indicator = buffers.indicators[col - 1][i];
35003506
if (indicator == SQL_NULL_DATA) {
3501-
row[col - 1] = py::none();
3507+
rowWrapper[col - 1] = py::none();
35023508
break;
35033509
}
35043510
SQLGUID* guidValue = &buffers.guidBuffers[col - 1][i];
@@ -3517,18 +3523,18 @@ SQLRETURN FetchBatchData(SQLHSTMT hStmt, ColumnBuffers& buffers, py::list& colum
35173523
py::dict kwargs;
35183524
kwargs["bytes"] = py_guid_bytes;
35193525
py::object uuid_obj = PythonObjectCache::get_uuid_class()(**kwargs);
3520-
row[col - 1] = uuid_obj;
3526+
rowWrapper[col - 1] = uuid_obj;
35213527
break;
35223528
}
35233529
case SQL_BINARY:
35243530
case SQL_VARBINARY:
35253531
case SQL_LONGVARBINARY: {
35263532
if (!isLob && static_cast<size_t>(dataLen) <= processedColumnSize) {
3527-
row[col - 1] = py::bytes(reinterpret_cast<const char*>(
3533+
rowWrapper[col - 1] = py::bytes(reinterpret_cast<const char*>(
35283534
&buffers.charBuffers[col - 1][i * processedColumnSize]),
35293535
dataLen);
35303536
} else {
3531-
row[col - 1] = FetchLobColumnData(hStmt, col, SQL_C_BINARY, false, true);
3537+
rowWrapper[col - 1] = FetchLobColumnData(hStmt, col, SQL_C_BINARY, false, true);
35323538
}
35333539
break;
35343540
}
@@ -3545,7 +3551,7 @@ SQLRETURN FetchBatchData(SQLHSTMT hStmt, ColumnBuffers& buffers, py::list& colum
35453551
}
35463552
}
35473553
} // End all_columns_processing timer
3548-
rows[initialSize + i] = row;
3554+
// Row is already in the list, no need to assign
35493555
}
35503556
} // End construct_rows timer
35513557
return ret;

0 commit comments

Comments
 (0)