Skip to content

Commit c9364e8

Browse files
committed
refactor: Move inline processor functions and required structs to header file
- Moved DateTimeOffset struct definition to header (required by ColumnBuffers) - Moved ColumnBuffers struct definition to header (required by inline functions) - Moved typedef ColumnProcessor, struct ColumnInfoExt, and all 10 inline processor functions to header - Added new 'INTERNAL: Performance Optimization Helpers' section in header - Added forward declaration for FetchLobColumnData function - Enables true cross-compilation-unit inlining for performance optimization - Follows C++ best practices for inline function placement Addresses review comments #4, #5, #6 from subrata-ms Build verified successful (universal2 binary for macOS arm64 + x86_64)
1 parent 1276aa6 commit c9364e8

File tree

2 files changed

+257
-248
lines changed

2 files changed

+257
-248
lines changed

mssql_python/pybind/ddbc_bindings.cpp

Lines changed: 0 additions & 248 deletions
Original file line numberDiff line numberDiff line change
@@ -135,52 +135,6 @@ struct NumericData {
135135
}
136136
};
137137

138-
// Struct to hold the DateTimeOffset structure
139-
struct DateTimeOffset
140-
{
141-
SQLSMALLINT year;
142-
SQLUSMALLINT month;
143-
SQLUSMALLINT day;
144-
SQLUSMALLINT hour;
145-
SQLUSMALLINT minute;
146-
SQLUSMALLINT second;
147-
SQLUINTEGER fraction; // Nanoseconds
148-
SQLSMALLINT timezone_hour; // Offset hours from UTC
149-
SQLSMALLINT timezone_minute; // Offset minutes from UTC
150-
};
151-
152-
// Struct to hold data buffers and indicators for each column
153-
struct ColumnBuffers {
154-
std::vector<std::vector<SQLCHAR>> charBuffers;
155-
std::vector<std::vector<SQLWCHAR>> wcharBuffers;
156-
std::vector<std::vector<SQLINTEGER>> intBuffers;
157-
std::vector<std::vector<SQLSMALLINT>> smallIntBuffers;
158-
std::vector<std::vector<SQLREAL>> realBuffers;
159-
std::vector<std::vector<SQLDOUBLE>> doubleBuffers;
160-
std::vector<std::vector<SQL_TIMESTAMP_STRUCT>> timestampBuffers;
161-
std::vector<std::vector<SQLBIGINT>> bigIntBuffers;
162-
std::vector<std::vector<SQL_DATE_STRUCT>> dateBuffers;
163-
std::vector<std::vector<SQL_TIME_STRUCT>> timeBuffers;
164-
std::vector<std::vector<SQLGUID>> guidBuffers;
165-
std::vector<std::vector<SQLLEN>> indicators;
166-
std::vector<std::vector<DateTimeOffset>> datetimeoffsetBuffers;
167-
168-
ColumnBuffers(SQLSMALLINT numCols, int fetchSize)
169-
: charBuffers(numCols),
170-
wcharBuffers(numCols),
171-
intBuffers(numCols),
172-
smallIntBuffers(numCols),
173-
realBuffers(numCols),
174-
doubleBuffers(numCols),
175-
timestampBuffers(numCols),
176-
bigIntBuffers(numCols),
177-
dateBuffers(numCols),
178-
timeBuffers(numCols),
179-
guidBuffers(numCols),
180-
datetimeoffsetBuffers(numCols),
181-
indicators(numCols, std::vector<SQLLEN>(fetchSize)) {}
182-
};
183-
184138
//-------------------------------------------------------------------------------------------------
185139
// Function pointer initialization
186140
//-------------------------------------------------------------------------------------------------
@@ -3185,208 +3139,6 @@ SQLRETURN SQLBindColums(SQLHSTMT hStmt, ColumnBuffers& buffers, py::list& column
31853139
return ret;
31863140
}
31873141

3188-
// OPTIMIZATION #5: Column processor function type - processes one cell
3189-
// Using function pointers eliminates switch statement overhead in the hot loop
3190-
typedef void (*ColumnProcessor)(PyObject* row, ColumnBuffers& buffers, const void* colInfo,
3191-
SQLUSMALLINT col, SQLULEN rowIdx, SQLHSTMT hStmt);
3192-
3193-
// Extended column info struct for processor functions
3194-
struct ColumnInfoExt {
3195-
SQLSMALLINT dataType;
3196-
SQLULEN columnSize;
3197-
SQLULEN processedColumnSize;
3198-
uint64_t fetchBufferSize;
3199-
bool isLob;
3200-
};
3201-
3202-
// Specialized column processors for each data type (eliminates switch in hot loop)
3203-
namespace ColumnProcessors {
3204-
3205-
inline void ProcessInteger(PyObject* row, ColumnBuffers& buffers, const void*, SQLUSMALLINT col,
3206-
SQLULEN rowIdx, SQLHSTMT) {
3207-
if (buffers.indicators[col - 1][rowIdx] == SQL_NULL_DATA) {
3208-
Py_INCREF(Py_None);
3209-
PyList_SET_ITEM(row, col - 1, Py_None);
3210-
return;
3211-
}
3212-
// OPTIMIZATION #2: Direct Python C API call (bypasses pybind11)
3213-
PyObject* pyInt = PyLong_FromLong(buffers.intBuffers[col - 1][rowIdx]);
3214-
PyList_SET_ITEM(row, col - 1, pyInt);
3215-
}
3216-
3217-
inline void ProcessSmallInt(PyObject* row, ColumnBuffers& buffers, const void*, SQLUSMALLINT col,
3218-
SQLULEN rowIdx, SQLHSTMT) {
3219-
if (buffers.indicators[col - 1][rowIdx] == SQL_NULL_DATA) {
3220-
Py_INCREF(Py_None);
3221-
PyList_SET_ITEM(row, col - 1, Py_None);
3222-
return;
3223-
}
3224-
// OPTIMIZATION #2: Direct Python C API call
3225-
PyObject* pyInt = PyLong_FromLong(buffers.smallIntBuffers[col - 1][rowIdx]);
3226-
PyList_SET_ITEM(row, col - 1, pyInt);
3227-
}
3228-
3229-
inline void ProcessBigInt(PyObject* row, ColumnBuffers& buffers, const void*, SQLUSMALLINT col,
3230-
SQLULEN rowIdx, SQLHSTMT) {
3231-
if (buffers.indicators[col - 1][rowIdx] == SQL_NULL_DATA) {
3232-
Py_INCREF(Py_None);
3233-
PyList_SET_ITEM(row, col - 1, Py_None);
3234-
return;
3235-
}
3236-
// OPTIMIZATION #2: Direct Python C API call
3237-
PyObject* pyInt = PyLong_FromLongLong(buffers.bigIntBuffers[col - 1][rowIdx]);
3238-
PyList_SET_ITEM(row, col - 1, pyInt);
3239-
}
3240-
3241-
inline void ProcessTinyInt(PyObject* row, ColumnBuffers& buffers, const void*, SQLUSMALLINT col,
3242-
SQLULEN rowIdx, SQLHSTMT) {
3243-
if (buffers.indicators[col - 1][rowIdx] == SQL_NULL_DATA) {
3244-
Py_INCREF(Py_None);
3245-
PyList_SET_ITEM(row, col - 1, Py_None);
3246-
return;
3247-
}
3248-
// OPTIMIZATION #2: Direct Python C API call
3249-
PyObject* pyInt = PyLong_FromLong(buffers.charBuffers[col - 1][rowIdx]);
3250-
PyList_SET_ITEM(row, col - 1, pyInt);
3251-
}
3252-
3253-
inline void ProcessBit(PyObject* row, ColumnBuffers& buffers, const void*, SQLUSMALLINT col,
3254-
SQLULEN rowIdx, SQLHSTMT) {
3255-
if (buffers.indicators[col - 1][rowIdx] == SQL_NULL_DATA) {
3256-
Py_INCREF(Py_None);
3257-
PyList_SET_ITEM(row, col - 1, Py_None);
3258-
return;
3259-
}
3260-
// OPTIMIZATION #2: Direct Python C API call
3261-
PyObject* pyBool = PyBool_FromLong(buffers.charBuffers[col - 1][rowIdx]);
3262-
PyList_SET_ITEM(row, col - 1, pyBool);
3263-
}
3264-
3265-
inline void ProcessReal(PyObject* row, ColumnBuffers& buffers, const void*, SQLUSMALLINT col,
3266-
SQLULEN rowIdx, SQLHSTMT) {
3267-
if (buffers.indicators[col - 1][rowIdx] == SQL_NULL_DATA) {
3268-
Py_INCREF(Py_None);
3269-
PyList_SET_ITEM(row, col - 1, Py_None);
3270-
return;
3271-
}
3272-
// OPTIMIZATION #2: Direct Python C API call
3273-
PyObject* pyFloat = PyFloat_FromDouble(buffers.realBuffers[col - 1][rowIdx]);
3274-
PyList_SET_ITEM(row, col - 1, pyFloat);
3275-
}
3276-
3277-
inline void ProcessDouble(PyObject* row, ColumnBuffers& buffers, const void*, SQLUSMALLINT col,
3278-
SQLULEN rowIdx, SQLHSTMT) {
3279-
if (buffers.indicators[col - 1][rowIdx] == SQL_NULL_DATA) {
3280-
Py_INCREF(Py_None);
3281-
PyList_SET_ITEM(row, col - 1, Py_None);
3282-
return;
3283-
}
3284-
// OPTIMIZATION #2: Direct Python C API call
3285-
PyObject* pyFloat = PyFloat_FromDouble(buffers.doubleBuffers[col - 1][rowIdx]);
3286-
PyList_SET_ITEM(row, col - 1, pyFloat);
3287-
}
3288-
3289-
inline void ProcessChar(PyObject* row, ColumnBuffers& buffers, const void* colInfoPtr,
3290-
SQLUSMALLINT col, SQLULEN rowIdx, SQLHSTMT hStmt) {
3291-
const ColumnInfoExt* colInfo = static_cast<const ColumnInfoExt*>(colInfoPtr);
3292-
SQLLEN dataLen = buffers.indicators[col - 1][rowIdx];
3293-
3294-
if (dataLen == SQL_NULL_DATA || dataLen == SQL_NO_TOTAL) {
3295-
Py_INCREF(Py_None);
3296-
PyList_SET_ITEM(row, col - 1, Py_None);
3297-
return;
3298-
}
3299-
if (dataLen == 0) {
3300-
PyList_SET_ITEM(row, col - 1, PyUnicode_FromStringAndSize("", 0));
3301-
return;
3302-
}
3303-
3304-
uint64_t numCharsInData = dataLen / sizeof(SQLCHAR);
3305-
// fetchBufferSize includes null-terminator, numCharsInData doesn't. Hence '<'
3306-
if (!colInfo->isLob && numCharsInData < colInfo->fetchBufferSize) {
3307-
// OPTIMIZATION #2: Direct Python C API call
3308-
PyObject* pyStr = PyUnicode_FromStringAndSize(
3309-
reinterpret_cast<char*>(&buffers.charBuffers[col - 1][rowIdx * colInfo->fetchBufferSize]),
3310-
numCharsInData);
3311-
PyList_SET_ITEM(row, col - 1, pyStr);
3312-
} else {
3313-
PyList_SET_ITEM(row, col - 1, FetchLobColumnData(hStmt, col, SQL_C_CHAR, false, false).release().ptr());
3314-
}
3315-
}
3316-
3317-
inline void ProcessWChar(PyObject* row, ColumnBuffers& buffers, const void* colInfoPtr,
3318-
SQLUSMALLINT col, SQLULEN rowIdx, SQLHSTMT hStmt) {
3319-
const ColumnInfoExt* colInfo = static_cast<const ColumnInfoExt*>(colInfoPtr);
3320-
SQLLEN dataLen = buffers.indicators[col - 1][rowIdx];
3321-
3322-
if (dataLen == SQL_NULL_DATA || dataLen == SQL_NO_TOTAL) {
3323-
Py_INCREF(Py_None);
3324-
PyList_SET_ITEM(row, col - 1, Py_None);
3325-
return;
3326-
}
3327-
if (dataLen == 0) {
3328-
PyList_SET_ITEM(row, col - 1, PyUnicode_FromStringAndSize("", 0));
3329-
return;
3330-
}
3331-
3332-
uint64_t numCharsInData = dataLen / sizeof(SQLWCHAR);
3333-
// fetchBufferSize includes null-terminator, numCharsInData doesn't. Hence '<'
3334-
if (!colInfo->isLob && numCharsInData < colInfo->fetchBufferSize) {
3335-
#if defined(__APPLE__) || defined(__linux__)
3336-
SQLWCHAR* wcharData = &buffers.wcharBuffers[col - 1][rowIdx * colInfo->fetchBufferSize];
3337-
// OPTIMIZATION #1: Direct UTF-16 decode
3338-
PyObject* pyStr = PyUnicode_DecodeUTF16(
3339-
reinterpret_cast<const char*>(wcharData),
3340-
numCharsInData * sizeof(SQLWCHAR),
3341-
NULL,
3342-
NULL
3343-
);
3344-
if (pyStr) {
3345-
PyList_SET_ITEM(row, col - 1, pyStr);
3346-
} else {
3347-
PyErr_Clear();
3348-
PyList_SET_ITEM(row, col - 1, PyUnicode_FromStringAndSize("", 0));
3349-
}
3350-
#else
3351-
// OPTIMIZATION #2: Direct Python C API call
3352-
PyObject* pyStr = PyUnicode_FromWideChar(
3353-
reinterpret_cast<wchar_t*>(&buffers.wcharBuffers[col - 1][rowIdx * colInfo->fetchBufferSize]),
3354-
numCharsInData);
3355-
PyList_SET_ITEM(row, col - 1, pyStr);
3356-
#endif
3357-
} else {
3358-
PyList_SET_ITEM(row, col - 1, FetchLobColumnData(hStmt, col, SQL_C_WCHAR, true, false).release().ptr());
3359-
}
3360-
}
3361-
3362-
inline void ProcessBinary(PyObject* row, ColumnBuffers& buffers, const void* colInfoPtr,
3363-
SQLUSMALLINT col, SQLULEN rowIdx, SQLHSTMT hStmt) {
3364-
const ColumnInfoExt* colInfo = static_cast<const ColumnInfoExt*>(colInfoPtr);
3365-
SQLLEN dataLen = buffers.indicators[col - 1][rowIdx];
3366-
3367-
if (dataLen == SQL_NULL_DATA || dataLen == SQL_NO_TOTAL) {
3368-
Py_INCREF(Py_None);
3369-
PyList_SET_ITEM(row, col - 1, Py_None);
3370-
return;
3371-
}
3372-
if (dataLen == 0) {
3373-
PyList_SET_ITEM(row, col - 1, PyBytes_FromStringAndSize("", 0));
3374-
return;
3375-
}
3376-
3377-
if (!colInfo->isLob && static_cast<size_t>(dataLen) <= colInfo->processedColumnSize) {
3378-
// OPTIMIZATION #2: Direct Python C API call
3379-
PyObject* pyBytes = PyBytes_FromStringAndSize(
3380-
reinterpret_cast<const char*>(&buffers.charBuffers[col - 1][rowIdx * colInfo->processedColumnSize]),
3381-
dataLen);
3382-
PyList_SET_ITEM(row, col - 1, pyBytes);
3383-
} else {
3384-
PyList_SET_ITEM(row, col - 1, FetchLobColumnData(hStmt, col, SQL_C_BINARY, false, true).release().ptr());
3385-
}
3386-
}
3387-
3388-
} // namespace ColumnProcessors
3389-
33903142
// Fetch rows in batches
33913143
// TODO: Move to anonymous namespace, since it is not used outside this file
33923144
SQLRETURN FetchBatchData(SQLHSTMT hStmt, ColumnBuffers& buffers, py::list& columnNames,

0 commit comments

Comments
 (0)