Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
081f3e2
OPTIMIZATION #1: Direct PyUnicode_DecodeUTF16 for NVARCHAR conversion…
bewithgaurav Nov 10, 2025
c7d1aa3
OPTIMIZATION #1: Direct PyUnicode_DecodeUTF16 for NVARCHAR conversion…
bewithgaurav Nov 10, 2025
94b8a69
OPTIMIZATION #2: Direct Python C API for numeric types
bewithgaurav Nov 10, 2025
7159d81
docs: Update OPTIMIZATION_PR_SUMMARY with OPT #2 details
bewithgaurav Nov 10, 2025
ef095fd
OPTIMIZATION #3: Metadata prefetch caching
bewithgaurav Nov 10, 2025
7ad0947
OPTIMIZATION #3 (FIX): Remove unused columnSize variables (Windows bu…
bewithgaurav Nov 10, 2025
55fb898
OPTIMIZATION #4: Batch row allocation with direct Python C API
bewithgaurav Nov 10, 2025
e1e827a
docs: Update OPTIMIZATION_PR_SUMMARY with OPT #4 details
bewithgaurav Nov 10, 2025
18e5350
OPTIMIZATION #4 (FIX): Remove unused columnSize variables (Windows bu…
bewithgaurav Nov 10, 2025
3c195f6
OPTIMIZATION #5: Function pointer dispatch for column processors
bewithgaurav Nov 10, 2025
c30974c
docs: Complete OPTIMIZATION_PR_SUMMARY with OPT #3 and OPT #5 details
bewithgaurav Nov 10, 2025
201025f
Fix script
bewithgaurav Nov 10, 2025
5e9a427
PERFORMANCE FIX: Use single-pass batch row allocation
bewithgaurav Nov 10, 2025
797a617
test: Add comprehensive NULL handling test for all numeric types
bewithgaurav Nov 10, 2025
81551d4
test: Add LOB and NULL tests for GUID/DATETIMEOFFSET to improve coverage
bewithgaurav Nov 10, 2025
3e9ab3a
perf: Remove wasteful OPT #3 metadata duplication
bewithgaurav Nov 10, 2025
9b0ff30
docs: Simplify PR summary focusing on implemented optimizations
bewithgaurav Nov 10, 2025
1d712e5
Suppress s360 for WChars to make it faster
bewithgaurav Nov 10, 2025
cc7282e
Suppress s360 for WChars to make it faster
bewithgaurav Nov 10, 2025
02fc960
Restore s360 fix
bewithgaurav Nov 10, 2025
757ef84
more tests
bewithgaurav Nov 10, 2025
8e84080
Update PR Summary
bewithgaurav Nov 10, 2025
b6ea039
more tests for coverage
bewithgaurav Nov 10, 2025
ceaa5ba
PR Summary reformat
bewithgaurav Nov 10, 2025
0730e1d
PR Summary
bewithgaurav Nov 10, 2025
414151f
PR Summary
bewithgaurav Nov 10, 2025
1276aa6
10 averages and pyodbc conn string fix
bewithgaurav Nov 10, 2025
e94365f
refactor: Move inline processor functions to header file
bewithgaurav Nov 12, 2025
c9364e8
refactor: Move inline processor functions and required structs to hea…
bewithgaurav Nov 12, 2025
fd6c8e8
Merge remote-tracking branch 'origin/bewithgaurav/perf-improvements'
bewithgaurav Nov 12, 2025
15ce44e
fix: Remove static from FetchLobColumnData to fix linker error
bewithgaurav Nov 12, 2025
ea19bd0
feat: Add NULL checks for all Python C API calls
bewithgaurav Nov 12, 2025
54a3f99
OPTIMIZATION #6: Consistent NULL checking before all processor calls
bewithgaurav Nov 12, 2025
2045263
Fix two-phase allocation pattern and PyList_Append reallocation issue
bewithgaurav Nov 12, 2025
e3258cd
Improvements and comments fixed
bewithgaurav Nov 12, 2025
7450314
Add comprehensive stress tests and fix NULL check in UTF-16 decode
bewithgaurav Nov 12, 2025
c443a82
Improve test coverage and Windows compatibility
bewithgaurav Nov 12, 2025
26f8157
Remove unused unix_buffers.h
bewithgaurav Nov 12, 2025
6542674
PR Summary changed
bewithgaurav Nov 12, 2025
b5a2d82
Add SQL_DOUBLE and NULL GUID coverage tests
bewithgaurav Nov 12, 2025
2d34574
gitignore restored
bewithgaurav Nov 12, 2025
5d319d1
removed PR summary
bewithgaurav Nov 12, 2025
55e20c6
remove the buggy GUID test
bewithgaurav Nov 12, 2025
c26902c
kicking off the pipelines again
bewithgaurav Nov 12, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions benchmarks/perf-benchmarking.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,11 @@

# Ensure pyodbc connection string has ODBC driver specified
if CONN_STR and 'Driver=' not in CONN_STR:
CONN_STR = f"Driver={{ODBC Driver 18 for SQL Server}};{CONN_STR}"
CONN_STR_PYODBC = f"Driver={{ODBC Driver 18 for SQL Server}};{CONN_STR}"
else:
CONN_STR_PYODBC = CONN_STR

NUM_ITERATIONS = 5 # Number of times to run each test for averaging
NUM_ITERATIONS = 10 # Number of times to run each test for averaging

# SQL Queries
COMPLEX_JOIN_AGGREGATION = """
Expand Down Expand Up @@ -187,7 +189,7 @@ def run_benchmark_pyodbc(query: str, name: str, iterations: int) -> BenchmarkRes
for i in range(iterations):
try:
start_time = time.time()
conn = pyodbc.connect(CONN_STR)
conn = pyodbc.connect(CONN_STR_PYODBC)
cursor = conn.cursor()
cursor.execute(query)
rows = cursor.fetchall()
Expand Down
15 changes: 15 additions & 0 deletions mssql_python/pybind/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,21 @@ else
else
echo "[WARNING] macOS dylib configuration encountered issues"
fi

# Codesign the Python extension module (.so file) to prevent SIP crashes
echo "[ACTION] Codesigning Python extension module..."
SO_FILE="$PARENT_DIR/"*.so
for so in $SO_FILE; do
if [ -f "$so" ]; then
echo " Signing: $so"
codesign -s - -f "$so" 2>/dev/null
if [ $? -eq 0 ]; then
echo "[SUCCESS] Python extension codesigned: $so"
else
echo "[WARNING] Failed to codesign: $so"
fi
fi
done
fi
else
echo "[ERROR] Failed to copy .so file"
Expand Down
312 changes: 137 additions & 175 deletions mssql_python/pybind/ddbc_bindings.cpp

Large diffs are not rendered by default.

313 changes: 312 additions & 1 deletion mssql_python/pybind/ddbc_bindings.h
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,6 @@ inline std::vector<SQLWCHAR> WStringToSQLWCHAR(const std::wstring& str) {

#if defined(__APPLE__) || defined(__linux__)
#include "unix_utils.h" // Unix-specific fixes
#include "unix_buffers.h" // Unix-specific buffers
#endif

//-------------------------------------------------------------------------------------------------
Expand Down Expand Up @@ -563,3 +562,315 @@ inline std::string GetDecimalSeparator() {

// Function to set the decimal separator
void DDBCSetDecimalSeparator(const std::string& separator);

//-------------------------------------------------------------------------------------------------
// INTERNAL: Performance Optimization Helpers for Fetch Path
// (Used internally by ddbc_bindings.cpp - not part of public API)
//-------------------------------------------------------------------------------------------------

// Struct to hold the DateTimeOffset structure
struct DateTimeOffset
{
SQLSMALLINT year;
SQLUSMALLINT month;
SQLUSMALLINT day;
SQLUSMALLINT hour;
SQLUSMALLINT minute;
SQLUSMALLINT second;
SQLUINTEGER fraction; // Nanoseconds
SQLSMALLINT timezone_hour; // Offset hours from UTC
SQLSMALLINT timezone_minute; // Offset minutes from UTC
};

// Struct to hold data buffers and indicators for each column
struct ColumnBuffers {
std::vector<std::vector<SQLCHAR>> charBuffers;
std::vector<std::vector<SQLWCHAR>> wcharBuffers;
std::vector<std::vector<SQLINTEGER>> intBuffers;
std::vector<std::vector<SQLSMALLINT>> smallIntBuffers;
std::vector<std::vector<SQLREAL>> realBuffers;
std::vector<std::vector<SQLDOUBLE>> doubleBuffers;
std::vector<std::vector<SQL_TIMESTAMP_STRUCT>> timestampBuffers;
std::vector<std::vector<SQLBIGINT>> bigIntBuffers;
std::vector<std::vector<SQL_DATE_STRUCT>> dateBuffers;
std::vector<std::vector<SQL_TIME_STRUCT>> timeBuffers;
std::vector<std::vector<SQLGUID>> guidBuffers;
std::vector<std::vector<SQLLEN>> indicators;
std::vector<std::vector<DateTimeOffset>> datetimeoffsetBuffers;

ColumnBuffers(SQLSMALLINT numCols, int fetchSize)
: charBuffers(numCols),
wcharBuffers(numCols),
intBuffers(numCols),
smallIntBuffers(numCols),
realBuffers(numCols),
doubleBuffers(numCols),
timestampBuffers(numCols),
bigIntBuffers(numCols),
dateBuffers(numCols),
timeBuffers(numCols),
guidBuffers(numCols),
datetimeoffsetBuffers(numCols),
indicators(numCols, std::vector<SQLLEN>(fetchSize)) {}
};

// Performance: Column processor function type for fast type conversion
// Using function pointers eliminates switch statement overhead in the hot loop
typedef void (*ColumnProcessor)(PyObject* row, ColumnBuffers& buffers, const void* colInfo,
SQLUSMALLINT col, SQLULEN rowIdx, SQLHSTMT hStmt);

// Extended column info struct for processor functions
struct ColumnInfoExt {
SQLSMALLINT dataType;
SQLULEN columnSize;
SQLULEN processedColumnSize;
uint64_t fetchBufferSize;
bool isLob;
};

// Forward declare FetchLobColumnData (defined in ddbc_bindings.cpp) - MUST be outside namespace
py::object FetchLobColumnData(SQLHSTMT hStmt, SQLUSMALLINT col, SQLSMALLINT cType,
bool isWideChar, bool isBinary);

// Specialized column processors for each data type (eliminates switch in hot loop)
namespace ColumnProcessors {

// Process SQL INTEGER (4-byte int) column into Python int
// SAFETY: PyList_SET_ITEM is safe here because row is freshly allocated with PyList_New()
// and each slot is filled exactly once (NULL -> value)
// Performance: NULL check removed - handled centrally before processor is called
inline void ProcessInteger(PyObject* row, ColumnBuffers& buffers, const void*, SQLUSMALLINT col,
SQLULEN rowIdx, SQLHSTMT) {
// Performance: Direct Python C API call (bypasses pybind11 overhead)
PyObject* pyInt = PyLong_FromLong(buffers.intBuffers[col - 1][rowIdx]);
if (!pyInt) { // Handle memory allocation failure
Py_INCREF(Py_None);
PyList_SET_ITEM(row, col - 1, Py_None);
return;
}
PyList_SET_ITEM(row, col - 1, pyInt); // Transfer ownership to list
}

// Process SQL SMALLINT (2-byte int) column into Python int
// Performance: NULL check removed - handled centrally before processor is called
inline void ProcessSmallInt(PyObject* row, ColumnBuffers& buffers, const void*, SQLUSMALLINT col,
SQLULEN rowIdx, SQLHSTMT) {
// Performance: Direct Python C API call
PyObject* pyInt = PyLong_FromLong(buffers.smallIntBuffers[col - 1][rowIdx]);
if (!pyInt) { // Handle memory allocation failure
Py_INCREF(Py_None);
PyList_SET_ITEM(row, col - 1, Py_None);
return;
}
PyList_SET_ITEM(row, col - 1, pyInt);
}

// Process SQL BIGINT (8-byte int) column into Python int
// Performance: NULL check removed - handled centrally before processor is called
inline void ProcessBigInt(PyObject* row, ColumnBuffers& buffers, const void*, SQLUSMALLINT col,
SQLULEN rowIdx, SQLHSTMT) {
// Performance: Direct Python C API call
PyObject* pyInt = PyLong_FromLongLong(buffers.bigIntBuffers[col - 1][rowIdx]);
if (!pyInt) { // Handle memory allocation failure
Py_INCREF(Py_None);
PyList_SET_ITEM(row, col - 1, Py_None);
return;
}
PyList_SET_ITEM(row, col - 1, pyInt);
}

// Process SQL TINYINT (1-byte unsigned int) column into Python int
// Performance: NULL check removed - handled centrally before processor is called
inline void ProcessTinyInt(PyObject* row, ColumnBuffers& buffers, const void*, SQLUSMALLINT col,
SQLULEN rowIdx, SQLHSTMT) {
// Performance: Direct Python C API call
PyObject* pyInt = PyLong_FromLong(buffers.charBuffers[col - 1][rowIdx]);
if (!pyInt) { // Handle memory allocation failure
Py_INCREF(Py_None);
PyList_SET_ITEM(row, col - 1, Py_None);
return;
}
PyList_SET_ITEM(row, col - 1, pyInt);
}

// Process SQL BIT column into Python bool
// Performance: NULL check removed - handled centrally before processor is called
inline void ProcessBit(PyObject* row, ColumnBuffers& buffers, const void*, SQLUSMALLINT col,
SQLULEN rowIdx, SQLHSTMT) {
// Performance: Direct Python C API call (converts 0/1 to True/False)
PyObject* pyBool = PyBool_FromLong(buffers.charBuffers[col - 1][rowIdx]);
if (!pyBool) { // Handle memory allocation failure
Py_INCREF(Py_None);
PyList_SET_ITEM(row, col - 1, Py_None);
return;
}
PyList_SET_ITEM(row, col - 1, pyBool);
}

// Process SQL REAL (4-byte float) column into Python float
// Performance: NULL check removed - handled centrally before processor is called
inline void ProcessReal(PyObject* row, ColumnBuffers& buffers, const void*, SQLUSMALLINT col,
SQLULEN rowIdx, SQLHSTMT) {
// Performance: Direct Python C API call
PyObject* pyFloat = PyFloat_FromDouble(buffers.realBuffers[col - 1][rowIdx]);
if (!pyFloat) { // Handle memory allocation failure
Py_INCREF(Py_None);
PyList_SET_ITEM(row, col - 1, Py_None);
return;
}
PyList_SET_ITEM(row, col - 1, pyFloat);
}

// Process SQL DOUBLE/FLOAT (8-byte float) column into Python float
// Performance: NULL check removed - handled centrally before processor is called
inline void ProcessDouble(PyObject* row, ColumnBuffers& buffers, const void*, SQLUSMALLINT col,
SQLULEN rowIdx, SQLHSTMT) {
// Performance: Direct Python C API call
PyObject* pyFloat = PyFloat_FromDouble(buffers.doubleBuffers[col - 1][rowIdx]);
if (!pyFloat) { // Handle memory allocation failure
Py_INCREF(Py_None);
PyList_SET_ITEM(row, col - 1, Py_None);
return;
}
PyList_SET_ITEM(row, col - 1, pyFloat);
}

// Process SQL CHAR/VARCHAR (single-byte string) column into Python str
// Performance: NULL/NO_TOTAL checks removed - handled centrally before processor is called
inline void ProcessChar(PyObject* row, ColumnBuffers& buffers, const void* colInfoPtr,
SQLUSMALLINT col, SQLULEN rowIdx, SQLHSTMT hStmt) {
const ColumnInfoExt* colInfo = static_cast<const ColumnInfoExt*>(colInfoPtr);
SQLLEN dataLen = buffers.indicators[col - 1][rowIdx];

// Handle empty strings
if (dataLen == 0) {
PyObject* emptyStr = PyUnicode_FromStringAndSize("", 0);
if (!emptyStr) {
Py_INCREF(Py_None);
PyList_SET_ITEM(row, col - 1, Py_None);
} else {
PyList_SET_ITEM(row, col - 1, emptyStr);
}
return;
}

uint64_t numCharsInData = dataLen / sizeof(SQLCHAR);
// Fast path: Data fits in buffer (not LOB or truncated)
// fetchBufferSize includes null-terminator, numCharsInData doesn't. Hence '<'
if (!colInfo->isLob && numCharsInData < colInfo->fetchBufferSize) {
// Performance: Direct Python C API call - create string from buffer
PyObject* pyStr = PyUnicode_FromStringAndSize(
reinterpret_cast<char*>(&buffers.charBuffers[col - 1][rowIdx * colInfo->fetchBufferSize]),
numCharsInData);
if (!pyStr) {
Py_INCREF(Py_None);
PyList_SET_ITEM(row, col - 1, Py_None);
} else {
PyList_SET_ITEM(row, col - 1, pyStr);
}
} else {
// Slow path: LOB data requires separate fetch call
PyList_SET_ITEM(row, col - 1, FetchLobColumnData(hStmt, col, SQL_C_CHAR, false, false).release().ptr());
}
}

// Process SQL NCHAR/NVARCHAR (wide/Unicode string) column into Python str
// Performance: NULL/NO_TOTAL checks removed - handled centrally before processor is called
inline void ProcessWChar(PyObject* row, ColumnBuffers& buffers, const void* colInfoPtr,
SQLUSMALLINT col, SQLULEN rowIdx, SQLHSTMT hStmt) {
const ColumnInfoExt* colInfo = static_cast<const ColumnInfoExt*>(colInfoPtr);
SQLLEN dataLen = buffers.indicators[col - 1][rowIdx];

// Handle empty strings
if (dataLen == 0) {
PyObject* emptyStr = PyUnicode_FromStringAndSize("", 0);
if (!emptyStr) {
Py_INCREF(Py_None);
PyList_SET_ITEM(row, col - 1, Py_None);
} else {
PyList_SET_ITEM(row, col - 1, emptyStr);
}
return;
}

uint64_t numCharsInData = dataLen / sizeof(SQLWCHAR);
// Fast path: Data fits in buffer (not LOB or truncated)
// fetchBufferSize includes null-terminator, numCharsInData doesn't. Hence '<'
if (!colInfo->isLob && numCharsInData < colInfo->fetchBufferSize) {
#if defined(__APPLE__) || defined(__linux__)
// Performance: Direct UTF-16 decode (SQLWCHAR is 2 bytes on Linux/macOS)
SQLWCHAR* wcharData = &buffers.wcharBuffers[col - 1][rowIdx * colInfo->fetchBufferSize];
PyObject* pyStr = PyUnicode_DecodeUTF16(
reinterpret_cast<const char*>(wcharData),
numCharsInData * sizeof(SQLWCHAR),
NULL, // errors (use default strict)
NULL // byteorder (auto-detect)
);
if (pyStr) {
PyList_SET_ITEM(row, col - 1, pyStr);
} else {
PyErr_Clear(); // Ignore decode error, return empty string
PyObject* emptyStr = PyUnicode_FromStringAndSize("", 0);
if (!emptyStr) {
Py_INCREF(Py_None);
PyList_SET_ITEM(row, col - 1, Py_None);
} else {
PyList_SET_ITEM(row, col - 1, emptyStr);
}
}
#else
// Performance: Direct Python C API call (Windows where SQLWCHAR == wchar_t)
PyObject* pyStr = PyUnicode_FromWideChar(
reinterpret_cast<wchar_t*>(&buffers.wcharBuffers[col - 1][rowIdx * colInfo->fetchBufferSize]),
numCharsInData);
if (!pyStr) {
Py_INCREF(Py_None);
PyList_SET_ITEM(row, col - 1, Py_None);
} else {
PyList_SET_ITEM(row, col - 1, pyStr);
}
#endif
} else {
// Slow path: LOB data requires separate fetch call
PyList_SET_ITEM(row, col - 1, FetchLobColumnData(hStmt, col, SQL_C_WCHAR, true, false).release().ptr());
}
}

// Process SQL BINARY/VARBINARY (binary data) column into Python bytes
// Performance: NULL/NO_TOTAL checks removed - handled centrally before processor is called
inline void ProcessBinary(PyObject* row, ColumnBuffers& buffers, const void* colInfoPtr,
SQLUSMALLINT col, SQLULEN rowIdx, SQLHSTMT hStmt) {
const ColumnInfoExt* colInfo = static_cast<const ColumnInfoExt*>(colInfoPtr);
SQLLEN dataLen = buffers.indicators[col - 1][rowIdx];

// Handle empty binary data
if (dataLen == 0) {
PyObject* emptyBytes = PyBytes_FromStringAndSize("", 0);
if (!emptyBytes) {
Py_INCREF(Py_None);
PyList_SET_ITEM(row, col - 1, Py_None);
} else {
PyList_SET_ITEM(row, col - 1, emptyBytes);
}
return;
}

// Fast path: Data fits in buffer (not LOB or truncated)
if (!colInfo->isLob && static_cast<size_t>(dataLen) <= colInfo->processedColumnSize) {
// Performance: Direct Python C API call - create bytes from buffer
PyObject* pyBytes = PyBytes_FromStringAndSize(
reinterpret_cast<const char*>(&buffers.charBuffers[col - 1][rowIdx * colInfo->processedColumnSize]),
dataLen);
if (!pyBytes) {
Py_INCREF(Py_None);
PyList_SET_ITEM(row, col - 1, Py_None);
} else {
PyList_SET_ITEM(row, col - 1, pyBytes);
}
} else {
// Slow path: LOB data requires separate fetch call
PyList_SET_ITEM(row, col - 1, FetchLobColumnData(hStmt, col, SQL_C_BINARY, false, true).release().ptr());
}
}

} // namespace ColumnProcessors
Loading