Skip to content

Commit 57feeff

Browse files
ilejnslabko
authored andcommitted
Data compression between the CH servers and the drivers (#523)
* support_compression: initial * support_compression: slightly works * support_compression: LZ4 seems to be working * support_compression: code cleanup * support_compression: RowBinaryWithNamesAndTypes, tiny cleanup * support_compression: some doc, test * support_compression: need_more_input + minor improvements * support_compression: minor things per windsurf-bot' review * support_compression: irrelevant session_id change reverted, minor * support_compression: one more null pointer check * support_compression: destruction order fix (sanitizer complain) * support_compression: add lz4 library * support_compression: include vector (for Win and mac) * support_compression: tiny cleanup * support_compression: stream owning model modified * Revert "support_compression: stream owning model modified" This reverts commit 8c82dc3. * Revert "support_compression: tiny cleanup" This reverts commit 638008d. * Revert "support_compression: include vector (for Win and mac)" This reverts commit 9e98da6. * Revert "support_compression: add lz4 library" This reverts commit 19a0829. * Revert "support_compression: destruction order fix (sanitizer complain)" This reverts commit b15b466. * Revert "support_compression: one more null pointer check" This reverts commit b31874f. * Revert "support_compression: irrelevant session_id change reverted, minor" This reverts commit db61e10. * Revert "support_compression: minor things per windsurf-bot' review" This reverts commit 937b78f. * Revert "support_compression: need_more_input + minor improvements" This reverts commit 347db71. * Revert "support_compression: RowBinaryWithNamesAndTypes, tiny cleanup" This reverts commit a089e5d. * Revert "support_compression: code cleanup" This reverts commit da17f18. * Revert "support_compression: LZ4 seems to be working" This reverts commit 7b92ea0. * support_compression: reapplied everything non-LZ4 related
1 parent c16d70f commit 57feeff

File tree

14 files changed

+77
-6
lines changed

14 files changed

+77
-6
lines changed

README.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,8 +109,9 @@ The list of DSN parameters recognized by the driver is as follows:
109109
| `HugeIntAsString` | `off` | Report integer column types that may underflow or overflow 64-bit signed integer (`SQL_BIGINT`) as a `String`/`SQL_VARCHAR` |
110110
| `DriverLog` | `on` if `CMAKE_BUILD_TYPE` is `Debug`, `off` otherwise | Enable or disable the extended driver logging |
111111
| `DriverLogFile` | `\temp\clickhouse-odbc-driver.log` on Windows, `/tmp/clickhouse-odbc-driver.log` otherwise | Path to the extended driver log file (used when `DriverLog` is `on`) |
112-
| `AutoSessionId` | `off` | Auto generate session_id required to use some features of CH (e.g. TEMPORARY TABLE) |
113-
| `ClientName` | empty | Sets additional information about the calling application. This string will be used as a prefix for the User-Agent header.
112+
| `AutoSessionId` | `off` | Auto generate session_id required to use some features of CH (e.g. TEMPORARY TABLE) |
113+
| `ClientName` | empty | Sets additional information about the calling application. This string will be used as a prefix for the User-Agent header. |
114+
| `Compress` | `off` | Pass enable_http_compression=1 parameter to server |
114115

115116

116117
### URL query string

driver/config/config.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,9 @@ key_value_map_t readDSNInfo(const std::string & dsn_utf8) {
253253
INI_DRIVERLOG,
254254
INI_DRIVERLOGFILE,
255255
INI_AUTO_SESSION_ID,
256-
INI_CLIENT_NAME
256+
INI_CLIENT_NAME,
257+
INI_COMPRESS,
258+
INI_USE_COMPRESSION
257259
}
258260
) {
259261
if (

driver/config/ini_defines.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@
3333
#define INI_DRIVERLOGFILE "DriverLogFile"
3434
#define INI_AUTO_SESSION_ID "AutoSessionId"
3535
#define INI_CLIENT_NAME "ClientName"
36+
#define INI_COMPRESS "Compress"
37+
#define INI_USE_COMPRESSION "UseCompression"
3638

3739
#if defined(UNICODE)
3840
# define INI_DSN_DEFAULT DSN_DEFAULT_UNICODE
@@ -54,6 +56,8 @@
5456
#define INI_STRINGMAXLENGTH_DEFAULT "1048575"
5557
#define INI_AUTO_SESSION_ID_DEFAULT "off"
5658
#define INI_CLIENT_NAME_DEFAULT ""
59+
#define INI_COMPRESS_DEFAULT "0"
60+
#define INI_USE_COMPRESSION_DEFAULT "0"
5761

5862
#ifdef NDEBUG
5963
# define INI_DRIVERLOG_DEFAULT "off"

driver/connection.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ Poco::URI Connection::getUri() const {
9393
bool database_set = false;
9494
bool default_format_set = false;
9595
bool session_id_set = false;
96+
bool enable_http_compression_set = false;
9697

9798
for (const auto& parameter : uri.getQueryParameters()) {
9899
if (Poco::UTF8::icompare(parameter.first, "default_format") == 0) {
@@ -118,6 +119,10 @@ Poco::URI Connection::getUri() const {
118119
uri.addQueryParameter("session_id", session_id);
119120
}
120121

122+
if (enable_http_compression && !enable_http_compression_set) {
123+
uri.addQueryParameter("enable_http_compression", enable_http_compression ? "1" : "0");
124+
}
125+
121126
return uri;
122127
}
123128

@@ -409,6 +414,20 @@ void Connection::setConfiguration(const key_value_map_t & cs_fields, const key_v
409414
client_name = value;
410415
}
411416
}
417+
else if (Poco::UTF8::icompare(key, INI_COMPRESS) == 0 || Poco::UTF8::icompare(key, INI_USE_COMPRESSION) == 0) {
418+
recognized_key = true;
419+
unsigned int typed_value = 0;
420+
valid_value =
421+
(value.empty() ||
422+
(
423+
Poco::NumberParser::tryParseUnsigned(value, typed_value) &&
424+
(typed_value == 1 || typed_value == 0)
425+
) ||
426+
isYesOrNo(value));
427+
if (valid_value) {
428+
enable_http_compression = (typed_value == 1 || isYes(value));
429+
}
430+
}
412431

413432
return std::make_tuple(recognized_key, valid_value);
414433
};
@@ -426,6 +445,8 @@ void Connection::setConfiguration(const key_value_map_t & cs_fields, const key_v
426445
const auto & recognized_key = std::get<0>(res);
427446
const auto & valid_value = std::get<1>(res);
428447

448+
// LOG("DSN: known attribute '" << key << "', valid value, '" << valid_value << "'");
449+
429450
if (recognized_key) {
430451
if (!valid_value)
431452
throw std::runtime_error("DSN: bad value '" + value + "' for attribute '" + key + "'");

driver/connection.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ class Connection
4343
std::int32_t stringmaxlength = 0;
4444
bool auto_session_id = false;
4545
std::string client_name;
46+
bool enable_http_compression = false;
4647

4748
public:
4849
std::unique_ptr<Poco::Net::HTTPClientSession> session;

driver/format/ODBCDriver2.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,15 @@ ODBCDriver2ResultReader::ODBCDriver2ResultReader(
263263
result_set = std::make_unique<ODBCDriver2ResultSet>(timezone, stream, releaseMutator());
264264
}
265265

266+
ODBCDriver2ResultReader::ODBCDriver2ResultReader(const std::string & timezone_, std::istream & raw_stream, std::unique_ptr<ResultMutator> && mutator, std::unique_ptr<std::istream> && inflating_input_stream)
267+
: ResultReader(timezone_, raw_stream, std::move(mutator), std::move(inflating_input_stream))
268+
{
269+
if (stream.eof())
270+
return;
271+
272+
result_set = std::make_unique<ODBCDriver2ResultSet>(timezone, stream, releaseMutator());
273+
}
274+
266275
bool ODBCDriver2ResultReader::advanceToNextResultSet() {
267276
// ODBCDriver2 format doesn't support multiple result sets in the response,
268277
// so only a basic cleanup is done here.

driver/format/ODBCDriver2.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
#pragma once
22

3-
#include "driver/platform/platform.h"
43
#include "driver/result_set.h"
54

65
#include <Poco/Net/HTTPClientSession.h>

driver/format/RowBinaryWithNamesAndTypes.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,16 @@ RowBinaryWithNamesAndTypesResultReader::RowBinaryWithNamesAndTypesResultReader(
319319
result_set = std::make_unique<RowBinaryWithNamesAndTypesResultSet>(timezone, stream, releaseMutator());
320320
}
321321

322+
RowBinaryWithNamesAndTypesResultReader::RowBinaryWithNamesAndTypesResultReader(const std::string & timezone_, std::istream & raw_stream, std::unique_ptr<ResultMutator> && mutator, std::unique_ptr<std::istream> && inflating_input_stream)
323+
: ResultReader(timezone_, raw_stream, std::move(mutator), std::move(inflating_input_stream))
324+
{
325+
if (stream.eof())
326+
return;
327+
328+
result_set = std::make_unique<RowBinaryWithNamesAndTypesResultSet>(timezone, stream, releaseMutator());
329+
330+
}
331+
322332
bool RowBinaryWithNamesAndTypesResultReader::advanceToNextResultSet() {
323333
// RowBinaryWithNamesAndTypes format doesn't support multiple result sets in the response,
324334
// so only a basic cleanup is done here.

driver/result_set.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#include "driver/result_set.h"
2+
#include "driver/driver.h"
23
#include "driver/format/ODBCDriver2.h"
34
#include "driver/format/RowBinaryWithNamesAndTypes.h"
45

@@ -313,6 +314,13 @@ ResultReader::ResultReader(
313314
{
314315
}
315316

317+
ResultReader::ResultReader(const std::string & timezone_, std::istream & raw_stream, std::unique_ptr<ResultMutator> && mutator, std::unique_ptr<std::istream> && inflating_input_stream_)
318+
: timezone(timezone_)
319+
, stream(raw_stream, std::move(inflating_input_stream_))
320+
, result_mutator(std::move(mutator))
321+
{
322+
}
323+
316324
bool ResultReader::hasResultSet() const {
317325
return static_cast<bool>(result_set);
318326
}

driver/result_set.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,8 @@ std::unique_ptr<ResultReader> make_result_reader(
169169
);
170170

171171
template <typename ConversionContext>
172-
SQLRETURN Field::extract(BindingInfo & binding_info, ConversionContext && context) const {
172+
SQLRETURN Field::extract(BindingInfo & binding_info, ConversionContext && context) const
173+
{
173174
return std::visit([&binding_info, &context] (auto & value) {
174175
if constexpr (std::is_same_v<DataSourceType<DataSourceTypeId::Nothing>, std::decay_t<decltype(value)>>) {
175176
return fillOutputNULL(binding_info.value, binding_info.value_max_size, binding_info.indicator);

0 commit comments

Comments
 (0)