Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Rdkafka Changelog

## 0.25.1 (Unreleased)
- [Fix] Fix key partitioner to use `#bytesize` instead of `#size` to ensure correct partition assignment for multi-byte character keys (#629)
- [Enhancement] Use native ARM64 runners instead of QEMU emulation for Alpine musl aarch64 builds, improving build performance and reliability.
- [Enhancement] Enable parallel compilation (`make -j$(nproc)`) for ARM64 Alpine musl builds.

Expand Down
2 changes: 1 addition & 1 deletion lib/rdkafka/bindings.rb
Original file line number Diff line number Diff line change
Expand Up @@ -447,7 +447,7 @@ def self.partitioner(topic_ptr, str, partition_count, partitioner = "consistent_
raise Rdkafka::Config::ConfigError.new("Unknown partitioner: #{partitioner}")
end

public_send(method_name, topic_ptr, str_ptr, str.size, partition_count, nil, nil)
public_send(method_name, topic_ptr, str_ptr, str.bytesize, partition_count, nil, nil)
end

# Create Topics
Expand Down
64 changes: 64 additions & 0 deletions spec/lib/rdkafka/producer_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -1441,5 +1441,69 @@ def call(_, handle)
expect(zero_count).to be < all_partitioners.size
end
end

context "multi-byte character handling" do
it "correctly uses bytesize for partition key with multi-byte characters" do
# Test with a key that has different size vs bytesize
# "测试" has 2 characters but 6 bytes in UTF-8
multibyte_key = "测试"
expect(multibyte_key.size).to eq(2)
expect(multibyte_key.bytesize).to eq(6)

# For deterministic partitioners, the same key should always go to the same partition
%w[consistent murmur2 fnv1a].each do |partitioner|
partitions = []

# Produce multiple messages with the same multi-byte key
3.times do
handle = producer.produce(
topic: TestTopics.partitioner_test_topic,
payload: "test payload",
partition_key: multibyte_key,
partitioner: partitioner
)

report = handle.wait(max_wait_timeout_ms: 5_000)
partitions << report.partition
end

# All messages should go to the same partition
expect(partitions.uniq.size).to eq(1), "#{partitioner} should consistently route multi-byte keys to the same partition"
end
end

it "handles different multi-byte strings with same character count but different byte sizes" do
# These strings have the same character count but different byte counts
key1 = "ab" # 2 chars, 2 bytes
key2 = "测试" # 2 chars, 6 bytes

expect(key1.size).to eq(key2.size)
expect(key1.bytesize).not_to eq(key2.bytesize)

# For deterministic partitioners, different keys should potentially go to different partitions
%w[consistent murmur2 fnv1a].each do |partitioner|
handle1 = producer.produce(
topic: TestTopics.partitioner_test_topic,
payload: "test payload",
partition_key: key1,
partitioner: partitioner
)

handle2 = producer.produce(
topic: TestTopics.partitioner_test_topic,
payload: "test payload",
partition_key: key2,
partitioner: partitioner
)

report1 = handle1.wait(max_wait_timeout_ms: 5_000)
report2 = handle2.wait(max_wait_timeout_ms: 5_000)

# Both should be valid partitions
expect(report1.partition).to be >= 0
expect(report2.partition).to be >= 0
end
end
end
end
end
Loading