Skip to content

Commit 2ce555c

Browse files
committed
Purge events tied to closed partitions
In some cases, after a rebalance of partitions and depending on the late arrival of acks from HEC, it's needed to remove all references to events tied to the closed partitions. This purge has to be made in buffered events, failed events, offsets, and topic/partition records. This is considered safe because those events will be picked up by the task that opens the partitions closed in the original task.
1 parent a1a1f4a commit 2ce555c

File tree

4 files changed

+120
-25
lines changed

4 files changed

+120
-25
lines changed

src/main/java/com/splunk/kafka/connect/KafkaRecordTracker.java

Lines changed: 79 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -35,39 +35,42 @@ final class KafkaRecordTracker {
3535
private AtomicLong total;
3636
private ConcurrentLinkedQueue<EventBatch> failed;
3737
private volatile Map<TopicPartition, OffsetAndMetadata> offsets;
38+
private Collection<TopicPartition> partitions;
3839

3940
public KafkaRecordTracker() {
4041
all = new ConcurrentHashMap<>();
4142
failed = new ConcurrentLinkedQueue<>();
4243
total = new AtomicLong();
4344
offsets = new HashMap<>();
45+
partitions = new ArrayList<TopicPartition>();
4446
}
4547

48+
/**
49+
* Remove acked events and update the corresponding offsets finding the
50+
* lowest consecutive HEC-commited offsets.
51+
*
52+
* @param batches the acked event batches
53+
*/
4654
public void removeAckedEventBatches(final List<EventBatch> batches) {
47-
for (final EventBatch batch: batches) {
48-
//log.debug("Processing batch {}", batch.getUUID());
49-
removeAckedEventBatch(batch);
50-
}
51-
}
52-
53-
public void removeAckedEventBatch(final EventBatch batch) {
54-
final List<Event> events = batch.getEvents();
55-
final Event event = events.get(0);
56-
if (event.getTied() instanceof SinkRecord) {
57-
final SinkRecord record = (SinkRecord) event.getTied();
58-
TopicPartition tp = new TopicPartition(record.topic(), record.kafkaPartition());
59-
//log.debug("Processing topic {} partition {}", record.topic(), record.kafkaPartition());
55+
log.debug("received acked event batches={}", batches);
56+
/* Loop all *assigned* partitions to find the lowest consecutive
57+
* HEC-commited offsets. A batch could contain events coming from a
58+
* variety of topic/partitions, and scanning those events coulb be
59+
* expensive.
60+
* Note that if some events are tied to an unassigned partition those
61+
* offsets won't be able to be commited.
62+
*/
63+
for (TopicPartition tp : partitions) {
6064
TreeMap<Long, EventBatch> tpRecords = all.get(tp);
6165
if (tpRecords == null) {
62-
log.error("KafkaRecordTracker removing a batch in an unknown partition {} {} {}", record.topic(), record.kafkaPartition(), record.kafkaOffset());
63-
return;
66+
continue; // nothing to remove in this case
6467
}
6568
long offset = -1;
6669
Iterator<Map.Entry<Long, EventBatch>> iter = tpRecords.entrySet().iterator();
6770
for (; iter.hasNext();) {
6871
Map.Entry<Long, EventBatch> e = iter.next();
6972
if (e.getValue().isCommitted()) {
70-
//log.debug("processing offset {}", e.getKey());
73+
log.debug("processing offset {}", e.getKey());
7174
offset = e.getKey();
7275
iter.remove();
7376
total.decrementAndGet();
@@ -76,11 +79,7 @@ public void removeAckedEventBatch(final EventBatch batch) {
7679
}
7780
}
7881
if (offset >= 0) {
79-
if (offsets.containsKey(tp)) {
80-
offsets.replace(tp, new OffsetAndMetadata(offset + 1));
81-
} else {
82-
offsets.put(tp, new OffsetAndMetadata(offset + 1));
83-
}
82+
offsets.put(tp, new OffsetAndMetadata(offset + 1));
8483
}
8584
}
8685
}
@@ -116,21 +115,77 @@ public Collection<EventBatch> getAndRemoveFailedRecords() {
116115
Collection<EventBatch> records = new ArrayList<>();
117116
while (!failed.isEmpty()) {
118117
final EventBatch batch = failed.poll();
118+
/* Don't return null batches. */
119119
if (batch != null) {
120+
/* Purge events from closed partitions because it won't be
121+
* possible to commit their offsets. */
122+
batch.getEvents().removeIf(e -> !partitions.contains(getPartitionFromEvent(e)));
120123
records.add(batch);
121124
}
122125
}
123126
return records;
124127
}
125128

126-
// Loop through all SinkRecords for all topic partitions to
127-
// find all lowest consecutive committed offsets, calculate
128-
// the topic/partition offsets and then remove them
129+
/**
130+
* Return offsets computed when event batches are acked.
131+
*
132+
* @return map of topic/partition to offset/metadata
133+
*/
129134
public Map<TopicPartition, OffsetAndMetadata> computeOffsets() {
130135
return offsets;
131136
}
132137

133138
public long totalEvents() {
134139
return total.get();
135140
}
141+
142+
public void open(Collection<TopicPartition> partitions) {
143+
this.partitions.addAll(partitions);
144+
log.debug("open partitions={} so currently assigned partitions={}",
145+
partitions, this.partitions);
146+
}
147+
148+
public void close(Collection<TopicPartition> partitions) {
149+
this.partitions.removeAll(partitions);
150+
log.debug("close partitions={} so currently assigned partitions={}",
151+
partitions, this.partitions);
152+
cleanupAfterClosedPartitions(partitions);
153+
}
154+
155+
private TopicPartition getPartitionFromEvent(Event event) {
156+
if (event.getTied() instanceof SinkRecord) {
157+
final SinkRecord r = (SinkRecord) event.getTied();
158+
return new TopicPartition(r.topic(), r.kafkaPartition());
159+
} else return null;
160+
}
161+
162+
/**
163+
* Clean up and purge all things related to a partition that's closed (i.e.
164+
* became unassigned) to this task and reported via SinkTask.close(). This
165+
* avoids race conditions related to late received acks after a partition
166+
* rebalance.
167+
*
168+
* @param partitions partition closed and now unassigned for this task
169+
*/
170+
public void cleanupAfterClosedPartitions(Collection<TopicPartition> partitions)
171+
{
172+
/* Purge offsets. */
173+
offsets.keySet().removeAll(partitions);
174+
log.warn("purge offsets for closed partitions={} leaving offsets={}",
175+
partitions, offsets);
176+
177+
/* Count and purge outstanding event topic/partition records. */
178+
long countOfEventsToRemove = partitions.stream()
179+
.map(tp -> all.get(tp)) // get unassigned topic/partition records
180+
.filter(Objects::nonNull) // filter out null values
181+
.map(tpr -> tpr.size()) // get number of tp records
182+
.mapToInt(Integer::intValue) // map to int
183+
.sum();
184+
if (countOfEventsToRemove > 0) {
185+
log.warn("purge events={} from closed partitions={}",
186+
countOfEventsToRemove, partitions);
187+
all.keySet().removeAll(partitions);
188+
total.addAndGet(-1L * countOfEventsToRemove);
189+
}
190+
}
136191
}

src/main/java/com/splunk/kafka/connect/SplunkSinkTask.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -325,6 +325,21 @@ private EventBatch createRawEventBatch(final TopicPartition tp) {
325325
.build();
326326
}
327327

328+
@Override
329+
public void open(Collection<TopicPartition> partitions) {
330+
tracker.open(partitions);
331+
}
332+
333+
@Override
334+
public void close(Collection<TopicPartition> partitions) {
335+
/* Purge buffered events tied to closed partitions because this task
336+
* won't be able to commit their offsets. */
337+
bufferedRecords.removeIf(r -> partitions.contains(
338+
new TopicPartition(r.topic(), r.kafkaPartition())));
339+
/* Tell tracker about now closed partitions so to clean up. */
340+
tracker.close(partitions);
341+
}
342+
328343
@Override
329344
public Map<TopicPartition, OffsetAndMetadata> preCommit(Map<TopicPartition, OffsetAndMetadata> meta) {
330345
// tell Kafka Connect framework what are offsets we can safely commit to Kafka now

src/test/java/com/splunk/kafka/connect/KafkaRecordTrackerTest.java

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,9 @@ public class KafkaRecordTrackerTest {
3333
public void addFailedEventBatch() {
3434
EventBatch batch = UnitUtil.createBatch();
3535
batch.fail();
36-
36+
batch.getEvents().get(0).setTied(createSinkRecord(1));
3737
KafkaRecordTracker tracker = new KafkaRecordTracker();
38+
tracker.open(createTopicPartitionList());
3839
tracker.addFailedEventBatch(batch);
3940
Collection<EventBatch> failed = tracker.getAndRemoveFailedRecords();
4041
Assert.assertEquals(1, failed.size());
@@ -55,6 +56,7 @@ public void addEventBatch() {
5556
EventBatch batch = UnitUtil.createBatch();
5657
batch.getEvents().get(0).setTied(createSinkRecord(i));
5758
batches.add(batch);
59+
tracker.open(createTopicPartitionList());
5860
tracker.addEventBatch(batch);
5961
}
6062
Map<TopicPartition, OffsetAndMetadata> offsets = tracker.computeOffsets();
@@ -96,4 +98,10 @@ public void addEventBatchWithNonSinkRecord() {
9698
private SinkRecord createSinkRecord(long offset) {
9799
return new SinkRecord("t", 1, null, null, null, "ni, hao", offset);
98100
}
101+
102+
private List<TopicPartition> createTopicPartitionList() {
103+
ArrayList<TopicPartition> tps = new ArrayList<>();
104+
tps.add(new TopicPartition("t", 1));
105+
return tps;
106+
}
99107
}

src/test/java/com/splunk/kafka/connect/SplunkSinkTaskTest.java

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,16 +78,21 @@ public void putWithoutMaxBatchAligned() {
7878

7979
SplunkSinkTask task = new SplunkSinkTask();
8080
HecMock hec = new HecMock(task);
81+
TopicPartition tp = new TopicPartition(uu.configProfile.getTopics(), 1);
82+
List<TopicPartition> partitions = new ArrayList<>();
83+
partitions.add(tp);
8184
// success
8285
hec.setSendReturnResult(HecMock.success);
8386
task.setHec(hec);
8487
task.start(config);
88+
task.open(partitions);
8589
task.put(createSinkRecords(120));
8690
Assert.assertEquals(2, hec.getBatches().size());
8791
Map<TopicPartition, OffsetAndMetadata> offsets = new HashMap<>();
8892
offsets.put(new TopicPartition(uu.configProfile.getTopics(), 1), new OffsetAndMetadata(120));
8993
Assert.assertEquals(offsets, task.preCommit(new HashMap<>()));
9094
Assert.assertTrue(task.getTracker().getAndRemoveFailedRecords().isEmpty());
95+
task.close(partitions);
9196
task.stop();
9297
}
9398

@@ -105,6 +110,7 @@ public void putWithFailure() {
105110
hec.setSendReturnResult(HecMock.failure);
106111
task.setHec(hec);
107112
task.start(config);
113+
task.open(createTopicPartitionList());
108114
task.put(createSinkRecords(1000));
109115
Assert.assertEquals(10, hec.getBatches().size());
110116
Assert.assertTrue(task.getTracker().computeOffsets().isEmpty());
@@ -266,10 +272,14 @@ private void putWithSuccess(boolean raw, boolean withMeta) {
266272

267273
SplunkSinkTask task = new SplunkSinkTask();
268274
HecMock hec = new HecMock(task);
275+
TopicPartition tp = new TopicPartition(uu.configProfile.getTopics(), 1);
276+
List<TopicPartition> partitions = new ArrayList<>();
277+
partitions.add(tp);
269278
// success
270279
hec.setSendReturnResult(HecMock.success);
271280
task.setHec(hec);
272281
task.start(config);
282+
task.open(partitions);
273283
task.put(createSinkRecords(total));
274284
Assert.assertEquals(10, hec.getBatches().size());
275285
if (raw && withMeta) {
@@ -303,6 +313,7 @@ private void putWithSuccess(boolean raw, boolean withMeta) {
303313
offsets.put(new TopicPartition(uu.configProfile.getTopics(), 1), new OffsetAndMetadata(1000));
304314
Assert.assertEquals(offsets, task.preCommit(new HashMap<>()));
305315
Assert.assertTrue(task.getTracker().getAndRemoveFailedRecords().isEmpty());
316+
task.close(partitions);
306317
task.stop();
307318
}
308319

@@ -329,4 +340,10 @@ private Collection<SinkRecord> createNullSinkRecord() {
329340
records.add(rec);
330341
return records;
331342
}
343+
344+
private List<TopicPartition> createTopicPartitionList() {
345+
ArrayList<TopicPartition> tps = new ArrayList<>();
346+
tps.add(new TopicPartition("mytopic", 1));
347+
return tps;
348+
}
332349
}

0 commit comments

Comments
 (0)