Skip to content

Commit e134735

Browse files
committed
Draft: test non exact batch size
1 parent 8bbadaf commit e134735

File tree

3 files changed

+40
-39
lines changed

3 files changed

+40
-39
lines changed

Cargo.lock

Lines changed: 17 additions & 34 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,3 +229,21 @@ unexpected_cfgs = { level = "warn", check-cfg = [
229229
"cfg(tarpaulin_include)",
230230
] }
231231
unused_qualifications = "deny"
232+
233+
234+
# patch branch to use testing version of arrow
235+
# pin to zhuqi-lucas/support_exact_size_config branch
236+
[patch.crates-io]
237+
arrow = { git = "https://github.com/zhuqi-lucas/arrow-rs.git", rev = "60a239ae614568bacff819494b9332c98052badc" }
238+
arrow-array = { git = "https://github.com/zhuqi-lucas/arrow-rs.git", rev = "60a239ae614568bacff819494b9332c98052badc" }
239+
arrow-buffer = { git = "https://github.com/zhuqi-lucas/arrow-rs.git", rev = "60a239ae614568bacff819494b9332c98052badc" }
240+
arrow-cast = { git = "https://github.com/zhuqi-lucas/arrow-rs.git", rev = "60a239ae614568bacff819494b9332c98052badc" }
241+
arrow-data = { git = "https://github.com/zhuqi-lucas/arrow-rs.git", rev = "60a239ae614568bacff819494b9332c98052badc" }
242+
arrow-ipc = { git = "https://github.com/zhuqi-lucas/arrow-rs.git", rev = "60a239ae614568bacff819494b9332c98052badc" }
243+
arrow-schema = { git = "https://github.com/zhuqi-lucas/arrow-rs.git", rev = "60a239ae614568bacff819494b9332c98052badc" }
244+
arrow-select = { git = "https://github.com/zhuqi-lucas/arrow-rs.git", rev = "60a239ae614568bacff819494b9332c98052badc" }
245+
arrow-string = { git = "https://github.com/zhuqi-lucas/arrow-rs.git", rev = "60a239ae614568bacff819494b9332c98052badc" }
246+
arrow-ord = { git = "https://github.com/zhuqi-lucas/arrow-rs.git", rev = "60a239ae614568bacff819494b9332c98052badc" }
247+
arrow-flight = { git = "https://github.com/zhuqi-lucas/arrow-rs.git", rev = "60a239ae614568bacff819494b9332c98052badc" }
248+
parquet = { git = "https://github.com/zhuqi-lucas/arrow-rs.git", rev = "60a239ae614568bacff819494b9332c98052badc" }
249+

datafusion/physical-plan/src/coalesce/mod.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ impl LimitedBatchCoalescer {
6060
fetch: Option<usize>,
6161
) -> Self {
6262
Self {
63-
inner: BatchCoalescer::new(schema, target_batch_size),
63+
inner: BatchCoalescer::new(schema, target_batch_size).with_exact_size(false),
6464
total_rows: 0,
6565
fetch,
6666
finished: false,
@@ -156,7 +156,7 @@ mod tests {
156156
.with_batches(std::iter::repeat_n(batch, 10))
157157
// expected output is batches of exactly 21 rows (except for the final batch)
158158
.with_target_batch_size(21)
159-
.with_expected_output_sizes(vec![21, 21, 21, 17])
159+
.with_expected_output_sizes(vec![24, 24, 24, 8])
160160
.run()
161161
}
162162

@@ -169,7 +169,7 @@ mod tests {
169169
// expected to behave the same as `test_concat_batches`
170170
.with_target_batch_size(21)
171171
.with_fetch(Some(100))
172-
.with_expected_output_sizes(vec![21, 21, 21, 17])
172+
.with_expected_output_sizes(vec![24, 24, 24, 8])
173173
.run();
174174
}
175175

@@ -181,7 +181,7 @@ mod tests {
181181
// input is 10 batches x 8 rows (80 rows) with fetch limit of 50
182182
.with_target_batch_size(21)
183183
.with_fetch(Some(50))
184-
.with_expected_output_sizes(vec![21, 21, 8])
184+
.with_expected_output_sizes(vec![24, 24, 2])
185185
.run();
186186
}
187187

@@ -191,7 +191,7 @@ mod tests {
191191
Test::new()
192192
.with_batches(std::iter::repeat_n(batch, 10))
193193
// input is 10 batches x 8 rows (80 rows) with fetch limit of 48
194-
.with_target_batch_size(24)
194+
.with_target_batch_size(21)
195195
.with_fetch(Some(48))
196196
.with_expected_output_sizes(vec![24, 24])
197197
.run();

0 commit comments

Comments
 (0)