Skip to content

Commit 47d2431

Browse files
andygrovedharanad
authored andcommitted
remove dead code (apache#1155)
1 parent 210f380 commit 47d2431

File tree

24 files changed

+27
-281
lines changed

24 files changed

+27
-281
lines changed

native/core/benches/bloom_filter_agg.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,10 +61,8 @@ fn criterion_benchmark(c: &mut Criterion) {
6161
group.bench_function(agg_mode.0, |b| {
6262
let comet_bloom_filter_agg =
6363
Arc::new(AggregateUDF::new_from_impl(BloomFilterAgg::new(
64-
Arc::clone(&c0),
6564
Arc::clone(&num_items),
6665
Arc::clone(&num_bits),
67-
"bloom_filter_agg",
6866
DataType::Binary,
6967
)));
7068
b.to_async(&rt).iter(|| {

native/core/benches/parquet_read.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ fn bench(c: &mut Criterion) {
4444
let mut group = c.benchmark_group("comet_parquet_read");
4545
let schema = build_test_schema();
4646

47-
let pages = build_plain_int32_pages(schema.clone(), schema.column(0), 0.0);
47+
let pages = build_plain_int32_pages(schema.column(0), 0.0);
4848
group.bench_function("INT/PLAIN/NOT_NULL", |b| {
4949
let t = TypePtr::new(
5050
PrimitiveTypeBuilder::new("f", PhysicalType::INT32)
@@ -107,7 +107,6 @@ const VALUES_PER_PAGE: usize = 10_000;
107107
const BATCH_SIZE: usize = 4096;
108108

109109
fn build_plain_int32_pages(
110-
schema: SchemaDescPtr,
111110
column_desc: ColumnDescPtr,
112111
null_density: f32,
113112
) -> impl PageIterator + Clone {
@@ -143,7 +142,7 @@ fn build_plain_int32_pages(
143142

144143
// Since `InMemoryPageReader` is not exposed from parquet crate, here we use
145144
// `InMemoryPageIterator` instead which is a Iter<Iter<Page>>.
146-
InMemoryPageIterator::new(schema, column_desc, vec![pages])
145+
InMemoryPageIterator::new(vec![pages])
147146
}
148147

149148
struct TestColumnReader {

native/core/src/errors.rs

Lines changed: 1 addition & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -485,23 +485,6 @@ where
485485
|| f(t)
486486
}
487487

488-
// This is a duplicate of `try_unwrap_or_throw`, which is used to work around Arrow's lack of
489-
// `UnwindSafe` handling.
490-
pub fn try_assert_unwind_safe_or_throw<T, F>(env: &JNIEnv, f: F) -> T
491-
where
492-
T: JNIDefault,
493-
F: FnOnce(JNIEnv) -> Result<T, CometError>,
494-
{
495-
let mut env1 = unsafe { JNIEnv::from_raw(env.get_raw()).unwrap() };
496-
let env2 = unsafe { JNIEnv::from_raw(env.get_raw()).unwrap() };
497-
unwrap_or_throw_default(
498-
&mut env1,
499-
flatten(
500-
catch_unwind(std::panic::AssertUnwindSafe(curry(f, env2))).map_err(CometError::from),
501-
),
502-
)
503-
}
504-
505488
// It is currently undefined behavior to unwind from Rust code into foreign code, so we can wrap
506489
// our JNI functions and turn these panics into a `RuntimeException`.
507490
pub fn try_unwrap_or_throw<T, F>(env: &JNIEnv, f: F) -> T
@@ -534,10 +517,7 @@ mod tests {
534517
AttachGuard, InitArgsBuilder, JNIEnv, JNIVersion, JavaVM,
535518
};
536519

537-
use assertables::{
538-
assert_contains, assert_contains_as_result, assert_starts_with,
539-
assert_starts_with_as_result,
540-
};
520+
use assertables::{assert_starts_with, assert_starts_with_as_result};
541521

542522
pub fn jvm() -> &'static Arc<JavaVM> {
543523
static mut JVM: Option<Arc<JavaVM>> = None;
@@ -890,26 +870,4 @@ mod tests {
890870
// first line.
891871
assert_starts_with!(msg_rust, expected_message);
892872
}
893-
894-
// Asserts that exception's message matches `expected_message`.
895-
fn assert_exception_message_with_stacktrace(
896-
env: &mut JNIEnv,
897-
exception: JThrowable,
898-
expected_message: &str,
899-
stacktrace_contains: &str,
900-
) {
901-
let message = env
902-
.call_method(exception, "getMessage", "()Ljava/lang/String;", &[])
903-
.unwrap()
904-
.l()
905-
.unwrap();
906-
let message_string = message.into();
907-
let msg_rust: String = env.get_string(&message_string).unwrap().into();
908-
// Since panics result in multi-line messages which include the backtrace, just use the
909-
// first line.
910-
assert_starts_with!(msg_rust, expected_message);
911-
912-
// Check that the stacktrace is included by checking for a specific element
913-
assert_contains!(msg_rust, stacktrace_contains);
914-
}
915873
}

native/core/src/execution/datafusion/expressions/bloom_filter_agg.rs

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,7 @@ use datafusion_physical_expr::expressions::Literal;
3434

3535
#[derive(Debug, Clone)]
3636
pub struct BloomFilterAgg {
37-
name: String,
3837
signature: Signature,
39-
expr: Arc<dyn PhysicalExpr>,
4038
num_items: i32,
4139
num_bits: i32,
4240
}
@@ -53,15 +51,12 @@ fn extract_i32_from_literal(expr: Arc<dyn PhysicalExpr>) -> i32 {
5351

5452
impl BloomFilterAgg {
5553
pub fn new(
56-
expr: Arc<dyn PhysicalExpr>,
5754
num_items: Arc<dyn PhysicalExpr>,
5855
num_bits: Arc<dyn PhysicalExpr>,
59-
name: impl Into<String>,
6056
data_type: DataType,
6157
) -> Self {
6258
assert!(matches!(data_type, DataType::Binary));
6359
Self {
64-
name: name.into(),
6560
signature: Signature::uniform(
6661
1,
6762
vec![
@@ -73,7 +68,6 @@ impl BloomFilterAgg {
7368
],
7469
Volatility::Immutable,
7570
),
76-
expr,
7771
num_items: extract_i32_from_literal(num_items),
7872
num_bits: extract_i32_from_literal(num_bits),
7973
}

native/core/src/execution/datafusion/planner.rs

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,6 @@ use std::cmp::max;
116116
use std::{collections::HashMap, sync::Arc};
117117

118118
// For clippy error on type_complexity.
119-
type ExecResult<T> = Result<T, ExecutionError>;
120119
type PhyAggResult = Result<Vec<AggregateFunctionExpr>, ExecutionError>;
121120
type PhyExprResult = Result<Vec<(Arc<dyn PhysicalExpr>, String)>, ExecutionError>;
122121
type PartitionPhyExprResult = Result<Vec<Arc<dyn PhysicalExpr>>, ExecutionError>;
@@ -1773,10 +1772,8 @@ impl PhysicalPlanner {
17731772
self.create_expr(expr.num_bits.as_ref().unwrap(), Arc::clone(&schema))?;
17741773
let datatype = to_arrow_datatype(expr.datatype.as_ref().unwrap());
17751774
let func = AggregateUDF::new_from_impl(BloomFilterAgg::new(
1776-
Arc::clone(&child),
17771775
Arc::clone(&num_items),
17781776
Arc::clone(&num_bits),
1779-
"bloom_filter_agg",
17801777
datatype,
17811778
));
17821779
Self::create_aggr_func_expr("bloom_filter_agg", schema, vec![child], func)

native/core/src/execution/datafusion/util/spark_bit_array.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ impl SparkBitArray {
7070
self.data.len()
7171
}
7272

73+
#[allow(dead_code)] // this is only called from tests
7374
pub fn cardinality(&self) -> usize {
7475
self.bit_count
7576
}

native/core/src/execution/jni_api.rs

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -207,14 +207,6 @@ fn prepare_datafusion_session_context(
207207
Ok(session_ctx)
208208
}
209209

210-
fn parse_bool(conf: &HashMap<String, String>, name: &str) -> CometResult<bool> {
211-
conf.get(name)
212-
.map(String::as_str)
213-
.unwrap_or("false")
214-
.parse::<bool>()
215-
.map_err(|e| CometError::Config(format!("Failed to parse boolean config {name}: {e}")))
216-
}
217-
218210
/// Prepares arrow arrays for output.
219211
fn prepare_output(
220212
env: &mut JNIEnv,

native/core/src/execution/kernels/strings.rs

Lines changed: 1 addition & 116 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ use std::sync::Arc;
2121

2222
use arrow::{
2323
array::*,
24-
buffer::{Buffer, MutableBuffer},
24+
buffer::MutableBuffer,
2525
compute::kernels::substring::{substring as arrow_substring, substring_by_char},
2626
datatypes::{DataType, Int32Type},
2727
};
@@ -87,43 +87,6 @@ pub fn substring(array: &dyn Array, start: i64, length: u64) -> Result<ArrayRef,
8787
}
8888
}
8989

90-
/// Returns an ArrayRef with a substring starting from `start` and length.
91-
///
92-
/// # Preconditions
93-
///
94-
/// - `start` can be negative, in which case the start counts from the end of the string.
95-
/// - `array` must be either [`StringArray`] or [`LargeStringArray`].
96-
///
97-
/// Note: this is different from arrow-rs `substring` kernel in that both `start` and `length` are
98-
/// `Int32Array` here.
99-
pub fn substring_with_array(
100-
array: &dyn Array,
101-
start: &Int32Array,
102-
length: &Int32Array,
103-
) -> ArrayRef {
104-
match array.data_type() {
105-
DataType::LargeUtf8 => generic_substring(
106-
array
107-
.as_any()
108-
.downcast_ref::<LargeStringArray>()
109-
.expect("A large string is expected"),
110-
start,
111-
length,
112-
|i| i as i64,
113-
),
114-
DataType::Utf8 => generic_substring(
115-
array
116-
.as_any()
117-
.downcast_ref::<StringArray>()
118-
.expect("A string is expected"),
119-
start,
120-
length,
121-
|i| i,
122-
),
123-
_ => panic!("substring does not support type {:?}", array.data_type()),
124-
}
125-
}
126-
12790
fn generic_string_space<OffsetSize: OffsetSizeTrait>(length: &Int32Array) -> ArrayRef {
12891
let array_len = length.len();
12992
let mut offsets = MutableBuffer::new((array_len + 1) * std::mem::size_of::<OffsetSize>());
@@ -163,81 +126,3 @@ fn generic_string_space<OffsetSize: OffsetSizeTrait>(length: &Int32Array) -> Arr
163126
};
164127
make_array(data)
165128
}
166-
167-
fn generic_substring<OffsetSize: OffsetSizeTrait, F>(
168-
array: &GenericStringArray<OffsetSize>,
169-
start: &Int32Array,
170-
length: &Int32Array,
171-
f: F,
172-
) -> ArrayRef
173-
where
174-
F: Fn(i32) -> OffsetSize,
175-
{
176-
assert_eq!(array.len(), start.len());
177-
assert_eq!(array.len(), length.len());
178-
179-
// compute current offsets
180-
let offsets = array.to_data().buffers()[0].clone();
181-
let offsets: &[OffsetSize] = offsets.typed_data::<OffsetSize>();
182-
183-
// compute null bitmap (copy)
184-
let null_bit_buffer = array.to_data().nulls().map(|b| b.buffer().clone());
185-
186-
// Gets slices of start and length arrays to access them directly for performance.
187-
let start_data = start.to_data();
188-
let length_data = length.to_data();
189-
let starts = start_data.buffers()[0].typed_data::<i32>();
190-
let lengths = length_data.buffers()[0].typed_data::<i32>();
191-
192-
// compute values
193-
let array_data = array.to_data();
194-
let values = &array_data.buffers()[1];
195-
let data = values.as_slice();
196-
197-
// we have no way to estimate how much this will be.
198-
let mut new_values = MutableBuffer::new(0);
199-
let mut new_offsets: Vec<OffsetSize> = Vec::with_capacity(array.len() + 1);
200-
201-
let mut length_so_far = OffsetSize::zero();
202-
new_offsets.push(length_so_far);
203-
(0..array.len()).for_each(|i| {
204-
// the length of this entry
205-
let length_i: OffsetSize = offsets[i + 1] - offsets[i];
206-
// compute where we should start slicing this entry
207-
let start_pos: OffsetSize = f(starts[i]);
208-
209-
let start = offsets[i]
210-
+ if start_pos >= OffsetSize::zero() {
211-
start_pos
212-
} else {
213-
length_i + start_pos
214-
};
215-
216-
let start = start.clamp(offsets[i], offsets[i + 1]);
217-
// compute the length of the slice
218-
let slice_length: OffsetSize = f(lengths[i].max(0)).min(offsets[i + 1] - start);
219-
220-
length_so_far += slice_length;
221-
222-
new_offsets.push(length_so_far);
223-
224-
// we need usize for ranges
225-
let start = start.to_usize().unwrap();
226-
let slice_length = slice_length.to_usize().unwrap();
227-
228-
new_values.extend_from_slice(&data[start..start + slice_length]);
229-
});
230-
231-
let data = unsafe {
232-
ArrayData::new_unchecked(
233-
GenericStringArray::<OffsetSize>::DATA_TYPE,
234-
array.len(),
235-
None,
236-
null_bit_buffer,
237-
0,
238-
vec![Buffer::from_slice_ref(&new_offsets), new_values.into()],
239-
vec![],
240-
)
241-
};
242-
make_array(data)
243-
}

native/core/src/execution/operators/scan.rs

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -525,12 +525,4 @@ impl InputBatch {
525525

526526
InputBatch::Batch(columns, num_rows)
527527
}
528-
529-
/// Get the number of rows in this batch
530-
fn num_rows(&self) -> usize {
531-
match self {
532-
Self::EOF => 0,
533-
Self::Batch(_, num_rows) => *num_rows,
534-
}
535-
}
536528
}

native/core/src/execution/shuffle/list.rs

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ use arrow_schema::{DataType, TimeUnit};
2828

2929
pub struct SparkUnsafeArray {
3030
row_addr: i64,
31-
row_size: i32,
3231
num_elements: usize,
3332
element_offset: i64,
3433
}
@@ -45,7 +44,7 @@ impl SparkUnsafeObject for SparkUnsafeArray {
4544

4645
impl SparkUnsafeArray {
4746
/// Creates a `SparkUnsafeArray` which points to the given address and size in bytes.
48-
pub fn new(addr: i64, size: i32) -> Self {
47+
pub fn new(addr: i64) -> Self {
4948
// Read the number of elements from the first 8 bytes.
5049
let slice: &[u8] = unsafe { std::slice::from_raw_parts(addr as *const u8, 8) };
5150
let num_elements = i64::from_le_bytes(slice.try_into().unwrap());
@@ -60,7 +59,6 @@ impl SparkUnsafeArray {
6059

6160
Self {
6261
row_addr: addr,
63-
row_size: size,
6462
num_elements: num_elements as usize,
6563
element_offset: addr + Self::get_header_portion_in_bytes(num_elements),
6664
}

0 commit comments

Comments
 (0)