Skip to content

Commit

Permalink
refactor: Rename decimal_float to decimal_comma (#15817)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 authored Apr 21, 2024
1 parent 522e659 commit 5b11f28
Show file tree
Hide file tree
Showing 18 changed files with 86 additions and 86 deletions.
12 changes: 6 additions & 6 deletions crates/polars-io/src/csv/buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -486,7 +486,7 @@ pub(crate) fn init_buffers(
schema: &Schema,
quote_char: Option<u8>,
encoding: CsvEncoding,
decimal_float: bool,
decimal_comma: bool,
) -> PolarsResult<Vec<Buffer>> {
projection
.iter()
Expand All @@ -507,7 +507,7 @@ pub(crate) fn init_buffers(
&DataType::UInt32 => Buffer::UInt32(PrimitiveChunkedBuilder::new(name, capacity)),
&DataType::UInt64 => Buffer::UInt64(PrimitiveChunkedBuilder::new(name, capacity)),
&DataType::Float32 => {
if decimal_float {
if decimal_comma {
Buffer::DecimalFloat32(
PrimitiveChunkedBuilder::new(name, capacity),
Default::default(),
Expand All @@ -517,7 +517,7 @@ pub(crate) fn init_buffers(
}
},
&DataType::Float64 => {
if decimal_float {
if decimal_comma {
Buffer::DecimalFloat64(
PrimitiveChunkedBuilder::new(name, capacity),
Default::default(),
Expand Down Expand Up @@ -825,7 +825,7 @@ impl Buffer {
None,
),
DecimalFloat32(buf, scratch) => {
prepare_decimal_float(bytes, scratch);
prepare_decimal_comma(bytes, scratch);
<PrimitiveChunkedBuilder<Float32Type> as ParsedBuffer>::parse_bytes(
buf,
scratch,
Expand All @@ -836,7 +836,7 @@ impl Buffer {
)
},
DecimalFloat64(buf, scratch) => {
prepare_decimal_float(bytes, scratch);
prepare_decimal_comma(bytes, scratch);
<PrimitiveChunkedBuilder<Float64Type> as ParsedBuffer>::parse_bytes(
buf,
scratch,
Expand Down Expand Up @@ -891,7 +891,7 @@ impl Buffer {
}

#[inline]
fn prepare_decimal_float(bytes: &[u8], scratch: &mut Vec<u8>) {
fn prepare_decimal_comma(bytes: &[u8], scratch: &mut Vec<u8>) {
scratch.clear();
scratch.reserve(bytes.len());

Expand Down
2 changes: 1 addition & 1 deletion crates/polars-io/src/csv/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ pub struct CsvParserOptions {
pub schema_overwrite: Option<SchemaRef>,
pub schema: Option<SchemaRef>,
pub null_values: Option<NullValues>,
pub decimal_float: bool,
pub decimal_comma: bool,
}

/// Read the number of rows without parsing columns
Expand Down
14 changes: 7 additions & 7 deletions crates/polars-io/src/csv/read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ where
has_header: bool,
ignore_errors: bool,
eol_char: u8,
decimal_float: bool,
decimal_comma: bool,
}

impl<'a, R> CsvReader<'a, R>
Expand Down Expand Up @@ -374,8 +374,8 @@ where
}

/// Parse floats with decimals.
pub fn with_decimal_float(mut self, toggle: bool) -> Self {
self.decimal_float = toggle;
pub fn with_decimal_comma(mut self, toggle: bool) -> Self {
self.decimal_comma = toggle;
self
}
}
Expand Down Expand Up @@ -429,7 +429,7 @@ impl<'a, R: MmapBytesReader + 'a> CsvReader<'a, R> {
self.try_parse_dates,
self.raise_if_empty,
self.truncate_ragged_lines,
self.decimal_float,
self.decimal_comma,
)
}

Expand Down Expand Up @@ -537,7 +537,7 @@ impl<'a> CsvReader<'a, Box<dyn MmapBytesReader>> {
self.try_parse_dates,
self.raise_if_empty,
&mut self.n_threads,
self.decimal_float,
self.decimal_comma,
)?;
let schema = Arc::new(inferred_schema);
Ok(to_batched_owned_mmap(self, schema))
Expand Down Expand Up @@ -568,7 +568,7 @@ impl<'a> CsvReader<'a, Box<dyn MmapBytesReader>> {
self.try_parse_dates,
self.raise_if_empty,
&mut self.n_threads,
self.decimal_float,
self.decimal_comma,
)?;
let schema = Arc::new(inferred_schema);
Ok(to_batched_owned_read(self, schema))
Expand Down Expand Up @@ -614,7 +614,7 @@ where
row_index: None,
raise_if_empty: true,
truncate_ragged_lines: false,
decimal_float: false,
decimal_comma: false,
}
}

Expand Down
6 changes: 3 additions & 3 deletions crates/polars-io/src/csv/read_impl/batched_mmap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ impl<'a> CoreReader<'a> {
schema: self.schema,
rows_read: 0,
_cat_lock,
decimal_float: self.decimal_float,
decimal_comma: self.decimal_comma,
})
}
}
Expand Down Expand Up @@ -194,7 +194,7 @@ pub struct BatchedCsvReaderMmap<'a> {
_cat_lock: Option<polars_core::StringCacheHolder>,
#[cfg(not(feature = "dtype-categorical"))]
_cat_lock: Option<u8>,
decimal_float: bool,
decimal_comma: bool,
}

impl<'a> BatchedCsvReaderMmap<'a> {
Expand Down Expand Up @@ -245,7 +245,7 @@ impl<'a> BatchedCsvReaderMmap<'a> {
self.chunk_size,
stop_at_nbytes,
self.starting_point_offset,
self.decimal_float,
self.decimal_comma,
)?;

cast_columns(&mut df, &self.to_cast, false, self.ignore_errors)?;
Expand Down
6 changes: 3 additions & 3 deletions crates/polars-io/src/csv/read_impl/batched_read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ impl<'a> CoreReader<'a> {
schema: self.schema,
rows_read: 0,
_cat_lock,
decimal_float: self.decimal_float,
decimal_comma: self.decimal_comma,
})
}
}
Expand Down Expand Up @@ -277,7 +277,7 @@ pub struct BatchedCsvReaderRead<'a> {
_cat_lock: Option<polars_core::StringCacheHolder>,
#[cfg(not(feature = "dtype-categorical"))]
_cat_lock: Option<u8>,
decimal_float: bool,
decimal_comma: bool,
}
//
impl<'a> BatchedCsvReaderRead<'a> {
Expand Down Expand Up @@ -342,7 +342,7 @@ impl<'a> BatchedCsvReaderRead<'a> {
self.chunk_size,
stop_at_n_bytes,
self.starting_point_offset,
self.decimal_float,
self.decimal_comma,
)?;

cast_columns(&mut df, &self.to_cast, false, self.ignore_errors)?;
Expand Down
20 changes: 10 additions & 10 deletions crates/polars-io/src/csv/read_impl/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ pub(crate) struct CoreReader<'a> {
sample_size: usize,
chunk_size: usize,
low_memory: bool,
decimal_float: bool,
decimal_comma: bool,
comment_prefix: Option<CommentPrefix>,
quote_char: Option<u8>,
eol_char: u8,
Expand Down Expand Up @@ -160,9 +160,9 @@ impl<'a> CoreReader<'a> {
try_parse_dates: bool,
raise_if_empty: bool,
truncate_ragged_lines: bool,
decimal_float: bool,
decimal_comma: bool,
) -> PolarsResult<CoreReader<'a>> {
check_decimal_float(decimal_float, separator.unwrap_or(b','))?;
check_decimal_comma(decimal_comma, separator.unwrap_or(b','))?;
#[cfg(any(feature = "decompress", feature = "decompress-fast"))]
let mut reader_bytes = reader_bytes;

Expand Down Expand Up @@ -209,7 +209,7 @@ impl<'a> CoreReader<'a> {
try_parse_dates,
raise_if_empty,
&mut n_threads,
decimal_float,
decimal_comma,
)?;
Arc::new(inferred_schema)
},
Expand Down Expand Up @@ -264,7 +264,7 @@ impl<'a> CoreReader<'a> {
to_cast,
row_index,
truncate_ragged_lines,
decimal_float,
decimal_comma,
})
}

Expand Down Expand Up @@ -510,7 +510,7 @@ impl<'a> CoreReader<'a> {
schema,
self.quote_char,
self.encoding,
self.decimal_float,
self.decimal_comma,
)?;

let local_bytes = &bytes[read..stop_at_nbytes];
Expand Down Expand Up @@ -595,7 +595,7 @@ impl<'a> CoreReader<'a> {
usize::MAX,
stop_at_nbytes,
starting_point_offset,
self.decimal_float,
self.decimal_comma,
)?;

cast_columns(&mut df, &self.to_cast, false, self.ignore_errors)?;
Expand All @@ -619,7 +619,7 @@ impl<'a> CoreReader<'a> {
self.schema.as_ref(),
self.quote_char,
self.encoding,
self.decimal_float,
self.decimal_comma,
)?;

parse_lines(
Expand Down Expand Up @@ -702,7 +702,7 @@ fn read_chunk(
chunk_size: usize,
stop_at_nbytes: usize,
starting_point_offset: Option<usize>,
decimal_float: bool,
decimal_comma: bool,
) -> PolarsResult<DataFrame> {
let mut read = bytes_offset_thread;
// There's an off-by-one error somewhere in the reading code, where it reads
Expand All @@ -716,7 +716,7 @@ fn read_chunk(
schema,
quote_char,
encoding,
decimal_float,
decimal_comma,
)?;

let mut last_read = usize::MAX;
Expand Down
34 changes: 17 additions & 17 deletions crates/polars-io/src/csv/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ pub(crate) fn get_file_chunks(
}

/// Infer the data type of a record
fn infer_field_schema(string: &str, try_parse_dates: bool, decimal_float: bool) -> DataType {
fn infer_field_schema(string: &str, try_parse_dates: bool, decimal_comma: bool) -> DataType {
// when quoting is enabled in the reader, these quotes aren't escaped, we default to
// String for them
if string.starts_with('"') {
Expand Down Expand Up @@ -91,8 +91,8 @@ fn infer_field_schema(string: &str, try_parse_dates: bool, decimal_float: bool)
// match regex in a particular order
else if BOOLEAN_RE.is_match(string) {
DataType::Boolean
} else if !decimal_float && FLOAT_RE.is_match(string)
|| decimal_float && FLOAT_RE_DECIMAL.is_match(string)
} else if !decimal_comma && FLOAT_RE.is_match(string)
|| decimal_comma && FLOAT_RE_DECIMAL.is_match(string)
{
DataType::Float64
} else if INTEGER_RE.is_match(string) {
Expand Down Expand Up @@ -154,7 +154,7 @@ pub fn infer_file_schema_inner(
recursion_count: u8,
raise_if_empty: bool,
n_threads: &mut Option<usize>,
decimal_float: bool,
decimal_comma: bool,
) -> PolarsResult<(Schema, usize, usize)> {
// keep track so that we can determine the amount of bytes read
let start_ptr = reader_bytes.as_ptr() as usize;
Expand Down Expand Up @@ -255,7 +255,7 @@ pub fn infer_file_schema_inner(
recursion_count + 1,
raise_if_empty,
n_threads,
decimal_float,
decimal_comma,
);
} else if !raise_if_empty {
return Ok((Schema::new(), 0, 0));
Expand Down Expand Up @@ -333,17 +333,17 @@ pub fn infer_file_schema_inner(
};
let s = parse_bytes_with_encoding(slice_escaped, encoding)?;
let dtype = match &null_values {
None => Some(infer_field_schema(&s, try_parse_dates, decimal_float)),
None => Some(infer_field_schema(&s, try_parse_dates, decimal_comma)),
Some(NullValues::AllColumns(names)) => {
if !names.iter().any(|nv| nv == s.as_ref()) {
Some(infer_field_schema(&s, try_parse_dates, decimal_float))
Some(infer_field_schema(&s, try_parse_dates, decimal_comma))
} else {
None
}
},
Some(NullValues::AllColumnsSingle(name)) => {
if s.as_ref() != name {
Some(infer_field_schema(&s, try_parse_dates, decimal_float))
Some(infer_field_schema(&s, try_parse_dates, decimal_comma))
} else {
None
}
Expand All @@ -356,12 +356,12 @@ pub fn infer_file_schema_inner(

if let Some(null_name) = null_name {
if null_name.1 != s.as_ref() {
Some(infer_field_schema(&s, try_parse_dates, decimal_float))
Some(infer_field_schema(&s, try_parse_dates, decimal_comma))
} else {
None
}
} else {
Some(infer_field_schema(&s, try_parse_dates, decimal_float))
Some(infer_field_schema(&s, try_parse_dates, decimal_comma))
}
},
};
Expand Down Expand Up @@ -459,16 +459,16 @@ pub fn infer_file_schema_inner(
recursion_count + 1,
raise_if_empty,
n_threads,
decimal_float,
decimal_comma,
);
}

Ok((Schema::from_iter(fields), rows_count, end_ptr - start_ptr))
}

pub(super) fn check_decimal_float(decimal_float: bool, separator: u8) -> PolarsResult<()> {
if decimal_float {
polars_ensure!(b',' != separator, InvalidOperation: "'decimal_float' argument cannot be combined with ',' quote char")
pub(super) fn check_decimal_comma(decimal_comma: bool, separator: u8) -> PolarsResult<()> {
if decimal_comma {
polars_ensure!(b',' != separator, InvalidOperation: "'decimal_comma' argument cannot be combined with ',' quote char")
}
Ok(())
}
Expand Down Expand Up @@ -500,9 +500,9 @@ pub fn infer_file_schema(
try_parse_dates: bool,
raise_if_empty: bool,
n_threads: &mut Option<usize>,
decimal_float: bool,
decimal_comma: bool,
) -> PolarsResult<(Schema, usize, usize)> {
check_decimal_float(decimal_float, separator)?;
check_decimal_comma(decimal_comma, separator)?;
infer_file_schema_inner(
reader_bytes,
separator,
Expand All @@ -519,7 +519,7 @@ pub fn infer_file_schema(
0,
raise_if_empty,
n_threads,
decimal_float,
decimal_comma,
)
}

Expand Down
2 changes: 1 addition & 1 deletion crates/polars-lazy/src/physical_plan/executors/scan/csv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ impl CsvExec {
.with_try_parse_dates(self.options.try_parse_dates)
.with_n_threads(self.options.n_threads)
.truncate_ragged_lines(self.options.truncate_ragged_lines)
.with_decimal_float(self.options.decimal_float)
.with_decimal_comma(self.options.decimal_comma)
.raise_if_empty(self.options.raise_if_empty)
.finish()
}
Expand Down
Loading

0 comments on commit 5b11f28

Please sign in to comment.