|
16 | 16 | // under the License.
|
17 | 17 |
|
18 | 18 | use crate::schema::{Attributes, ComplexType, PrimitiveType, Record, Schema, TypeName};
|
| 19 | +use arrow_schema::DataType::{Decimal128, Decimal256}; |
19 | 20 | use arrow_schema::{
|
20 |
| - ArrowError, DataType, Field, FieldRef, Fields, IntervalUnit, SchemaBuilder, SchemaRef, TimeUnit, |
| 21 | + ArrowError, DataType, Field, FieldRef, Fields, IntervalUnit, SchemaBuilder, SchemaRef, |
| 22 | + TimeUnit, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE, |
21 | 23 | };
|
22 | 24 | use std::borrow::Cow;
|
23 | 25 | use std::collections::HashMap;
|
@@ -192,6 +194,13 @@ pub enum Codec {
|
192 | 194 | /// Represents Avro fixed type, maps to Arrow's FixedSizeBinary data type
|
193 | 195 | /// The i32 parameter indicates the fixed binary size
|
194 | 196 | Fixed(i32),
|
| 197 | + /// Represents Avro decimal type, maps to Arrow's Decimal128 or Decimal256 data types |
| 198 | + /// |
| 199 | + /// The fields are `(precision, scale, fixed_size)`. |
| 200 | + /// - `precision` (`usize`): Total number of digits. |
| 201 | + /// - `scale` (`Option<usize>`): Number of fractional digits. |
| 202 | + /// - `fixed_size` (`Option<usize>`): Size in bytes if backed by a `fixed` type, otherwise `None`. |
| 203 | + Decimal(usize, Option<usize>, Option<usize>), |
195 | 204 | /// Represents Avro Uuid type, a FixedSizeBinary with a length of 16
|
196 | 205 | Uuid,
|
197 | 206 | /// Represents Avro array type, maps to Arrow's List data type
|
@@ -227,6 +236,22 @@ impl Codec {
|
227 | 236 | }
|
228 | 237 | Self::Interval => DataType::Interval(IntervalUnit::MonthDayNano),
|
229 | 238 | Self::Fixed(size) => DataType::FixedSizeBinary(*size),
|
| 239 | + Self::Decimal(precision, scale, size) => { |
| 240 | + let p = *precision as u8; |
| 241 | + let s = scale.unwrap_or(0) as i8; |
| 242 | + let too_large_for_128 = match *size { |
| 243 | + Some(sz) => sz > 16, |
| 244 | + None => { |
| 245 | + (p as usize) > DECIMAL128_MAX_PRECISION as usize |
| 246 | + || (s as usize) > DECIMAL128_MAX_SCALE as usize |
| 247 | + } |
| 248 | + }; |
| 249 | + if too_large_for_128 { |
| 250 | + Decimal256(p, s) |
| 251 | + } else { |
| 252 | + Decimal128(p, s) |
| 253 | + } |
| 254 | + } |
230 | 255 | Self::Uuid => DataType::FixedSizeBinary(16),
|
231 | 256 | Self::List(f) => {
|
232 | 257 | DataType::List(Arc::new(f.field_with_name(Field::LIST_FIELD_DEFAULT_NAME)))
|
@@ -267,6 +292,32 @@ impl From<PrimitiveType> for Codec {
|
267 | 292 | }
|
268 | 293 | }
|
269 | 294 |
|
| 295 | +fn parse_decimal_attributes( |
| 296 | + attributes: &Attributes, |
| 297 | + fallback_size: Option<usize>, |
| 298 | + precision_required: bool, |
| 299 | +) -> Result<(usize, usize, Option<usize>), ArrowError> { |
| 300 | + let precision = attributes |
| 301 | + .additional |
| 302 | + .get("precision") |
| 303 | + .and_then(|v| v.as_u64()) |
| 304 | + .or(if precision_required { None } else { Some(10) }) |
| 305 | + .ok_or_else(|| ArrowError::ParseError("Decimal requires precision".to_string()))? |
| 306 | + as usize; |
| 307 | + let scale = attributes |
| 308 | + .additional |
| 309 | + .get("scale") |
| 310 | + .and_then(|v| v.as_u64()) |
| 311 | + .unwrap_or(0) as usize; |
| 312 | + let size = attributes |
| 313 | + .additional |
| 314 | + .get("size") |
| 315 | + .and_then(|v| v.as_u64()) |
| 316 | + .map(|s| s as usize) |
| 317 | + .or(fallback_size); |
| 318 | + Ok((precision, scale, size)) |
| 319 | +} |
| 320 | + |
270 | 321 | impl Codec {
|
271 | 322 | /// Converts a string codec to use Utf8View if requested
|
272 | 323 | ///
|
@@ -412,7 +463,6 @@ fn make_data_type<'a>(
|
412 | 463 | let size = f.size.try_into().map_err(|e| {
|
413 | 464 | ArrowError::ParseError(format!("Overflow converting size to i32: {e}"))
|
414 | 465 | })?;
|
415 |
| - |
416 | 466 | let field = AvroDataType {
|
417 | 467 | nullability: None,
|
418 | 468 | metadata: f.attributes.field_metadata(),
|
@@ -443,11 +493,27 @@ fn make_data_type<'a>(
|
443 | 493 |
|
444 | 494 | // https://avro.apache.org/docs/1.11.1/specification/#logical-types
|
445 | 495 | match (t.attributes.logical_type, &mut field.codec) {
|
446 |
| - (Some("decimal"), c @ Codec::Fixed(_)) => { |
447 |
| - return Err(ArrowError::NotYetImplemented( |
448 |
| - "Decimals are not currently supported".to_string(), |
449 |
| - )) |
450 |
| - } |
| 496 | + (Some("decimal"), c) => match *c { |
| 497 | + Codec::Fixed(sz_val) => { |
| 498 | + let (prec, sc, size_opt) = |
| 499 | + parse_decimal_attributes(&t.attributes, Some(sz_val as usize), true)?; |
| 500 | + let final_sz = if let Some(sz_actual) = size_opt { |
| 501 | + sz_actual |
| 502 | + } else { |
| 503 | + sz_val as usize |
| 504 | + }; |
| 505 | + *c = Codec::Decimal(prec, Some(sc), Some(final_sz)); |
| 506 | + } |
| 507 | + Codec::Binary => { |
| 508 | + let (prec, sc, _) = parse_decimal_attributes(&t.attributes, None, false)?; |
| 509 | + *c = Codec::Decimal(prec, Some(sc), None); |
| 510 | + } |
| 511 | + _ => { |
| 512 | + return Err(ArrowError::SchemaError(format!( |
| 513 | + "Decimal logical type can only be backed by Fixed or Bytes, found {c:?}" |
| 514 | + ))) |
| 515 | + } |
| 516 | + }, |
451 | 517 | (Some("date"), c @ Codec::Int32) => *c = Codec::Date32,
|
452 | 518 | (Some("time-millis"), c @ Codec::Int32) => *c = Codec::TimeMillis,
|
453 | 519 | (Some("time-micros"), c @ Codec::Int64) => *c = Codec::TimeMicros,
|
|
0 commit comments