@@ -426,6 +426,90 @@ Result<std::shared_ptr<Buffer>> DecompressBuffer(const std::shared_ptr<Buffer>&
426426 return std::move (uncompressed);
427427}
428428
429+ Status DecompressBufferByType (const Buffer& buffer, util::Codec* codec,
430+ std::shared_ptr<Buffer>* out, MemoryPool* pool) {
431+ const uint8_t * data = buffer.data ();
432+ int64_t compressed_size = buffer.size () - sizeof (int64_t );
433+ int64_t uncompressed_size = BitUtil::FromLittleEndian (util::SafeLoadAs<int64_t >(data));
434+
435+ ARROW_ASSIGN_OR_RAISE (auto uncompressed, AllocateBuffer (uncompressed_size, pool));
436+
437+ int64_t actual_decompressed;
438+ ARROW_ASSIGN_OR_RAISE (
439+ actual_decompressed,
440+ codec->Decompress (compressed_size, data + sizeof (int64_t ), uncompressed_size,
441+ uncompressed->mutable_data ()));
442+ if (actual_decompressed != uncompressed_size) {
443+ return Status::Invalid (" Failed to fully decompress buffer, expected " ,
444+ uncompressed_size, " bytes but decompressed " ,
445+ actual_decompressed);
446+ }
447+ *out = std::move (uncompressed);
448+ return Status::OK ();
449+ }
450+
451+ Status DecompressBuffersByType (Compression::type compression,
452+ const IpcReadOptions& options,
453+ std::vector<std::shared_ptr<ArrayData>>* arrs,
454+ const std::vector<std::shared_ptr<Field>>& schema_fields) {
455+ ARROW_CHECK_EQ (arrs->size (), schema_fields.size ());
456+
457+ std::unique_ptr<util::Codec> codec;
458+ std::unique_ptr<util::Codec> fastpfor32_codec;
459+ std::unique_ptr<util::Codec> fastpfor64_codec;
460+ ARROW_ASSIGN_OR_RAISE (codec, util::Codec::Create (Compression::LZ4_FRAME));
461+ ARROW_ASSIGN_OR_RAISE (fastpfor32_codec, util::Codec::CreateInt32 (compression));
462+ ARROW_ASSIGN_OR_RAISE (fastpfor64_codec, util::Codec::CreateInt64 (compression));
463+
464+ for (size_t field_idx = 0 ; field_idx < schema_fields.size (); ++field_idx) {
465+ const auto & field = schema_fields[field_idx];
466+ auto & arr = (*arrs)[field_idx];
467+ if (field->type ()->id () == Type::NA) continue ;
468+
469+ const auto & layout_buffers = field->type ()->layout ().buffers ;
470+ for (size_t i = 0 ; i < layout_buffers.size (); ++i) {
471+ const auto & layout = layout_buffers[i];
472+ if (arr->buffers [i] == nullptr ) {
473+ continue ;
474+ }
475+ if (arr->buffers [i]->size () == 0 ) {
476+ continue ;
477+ }
478+ if (arr->buffers [i]->size () < 8 ) {
479+ return Status::Invalid (
480+ " Likely corrupted message, compressed buffers "
481+ " are larger than 8 bytes by construction" );
482+ }
483+ auto & buffer = arr->buffers [i];
484+ switch (layout.kind ) {
485+ case DataTypeLayout::BufferKind::FIXED_WIDTH:
486+ if (layout.byte_width == 4 && field->type ()->id () != Type::FLOAT) {
487+ RETURN_NOT_OK (DecompressBufferByType (*buffer, fastpfor32_codec.get (), &buffer,
488+ options.memory_pool ));
489+ } else if (layout.byte_width == 8 && field->type ()->id () != Type::DOUBLE) {
490+ RETURN_NOT_OK (DecompressBufferByType (*buffer, fastpfor64_codec.get (), &buffer,
491+ options.memory_pool ));
492+ } else {
493+ RETURN_NOT_OK (
494+ DecompressBufferByType (*buffer, codec.get (), &buffer, options.memory_pool ));
495+ }
496+ break ;
497+ case DataTypeLayout::BufferKind::BITMAP:
498+ case DataTypeLayout::BufferKind::VARIABLE_WIDTH: {
499+ RETURN_NOT_OK (
500+ DecompressBufferByType (*buffer, codec.get (), &buffer, options.memory_pool ));
501+ break ;
502+ }
503+ case DataTypeLayout::BufferKind::ALWAYS_NULL:
504+ break ;
505+ default :
506+ return Status::Invalid (" Wrong buffer layout." );
507+ }
508+ }
509+ }
510+ return arrow::Status::OK ();
511+ }
512+
429513Status DecompressBuffers (Compression::type compression, const IpcReadOptions& options,
430514 ArrayDataVector* fields) {
431515 struct BufferAccumulator {
@@ -507,8 +591,13 @@ Result<std::shared_ptr<RecordBatch>> LoadRecordBatchSubset(
507591 filtered_columns = std::move (columns);
508592 }
509593 if (context.compression != Compression::UNCOMPRESSED) {
510- RETURN_NOT_OK (
511- DecompressBuffers (context.compression , context.options , &filtered_columns));
594+
595+ if (context.compression == Compression::FASTPFOR) {
596+ RETURN_NOT_OK (
597+ DecompressBuffersByType (context.compression , context.options , &filtered_columns, filtered_fields));
598+ } else {
599+ RETURN_NOT_OK (DecompressBuffers (context.compression , context.options , &filtered_columns));
600+ }
512601 }
513602
514603 // swap endian in a set of ArrayData if necessary (swap_endian == true)
0 commit comments