Skip to content

Blosc2 in BP5: invalid blosc buffer version #3504

Closed
@franzpoeschel

Description

Describe the bug
Compressing with Blosc2 in BP5 will sometimes lead to unreadable datasets.

To Reproduce
I observed the bug when trying to post-hoc-compress a PIConGPU dataset with BP5. The following code snippet reads the metadata of a BP5 dataset written by PIConGPU (uncompressed.zip, metadata only, I removed the data.* files), and recreates all variables that it finds, filling them with zeroes and compression with BLOSC_BITSHUFFLE.

#include <adios2.h>
#include <adios2/common/ADIOSMacros.h>
#include <cstdlib>
#include <iostream>
#include <sstream>
#include <stdexcept>
#include <string>
#include <vector>

struct ADIOS2Params
{
    adios2::ADIOS adios;
    adios2::IO IO;
    adios2::Engine engine;
};

struct Params
{
    ADIOS2Params adios_in;
    ADIOS2Params adios_out;
    std::vector<std::unique_ptr<char[]>> buffers;
};

template <typename T>
void handle_var(Params &params, std::string const &name)
{
    auto var = params.adios_in.IO.InquireVariable<T>(name);
    if (!var)
    {
        throw std::runtime_error("Failed inquiring variable '" + name + "'");
    }
    adios2::Dims offset(var.Shape().size(), 0);
    size_t flat_extent = 1;
    for (auto ext : var.Shape())
    {
        flat_extent *= ext;
    }
    auto out_var = params.adios_out.IO.DefineVariable<T>(
        name, var.Shape(), offset, var.Shape());
    out_var.AddOperation("blosc", {{"doshuffle", "BLOSC_BITSHUFFLE"}});
    std::unique_ptr<char[]> writeBuffer{new char[flat_extent * sizeof(T)]{}};
    params.adios_out.engine.Put(
        out_var, reinterpret_cast<T *>(writeBuffer.get()));
    params.buffers.push_back(std::move(writeBuffer));
}

int main(int argsc, char **argsv)
{
    if (argsc != 3)
    {
        std::cout << "Usage: " << argsv[0]
                  << " <sample data location> <out file>" << std::endl;
        std::exit(1);
    }

    Params params;

    char const *in_file = argsv[1];
    char const *out_file = argsv[2];

    params.adios_in.adios = adios2::ADIOS{};
    params.adios_in.IO = params.adios_in.adios.DeclareIO("IO");
    params.adios_in.IO.SetEngine("bp5");
    params.adios_in.engine =
        params.adios_in.IO.Open(in_file, adios2::Mode::Read);

    params.adios_out.adios = adios2::ADIOS{};
    params.adios_out.IO = params.adios_out.adios.DeclareIO("IO");
    params.adios_out.IO.SetEngine("bp5");
    params.adios_out.engine =
        params.adios_out.IO.Open(out_file, adios2::Mode::Write);

    std::vector<char> mockHostnameTable;

    while (params.adios_in.engine.BeginStep() == adios2::StepStatus::OK)
    {
        params.adios_out.engine.BeginStep();

        for (auto const &[var, var_meta] :
             params.adios_in.IO.AvailableVariables())
        {
            std::string actualType = var_meta.at("Type");

#define SWITCH_TYPE(ctype, repr)                                               \
    if (#repr == actualType || std::string(#repr) + "_t" == actualType)        \
    {                                                                          \
        handle_var<ctype>(params, var);                                        \
    }                                                                          \
    else

            ADIOS2_FOREACH_PRIMITVE_STDTYPE_2ARGS(SWITCH_TYPE)
            {
                throw std::runtime_error("Unknown type: '" + actualType + "'");
            }
#undef SWITCH_TYPE
        }
        params.adios_in.engine.EndStep();
        params.adios_out.engine.EndStep();
        params.buffers.clear();
    }

    params.adios_in.engine.Close();
    params.adios_out.engine.Close();
}
cmake_minimum_required(VERSION 3.12.0)

project(adios2_mock_stream)

find_package(ADIOS2 REQUIRED)

foreach(binary bp5_blosc)
    add_executable(${binary} ${binary}.cpp)
    set_property(TARGET ${binary} PROPERTY CXX_STANDARD 17)

    target_link_libraries(${binary} PRIVATE adios2::cxx11)
endforeach(binary adios2_mock_stream adios2_read_stream)

Trying to read some of the variables will now fail:

$ ./bp5_blosc uncompressed_000500.bp5/ compressed.bp5
$ bpls -d compressed.bp5/ -e /data/500/particles/i/momentum/x
  float     /data/500/particles/i/momentum/x                          {164659200}

bpls caught an exception
[Fri Feb 24 14:12:10 2023] [ADIOS2 EXCEPTION] <Operator> <CompressBlosc> <InverseOperate> : invalid blosc buffer version

Expected behavior
No reading failure

Desktop (please complete the following information):

  • OS/Platform: Ubuntu 22.04.1 LTS Singularity container on an Ubuntu 20.04.5 LTS system
  • Build: v2.9.0-rc1, gcc 11.3.0

Additional context
The same workflow works fine with BP4.

Following up
Was the issue fixed? Please report back.

Activity

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Metadata

Assignees

Labels

No labels
No labels

Type

No type

Projects

No projects

Milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions