Closed
Description
Describe the bug
Compressing with Blosc2 in BP5 will sometimes lead to unreadable datasets.
To Reproduce
I observed the bug when trying to post-hoc-compress a PIConGPU dataset with BP5. The following code snippet reads the metadata of a BP5 dataset written by PIConGPU (uncompressed.zip, metadata only, I removed the data.* files), and recreates all variables that it finds, filling them with zeroes and compression with BLOSC_BITSHUFFLE.
#include <adios2.h>
#include <adios2/common/ADIOSMacros.h>
#include <cstdlib>
#include <iostream>
#include <sstream>
#include <stdexcept>
#include <string>
#include <vector>
struct ADIOS2Params
{
adios2::ADIOS adios;
adios2::IO IO;
adios2::Engine engine;
};
struct Params
{
ADIOS2Params adios_in;
ADIOS2Params adios_out;
std::vector<std::unique_ptr<char[]>> buffers;
};
template <typename T>
void handle_var(Params ¶ms, std::string const &name)
{
auto var = params.adios_in.IO.InquireVariable<T>(name);
if (!var)
{
throw std::runtime_error("Failed inquiring variable '" + name + "'");
}
adios2::Dims offset(var.Shape().size(), 0);
size_t flat_extent = 1;
for (auto ext : var.Shape())
{
flat_extent *= ext;
}
auto out_var = params.adios_out.IO.DefineVariable<T>(
name, var.Shape(), offset, var.Shape());
out_var.AddOperation("blosc", {{"doshuffle", "BLOSC_BITSHUFFLE"}});
std::unique_ptr<char[]> writeBuffer{new char[flat_extent * sizeof(T)]{}};
params.adios_out.engine.Put(
out_var, reinterpret_cast<T *>(writeBuffer.get()));
params.buffers.push_back(std::move(writeBuffer));
}
int main(int argsc, char **argsv)
{
if (argsc != 3)
{
std::cout << "Usage: " << argsv[0]
<< " <sample data location> <out file>" << std::endl;
std::exit(1);
}
Params params;
char const *in_file = argsv[1];
char const *out_file = argsv[2];
params.adios_in.adios = adios2::ADIOS{};
params.adios_in.IO = params.adios_in.adios.DeclareIO("IO");
params.adios_in.IO.SetEngine("bp5");
params.adios_in.engine =
params.adios_in.IO.Open(in_file, adios2::Mode::Read);
params.adios_out.adios = adios2::ADIOS{};
params.adios_out.IO = params.adios_out.adios.DeclareIO("IO");
params.adios_out.IO.SetEngine("bp5");
params.adios_out.engine =
params.adios_out.IO.Open(out_file, adios2::Mode::Write);
std::vector<char> mockHostnameTable;
while (params.adios_in.engine.BeginStep() == adios2::StepStatus::OK)
{
params.adios_out.engine.BeginStep();
for (auto const &[var, var_meta] :
params.adios_in.IO.AvailableVariables())
{
std::string actualType = var_meta.at("Type");
#define SWITCH_TYPE(ctype, repr) \
if (#repr == actualType || std::string(#repr) + "_t" == actualType) \
{ \
handle_var<ctype>(params, var); \
} \
else
ADIOS2_FOREACH_PRIMITVE_STDTYPE_2ARGS(SWITCH_TYPE)
{
throw std::runtime_error("Unknown type: '" + actualType + "'");
}
#undef SWITCH_TYPE
}
params.adios_in.engine.EndStep();
params.adios_out.engine.EndStep();
params.buffers.clear();
}
params.adios_in.engine.Close();
params.adios_out.engine.Close();
}
cmake_minimum_required(VERSION 3.12.0)
project(adios2_mock_stream)
find_package(ADIOS2 REQUIRED)
foreach(binary bp5_blosc)
add_executable(${binary} ${binary}.cpp)
set_property(TARGET ${binary} PROPERTY CXX_STANDARD 17)
target_link_libraries(${binary} PRIVATE adios2::cxx11)
endforeach(binary adios2_mock_stream adios2_read_stream)
Trying to read some of the variables will now fail:
$ ./bp5_blosc uncompressed_000500.bp5/ compressed.bp5
$ bpls -d compressed.bp5/ -e /data/500/particles/i/momentum/x
float /data/500/particles/i/momentum/x {164659200}
bpls caught an exception
[Fri Feb 24 14:12:10 2023] [ADIOS2 EXCEPTION] <Operator> <CompressBlosc> <InverseOperate> : invalid blosc buffer version
Expected behavior
No reading failure
Desktop (please complete the following information):
- OS/Platform: Ubuntu 22.04.1 LTS Singularity container on an Ubuntu 20.04.5 LTS system
- Build: v2.9.0-rc1, gcc 11.3.0
Additional context
The same workflow works fine with BP4.
Following up
Was the issue fixed? Please report back.
Activity