Skip to content

Commit

Permalink
Pickle API: Avoid static Series hack, allow multiple Series (#1633)
Browse files Browse the repository at this point in the history
* Defer iteration parsing

* Add first attempt of createOwningCopy

* Continue...

* seems to work??

* Reenable the Python tests

* Add pickling for Iteration and Series too

* Add pickle support for Series and Iteration

* Document new internal function
  • Loading branch information
franzpoeschel authored Jul 23, 2024
1 parent ff6cf66 commit 74fdc47
Show file tree
Hide file tree
Showing 15 changed files with 166 additions and 20 deletions.
7 changes: 7 additions & 0 deletions include/openPMD/Iteration.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,8 @@ class Iteration : public Attributable
friend class WriteIterations;
friend class SeriesIterator;
friend class internal::AttributableData;
template <typename T>
friend T &internal::makeOwning(T &self, Series);

public:
Iteration(Iteration const &) = default;
Expand Down Expand Up @@ -258,6 +260,11 @@ class Iteration : public Attributable
return *m_iterationData;
}

inline std::shared_ptr<Data_t> getShared()
{
return m_iterationData;
}

inline void setData(std::shared_ptr<Data_t> data)
{
m_iterationData = std::move(data);
Expand Down
9 changes: 9 additions & 0 deletions include/openPMD/ParticleSpecies.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ class ParticleSpecies : public Container<Record>
friend class Container<ParticleSpecies>;
friend class Container<Record>;
friend class Iteration;
template <typename T>
friend T &internal::makeOwning(T &self, Series);

public:
ParticlePatches particlePatches;
Expand All @@ -44,6 +46,13 @@ class ParticleSpecies : public Container<Record>

void read();
void flush(std::string const &, internal::FlushParams const &) override;

using Data_t = Container<Record>::ContainerData;

inline std::shared_ptr<Data_t> getShared()
{
return m_containerData;
}
};

namespace traits
Expand Down
8 changes: 8 additions & 0 deletions include/openPMD/RecordComponent.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "openPMD/auxiliary/ShareRaw.hpp"
#include "openPMD/auxiliary/TypeTraits.hpp"
#include "openPMD/auxiliary/UniquePtr.hpp"
#include "openPMD/backend/Attributable.hpp"
#include "openPMD/backend/BaseRecordComponent.hpp"

#include <array>
Expand Down Expand Up @@ -132,6 +133,8 @@ class RecordComponent : public BaseRecordComponent
friend class DynamicMemoryView;
friend class internal::RecordComponentData;
friend class MeshRecordComponent;
template <typename T>
friend T &internal::makeOwning(T &self, Series);

public:
enum class Allocation
Expand Down Expand Up @@ -523,6 +526,11 @@ OPENPMD_protected
return *m_recordComponentData;
}

inline std::shared_ptr<Data_t> getShared()
{
return m_recordComponentData;
}

inline void setData(std::shared_ptr<internal::RecordComponentData> data)
{
m_recordComponentData = std::move(data);
Expand Down
17 changes: 17 additions & 0 deletions include/openPMD/backend/Attributable.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,21 @@ namespace internal
class BaseRecordData;

class RecordComponentData;

/*
* Internal function to turn a handle into an owning handle that will keep
* not only itself, but the entire Series alive. Works by hiding a copy of
* the Series into the destructor lambda of the internal shared pointer. The
* returned handle is entirely safe to use in just the same ways as a normal
* handle, just the surrounding Series needs not be kept alive any more
* since it is stored within the handle. By storing the Series in the
* handle, not in the actual data, reference cycles are avoided.
*
* Instantiations for T exist for types RecordComponent,
* MeshRecordComponent, Mesh, Record, ParticleSpecies, Iteration.
*/
template <typename T>
T &makeOwning(T &self, Series);
} // namespace internal

namespace debug
Expand Down Expand Up @@ -157,6 +172,8 @@ class Attributable
friend class WriteIterations;
friend class internal::RecordComponentData;
friend void debug::printDirty(Series const &);
template <typename T>
friend T &internal::makeOwning(T &self, Series);

protected:
// tag for internal constructor
Expand Down
7 changes: 7 additions & 0 deletions include/openPMD/backend/BaseRecord.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,8 @@ class BaseRecord
friend class internal::BaseRecordData;
template <typename, typename, typename>
friend class internal::ScalarIterator;
template <typename T>
friend T &internal::makeOwning(T &self, Series);

using Data_t =
internal::BaseRecordData<T_elem, typename T_RecordComponent::Data_t>;
Expand All @@ -256,6 +258,11 @@ class BaseRecord
return *m_baseRecordData;
}

inline std::shared_ptr<Data_t> getShared()
{
return m_baseRecordData;
}

BaseRecord();

protected:
Expand Down
9 changes: 3 additions & 6 deletions include/openPMD/binding/python/Pickle.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,12 +67,9 @@ add_pickle(pybind11::class_<T_Args...> &cl, T_SeriesAccessor &&seriesAccessor)
std::vector<std::string> const group =
t[1].cast<std::vector<std::string> >();

// Create a new openPMD Series and keep it alive.
// This is a big hack for now, but it works for our use
// case, which is spinning up remote serial read series
// for DASK.
static auto series = openPMD::Series(filename, Access::READ_ONLY);
return seriesAccessor(series, group);
openPMD::Series series(
filename, Access::READ_ONLY, "defer_iteration_parsing = true");
return seriesAccessor(std::move(series), group);
}));
}
} // namespace openPMD
52 changes: 52 additions & 0 deletions src/backend/Attributable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,12 @@
*/
#include "openPMD/backend/Attributable.hpp"
#include "openPMD/Iteration.hpp"
#include "openPMD/ParticleSpecies.hpp"
#include "openPMD/RecordComponent.hpp"
#include "openPMD/Series.hpp"
#include "openPMD/auxiliary/DerefDynamicCast.hpp"
#include "openPMD/auxiliary/StringManip.hpp"
#include "openPMD/backend/Attribute.hpp"

#include <algorithm>
#include <complex>
Expand Down Expand Up @@ -505,4 +508,53 @@ void Attributable::linkHierarchy(Writable &w)
writable().parent = &w;
setDirty(true);
}

namespace internal
{
template <typename T>
T &makeOwning(T &self, Series s)
{
/*
* `self` is a handle object such as RecordComponent or Mesh (see
* instantiations below).
* These objects don't normally keep alive the Series, i.e. as soon as
* the Series is destroyed, the handle becomes invalid.
* This function modifies the handle such that it actually keeps the
* Series alive and behaves otherwise identically.
* First, get the internal shared pointer of the handle.
*/
std::shared_ptr<typename T::Data_t> data_ptr = self.T::getShared();
auto raw_ptr = data_ptr.get();
/*
* Now, create a new shared pointer pointing to the same address as the
* actual pointer and replace the old internal shared pointer by the new
* one.
*/
self.setData(std::shared_ptr<typename T::Data_t>{
raw_ptr,
/*
* Here comes the main trick.
* The new shared pointer stores (and thus keeps alive) two items
* via lambda capture in its destructor:
* 1. The old shared pointer.
* 2. The Series.
* It's important to notice that these two items are only stored
* within the newly created handle, and not internally within the
* actual openPMD object model. This means that no reference cycles
* can occur.
*/
[s_lambda = std::move(s),
data_ptr_lambda = std::move(data_ptr)](auto const *) {
/* no-op, the lambda captures simply go out of scope */
}});
return self;
}

template RecordComponent &makeOwning(RecordComponent &, Series);
template MeshRecordComponent &makeOwning(MeshRecordComponent &, Series);
template Mesh &makeOwning(Mesh &, Series);
template Record &makeOwning(Record &, Series);
template ParticleSpecies &makeOwning(ParticleSpecies &, Series);
template Iteration &makeOwning(Iteration &, Series);
} // namespace internal
} // namespace openPMD
15 changes: 15 additions & 0 deletions src/binding/python/Iteration.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include "openPMD/backend/Attributable.hpp"
#include "openPMD/binding/python/Common.hpp"
#include "openPMD/binding/python/Container.H"
#include "openPMD/binding/python/Pickle.hpp"

#include <ios>
#include <sstream>
Expand All @@ -33,6 +34,13 @@ void init_Iteration(py::module &m)
auto py_it_cont = declare_container<PyIterationContainer, Attributable>(
m, "Iteration_Container");

// `clang-format on/off` doesn't help here.
// Writing this without a macro would lead to a huge diff due to
// clang-format.
#define OPENPMD_AVOID_CLANG_FORMAT auto cl =
OPENPMD_AVOID_CLANG_FORMAT
#undef OPENPMD_AVOID_CLANG_FORMAT

py::class_<Iteration, Attributable>(m, "Iteration")
.def(py::init<Iteration const &>())

Expand Down Expand Up @@ -99,5 +107,12 @@ void init_Iteration(py::module &m)
// garbage collection: return value must be freed before Iteration
py::keep_alive<1, 0>());

add_pickle(
cl, [](openPMD::Series series, std::vector<std::string> const &group) {
uint64_t const n_it = std::stoull(group.at(1));
auto &res = series.iterations[n_it];
return internal::makeOwning(res, std::move(series));
});

finalize_container<PyIterationContainer>(py_it_cont);
}
5 changes: 3 additions & 2 deletions src/binding/python/Mesh.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,9 +115,10 @@ void init_Mesh(py::module &m)
.def("set_grid_global_offset", &Mesh::setGridGlobalOffset)
.def("set_grid_unit_SI", &Mesh::setGridUnitSI);
add_pickle(
cl, [](openPMD::Series &series, std::vector<std::string> const &group) {
cl, [](openPMD::Series series, std::vector<std::string> const &group) {
uint64_t const n_it = std::stoull(group.at(1));
return series.iterations[n_it].meshes[group.at(3)];
auto &res = series.iterations[n_it].open().meshes[group.at(3)];
return internal::makeOwning(res, std::move(series));
});

finalize_container<PyMeshContainer>(py_m_cont);
Expand Down
13 changes: 8 additions & 5 deletions src/binding/python/MeshRecordComponent.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,12 +82,15 @@ void init_MeshRecordComponent(py::module &m)
"Relative position of the component on an element "
"(node/cell/voxel) of the mesh");
add_pickle(
cl, [](openPMD::Series &series, std::vector<std::string> const &group) {
cl, [](openPMD::Series series, std::vector<std::string> const &group) {
uint64_t const n_it = std::stoull(group.at(1));
return series.iterations[n_it]
.meshes[group.at(3)]
[group.size() < 5 ? MeshRecordComponent::SCALAR
: group.at(4)];
auto &res =
series.iterations[n_it]
.open()
.meshes[group.at(3)]
[group.size() < 5 ? MeshRecordComponent::SCALAR
: group.at(4)];
return internal::makeOwning(res, std::move(series));
});

finalize_container<PyMeshRecordComponentContainer>(py_mrc_cnt);
Expand Down
6 changes: 4 additions & 2 deletions src/binding/python/ParticleSpecies.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,11 @@ void init_ParticleSpecies(py::module &m)
// garbage collection: return value must be freed before Series
py::keep_alive<1, 0>());
add_pickle(
cl, [](openPMD::Series &series, std::vector<std::string> const &group) {
cl, [](openPMD::Series series, std::vector<std::string> const &group) {
uint64_t const n_it = std::stoull(group.at(1));
return series.iterations[n_it].particles[group.at(3)];
ParticleSpecies &res =
series.iterations[n_it].open().particles[group.at(3)];
return internal::makeOwning(res, std::move(series));
});

finalize_container<PyPartContainer>(py_ps_cnt);
Expand Down
6 changes: 4 additions & 2 deletions src/binding/python/Record.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,11 @@ void init_Record(py::module &m)
.def("set_time_offset", &Record::setTimeOffset<double>)
.def("set_time_offset", &Record::setTimeOffset<long double>);
add_pickle(
cl, [](openPMD::Series &series, std::vector<std::string> const &group) {
cl, [](openPMD::Series series, std::vector<std::string> const &group) {
uint64_t const n_it = std::stoull(group.at(1));
return series.iterations[n_it].particles[group.at(3)][group.at(4)];
auto &res = series.iterations[n_it].open().particles[group.at(3)]
[group.at(4)];
return internal::makeOwning(res, std::move(series));
});

finalize_container<PyRecordContainer>(py_r_cnt);
Expand Down
11 changes: 8 additions & 3 deletions src/binding/python/RecordComponent.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1122,10 +1122,15 @@ void init_RecordComponent(py::module &m)
.def("set_unit_SI", &RecordComponent::setUnitSI) // deprecated
;
add_pickle(
cl, [](openPMD::Series &series, std::vector<std::string> const &group) {
cl, [](openPMD::Series series, std::vector<std::string> const &group) {
uint64_t const n_it = std::stoull(group.at(1));
return series.iterations[n_it].particles[group.at(3)][group.at(
4)][group.size() < 6 ? RecordComponent::SCALAR : group.at(5)];
auto &res =
series.iterations[n_it]
.open()
.particles[group.at(3)][group.at(4)]
[group.size() < 6 ? RecordComponent::SCALAR
: group.at(5)];
return internal::makeOwning(res, std::move(series));
});

addRecordComponentSetGet(cl);
Expand Down
13 changes: 13 additions & 0 deletions src/binding/python/Series.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "openPMD/IO/Access.hpp"
#include "openPMD/IterationEncoding.hpp"
#include "openPMD/auxiliary/JSON.hpp"
#include "openPMD/binding/python/Pickle.hpp"
#include "openPMD/config.hpp"

#include "openPMD/binding/python/Common.hpp"
Expand Down Expand Up @@ -150,6 +151,13 @@ not possible once it has been closed.
// keep handle alive while iterator exists
py::keep_alive<0, 1>());

// `clang-format on/off` doesn't help here.
// Writing this without a macro would lead to a huge diff due to
// clang-format.
#define OPENPMD_AVOID_CLANG_FORMAT auto cl =
OPENPMD_AVOID_CLANG_FORMAT
#undef OPENPMD_AVOID_CLANG_FORMAT

py::class_<Series, Attributable>(m, "Series")

.def(
Expand Down Expand Up @@ -394,6 +402,11 @@ this method twice.
Look for the WriteIterations class for further documentation.
)END");

add_pickle(
cl, [](openPMD::Series series, std::vector<std::string> const &) {
return series;
});

m.def(
"merge_json",
&json::merge,
Expand Down
8 changes: 8 additions & 0 deletions test/python/unittest/API/APITest.py
Original file line number Diff line number Diff line change
Expand Up @@ -971,6 +971,8 @@ def testPickle(self):
series.flush()

# Pickle
pickled_s = pickle.dumps(series)
pickled_i = pickle.dumps(i)
pickled_E = pickle.dumps(E)
pickled_E_x = pickle.dumps(E_x)
pickled_electrons = pickle.dumps(electrons)
Expand All @@ -980,16 +982,20 @@ def testPickle(self):
pickled_w = pickle.dumps(w)
print(f"This is my pickled object:\n{pickled_E_x}\n")

series.close()
del E
del E_x
del electrons
del momentum
del pos
del pos_y
del w
del i
del series

# Unpickling the object
series = pickle.loads(pickled_s)
i = pickle.loads(pickled_i)
E = pickle.loads(pickled_E)
E_x = pickle.loads(pickled_E_x)
electrons = pickle.loads(pickled_electrons)
Expand All @@ -1000,6 +1006,8 @@ def testPickle(self):
print(
f"This is E_x.position of the unpickled object:\n{E_x.position}\n")

self.assertIsInstance(series, io.Series)
self.assertIsInstance(i, io.Iteration)
self.assertIsInstance(E, io.Mesh)
self.assertIsInstance(E_x, io.Mesh_Record_Component)
self.assertIsInstance(electrons, io.ParticleSpecies)
Expand Down

0 comments on commit 74fdc47

Please sign in to comment.