From 74fdc479a1008fd0d848ad123006c74f47fc6232 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Tue, 23 Jul 2024 15:56:39 +0200 Subject: [PATCH] Pickle API: Avoid static Series hack, allow multiple Series (#1633) * Defer iteration parsing * Add first attempt of createOwningCopy * Continue... * seems to work?? * Reenable the Python tests * Add pickling for Iteration and Series too * Add pickle support for Series and Iteration * Document new internal function --- include/openPMD/Iteration.hpp | 7 +++ include/openPMD/ParticleSpecies.hpp | 9 ++++ include/openPMD/RecordComponent.hpp | 8 ++++ include/openPMD/backend/Attributable.hpp | 17 +++++++ include/openPMD/backend/BaseRecord.hpp | 7 +++ include/openPMD/binding/python/Pickle.hpp | 9 ++-- src/backend/Attributable.cpp | 52 ++++++++++++++++++++++ src/binding/python/Iteration.cpp | 15 +++++++ src/binding/python/Mesh.cpp | 5 ++- src/binding/python/MeshRecordComponent.cpp | 13 +++--- src/binding/python/ParticleSpecies.cpp | 6 ++- src/binding/python/Record.cpp | 6 ++- src/binding/python/RecordComponent.cpp | 11 +++-- src/binding/python/Series.cpp | 13 ++++++ test/python/unittest/API/APITest.py | 8 ++++ 15 files changed, 166 insertions(+), 20 deletions(-) diff --git a/include/openPMD/Iteration.hpp b/include/openPMD/Iteration.hpp index a8f4d7e43d..52bf43293a 100644 --- a/include/openPMD/Iteration.hpp +++ b/include/openPMD/Iteration.hpp @@ -131,6 +131,8 @@ class Iteration : public Attributable friend class WriteIterations; friend class SeriesIterator; friend class internal::AttributableData; + template + friend T &internal::makeOwning(T &self, Series); public: Iteration(Iteration const &) = default; @@ -258,6 +260,11 @@ class Iteration : public Attributable return *m_iterationData; } + inline std::shared_ptr getShared() + { + return m_iterationData; + } + inline void setData(std::shared_ptr data) { m_iterationData = std::move(data); diff --git a/include/openPMD/ParticleSpecies.hpp b/include/openPMD/ParticleSpecies.hpp index 210f81d260..af7aa50375 100644 --- a/include/openPMD/ParticleSpecies.hpp +++ b/include/openPMD/ParticleSpecies.hpp @@ -35,6 +35,8 @@ class ParticleSpecies : public Container friend class Container; friend class Container; friend class Iteration; + template + friend T &internal::makeOwning(T &self, Series); public: ParticlePatches particlePatches; @@ -44,6 +46,13 @@ class ParticleSpecies : public Container void read(); void flush(std::string const &, internal::FlushParams const &) override; + + using Data_t = Container::ContainerData; + + inline std::shared_ptr getShared() + { + return m_containerData; + } }; namespace traits diff --git a/include/openPMD/RecordComponent.hpp b/include/openPMD/RecordComponent.hpp index 1c96981bb5..ebb5a80ca8 100644 --- a/include/openPMD/RecordComponent.hpp +++ b/include/openPMD/RecordComponent.hpp @@ -25,6 +25,7 @@ #include "openPMD/auxiliary/ShareRaw.hpp" #include "openPMD/auxiliary/TypeTraits.hpp" #include "openPMD/auxiliary/UniquePtr.hpp" +#include "openPMD/backend/Attributable.hpp" #include "openPMD/backend/BaseRecordComponent.hpp" #include @@ -132,6 +133,8 @@ class RecordComponent : public BaseRecordComponent friend class DynamicMemoryView; friend class internal::RecordComponentData; friend class MeshRecordComponent; + template + friend T &internal::makeOwning(T &self, Series); public: enum class Allocation @@ -523,6 +526,11 @@ OPENPMD_protected return *m_recordComponentData; } + inline std::shared_ptr getShared() + { + return m_recordComponentData; + } + inline void setData(std::shared_ptr data) { m_recordComponentData = std::move(data); diff --git a/include/openPMD/backend/Attributable.hpp b/include/openPMD/backend/Attributable.hpp index d8255b6e7b..0f7b722ae5 100644 --- a/include/openPMD/backend/Attributable.hpp +++ b/include/openPMD/backend/Attributable.hpp @@ -124,6 +124,21 @@ namespace internal class BaseRecordData; class RecordComponentData; + + /* + * Internal function to turn a handle into an owning handle that will keep + * not only itself, but the entire Series alive. Works by hiding a copy of + * the Series into the destructor lambda of the internal shared pointer. The + * returned handle is entirely safe to use in just the same ways as a normal + * handle, just the surrounding Series needs not be kept alive any more + * since it is stored within the handle. By storing the Series in the + * handle, not in the actual data, reference cycles are avoided. + * + * Instantiations for T exist for types RecordComponent, + * MeshRecordComponent, Mesh, Record, ParticleSpecies, Iteration. + */ + template + T &makeOwning(T &self, Series); } // namespace internal namespace debug @@ -157,6 +172,8 @@ class Attributable friend class WriteIterations; friend class internal::RecordComponentData; friend void debug::printDirty(Series const &); + template + friend T &internal::makeOwning(T &self, Series); protected: // tag for internal constructor diff --git a/include/openPMD/backend/BaseRecord.hpp b/include/openPMD/backend/BaseRecord.hpp index 6d17ae4eae..ba137b10db 100644 --- a/include/openPMD/backend/BaseRecord.hpp +++ b/include/openPMD/backend/BaseRecord.hpp @@ -237,6 +237,8 @@ class BaseRecord friend class internal::BaseRecordData; template friend class internal::ScalarIterator; + template + friend T &internal::makeOwning(T &self, Series); using Data_t = internal::BaseRecordData; @@ -256,6 +258,11 @@ class BaseRecord return *m_baseRecordData; } + inline std::shared_ptr getShared() + { + return m_baseRecordData; + } + BaseRecord(); protected: diff --git a/include/openPMD/binding/python/Pickle.hpp b/include/openPMD/binding/python/Pickle.hpp index 70a8ec1cdd..3d3b233eb4 100644 --- a/include/openPMD/binding/python/Pickle.hpp +++ b/include/openPMD/binding/python/Pickle.hpp @@ -67,12 +67,9 @@ add_pickle(pybind11::class_ &cl, T_SeriesAccessor &&seriesAccessor) std::vector const group = t[1].cast >(); - // Create a new openPMD Series and keep it alive. - // This is a big hack for now, but it works for our use - // case, which is spinning up remote serial read series - // for DASK. - static auto series = openPMD::Series(filename, Access::READ_ONLY); - return seriesAccessor(series, group); + openPMD::Series series( + filename, Access::READ_ONLY, "defer_iteration_parsing = true"); + return seriesAccessor(std::move(series), group); })); } } // namespace openPMD diff --git a/src/backend/Attributable.cpp b/src/backend/Attributable.cpp index 9e4d1b4fd3..d5ff005389 100644 --- a/src/backend/Attributable.cpp +++ b/src/backend/Attributable.cpp @@ -20,9 +20,12 @@ */ #include "openPMD/backend/Attributable.hpp" #include "openPMD/Iteration.hpp" +#include "openPMD/ParticleSpecies.hpp" +#include "openPMD/RecordComponent.hpp" #include "openPMD/Series.hpp" #include "openPMD/auxiliary/DerefDynamicCast.hpp" #include "openPMD/auxiliary/StringManip.hpp" +#include "openPMD/backend/Attribute.hpp" #include #include @@ -505,4 +508,53 @@ void Attributable::linkHierarchy(Writable &w) writable().parent = &w; setDirty(true); } + +namespace internal +{ + template + T &makeOwning(T &self, Series s) + { + /* + * `self` is a handle object such as RecordComponent or Mesh (see + * instantiations below). + * These objects don't normally keep alive the Series, i.e. as soon as + * the Series is destroyed, the handle becomes invalid. + * This function modifies the handle such that it actually keeps the + * Series alive and behaves otherwise identically. + * First, get the internal shared pointer of the handle. + */ + std::shared_ptr data_ptr = self.T::getShared(); + auto raw_ptr = data_ptr.get(); + /* + * Now, create a new shared pointer pointing to the same address as the + * actual pointer and replace the old internal shared pointer by the new + * one. + */ + self.setData(std::shared_ptr{ + raw_ptr, + /* + * Here comes the main trick. + * The new shared pointer stores (and thus keeps alive) two items + * via lambda capture in its destructor: + * 1. The old shared pointer. + * 2. The Series. + * It's important to notice that these two items are only stored + * within the newly created handle, and not internally within the + * actual openPMD object model. This means that no reference cycles + * can occur. + */ + [s_lambda = std::move(s), + data_ptr_lambda = std::move(data_ptr)](auto const *) { + /* no-op, the lambda captures simply go out of scope */ + }}); + return self; + } + + template RecordComponent &makeOwning(RecordComponent &, Series); + template MeshRecordComponent &makeOwning(MeshRecordComponent &, Series); + template Mesh &makeOwning(Mesh &, Series); + template Record &makeOwning(Record &, Series); + template ParticleSpecies &makeOwning(ParticleSpecies &, Series); + template Iteration &makeOwning(Iteration &, Series); +} // namespace internal } // namespace openPMD diff --git a/src/binding/python/Iteration.cpp b/src/binding/python/Iteration.cpp index df017114e6..85eddd6037 100644 --- a/src/binding/python/Iteration.cpp +++ b/src/binding/python/Iteration.cpp @@ -23,6 +23,7 @@ #include "openPMD/backend/Attributable.hpp" #include "openPMD/binding/python/Common.hpp" #include "openPMD/binding/python/Container.H" +#include "openPMD/binding/python/Pickle.hpp" #include #include @@ -33,6 +34,13 @@ void init_Iteration(py::module &m) auto py_it_cont = declare_container( m, "Iteration_Container"); + // `clang-format on/off` doesn't help here. + // Writing this without a macro would lead to a huge diff due to + // clang-format. +#define OPENPMD_AVOID_CLANG_FORMAT auto cl = + OPENPMD_AVOID_CLANG_FORMAT +#undef OPENPMD_AVOID_CLANG_FORMAT + py::class_(m, "Iteration") .def(py::init()) @@ -99,5 +107,12 @@ void init_Iteration(py::module &m) // garbage collection: return value must be freed before Iteration py::keep_alive<1, 0>()); + add_pickle( + cl, [](openPMD::Series series, std::vector const &group) { + uint64_t const n_it = std::stoull(group.at(1)); + auto &res = series.iterations[n_it]; + return internal::makeOwning(res, std::move(series)); + }); + finalize_container(py_it_cont); } diff --git a/src/binding/python/Mesh.cpp b/src/binding/python/Mesh.cpp index 55c6fd13a4..4c939068f4 100644 --- a/src/binding/python/Mesh.cpp +++ b/src/binding/python/Mesh.cpp @@ -115,9 +115,10 @@ void init_Mesh(py::module &m) .def("set_grid_global_offset", &Mesh::setGridGlobalOffset) .def("set_grid_unit_SI", &Mesh::setGridUnitSI); add_pickle( - cl, [](openPMD::Series &series, std::vector const &group) { + cl, [](openPMD::Series series, std::vector const &group) { uint64_t const n_it = std::stoull(group.at(1)); - return series.iterations[n_it].meshes[group.at(3)]; + auto &res = series.iterations[n_it].open().meshes[group.at(3)]; + return internal::makeOwning(res, std::move(series)); }); finalize_container(py_m_cont); diff --git a/src/binding/python/MeshRecordComponent.cpp b/src/binding/python/MeshRecordComponent.cpp index 825753402d..f59f5b3fc1 100644 --- a/src/binding/python/MeshRecordComponent.cpp +++ b/src/binding/python/MeshRecordComponent.cpp @@ -82,12 +82,15 @@ void init_MeshRecordComponent(py::module &m) "Relative position of the component on an element " "(node/cell/voxel) of the mesh"); add_pickle( - cl, [](openPMD::Series &series, std::vector const &group) { + cl, [](openPMD::Series series, std::vector const &group) { uint64_t const n_it = std::stoull(group.at(1)); - return series.iterations[n_it] - .meshes[group.at(3)] - [group.size() < 5 ? MeshRecordComponent::SCALAR - : group.at(4)]; + auto &res = + series.iterations[n_it] + .open() + .meshes[group.at(3)] + [group.size() < 5 ? MeshRecordComponent::SCALAR + : group.at(4)]; + return internal::makeOwning(res, std::move(series)); }); finalize_container(py_mrc_cnt); diff --git a/src/binding/python/ParticleSpecies.cpp b/src/binding/python/ParticleSpecies.cpp index 55fe0aaef0..debc80776e 100644 --- a/src/binding/python/ParticleSpecies.cpp +++ b/src/binding/python/ParticleSpecies.cpp @@ -55,9 +55,11 @@ void init_ParticleSpecies(py::module &m) // garbage collection: return value must be freed before Series py::keep_alive<1, 0>()); add_pickle( - cl, [](openPMD::Series &series, std::vector const &group) { + cl, [](openPMD::Series series, std::vector const &group) { uint64_t const n_it = std::stoull(group.at(1)); - return series.iterations[n_it].particles[group.at(3)]; + ParticleSpecies &res = + series.iterations[n_it].open().particles[group.at(3)]; + return internal::makeOwning(res, std::move(series)); }); finalize_container(py_ps_cnt); diff --git a/src/binding/python/Record.cpp b/src/binding/python/Record.cpp index 9cad75d03a..c56f4b6f0f 100644 --- a/src/binding/python/Record.cpp +++ b/src/binding/python/Record.cpp @@ -72,9 +72,11 @@ void init_Record(py::module &m) .def("set_time_offset", &Record::setTimeOffset) .def("set_time_offset", &Record::setTimeOffset); add_pickle( - cl, [](openPMD::Series &series, std::vector const &group) { + cl, [](openPMD::Series series, std::vector const &group) { uint64_t const n_it = std::stoull(group.at(1)); - return series.iterations[n_it].particles[group.at(3)][group.at(4)]; + auto &res = series.iterations[n_it].open().particles[group.at(3)] + [group.at(4)]; + return internal::makeOwning(res, std::move(series)); }); finalize_container(py_r_cnt); diff --git a/src/binding/python/RecordComponent.cpp b/src/binding/python/RecordComponent.cpp index e323127885..0cede898da 100644 --- a/src/binding/python/RecordComponent.cpp +++ b/src/binding/python/RecordComponent.cpp @@ -1122,10 +1122,15 @@ void init_RecordComponent(py::module &m) .def("set_unit_SI", &RecordComponent::setUnitSI) // deprecated ; add_pickle( - cl, [](openPMD::Series &series, std::vector const &group) { + cl, [](openPMD::Series series, std::vector const &group) { uint64_t const n_it = std::stoull(group.at(1)); - return series.iterations[n_it].particles[group.at(3)][group.at( - 4)][group.size() < 6 ? RecordComponent::SCALAR : group.at(5)]; + auto &res = + series.iterations[n_it] + .open() + .particles[group.at(3)][group.at(4)] + [group.size() < 6 ? RecordComponent::SCALAR + : group.at(5)]; + return internal::makeOwning(res, std::move(series)); }); addRecordComponentSetGet(cl); diff --git a/src/binding/python/Series.cpp b/src/binding/python/Series.cpp index 9a87da3bdb..37de823f2a 100644 --- a/src/binding/python/Series.cpp +++ b/src/binding/python/Series.cpp @@ -22,6 +22,7 @@ #include "openPMD/IO/Access.hpp" #include "openPMD/IterationEncoding.hpp" #include "openPMD/auxiliary/JSON.hpp" +#include "openPMD/binding/python/Pickle.hpp" #include "openPMD/config.hpp" #include "openPMD/binding/python/Common.hpp" @@ -150,6 +151,13 @@ not possible once it has been closed. // keep handle alive while iterator exists py::keep_alive<0, 1>()); + // `clang-format on/off` doesn't help here. + // Writing this without a macro would lead to a huge diff due to + // clang-format. +#define OPENPMD_AVOID_CLANG_FORMAT auto cl = + OPENPMD_AVOID_CLANG_FORMAT +#undef OPENPMD_AVOID_CLANG_FORMAT + py::class_(m, "Series") .def( @@ -394,6 +402,11 @@ this method twice. Look for the WriteIterations class for further documentation. )END"); + add_pickle( + cl, [](openPMD::Series series, std::vector const &) { + return series; + }); + m.def( "merge_json", &json::merge, diff --git a/test/python/unittest/API/APITest.py b/test/python/unittest/API/APITest.py index 6ff987f657..59e6b5c97e 100644 --- a/test/python/unittest/API/APITest.py +++ b/test/python/unittest/API/APITest.py @@ -971,6 +971,8 @@ def testPickle(self): series.flush() # Pickle + pickled_s = pickle.dumps(series) + pickled_i = pickle.dumps(i) pickled_E = pickle.dumps(E) pickled_E_x = pickle.dumps(E_x) pickled_electrons = pickle.dumps(electrons) @@ -980,6 +982,7 @@ def testPickle(self): pickled_w = pickle.dumps(w) print(f"This is my pickled object:\n{pickled_E_x}\n") + series.close() del E del E_x del electrons @@ -987,9 +990,12 @@ def testPickle(self): del pos del pos_y del w + del i del series # Unpickling the object + series = pickle.loads(pickled_s) + i = pickle.loads(pickled_i) E = pickle.loads(pickled_E) E_x = pickle.loads(pickled_E_x) electrons = pickle.loads(pickled_electrons) @@ -1000,6 +1006,8 @@ def testPickle(self): print( f"This is E_x.position of the unpickled object:\n{E_x.position}\n") + self.assertIsInstance(series, io.Series) + self.assertIsInstance(i, io.Iteration) self.assertIsInstance(E, io.Mesh) self.assertIsInstance(E_x, io.Mesh_Record_Component) self.assertIsInstance(electrons, io.ParticleSpecies)