From e668e8629c07b9d94c7bad573def3df12974ccbe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Fri, 22 Dec 2023 15:59:59 +0100 Subject: [PATCH 01/37] Adios2 warn groupbased encoding (#1498) * Warning: group-based iteration encoding in ADIOS2 * Remove adios2.usesteps, set it always to true This uncovered loads of bugs * Remove requireActiveStep * Remove StreamStatus::Parsing * Remove usesteps = true/false from tests * Fix CI error * Testing: ADIOS2 < v2.9 compatibility * Less misleading warning message * Unify struct/class * Transition a bit more leniently (part 1) * Transition a bit more leniently (part 2) * Remove usesteps option from documentation * Cleanup and fixes * Fix tests * Fix the warning text --- CMakeLists.txt | 14 +- docs/source/backends/adios2.rst | 2 - docs/source/details/backendconfig.rst | 1 - include/openPMD/IO/ADIOS/ADIOS2IOHandler.hpp | 50 +--- include/openPMD/IO/AbstractIOHandler.hpp | 12 + include/openPMD/IO/IOTask.hpp | 7 - src/IO/ADIOS/ADIOS2IOHandler.cpp | 294 ++++++++----------- src/IO/AbstractIOHandlerImpl.cpp | 18 +- src/ReadIterations.cpp | 1 - src/Series.cpp | 4 - test/ParallelIOTest.cpp | 24 +- test/SerialIOTest.cpp | 159 ++++------ 12 files changed, 237 insertions(+), 349 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 20673b1c50..1d81e94d83 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1360,16 +1360,18 @@ if(openPMD_BUILD_TESTING) ${MPI_TEST_EXE} ${Python_EXECUTABLE} \ ${openPMD_RUNTIME_OUTPUT_DIRECTORY}/openpmd-pipe \ --infile ../samples/git-sample/data00000100.h5 \ - --outfile ../samples/git-sample/single_iteration.bp && \ + --outfile \ + ../samples/git-sample/single_iteration_%T.bp && \ \ ${MPI_TEST_EXE} ${Python_EXECUTABLE} \ ${openPMD_RUNTIME_OUTPUT_DIRECTORY}/openpmd-pipe \ --infile ../samples/git-sample/thetaMode/data%T.h5 \ - --outfile ../samples/git-sample/thetaMode/data.bp && \ + --outfile \ + ../samples/git-sample/thetaMode/data_%T.bp && \ \ ${Python_EXECUTABLE} \ ${openPMD_RUNTIME_OUTPUT_DIRECTORY}/openpmd-pipe \ - --infile ../samples/git-sample/thetaMode/data.bp \ + --infile ../samples/git-sample/thetaMode/data_%T.bp \ --outfile ../samples/git-sample/thetaMode/data%T.json \ " WORKING_DIRECTORY ${openPMD_RUNTIME_OUTPUT_DIRECTORY} @@ -1378,17 +1380,17 @@ if(openPMD_BUILD_TESTING) add_test(NAME CLI.pipe.py COMMAND sh -c "${Python_EXECUTABLE} \ - ${openPMD_RUNTIME_OUTPUT_DIRECTORY}/openpmd-pipe \ + ${openPMD_RUNTIME_OUTPUT_DIRECTORY}/openpmd-pipe \ --infile ../samples/git-sample/data%T.h5 \ --outfile ../samples/git-sample/data%T.bp && \ \ ${Python_EXECUTABLE} \ - ${openPMD_RUNTIME_OUTPUT_DIRECTORY}/openpmd-pipe \ + ${openPMD_RUNTIME_OUTPUT_DIRECTORY}/openpmd-pipe \ --infile ../samples/git-sample/thetaMode/data%T.h5 \ --outfile ../samples/git-sample/thetaMode/data%T.bp && \ \ ${Python_EXECUTABLE} \ - ${openPMD_RUNTIME_OUTPUT_DIRECTORY}/openpmd-pipe \ + ${openPMD_RUNTIME_OUTPUT_DIRECTORY}/openpmd-pipe \ --infile ../samples/git-sample/thetaMode/data%T.bp \ --outfile ../samples/git-sample/thetaMode/data%T.json \ " diff --git a/docs/source/backends/adios2.rst b/docs/source/backends/adios2.rst index 13e357022c..a6161ed9dc 100644 --- a/docs/source/backends/adios2.rst +++ b/docs/source/backends/adios2.rst @@ -56,7 +56,6 @@ In order to activate steps, it is imperative to use the :ref:`Streaming API `_) that disallows random-accessing steps in file-based engines. With this ADIOS2 release, files written with steps may only be read using the streaming API. -In order to keep compatibility with older codes reading ADIOS2 files, step-based processing must currently be opted in to via use of the :ref:`JSON parameter` ``adios2.engine.usesteps = true`` when using a file-based engine such as BP3 or BP4 (usesteps). Upon reading a file, the ADIOS2 backend will automatically recognize whether it has been written with or without steps, ignoring the JSON option mentioned above. Steps are mandatory for streaming-based engines and trying to switch them off will result in a runtime error. @@ -183,7 +182,6 @@ This feature can be activated via the JSON/TOML key ``adios2.use_group_table = t It is fully backward-compatible with the old layout of openPMD in ADIOS2 and mostly forward-compatible (except the support for steps). The variable-based encoding of openPMD automatically activates the group table feature. -The group table feature automatically activates the use of ADIOS2 steps (which until version 0.15 was an opt-in feature via ``adios2.engine.usesteps = true``). Memory usage ------------ diff --git a/docs/source/details/backendconfig.rst b/docs/source/details/backendconfig.rst index 57d577af10..ae2a2d4f63 100644 --- a/docs/source/details/backendconfig.rst +++ b/docs/source/details/backendconfig.rst @@ -121,7 +121,6 @@ Explanation of the single keys: * ``adios2.engine.parameters``: An associative array of string-formatted engine parameters, passed directly through to ``adios2::IO::SetParameters``. Please refer to the `official ADIOS2 documentation `_ for the available engine parameters. The openPMD-api does not interpret these values and instead simply forwards them to ADIOS2. -* ``adios2.engine.usesteps``: Described more closely in the documentation for the :ref:`ADIOS2 backend` (usesteps). * ``adios2.engine.preferred_flush_target`` Only relevant for BP5 engine, possible values are ``"disk"`` and ``"buffer"`` (default: ``"disk"``). * If ``"disk"``, data will be moved to disk on every flush. diff --git a/include/openPMD/IO/ADIOS/ADIOS2IOHandler.hpp b/include/openPMD/IO/ADIOS/ADIOS2IOHandler.hpp index cdd7983312..269d908360 100644 --- a/include/openPMD/IO/ADIOS/ADIOS2IOHandler.hpp +++ b/include/openPMD/IO/ADIOS/ADIOS2IOHandler.hpp @@ -225,12 +225,6 @@ class ADIOS2IOHandlerImpl #if openPMD_HAVE_MPI std::optional m_communicator; #endif - /* - * If the iteration encoding is variableBased, we default to using a group - * table, since it is the only reliable way to recover currently active - * groups. - */ - IterationEncoding m_iterationEncoding = IterationEncoding::groupBased; /** * The ADIOS2 engine type, to be passed to adios2::IO::SetEngine */ @@ -439,6 +433,8 @@ namespace ADIOS2Defaults "__openPMD_internal/openPMD2_adios2_schema"; constexpr const_str str_isBoolean = "__is_boolean__"; constexpr const_str str_activeTablePrefix = "__openPMD_groups"; + constexpr const_str str_groupBasedWarning = + "__openPMD_internal/warning_bugprone_groupbased_encoding"; } // namespace ADIOS2Defaults namespace detail @@ -914,7 +910,6 @@ namespace detail UseGroupTable detectGroupTable(); adios2::Engine &getEngine(); - adios2::Engine &requireActiveStep(); template void enqueue(BA &&ba); @@ -976,15 +971,9 @@ namespace detail * @brief Begin or end an ADIOS step. * * @param mode Whether to begin or end a step. - * @param calledExplicitly True if called due to a public API call. - * False if called from requireActiveStep. - * Some engines (BP5) require that every interaction happens within - * an active step, meaning that we need to call advance() - * implicitly at times. When doing that, do not tag the dataset - * with __openPMD_internal/useSteps (yet). * @return AdvanceStatus */ - AdvanceStatus advance(AdvanceMode mode, bool calledExplicitly); + AdvanceStatus advance(AdvanceMode mode); /* * Delete all buffered actions without running them. @@ -1069,34 +1058,7 @@ namespace detail * without steps. This is not a workaround since not using steps, * while inefficient in ADIOS2, is something that we support. */ - NoStream, - /** - * Rationale behind this state: - * When user code opens a Series, series.iterations should contain - * all available iterations. - * If accessing a file without opening a step, ADIOS2 will grant - * access to variables and attributes from all steps, allowing us - * to parse the complete dump. - * This state indicates that no step should be opened for parsing - * purposes (which is necessary in streaming engines, hence they - * are initialized with the OutsideOfStep state). - * A step should only be opened if an explicit ADVANCE task arrives - * at the backend. - * - * @todo If the streaming API is used on files, parsing the whole - * Series up front is unnecessary work. - * Our frontend does not yet allow to distinguish whether - * parsing the whole series will be necessary since parsing - * happens upon construction time of Series, - * but the classical and the streaming API are both activated - * afterwards from the created Series object. - * Hence, improving this requires refactoring in our - * user-facing API. Ideas: - * (1) Delayed lazy parsing of iterations upon accessing - * (would bring other benefits also). - * (2) Introduce a restricted class StreamingSeries. - */ - Parsing, + ReadWithoutStream, /** * The stream status of a file-based engine will be decided upon * opening the engine if in read mode. Up until then, this right @@ -1157,8 +1119,8 @@ namespace detail void create_IO(); void configure_IO(ADIOS2IOHandlerImpl &impl); - void configure_IO_Read(std::optional userSpecifiedUsesteps); - void configure_IO_Write(std::optional userSpecifiedUsesteps); + void configure_IO_Read(); + void configure_IO_Write(); }; } // namespace detail diff --git a/include/openPMD/IO/AbstractIOHandler.hpp b/include/openPMD/IO/AbstractIOHandler.hpp index 1106f78f16..71a0587b0f 100644 --- a/include/openPMD/IO/AbstractIOHandler.hpp +++ b/include/openPMD/IO/AbstractIOHandler.hpp @@ -23,6 +23,7 @@ #include "openPMD/IO/Access.hpp" #include "openPMD/IO/Format.hpp" #include "openPMD/IO/IOTask.hpp" +#include "openPMD/IterationEncoding.hpp" #include "openPMD/config.hpp" #if openPMD_HAVE_MPI @@ -168,6 +169,11 @@ namespace internal } } // namespace internal +namespace detail +{ + struct BufferedActions; +} + /** Interface for communicating between logical and physically persistent data. * * Input and output operations are channeled through a task queue that is @@ -179,8 +185,12 @@ namespace internal class AbstractIOHandler { friend class Series; + friend class ADIOS2IOHandlerImpl; + friend struct detail::BufferedActions; private: + IterationEncoding m_encoding = IterationEncoding::groupBased; + void setIterationEncoding(IterationEncoding encoding) { /* @@ -193,6 +203,8 @@ class AbstractIOHandler // do we really want to have those as const members..? *const_cast(&m_backendAccess) = Access::CREATE; } + + m_encoding = encoding; } public: diff --git a/include/openPMD/IO/IOTask.hpp b/include/openPMD/IO/IOTask.hpp index 7ca9e490a3..d2fc05f379 100644 --- a/include/openPMD/IO/IOTask.hpp +++ b/include/openPMD/IO/IOTask.hpp @@ -125,7 +125,6 @@ struct OPENPMDAPI_EXPORT Parameter } std::string name = ""; - IterationEncoding encoding = IterationEncoding::groupBased; }; template <> @@ -172,12 +171,6 @@ struct OPENPMDAPI_EXPORT Parameter } std::string name = ""; - /* - * The backends might need to ensure availability of certain features - * for some iteration encodings, e.g. availability of ADIOS steps for - * variableBased encoding. - */ - IterationEncoding encoding = IterationEncoding::groupBased; using ParsePreference = internal::ParsePreference; std::shared_ptr out_parsePreference = std::make_shared(ParsePreference::UpFront); diff --git a/src/IO/ADIOS/ADIOS2IOHandler.cpp b/src/IO/ADIOS/ADIOS2IOHandler.cpp index 534e364f53..708fbbdef0 100644 --- a/src/IO/ADIOS/ADIOS2IOHandler.cpp +++ b/src/IO/ADIOS/ADIOS2IOHandler.cpp @@ -26,6 +26,7 @@ #include "openPMD/IO/ADIOS/ADIOS2Auxiliary.hpp" #include "openPMD/IO/ADIOS/ADIOS2FilePosition.hpp" #include "openPMD/IO/ADIOS/ADIOS2IOHandler.hpp" +#include "openPMD/IterationEncoding.hpp" #include "openPMD/auxiliary/Environment.hpp" #include "openPMD/auxiliary/Filesystem.hpp" #include "openPMD/auxiliary/Mpi.hpp" @@ -38,6 +39,7 @@ #include #include #include +#include #include #include @@ -543,6 +545,33 @@ ADIOS2IOHandlerImpl::flush(internal::ParsedFlushParams &flushParams) return res; } +/* + * If the iteration encoding is variableBased, we default to using a group + * table, since it is the only reliable way to recover currently active + * groups. + * If group-based encoding is used without group table, then + * READ_LINEAR is forbidden as it will be unreliable in reporting + * currently available data. + * Use AbstractIOHandler::m_encoding for implementing this logic. + */ + +static constexpr char const *warningADIOS2NoGroupbasedEncoding = &R"( +[Warning] Use of group-based encoding in ADIOS2 is discouraged as it can lead +to drastic performance issues, no matter if I/O steps are used or not. + +* If not using I/O steps: A crash will corrupt all data since there is only + one atomic logical write operation upon closing the file. + Memory performance can be pathological depending on the setup. +* If using I/O steps: Each step will add new variables and attributes instead + of reusing those from earlier steps. ADIOS2 is not optimized for this and + especially the BP5 engine will show a quadratic increase in metadata size + as the number of steps increase. +We advise you to pick either file-based encoding or variable-based encoding +(variable-based encoding is not yet feature-complete in the openPMD-api). +For more details, refer to +https://openpmd-api.readthedocs.io/en/latest/usage/concepts.html#iteration-and-series)" + [1]; + void ADIOS2IOHandlerImpl::createFile( Writable *writable, Parameter const ¶meters) { @@ -578,7 +607,6 @@ void ADIOS2IOHandlerImpl::createFile( VERIFY(success, "[ADIOS2] Could not create directory."); } - m_iterationEncoding = parameters.encoding; associateWithFile(writable, shared_name); this->m_dirty.emplace(shared_name); @@ -586,7 +614,26 @@ void ADIOS2IOHandlerImpl::createFile( writable->abstractFilePosition = std::make_shared(); // enforce opening the file // lazy opening is deathly in parallel situations - getFileData(shared_name, IfFileNotOpen::OpenImplicitly); + auto &fileData = + getFileData(shared_name, IfFileNotOpen::OpenImplicitly); + + if (!printedWarningsAlready.noGroupBased && + m_writeAttributesFromThisRank && + m_handler->m_encoding == IterationEncoding::groupBased) + { + // For a peaceful phase-out of group-based encoding in ADIOS2, + // print this warning only in the new layout (with group table) + if (m_useGroupTable.value_or(UseGroupTable::No) == + UseGroupTable::Yes) + { + std::cerr << warningADIOS2NoGroupbasedEncoding << std::endl; + printedWarningsAlready.noGroupBased = true; + } + fileData.m_IO.DefineAttribute( + ADIOS2Defaults::str_groupBasedWarning, + std::string("Consider using file-based or variable-based " + "encoding instead in ADIOS2.")); + } } } @@ -832,7 +879,6 @@ void ADIOS2IOHandlerImpl::openFile( writable->written = true; writable->abstractFilePosition = std::make_shared(); - m_iterationEncoding = parameters.encoding; // enforce opening the file // lazy opening is deathly in parallel situations auto &fileData = getFileData(file, IfFileNotOpen::OpenImplicitly); @@ -1109,8 +1155,6 @@ void ADIOS2IOHandlerImpl::getBufferView( break; } - ba.requireActiveStep(); - if (parameters.update) { detail::I_UpdateSpan &updater = @@ -1145,7 +1189,6 @@ void ADIOS2IOHandlerImpl::readAttribute( auto file = refreshFileFromParent(writable, /* preferParentFile = */ false); auto pos = setAndGetFilePosition(writable); detail::BufferedActions &ba = getFileData(file, IfFileNotOpen::ThrowError); - ba.requireActiveStep(); auto name = nameOfAttribute(writable, parameters.name); auto type = detail::attributeInfo(ba.m_IO, name, /* verbose = */ true); @@ -1183,7 +1226,6 @@ void ADIOS2IOHandlerImpl::listPaths( * from variables and attributes. */ auto &fileData = getFileData(file, IfFileNotOpen::ThrowError); - fileData.requireActiveStep(); std::unordered_set subdirs; /* @@ -1315,7 +1357,6 @@ void ADIOS2IOHandlerImpl::listDatasets( */ auto &fileData = getFileData(file, IfFileNotOpen::ThrowError); - fileData.requireActiveStep(); std::unordered_set subdirs; for (auto var : fileData.availableVariablesPrefixed(myName)) @@ -1351,7 +1392,6 @@ void ADIOS2IOHandlerImpl::listAttributes( attributePrefix = ""; } auto &ba = getFileData(file, IfFileNotOpen::ThrowError); - ba.requireActiveStep(); // make sure that the attributes are present std::vector attrs = ba.availableAttributesPrefixed(attributePrefix); @@ -1371,8 +1411,7 @@ void ADIOS2IOHandlerImpl::advance( { auto file = m_files.at(writable); auto &ba = getFileData(file, IfFileNotOpen::ThrowError); - *parameters.status = - ba.advance(parameters.mode, /* calledExplicitly = */ true); + *parameters.status = ba.advance(parameters.mode); } void ADIOS2IOHandlerImpl::closePath( @@ -1416,8 +1455,8 @@ void ADIOS2IOHandlerImpl::availableChunks( std::string varName = nameOfVariable(writable); auto engine = ba.getEngine(); // make sure that data are present auto datatype = detail::fromADIOS2Type(ba.m_IO.VariableType(varName)); - bool allSteps = m_handler->m_frontendAccess != Access::READ_LINEAR && - ba.streamStatus == detail::BufferedActions::StreamStatus::NoStream; + bool allSteps = ba.streamStatus == + detail::BufferedActions::StreamStatus::ReadWithoutStream; switchAdios2VariableType( datatype, parameters, @@ -1786,7 +1825,6 @@ namespace detail auto &filedata = impl->getFileData( file, ADIOS2IOHandlerImpl::IfFileNotOpen::ThrowError); - filedata.requireActiveStep(); filedata.invalidateAttributesMap(); adios2::IO IO = filedata.m_IO; impl->m_dirty.emplace(std::move(file)); @@ -1946,7 +1984,6 @@ namespace detail { auto &fileData = impl->getFileData( file, ADIOS2IOHandlerImpl::IfFileNotOpen::ThrowError); - fileData.requireActiveStep(); auto &IO = fileData.m_IO; adios2::Variable var = IO.InquireVariable(varName); if (!var) @@ -2230,9 +2267,7 @@ namespace detail // might have been closed previously if (engine) { - if (streamStatus == StreamStatus::DuringStep || - (streamStatus == StreamStatus::NoStream && - m_mode == adios2::Mode::Write)) + if (streamStatus == StreamStatus::DuringStep) { engine.EndStep(); } @@ -2329,36 +2364,6 @@ namespace detail } return false; } - - bool useStepsInWriting( - UseGroupTable groupTable, std::string const &engineType) - { - if (engineType == "bp5") - { - /* - * BP5 does not require steps when reading, but it requires - * them when writing. - */ - return true; - } - switch (supportsPerstepParsing(Access::CREATE, engineType)) - { - case PerstepParsing::Required: - return true; - case PerstepParsing::Supported: - switch (groupTable) - { - case UseGroupTable::No: - return false; - case UseGroupTable::Yes: - return true; - } - break; - case PerstepParsing::Unsupported: - return false; - } - return false; // unreachable - } } // namespace size_t BufferedActions::currentStep() @@ -2373,18 +2378,8 @@ namespace detail } } - void BufferedActions::configure_IO_Read( - std::optional userSpecifiedUsesteps) + void BufferedActions::configure_IO_Read() { - if (userSpecifiedUsesteps.has_value() && - m_impl->m_handler->m_backendAccess != Access::READ_WRITE) - { - std::cerr << "Explicitly specified `adios2.usesteps` in Read mode. " - "Usage of steps will be determined by what is found " - "in the file being read." - << std::endl; - } - bool upfrontParsing = supportsUpfrontParsing( m_impl->m_handler->m_backendAccess, m_engineType); PerstepParsing perstepParsing = supportsPerstepParsing( @@ -2411,18 +2406,9 @@ namespace detail m_IO.SetParameter("StreamReader", "On"); break; case PerstepParsing::Unsupported: - streamStatus = StreamStatus::NoStream; - parsePreference = ParsePreference::UpFront; - /* - * Note that in BP4 with linear access mode, we set the - * StreamReader option, disabling upfrontParsing capability. - * So, this branch is only taken by niche engines, such as - * BP3 or HDF5, or by BP5 without group table and normal read - * mode. Need to fall back to random access parsing. - */ -#if openPMD_HAS_ADIOS_2_8 - m_mode = adios2::Mode::ReadRandomAccess; -#endif + throw error::Internal( + "Internal control flow error: Per-Step parsing cannot be " + "unsupported when access type is READ_LINEAR"); break; } break; @@ -2441,7 +2427,7 @@ namespace detail } if (upfrontParsing) { - streamStatus = StreamStatus::NoStream; + streamStatus = StreamStatus::ReadWithoutStream; parsePreference = ParsePreference::UpFront; } else @@ -2464,8 +2450,7 @@ namespace detail } } - void BufferedActions::configure_IO_Write( - std::optional userSpecifiedUsesteps) + void BufferedActions::configure_IO_Write() { optimizeAttributesStreaming = // Also, it should only be done when truly streaming, not @@ -2473,20 +2458,7 @@ namespace detail // streaming engine (otherwise attributes might vanish) nonpersistentEngine(m_engineType); - bool useSteps = useStepsInWriting(useGroupTable(), m_engineType); - if (userSpecifiedUsesteps.has_value()) - { - useSteps = userSpecifiedUsesteps.value(); - if (!useSteps && nonpersistentEngine(m_engineType)) - { - throw error::WrongAPIUsage( - "Cannot switch off IO steps for non-persistent stream " - "engines in ADIOS2."); - } - } - - streamStatus = - useSteps ? StreamStatus::OutsideOfStep : StreamStatus::NoStream; + streamStatus = StreamStatus::OutsideOfStep; } void BufferedActions::configure_IO(ADIOS2IOHandlerImpl &impl) @@ -2501,8 +2473,12 @@ namespace detail #if openPMD_HAS_ADIOS_2_9 if (!m_impl->m_useGroupTable.has_value()) { - switch (m_impl->m_iterationEncoding) + switch (m_impl->m_handler->m_encoding) { + /* + * For variable-based encoding, this does not matter as it is + * new and requires >= v2.9 features anyway. + */ case IterationEncoding::variableBased: m_impl->m_useGroupTable = UseGroupTable::Yes; break; @@ -2516,7 +2492,8 @@ namespace detail if (m_impl->m_modifiableAttributes == ADIOS2IOHandlerImpl::ModifiableAttributes::Unspecified) { - m_impl->m_modifiableAttributes = m_impl->m_iterationEncoding == + m_impl->m_modifiableAttributes = + m_impl->m_handler->m_encoding == IterationEncoding::variableBased ? ADIOS2IOHandlerImpl::ModifiableAttributes::Yes : ADIOS2IOHandlerImpl::ModifiableAttributes::No; @@ -2567,7 +2544,6 @@ namespace detail // set engine parameters std::set alreadyConfigured; - std::optional userSpecifiedUsesteps; bool wasTheFlushTargetSpecifiedViaJSON = false; auto engineConfig = impl.config(ADIOS2Defaults::str_engine); if (!engineConfig.json().is_null()) @@ -2599,8 +2575,10 @@ namespace detail impl.config(ADIOS2Defaults::str_usesteps, engineConfig); if (!_useAdiosSteps.json().is_null() && writeOnly(m_mode)) { - userSpecifiedUsesteps = - std::make_optional(_useAdiosSteps.json().get()); + std::cerr << "[ADIOS2 backend] WARNING: Parameter " + "`adios2.engine.usesteps` is deprecated since use " + "of steps is now always enabled." + << std::endl; } if (engineConfig.json().contains(ADIOS2Defaults::str_flushtarget)) @@ -2643,21 +2621,21 @@ namespace detail { case Access::READ_LINEAR: case Access::READ_ONLY: - configure_IO_Read(userSpecifiedUsesteps); + configure_IO_Read(); break; case Access::READ_WRITE: if (readOnly(m_mode)) { - configure_IO_Read(userSpecifiedUsesteps); + configure_IO_Read(); } else { - configure_IO_Write(userSpecifiedUsesteps); + configure_IO_Write(); } break; case Access::APPEND: case Access::CREATE: - configure_IO_Write(userSpecifiedUsesteps); + configure_IO_Write(); break; } @@ -2828,11 +2806,8 @@ namespace detail // the streaming API was used. m_engine = std::make_optional( adios2::Engine(m_IO.Open(m_file, tempMode))); - if (streamStatus == StreamStatus::NoStream) - { - // Write everything into one big step - m_engine->BeginStep(); - } + m_engine->BeginStep(); + streamStatus = StreamStatus::DuringStep; break; } #if openPMD_HAS_ADIOS_2_8 @@ -2911,10 +2886,45 @@ namespace detail "anything in an engine that supports " "up-front parsing."); } - streamStatus = StreamStatus::Parsing; + streamStatus = StreamStatus::ReadWithoutStream; } else { + // If the iteration encoding is group-based and + // no group table is used, we're now at a dead-end. + // Step-by-Step parsing is unreliable in that mode + // since groups might be reported that are not + // there. + // But we were only able to find this out by opening + // the ADIOS2 file with an access mode that was + // possibly wrong, so we would have to close and + // reopen here. + // Since group-based encoding is a bag of trouble in + // ADIOS2 anyway, we just don't support this + // particular use case. + // This failure will only arise when the following + // conditions are met: + // + // 1) group-based encoding + // 2) no group table (i.e. old "ADIOS2 schema") + // 3) LINEAR access mode + // + // This is a relatively lenient restriction compared + // to forbidding group-based encoding in ADIOS2 + // altogether. + if (m_impl->m_useGroupTable.value() == + UseGroupTable::No && + m_IO.InquireAttribute( + ADIOS2Defaults::str_groupBasedWarning)) + { + throw error::OperationUnsupportedInBackend( + "ADIOS2", + "Trying to open a group-based ADIOS2 file " + "that does not have a group table with " + "LINEAR access type. That combination is " + "very buggy, so please use " + "READ_ONLY/READ_RANDOM_ACCESS instead."); + } if (!openedANewStep && m_engine.value().BeginStep() != adios2::StepStatus::OK) @@ -2932,11 +2942,11 @@ namespace detail * If openedANewStep is true, then the file consists * of one large step, we just leave it open. */ - streamStatus = StreamStatus::NoStream; + streamStatus = StreamStatus::ReadWithoutStream; } break; } - case StreamStatus::NoStream: + case StreamStatus::ReadWithoutStream: // using random-access mode break; case StreamStatus::DuringStep: @@ -2972,31 +2982,6 @@ namespace detail return m_engine.value(); } - adios2::Engine &BufferedActions::requireActiveStep() - { - adios2::Engine &eng = getEngine(); - /* - * If streamStatus is Parsing, do NOT open the step. - */ - if (streamStatus == StreamStatus::OutsideOfStep) - { - switch ( - advance(AdvanceMode::BEGINSTEP, /* calledExplicitly = */ false)) - { - case AdvanceStatus::OVER: - throw std::runtime_error( - "[ADIOS2] Operation requires active step but no step is " - "left."); - case AdvanceStatus::OK: - case AdvanceStatus::RANDOMACCESS: - // pass - break; - } - streamStatus = StreamStatus::DuringStep; - } - return eng; - } - template void BufferedActions::enqueue(BA &&ba) { @@ -3068,10 +3053,6 @@ namespace detail } return; } - else - { - requireActiveStep(); - } } for (auto &ba : m_buffer) { @@ -3207,48 +3188,23 @@ namespace detail /* flushUnconditionally = */ false); } - AdvanceStatus - BufferedActions::advance(AdvanceMode mode, bool calledExplicitly) + AdvanceStatus BufferedActions::advance(AdvanceMode mode) { if (streamStatus == StreamStatus::Undecided) { - // stream status gets decided on upon opening an engine - getEngine(); + throw error::Internal( + "[BufferedActions::advance()] StreamStatus Undecided before " + "beginning/ending a step?"); } // sic! no else - if (streamStatus == StreamStatus::NoStream) + if (streamStatus == StreamStatus::ReadWithoutStream) { - if (writeOnly(m_mode) && - !m_IO.InquireAttribute( - ADIOS2Defaults::str_usesstepsAttribute) && - m_impl->m_writeAttributesFromThisRank) - { - m_IO.DefineAttribute( - ADIOS2Defaults::str_usesstepsAttribute, 0); - } flush( ADIOS2FlushParams{FlushLevel::UserFlush}, /* writeLatePuts = */ false); return AdvanceStatus::RANDOMACCESS; } - /* - * If advance() is called implicitly (by requireActiveStep()), the - * Series is not necessarily using steps (logically). - * But in some ADIOS2 engines, at least one step must be opened - * (physically) to do anything. - * The usessteps tag should only be set when the Series is *logically* - * using steps. - */ - if (calledExplicitly && writeOnly(m_mode) && - !m_IO.InquireAttribute( - ADIOS2Defaults::str_usesstepsAttribute) && - m_impl->m_writeAttributesFromThisRank) - { - m_IO.DefineAttribute( - ADIOS2Defaults::str_usesstepsAttribute, 1); - } - switch (mode) { case AdvanceMode::ENDSTEP: { @@ -3270,6 +3226,15 @@ namespace detail "opened."); } } + + if (writeOnly(m_mode) && m_impl->m_writeAttributesFromThisRank && + !m_IO.InquireAttribute( + ADIOS2Defaults::str_usesstepsAttribute)) + { + m_IO.DefineAttribute( + ADIOS2Defaults::str_usesstepsAttribute, 1); + } + flush( ADIOS2FlushParams{FlushLevel::UserFlush}, [](BufferedActions &, adios2::Engine &eng) { eng.EndStep(); }, @@ -3412,7 +3377,6 @@ namespace detail { if (writeOnly(m_mode) && m_impl->m_writeAttributesFromThisRank) { - requireActiveStep(); auto currentStepBuffered = currentStep(); do { diff --git a/src/IO/AbstractIOHandlerImpl.cpp b/src/IO/AbstractIOHandlerImpl.cpp index 9a12a45b35..bbab360b4d 100644 --- a/src/IO/AbstractIOHandlerImpl.cpp +++ b/src/IO/AbstractIOHandlerImpl.cpp @@ -25,6 +25,7 @@ #include "openPMD/backend/Writable.hpp" #include +#include namespace openPMD { @@ -317,7 +318,22 @@ std::future AbstractIOHandlerImpl::flush() auto ¶meter = deref_dynamic_cast>( i.parameter.get()); writeToStderr( - "[", i.writable->parent, "->", i.writable, "] ADVANCE"); + "[", + i.writable->parent, + "->", + i.writable, + "] ADVANCE ", + [&]() { + switch (parameter.mode) + { + + case AdvanceMode::BEGINSTEP: + return "BEGINSTEP"; + case AdvanceMode::ENDSTEP: + return "ENDSTEP"; + } + throw std::runtime_error("Unreachable!"); + }()); advance(i.writable, parameter); break; } diff --git a/src/ReadIterations.cpp b/src/ReadIterations.cpp index 74e963eb55..fff398c8cd 100644 --- a/src/ReadIterations.cpp +++ b/src/ReadIterations.cpp @@ -74,7 +74,6 @@ void SeriesIterator::initSeriesInLinearReadMode() case IE::variableBased: { Parameter fOpen; fOpen.name = series.get().m_name; - fOpen.encoding = series.iterationEncoding(); series.IOHandler()->enqueue(IOTask(&series, fOpen)); series.IOHandler()->flush(internal::defaultFlushParams); using PP = Parameter::ParsePreference; diff --git a/src/Series.cpp b/src/Series.cpp index e89841db7d..5d698ffbcf 100644 --- a/src/Series.cpp +++ b/src/Series.cpp @@ -924,7 +924,6 @@ void Series::flushGorVBased( } Parameter fCreate; fCreate.name = series.m_name; - fCreate.encoding = iterationEncoding(); IOHandler()->enqueue(IOTask(this, fCreate)); } @@ -1004,7 +1003,6 @@ void Series::readFileBased() auto &series = get(); Parameter fOpen; Parameter aRead; - fOpen.encoding = iterationEncoding(); if (!auxiliary::directory_exists(IOHandler()->directory)) throw error::ReadError( @@ -1320,7 +1318,6 @@ auto Series::readGorVBased( auto &series = get(); Parameter fOpen; fOpen.name = series.m_name; - fOpen.encoding = iterationEncoding(); IOHandler()->enqueue(IOTask(this, fOpen)); IOHandler()->flush(internal::defaultFlushParams); series.m_parsePreference = *fOpen.out_parsePreference; @@ -2107,7 +2104,6 @@ void Series::openIteration(IterationIndex_t index, Iteration iteration) auto &series = get(); // open the iteration's file again Parameter fOpen; - fOpen.encoding = iterationEncoding(); fOpen.name = iterationFilename(index); IOHandler()->enqueue(IOTask(this, fOpen)); diff --git a/test/ParallelIOTest.cpp b/test/ParallelIOTest.cpp index ce7d6cc565..a82a300b0e 100644 --- a/test/ParallelIOTest.cpp +++ b/test/ParallelIOTest.cpp @@ -1403,7 +1403,8 @@ void append_mode( std::string const &extension, bool variableBased, ParseMode parseMode, - std::string const &jsonConfig = "{}") + std::string const &jsonConfig = "{}", + bool test_read_linear = true) { std::string filename = (variableBased ? "../samples/append/append_variablebased." @@ -1500,6 +1501,7 @@ void append_mode( } }; + if (test_read_linear) { switch (parseMode) { @@ -1624,6 +1626,8 @@ void append_mode( write.flush(); } MPI_Barrier(MPI_COMM_WORLD); + + if (test_read_linear) { Series read(filename, Access::READ_LINEAR, MPI_COMM_WORLD); switch (parseMode) @@ -1689,22 +1693,14 @@ TEST_CASE("append_mode", "[serial]") { "adios2": { - "use_group_table": false, - "engine": - { - "usesteps" : true - } + "use_group_table": false } })END"; std::string jsonConfigNew = R"END( { "adios2": { - "use_group_table": true, - "engine": - { - "usesteps" : true - } + "use_group_table": true } })END"; if (t == "bp" || t == "bp4" || t == "bp5") @@ -1718,7 +1714,11 @@ TEST_CASE("append_mode", "[serial]") */ #if HAS_ADIOS_2_8 append_mode( - t, false, ParseMode::LinearWithoutSnapshot, jsonConfigOld); + t, + false, + ParseMode::LinearWithoutSnapshot, + jsonConfigOld, + /* test_read_linear = */ false); #endif #if HAS_ADIOS_2_9 append_mode(t, false, ParseMode::WithSnapshot, jsonConfigNew); diff --git a/test/SerialIOTest.cpp b/test/SerialIOTest.cpp index 19cad91155..7f126e104f 100644 --- a/test/SerialIOTest.cpp +++ b/test/SerialIOTest.cpp @@ -4260,8 +4260,6 @@ TEST_CASE("adios2_bp5_flush", "[serial][adios2]") [adios2] [adios2.engine] -# Check that BP5 can also be used without steps -usesteps = false type = "bp5" preferred_flush_target = "disk" @@ -4280,7 +4278,6 @@ BufferChunkSize = 2147483646 # 2^31 - 2 [adios2] [adios2.engine] -usesteps = true type = "bp5" preferred_flush_target = "buffer" @@ -4298,7 +4295,6 @@ BufferChunkSize = 2147483646 # 2^31 - 2 [adios2] [adios2.engine] -usesteps = true type = "bp5" # preferred_flush_target = @@ -4321,7 +4317,6 @@ BufferChunkSize = 2147483646 # 2^31 - 2 [adios2] [adios2.engine] -usesteps = true type = "bp5" preferred_flush_target = "buffer_override" @@ -4339,7 +4334,6 @@ BufferChunkSize = 2147483646 # 2^31 - 2 [adios2] [adios2.engine] -usesteps = true type = "bp5" preferred_flush_target = "disk_override" @@ -4859,8 +4853,7 @@ this = "should not warn" void bp4_steps( std::string const &file, std::string const &options_write, - std::string const &options_read, - Access access = Access::READ_ONLY) + std::optional access = Access::READ_ONLY) { { Series writeSeries(file, Access::CREATE, options_write); @@ -4880,12 +4873,12 @@ void bp4_steps( } } - if (options_read.empty()) + if (!access.has_value()) { return; } - Series readSeries(file, access, options_read); + Series readSeries(file, *access); size_t last_iteration_index = 0; for (auto iteration : readSeries.readIterations()) @@ -4911,104 +4904,62 @@ void bp4_steps( TEST_CASE("bp4_steps", "[serial][adios2]") { - std::string useSteps = R"( + std::string bp4 = json::merge( + R"( { "ADIOS2": { "engine": { - "type": "bp4", - "usesteps": true + "type": "bp4" } } } - )"; + )", +#if openPMD_HAS_ADIOS_2_9 + R"({"ADIOS2":{"use_group_table": true}})" +#else + R"({"ADIOS2":{"use_group_table": false}})" +#endif + ); std::string nullcore = R"( { "adios2": { - "type": "nullcore", - "ENGINE": { - "type": "bp4", - "usesteps": true - } + "type": "nullcore" } } )"; - std::string dontUseSteps = R"( - # let's use TOML for this one - [adios2.engine] - type = "bp4" - UseSteps = false - )"; - // sing the yes no song - bp4_steps("../samples/bp4steps_yes_yes.bp", useSteps, useSteps); - bp4_steps("../samples/bp4steps_no_yes.bp", dontUseSteps, useSteps); - bp4_steps("../samples/bp4steps_yes_no.bp", useSteps, dontUseSteps); - bp4_steps("../samples/bp4steps_no_no.bp", dontUseSteps, dontUseSteps); - bp4_steps("../samples/nullcore.bp", nullcore, ""); - bp4_steps("../samples/bp4steps_default.bp", "{}", "{}"); - - // bp4_steps( - // "../samples/newlayout_bp4steps_yes_yes.bp", - // useSteps, - // useSteps, - // Access::READ_LINEAR); - // bp4_steps( - // "../samples/newlayout_bp4steps_yes_no.bp", - // useSteps, - // dontUseSteps, - // Access::READ_LINEAR); + bp4_steps("../samples/bp4steps.bp", bp4); + bp4_steps("../samples/nullcore.bp", nullcore, std::nullopt); + bp4_steps("../samples/bp4steps_default.bp", "{}"); + // Can use READ_LINEAR with ADIOS2 v2.9 because then we have the group table + // feature and can sensibly parse group-based encoding in step-based mode + bp4_steps( + "../samples/bp4steps.bp", + bp4, +#if openPMD_HAS_ADIOS_2_9 + Access::READ_LINEAR +#else + Access::READ_ONLY +#endif + ); #if openPMD_HAS_ADIOS_2_9 /* * Do this whole thing once more, but this time use the new attribute * layout. */ - useSteps = R"( - { - "adios2": { - "use_group_table": true, - "engine": { - "type": "bp4", - "usesteps": true - } - } - } - )"; - dontUseSteps = R"( + bp4 = R"( { "adios2": { "use_group_table": true, "engine": { - "type": "bp4", - "usesteps": false + "type": "bp4" } } } )"; - // sing the yes no song - bp4_steps( - "../samples/newlayout_bp4steps_yes_yes.bp", - useSteps, - useSteps, - Access::READ_LINEAR); - bp4_steps("../samples/newlayout_bp4steps_yes_yes.bp", useSteps, useSteps); - bp4_steps( - "../samples/newlayout_bp4steps_yes_no.bp", useSteps, dontUseSteps); - bp4_steps( - "../samples/newlayout_bp4steps_no_yes.bp", dontUseSteps, useSteps); - bp4_steps( - "../samples/newlayout_bp4steps_no_no.bp", dontUseSteps, dontUseSteps); - - bp4_steps( - "../samples/newlayout_bp4steps_yes_yes.bp", - useSteps, - useSteps, - Access::READ_LINEAR); - bp4_steps( - "../samples/newlayout_bp4steps_yes_no.bp", - useSteps, - dontUseSteps, - Access::READ_LINEAR); + bp4_steps("../samples/newlayout_bp4steps.bp", bp4, Access::READ_LINEAR); + bp4_steps("../samples/newlayout_bp4steps.bp", bp4); #endif } #endif @@ -6478,10 +6429,7 @@ void chaotic_stream(std::string filename, bool variableBased) std::string jsonConfig = R"( { "adios2": { - "use_group_table": true, - "engine": { - "usesteps": true - } + "use_group_table": true } })"; @@ -6552,7 +6500,8 @@ TEST_CASE("chaotic_stream", "[serial]") void unfinished_iteration_test( std::string const &ext, IterationEncoding encoding, - std::string const &config = "{}") + std::string const &config = "{}", + bool test_linear_access = true) { std::cout << "\n\nTESTING " << ext << "\n\n" << std::endl; std::string file = std::string("../samples/unfinished_iteration") + @@ -6641,8 +6590,11 @@ void unfinished_iteration_test( } }; - tryReading(Access::READ_LINEAR); - tryReading(Access::READ_LINEAR, R"({"defer_iteration_parsing": true})"); + if (test_linear_access) + { + tryReading(Access::READ_LINEAR); + tryReading(Access::READ_LINEAR, R"({"defer_iteration_parsing": true})"); + } if (encoding != IterationEncoding::variableBased) { /* @@ -6661,7 +6613,10 @@ TEST_CASE("unfinished_iteration_test", "[serial]") { #if openPMD_HAVE_ADIOS2 unfinished_iteration_test( - "bp", IterationEncoding::groupBased, R"({"backend": "adios2"})"); + "bp", + IterationEncoding::groupBased, + R"({"backend": "adios2"})", + /* test_linear_access = */ false); #if openPMD_HAS_ADIOS_2_9 unfinished_iteration_test( "bp5", @@ -6822,7 +6777,8 @@ void append_mode( std::string const &filename, bool variableBased, ParseMode parseMode, - std::string const &jsonConfig = "{}") + std::string const &jsonConfig = "{}", + bool test_read_linear = true) { if (auxiliary::directory_exists("../samples/append")) { @@ -6905,6 +6861,7 @@ void append_mode( } }; + if (test_read_linear) { switch (parseMode) { @@ -7027,6 +6984,7 @@ void append_mode( write.writeIterations(), std::vector{4, 5}); write.flush(); } + if (test_read_linear) { Series read(filename, Access::READ_LINEAR); switch (parseMode) @@ -7091,22 +7049,14 @@ TEST_CASE("append_mode", "[serial]") { "adios2": { - "use_group_table": false, - "engine": - { - "usesteps" : true - } + "use_group_table": false } })END"; std::string jsonConfigNew = R"END( { "adios2": { - "use_group_table": true, - "engine": - { - "usesteps" : true - } + "use_group_table": true } })END"; if (t == "bp" || t == "bp4" || t == "bp5") @@ -7115,7 +7065,8 @@ TEST_CASE("append_mode", "[serial]") "../samples/append/append_groupbased." + t, false, ParseMode::LinearWithoutSnapshot, - jsonConfigOld); + jsonConfigOld, + /* test_read_linear = */ false); #if openPMD_HAS_ADIOS_2_9 append_mode( "../samples/append/append_groupbased." + t, @@ -7152,11 +7103,7 @@ void append_mode_filebased(std::string const &extension) { "adios2": { - "use_group_table": true, - "engine": - { - "usesteps" : true - } + "use_group_table": true } })END"; auto writeSomeIterations = [](WriteIterations &&writeIterations, From 7296948d146b05fddef355f1a5cde4801a0ad37a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Fri, 22 Dec 2023 16:02:36 +0100 Subject: [PATCH 02/37] Add documentation for typical use cases of openpmd-pipe (#1578) * Add documentation for use cases of openpmd-pipe * Update docs/source/analysis/pipe.rst * Move this documentation to cli.rst * Revert "Update docs/source/analysis/pipe.rst" This reverts commit 993b2259a81926be77a894a77b8f809e6e75bb7e. * Revert "Add documentation for use cases of openpmd-pipe" This reverts commit e3e433650a03486f6f9ad2ddc2eae90ec7e2a518. * Headers --> paragraphs --------- Co-authored-by: Axel Huebl --- docs/source/utilities/cli.rst | 147 +++++++++++++++++++++++++++++++--- 1 file changed, 134 insertions(+), 13 deletions(-) diff --git a/docs/source/utilities/cli.rst b/docs/source/utilities/cli.rst index d661793545..6aeef7178d 100644 --- a/docs/source/utilities/cli.rst +++ b/docs/source/utilities/cli.rst @@ -28,24 +28,145 @@ With some ``pip``-based python installations, you might have to run this as a mo Redirect openPMD data from any source to any sink. -The script can be used in parallel via MPI. -Datasets will be split into chunks of equal size to be loaded and written by the single processes. +Any Python-enabled openPMD-api installation with enabled CLI tools comes with a command-line tool named ``openpmd-pipe``. +Naming and use are inspired from the `piping concept `__ known from UNIX shells. -Possible uses include: +With some ``pip``-based python installations, you might have to run this as a module: -* Conversion of a dataset between two openPMD-based backends, such as ADIOS and HDF5. -* Decompression and compression of a dataset. -* Capture of a stream into a file. -* Template for simpler loosely-coupled post-processing scripts. +.. code-block:: bash -The syntax of the command line tool is printed via: + python3 -m openpmd_api.pipe --help -.. code-block:: bash +The fundamental idea is to redirect data from an openPMD data source to another openPMD data sink. +This concept becomes useful through the openPMD-api's ability to use different backends in different configurations; ``openpmd-pipe`` can hence be understood as a translation from one I/O configuration to another one. - openpmd-pipe --help -With some ``pip``-based python installations, you might have to run this as a module: +.. note:: -.. code-block:: bash + ``openpmd-pipe`` is (currently) optimized for streaming workflows in order to minimize the number of back-and-forth communications between writer and reader. + All data load operations are issued in a single ``flush()`` per iteration. + Data is loaded directly loaded into backend-provided buffers of the writer (if supported by the writer), where again only one ``flush()`` per iteration is used to put data to disk again. + This means that the peak memory usage will be roughly equivalent to the data size of each single iteration. - python3 -m openpmd_api.pipe --help +The reader Series is configured by the parameters ``--infile`` and ``--inconfig`` which are both forwarded to the ``filepath`` and ``options`` parameters of the ``Series`` constructor. +The writer Series is likewise controlled by ``--outfile`` and ``--outconfig``. + +Use of MPI is controlled by the ``--mpi`` and ``--no-mpi`` switches. +If left unspecified, MPI will be used automatically if the MPI size is greater than 1. + +.. note:: + + Required parameters are ``--infile`` and ``--outfile``. Otherwise also refer to the output of ``--openpmd-pipe --help``. + +When using MPI, each dataset will be sliced into roughly equally-sized hyperslabs along the dimension with highest item count for load distribution across worker ranks. + +If you are interested in further chunk distribution strategies (e.g. node-aware distribution, chunking-aware distribution) that are used/tested on development branches, feel free to contact us, e.g. on GitHub. + +The remainder of this page discusses a select number of use cases and examples for the ``openpmd-pipe`` tool. + + +Conversion between backends +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Converting from ADIOS2 to HDF5: + +.. code:: bash + + $ openpmd-pipe --infile simData_%T.bp --outfile simData_%T.h5 + +Converting from the ADIOS2 BP3 engine to the (newer) ADIOS2 BP5 engine: + +.. code:: bash + + $ openpmd-pipe --infile simData_%T.bp --outfile simData_%T.bp5 + + # or e.g. via inline TOML specification (also possible: JSON) + $ openpmd-pipe --infile simData_%T.bp --outfile output_folder/simData_%T.bp \ + --outconfig 'adios2.engine.type = "bp5"' + # the config can also be read from a file, e.g. --outconfig @cfg.toml + # or --outconfig @cfg.json + +Converting between iteration encodings +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Converting to group-based iteration encoding: + +.. code:: bash + + $ openpmd-pipe --infile simData_%T.h5 --outfile simData.h5 + +Converting to variable-based iteration encoding (not yet feature-complete): + +.. code:: bash + + # e.g. specified via inline JSON + $ openpmd-pipe --infile simData_%T.bp --outfile simData.bp \ + --outconfig '{"iteration_encoding": "variable_based"}' + + +Capturing a stream +^^^^^^^^^^^^^^^^^^ + +Since the openPMD-api also supports streaming/staging I/O transports from ADIOS2, ``openpmd-pipe`` can be used to capture a stream in order to write it to disk. +In the ADIOS2 `SST engine `_, a stream can have any number of readers. +This makes it possible to intercept a stream in a data processing pipeline. + +.. code:: bash + + $ cat << EOF > streamParams.toml + [adios2.engine.parameters] + DataTransport = "fabric" + OpenTimeoutSecs = 600 + EOF + + $ openpmd-pipe --infile streamContactFile.sst --inconfig @streamParams.toml \ + --outfile capturedStreamData_%06T.bp + + # Just loading and discarding streaming data, e.g. for performance benchmarking: + $ openpmd-pipe --infile streamContactFile.sst --inconfig @streamParams.toml \ + --outfile null.bp --outconfig 'adios2.engine.type = "nullcore"' + + +Defragmenting a file +^^^^^^^^^^^^^^^^^^^^ + +Due to the file layout of ADIOS2, especially mesh-refinement-enabled simulation codes can create file output that is very strongly fragmented. +Since only one ``load_chunk()`` and one ``store_chunk()`` call is issued per MPI rank, per dataset and per iteration, the file is implicitly defragmented by the backend when passed through ``openpmd-pipe``: + +.. code:: bash + + $ openpmd-pipe --infile strongly_fragmented_%T.bp --outfile defragmented_%T.bp + +Post-hoc compression +^^^^^^^^^^^^^^^^^^^^ + +The openPMD-api can be directly used to compress data already when originally creating it. +When however intending to compress data that has been written without compression enabled, ``openpmd-pipe`` can help: + +.. code:: bash + + $ cat << EOF > compression_cfg.json + { + "adios2": { + "dataset": { + "operators": [ + { + "type": "blosc", + "parameters": { + "clevel": 1, + "doshuffle": "BLOSC_BITSHUFFLE" + } + } + ] + } + } + } + EOF + + $ openpmd-pipe --infile not_compressed_%T.bp --outfile compressed_%T.bp \ + --outconfig @compression_cfg.json + +Starting point for custom transformation and analysis +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``openpmd-pipe`` is a Python script that can serve as basis for custom extensions, e.g. for adding, modifying, transforming or reducing data. The typical use case would be as a building block in a domain-specific data processing pipeline. From a3fe9b75d81aa1df569b9fb73ad67b772638961f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Fri, 22 Dec 2023 18:57:59 +0100 Subject: [PATCH 03/37] Remove constexpr from RecordComponent::visit() (#1582) RecordComponent has a virtual base class, so it cannot be constexpr --- include/openPMD/RecordComponent.hpp | 5 ++--- include/openPMD/RecordComponent.tpp | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/include/openPMD/RecordComponent.hpp b/include/openPMD/RecordComponent.hpp index 8b81ae67b0..7fe1651409 100644 --- a/include/openPMD/RecordComponent.hpp +++ b/include/openPMD/RecordComponent.hpp @@ -476,9 +476,8 @@ class RecordComponent : public BaseRecordComponent * be implicitly converted. */ template - constexpr auto visit(Args &&...args) - -> decltype(Visitor::template call( - std::declval(), std::forward(args)...)); + auto visit(Args &&...args) -> decltype(Visitor::template call( + std::declval(), std::forward(args)...)); static constexpr char const *const SCALAR = "\vScalar"; diff --git a/include/openPMD/RecordComponent.tpp b/include/openPMD/RecordComponent.tpp index e34013327f..e8ba6006ab 100644 --- a/include/openPMD/RecordComponent.tpp +++ b/include/openPMD/RecordComponent.tpp @@ -397,7 +397,7 @@ namespace detail } // namespace detail template -constexpr auto RecordComponent::visit(Args &&...args) +auto RecordComponent::visit(Args &&...args) -> decltype(Visitor::template call( std::declval(), std::forward(args)...)) { From 71512ffc0d526d96ebb195a511032a65f7218117 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Fri, 7 Jul 2023 16:03:28 +0200 Subject: [PATCH 04/37] Plumberwork for MPI communicator in JSON backend --- include/openPMD/IO/JSON/JSONIOHandler.hpp | 15 ++++++++++++++- include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp | 16 ++++++++++++++++ src/IO/AbstractIOHandlerHelper.cpp | 19 +++++++++++++++++-- src/IO/JSON/JSONIOHandler.cpp | 18 ++++++++++++++++-- src/IO/JSON/JSONIOHandlerImpl.cpp | 17 +++++++++++++++++ 5 files changed, 80 insertions(+), 5 deletions(-) diff --git a/include/openPMD/IO/JSON/JSONIOHandler.hpp b/include/openPMD/IO/JSON/JSONIOHandler.hpp index 7fdea5b6f0..7cb6870f5b 100644 --- a/include/openPMD/IO/JSON/JSONIOHandler.hpp +++ b/include/openPMD/IO/JSON/JSONIOHandler.hpp @@ -24,17 +24,30 @@ #include "openPMD/IO/AbstractIOHandler.hpp" #include "openPMD/IO/JSON/JSONIOHandlerImpl.hpp" +#if openPMD_HAVE_MPI +#include +#endif + namespace openPMD { class JSONIOHandler : public AbstractIOHandler { public: JSONIOHandler( - std::string const &path, + std::string path, + Access at, + openPMD::json::TracingJSON config, + JSONIOHandlerImpl::FileFormat, + std::string originalExtension); +#if openPMD_HAVE_MPI + JSONIOHandler( + std::string path, Access at, + MPI_Comm, openPMD::json::TracingJSON config, JSONIOHandlerImpl::FileFormat, std::string originalExtension); +#endif ~JSONIOHandler() override; diff --git a/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp b/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp index 5ce9d057c3..81dc9c39f3 100644 --- a/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp +++ b/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp @@ -31,6 +31,9 @@ #include #include +#if openPMD_HAVE_MPI +#include +#endif #include #include @@ -167,6 +170,15 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl FileFormat, std::string originalExtension); +#if openPMD_HAVE_MPI + JSONIOHandlerImpl( + AbstractIOHandler *, + MPI_Comm, + openPMD::json::TracingJSON config, + FileFormat, + std::string originalExtension); +#endif + ~JSONIOHandlerImpl() override; void @@ -230,6 +242,10 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl std::future flush(); private: +#if openPMD_HAVE_MPI + std::optional m_communicator; +#endif + using FILEHANDLE = std::fstream; // map each Writable to its associated file diff --git a/src/IO/AbstractIOHandlerHelper.cpp b/src/IO/AbstractIOHandlerHelper.cpp index 699dfd3619..8576343e5d 100644 --- a/src/IO/AbstractIOHandlerHelper.cpp +++ b/src/IO/AbstractIOHandlerHelper.cpp @@ -125,8 +125,23 @@ std::unique_ptr createIOHandler( "ssc", std::move(originalExtension)); case Format::JSON: - throw error::WrongAPIUsage( - "JSON backend not available in parallel openPMD."); + return constructIOHandler( + "JSON", + path, + access, + comm, + std::move(options), + JSONIOHandlerImpl::FileFormat::Json, + std::move(originalExtension)); + case Format::TOML: + return constructIOHandler( + "JSON", + path, + access, + comm, + std::move(options), + JSONIOHandlerImpl::FileFormat::Toml, + std::move(originalExtension)); default: throw error::WrongAPIUsage( "Unknown file format! Did you specify a file ending? Specified " diff --git a/src/IO/JSON/JSONIOHandler.cpp b/src/IO/JSON/JSONIOHandler.cpp index 041b236340..d2a6217eb5 100644 --- a/src/IO/JSON/JSONIOHandler.cpp +++ b/src/IO/JSON/JSONIOHandler.cpp @@ -26,15 +26,29 @@ namespace openPMD JSONIOHandler::~JSONIOHandler() = default; JSONIOHandler::JSONIOHandler( - std::string const &path, + std::string path, Access at, openPMD::json::TracingJSON jsonCfg, JSONIOHandlerImpl::FileFormat format, std::string originalExtension) - : AbstractIOHandler{path, at} + : AbstractIOHandler{std::move(path), at} , m_impl{this, std::move(jsonCfg), format, std::move(originalExtension)} {} +#if openPMD_HAVE_MPI +JSONIOHandler::JSONIOHandler( + std::string path, + Access at, + MPI_Comm comm, + openPMD::json::TracingJSON jsonCfg, + JSONIOHandlerImpl::FileFormat format, + std::string originalExtension) + : AbstractIOHandler{std::move(path), at} + , m_impl{JSONIOHandlerImpl{ + this, comm, std::move(jsonCfg), format, std::move(originalExtension)}} +{} +#endif + std::future JSONIOHandler::flush(internal::ParsedFlushParams &) { return m_impl.flush(); diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index a4e1bb39ab..5112b0bb2b 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -23,6 +23,8 @@ #include "openPMD/Datatype.hpp" #include "openPMD/DatatypeHelpers.hpp" #include "openPMD/Error.hpp" +#include "openPMD/IO/AbstractIOHandler.hpp" +#include "openPMD/IO/AbstractIOHandlerImpl.hpp" #include "openPMD/auxiliary/Filesystem.hpp" #include "openPMD/auxiliary/Memory.hpp" #include "openPMD/auxiliary/StringManip.hpp" @@ -133,6 +135,21 @@ JSONIOHandlerImpl::JSONIOHandlerImpl( , m_originalExtension{std::move(originalExtension)} {} +#if openPMD_HAVE_MPI +JSONIOHandlerImpl::JSONIOHandlerImpl( + AbstractIOHandler *handler, + MPI_Comm comm, + // NOLINTNEXTLINE(performance-unnecessary-value-param) + [[maybe_unused]] openPMD::json::TracingJSON config, + FileFormat format, + std::string originalExtension) + : AbstractIOHandlerImpl(handler) + , m_communicator{comm} + , m_fileFormat{format} + , m_originalExtension{std::move(originalExtension)} +{} +#endif + JSONIOHandlerImpl::~JSONIOHandlerImpl() = default; std::future JSONIOHandlerImpl::flush() From f918e78d6d9132bec7eb58a69b66bc1f7f804a84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Fri, 7 Jul 2023 16:12:58 +0200 Subject: [PATCH 05/37] Parallel reading --- src/IO/JSON/JSONIOHandlerImpl.cpp | 70 +++++++++++++++++++++++++------ src/auxiliary/Filesystem.cpp | 3 +- 2 files changed, 60 insertions(+), 13 deletions(-) diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index 5112b0bb2b..f6887dc34c 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -26,11 +26,13 @@ #include "openPMD/IO/AbstractIOHandler.hpp" #include "openPMD/IO/AbstractIOHandlerImpl.hpp" #include "openPMD/auxiliary/Filesystem.hpp" +#include "openPMD/auxiliary/JSON_internal.hpp" #include "openPMD/auxiliary/Memory.hpp" #include "openPMD/auxiliary/StringManip.hpp" #include "openPMD/auxiliary/TypeTraits.hpp" #include "openPMD/backend/Writable.hpp" +#include #include #include @@ -1260,21 +1262,65 @@ JSONIOHandlerImpl::obtainJsonContents(File const &file) { return it->second; } + auto serialImplementation = [&file, this]() { + auto [fh, fh_with_precision, _] = + getFilehandle(file, Access::READ_ONLY); + (void)_; + std::shared_ptr res = + std::make_shared(); + switch (m_fileFormat) + { + case FileFormat::Json: + *fh_with_precision >> *res; + break; + case FileFormat::Toml: + *res = openPMD::json::tomlToJson( + toml::parse(*fh_with_precision, *file)); + break; + } + VERIFY(fh->good(), "[JSON] Failed reading from a file."); + return res; + }; + auto parallelImplementation = [&file, this](MPI_Comm comm) { + auto path = fullPath(*file); + std::string collectivelyReadRawData = + auxiliary::collective_file_read(path, comm); + std::shared_ptr res = + std::make_shared(); + switch (m_fileFormat) + { + case FileFormat::Json: + *res = nlohmann::json::parse(collectivelyReadRawData); + break; + case FileFormat::Toml: + std::istringstream istream( + collectivelyReadRawData.c_str(), + std::ios_base::binary | std::ios_base::in); + auto as_toml = toml::parse( + istream >> std::setprecision( + std::numeric_limits::digits10 + 1), + *file); + *res = openPMD::json::tomlToJson(as_toml); + break; + } + return res; + }; // read from file - auto [fh, fh_with_precision, _] = getFilehandle(file, Access::READ_ONLY); - (void)_; - std::shared_ptr res = std::make_shared(); - switch (m_fileFormat) +#if openPMD_HAVE_MPI + std::shared_ptr res; + if (m_communicator.has_value()) { - case FileFormat::Json: - *fh_with_precision >> *res; - break; - case FileFormat::Toml: - *res = - openPMD::json::tomlToJson(toml::parse(*fh_with_precision, *file)); - break; + res = parallelImplementation(m_communicator.value()); } - VERIFY(fh->good(), "[JSON] Failed reading from a file."); + else + { + res = serialImplementation(); + } + +#else + auto res = serialImplementation(); +#endif + m_jsonVals.emplace(file, res); return res; } diff --git a/src/auxiliary/Filesystem.cpp b/src/auxiliary/Filesystem.cpp index cce80b9d17..564d266ee3 100644 --- a/src/auxiliary/Filesystem.cpp +++ b/src/auxiliary/Filesystem.cpp @@ -195,7 +195,8 @@ std::string collective_file_read(std::string const &path, MPI_Comm comm) if (!handle.good()) { throw std::runtime_error( - "Failed reading JSON config from file " + path + "."); + "[collective_file_read] Failed acessing file '" + path + + "' on MPI rank 0."); } stringLength = res.size() + 1; } From 8f1d2a24b149470e11bbaac0ba1933fe48ecdaa7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 10 Jul 2023 14:46:09 +0200 Subject: [PATCH 06/37] ... and writing --- include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp | 3 +- src/IO/JSON/JSONIOHandlerImpl.cpp | 81 ++++++++++++++----- 2 files changed, 65 insertions(+), 19 deletions(-) diff --git a/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp b/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp index 81dc9c39f3..9598602869 100644 --- a/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp +++ b/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp @@ -339,7 +339,8 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl // write to disk the json contents associated with the file // remove from m_dirty if unsetDirty == true - void putJsonContents(File const &, bool unsetDirty = true); + auto putJsonContents(File const &, bool unsetDirty = true) + -> decltype(m_jsonVals)::iterator; // figure out the file position of the writable // (preferring the parent's file position) and extend it diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index f6887dc34c..fba3fd7d8a 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -33,6 +33,7 @@ #include "openPMD/backend/Writable.hpp" #include +#include #include #include @@ -631,7 +632,11 @@ void JSONIOHandlerImpl::closeFile( auto fileIterator = m_files.find(writable); if (fileIterator != m_files.end()) { - putJsonContents(fileIterator->second); + auto it = putJsonContents(fileIterator->second); + if (it != m_jsonVals.end()) + { + m_jsonVals.erase(it); + } m_dirty.erase(fileIterator->second); // do not invalidate the file // it still exists, it is just not open @@ -1262,6 +1267,7 @@ JSONIOHandlerImpl::obtainJsonContents(File const &file) { return it->second; } + // read from file auto serialImplementation = [&file, this]() { auto [fh, fh_with_precision, _] = getFilehandle(file, Access::READ_ONLY); @@ -1281,6 +1287,7 @@ JSONIOHandlerImpl::obtainJsonContents(File const &file) VERIFY(fh->good(), "[JSON] Failed reading from a file."); return res; }; +#if openPMD_HAVE_MPI auto parallelImplementation = [&file, this](MPI_Comm comm) { auto path = fullPath(*file); std::string collectivelyReadRawData = @@ -1305,8 +1312,6 @@ JSONIOHandlerImpl::obtainJsonContents(File const &file) } return res; }; - // read from file -#if openPMD_HAVE_MPI std::shared_ptr res; if (m_communicator.has_value()) { @@ -1332,10 +1337,10 @@ nlohmann::json &JSONIOHandlerImpl::obtainJsonContents(Writable *writable) return (*obtainJsonContents(file))[filePosition->id]; } -void JSONIOHandlerImpl::putJsonContents( +auto JSONIOHandlerImpl::putJsonContents( File const &filename, bool unsetDirty // = true -) + ) -> decltype(m_jsonVals)::iterator { VERIFY_ALWAYS( filename.valid(), @@ -1343,29 +1348,69 @@ void JSONIOHandlerImpl::putJsonContents( auto it = m_jsonVals.find(filename); if (it != m_jsonVals.end()) { - auto [fh, _, fh_with_precision] = - getFilehandle(filename, Access::CREATE); - (void)_; (*it->second)["platform_byte_widths"] = platformSpecifics(); - switch (m_fileFormat) + auto writeSingleFile = [this, &it](std::string const &writeThisFile) { + auto [fh, _, fh_with_precision] = + getFilehandle(File(writeThisFile), Access::CREATE); + (void)_; + + switch (m_fileFormat) + { + case FileFormat::Json: + *fh_with_precision << *it->second << std::endl; + break; + case FileFormat::Toml: + *fh_with_precision << openPMD::json::jsonToToml(*it->second) + << std::endl; + break; + } + + VERIFY(fh->good(), "[JSON] Failed writing data to disk.") + }; + + auto serialImplementation = [&filename, &writeSingleFile]() { + writeSingleFile(*filename); + }; + +#if openPMD_HAVE_MPI + auto parallelImplementation = + [this, &filename, &writeSingleFile](MPI_Comm comm) { + auto path = fullPath(*filename); + auto dirpath = path + ".parallel"; + if (!auxiliary::create_directories(dirpath)) + { + throw std::runtime_error( + "Failed creating directory '" + dirpath + + "' for parallel JSON output"); + } + int rank = 0; + MPI_Comm_rank(comm, &rank); + std::stringstream subfilePath; + subfilePath << dirpath << "/mpi_rank_" << std::setw(6) + << std::setfill('0') << rank << ".json"; + writeSingleFile(subfilePath.str()); + }; + + std::shared_ptr res; + if (m_communicator.has_value()) { - case FileFormat::Json: - *fh_with_precision << *it->second << std::endl; - break; - case FileFormat::Toml: - *fh_with_precision << openPMD::json::jsonToToml(*it->second) - << std::endl; - break; + parallelImplementation(m_communicator.value()); + } + else + { + serialImplementation(); } - VERIFY(fh->good(), "[JSON] Failed writing data to disk.") - m_jsonVals.erase(it); +#else + serialImplementation(); +#endif if (unsetDirty) { m_dirty.erase(filename); } } + return it; } std::shared_ptr JSONIOHandlerImpl::setAndGetFilePosition( From ee32cb078936aef56a1be6d2e1e5129a4fefc8e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 10 Jul 2023 15:18:26 +0200 Subject: [PATCH 07/37] Set padding according to MPI rank --- src/IO/JSON/JSONIOHandlerImpl.cpp | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index fba3fd7d8a..91e9eac6fb 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -1374,8 +1374,24 @@ auto JSONIOHandlerImpl::putJsonContents( }; #if openPMD_HAVE_MPI + auto num_digits = [](unsigned n) -> unsigned { + constexpr auto max = std::numeric_limits::max(); + unsigned base_10 = 1; + unsigned res = 1; + while (base_10 < max) + { + base_10 *= 10; + if (n / base_10 == 0) + { + return res; + } + ++res; + } + return res; + }; + auto parallelImplementation = - [this, &filename, &writeSingleFile](MPI_Comm comm) { + [this, &filename, &writeSingleFile, &num_digits](MPI_Comm comm) { auto path = fullPath(*filename); auto dirpath = path + ".parallel"; if (!auxiliary::create_directories(dirpath)) @@ -1384,10 +1400,12 @@ auto JSONIOHandlerImpl::putJsonContents( "Failed creating directory '" + dirpath + "' for parallel JSON output"); } - int rank = 0; + int rank = 0, size = 0; MPI_Comm_rank(comm, &rank); + MPI_Comm_size(comm, &size); std::stringstream subfilePath; - subfilePath << dirpath << "/mpi_rank_" << std::setw(6) + subfilePath << dirpath << "/mpi_rank_" + << std::setw(num_digits(size - 1)) << std::setfill('0') << rank << ".json"; writeSingleFile(subfilePath.str()); }; From 5dbc1ae06afbb8d0fe630a8d46add958db89fa07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 10 Jul 2023 15:49:34 +0200 Subject: [PATCH 08/37] Write README.txt file --- include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp | 1 + src/IO/JSON/JSONIOHandlerImpl.cpp | 67 ++++++++++++++----- 2 files changed, 51 insertions(+), 17 deletions(-) diff --git a/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp b/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp index 9598602869..da7e296d59 100644 --- a/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp +++ b/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp @@ -73,6 +73,7 @@ struct File std::string name; bool valid = true; + bool printedReadmeWarningAlready = false; }; std::shared_ptr fileState; diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index 91e9eac6fb..8f775832be 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -1390,25 +1390,58 @@ auto JSONIOHandlerImpl::putJsonContents( return res; }; - auto parallelImplementation = - [this, &filename, &writeSingleFile, &num_digits](MPI_Comm comm) { - auto path = fullPath(*filename); - auto dirpath = path + ".parallel"; - if (!auxiliary::create_directories(dirpath)) + auto parallelImplementation = [this, + &filename, + &writeSingleFile, + &num_digits](MPI_Comm comm) { + auto path = fullPath(*filename); + auto dirpath = path + ".parallel"; + if (!auxiliary::create_directories(dirpath)) + { + throw std::runtime_error( + "Failed creating directory '" + dirpath + + "' for parallel JSON output"); + } + int rank = 0, size = 0; + MPI_Comm_rank(comm, &rank); + MPI_Comm_size(comm, &size); + std::stringstream subfilePath; + subfilePath << dirpath << "/mpi_rank_" + << std::setw(num_digits(size - 1)) << std::setfill('0') + << rank << ".json"; + writeSingleFile(subfilePath.str()); + if (rank == 0) + { + constexpr char const *readme_msg = R"( +This folder has been created by a parallel instance of the JSON backend in +openPMD. There is one JSON file for each parallel writer MPI rank. +The parallel JSON backend performs no metadata or data aggregation at all. + +This functionality is intended mainly for debugging and prototyping workflows. +There is no support in the openPMD-api for reading this folder as a single +dataset. For reading purposes, either pick a single .json file and read that, or +merge the .json files somehow (no tooling provided for this (yet)). +)"; + std::fstream readme_file; + readme_file.open( + dirpath + "/README.txt", + std::ios_base::out | std::ios_base::trunc); + readme_file << readme_msg + 1; + readme_file.close(); + if (!readme_file.good() && + !filename.fileState->printedReadmeWarningAlready) { - throw std::runtime_error( - "Failed creating directory '" + dirpath + - "' for parallel JSON output"); + std::cerr + << "[Warning] Something went wrong in trying to create " + "README file at '" + << dirpath + << "/README.txt'. Will ignore and continue. The README " + "message would have been:\n----------\n" + << readme_msg + 1 << "----------" << std::endl; + filename.fileState->printedReadmeWarningAlready = true; } - int rank = 0, size = 0; - MPI_Comm_rank(comm, &rank); - MPI_Comm_size(comm, &size); - std::stringstream subfilePath; - subfilePath << dirpath << "/mpi_rank_" - << std::setw(num_digits(size - 1)) - << std::setfill('0') << rank << ".json"; - writeSingleFile(subfilePath.str()); - }; + } + }; std::shared_ptr res; if (m_communicator.has_value()) From 7fdfc54ed6124631c66fa81e79cdffc6db6365cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Fri, 22 Sep 2023 17:55:53 +0200 Subject: [PATCH 09/37] Bug fix: don't double prepend base dir --- src/IO/JSON/JSONIOHandlerImpl.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index 8f775832be..8a70d4ca97 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -1406,7 +1406,8 @@ auto JSONIOHandlerImpl::putJsonContents( MPI_Comm_rank(comm, &rank); MPI_Comm_size(comm, &size); std::stringstream subfilePath; - subfilePath << dirpath << "/mpi_rank_" + // writeSingleFile will prepend the base dir + subfilePath << *filename << ".parallel/mpi_rank_" << std::setw(num_digits(size - 1)) << std::setfill('0') << rank << ".json"; writeSingleFile(subfilePath.str()); From d859f3a711969bff92688b0663b5304240b68c51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Thu, 12 Oct 2023 09:41:24 +0200 Subject: [PATCH 10/37] Test parallel output in openpmd-pipe test --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1d81e94d83..f76d15d8e5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1369,7 +1369,7 @@ if(openPMD_BUILD_TESTING) --outfile \ ../samples/git-sample/thetaMode/data_%T.bp && \ \ - ${Python_EXECUTABLE} \ + ${MPI_TEST_EXE} ${Python_EXECUTABLE} \ ${openPMD_RUNTIME_OUTPUT_DIRECTORY}/openpmd-pipe \ --infile ../samples/git-sample/thetaMode/data_%T.bp \ --outfile ../samples/git-sample/thetaMode/data%T.json \ From 561736cbfb904912a0156b90128a3012fe4363db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Thu, 12 Oct 2023 11:01:22 +0200 Subject: [PATCH 11/37] Bug fix: use mpi_rank_%i.toml when writing to TOML --- src/IO/JSON/JSONIOHandlerImpl.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index 8a70d4ca97..5518b1b98f 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -1409,7 +1409,16 @@ auto JSONIOHandlerImpl::putJsonContents( // writeSingleFile will prepend the base dir subfilePath << *filename << ".parallel/mpi_rank_" << std::setw(num_digits(size - 1)) << std::setfill('0') - << rank << ".json"; + << rank << [&]() { + switch (m_fileFormat) + { + case FileFormat::Json: + return ".json"; + case FileFormat::Toml: + return ".toml"; + } + throw std::runtime_error("Unreachable!"); + }(); writeSingleFile(subfilePath.str()); if (rank == 0) { From c7cc20a4660a78f57729d8d624736670b43c1275 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Fri, 24 Nov 2023 13:37:12 +0100 Subject: [PATCH 12/37] Refactor `if` statement --- src/IO/JSON/JSONIOHandlerImpl.cpp | 104 +++++++++++++++--------------- 1 file changed, 52 insertions(+), 52 deletions(-) diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index 5518b1b98f..9910714c4b 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -1346,54 +1346,55 @@ auto JSONIOHandlerImpl::putJsonContents( filename.valid(), "[JSON] File has been overwritten/deleted before writing"); auto it = m_jsonVals.find(filename); - if (it != m_jsonVals.end()) + if (it == m_jsonVals.end()) { - (*it->second)["platform_byte_widths"] = platformSpecifics(); + return it; + } - auto writeSingleFile = [this, &it](std::string const &writeThisFile) { - auto [fh, _, fh_with_precision] = - getFilehandle(File(writeThisFile), Access::CREATE); - (void)_; + (*it->second)["platform_byte_widths"] = platformSpecifics(); - switch (m_fileFormat) - { - case FileFormat::Json: - *fh_with_precision << *it->second << std::endl; - break; - case FileFormat::Toml: - *fh_with_precision << openPMD::json::jsonToToml(*it->second) - << std::endl; - break; - } + auto writeSingleFile = [this, &it](std::string const &writeThisFile) { + auto [fh, _, fh_with_precision] = + getFilehandle(File(writeThisFile), Access::CREATE); + (void)_; - VERIFY(fh->good(), "[JSON] Failed writing data to disk.") - }; + switch (m_fileFormat) + { + case FileFormat::Json: + *fh_with_precision << *it->second << std::endl; + break; + case FileFormat::Toml: + *fh_with_precision << openPMD::json::jsonToToml(*it->second) + << std::endl; + break; + } - auto serialImplementation = [&filename, &writeSingleFile]() { - writeSingleFile(*filename); - }; + VERIFY(fh->good(), "[JSON] Failed writing data to disk.") + }; + + auto serialImplementation = [&filename, &writeSingleFile]() { + writeSingleFile(*filename); + }; #if openPMD_HAVE_MPI - auto num_digits = [](unsigned n) -> unsigned { - constexpr auto max = std::numeric_limits::max(); - unsigned base_10 = 1; - unsigned res = 1; - while (base_10 < max) + auto num_digits = [](unsigned n) -> unsigned { + constexpr auto max = std::numeric_limits::max(); + unsigned base_10 = 1; + unsigned res = 1; + while (base_10 < max) + { + base_10 *= 10; + if (n / base_10 == 0) { - base_10 *= 10; - if (n / base_10 == 0) - { - return res; - } - ++res; + return res; } - return res; - }; + ++res; + } + return res; + }; - auto parallelImplementation = [this, - &filename, - &writeSingleFile, - &num_digits](MPI_Comm comm) { + auto parallelImplementation = + [this, &filename, &writeSingleFile, &num_digits](MPI_Comm comm) { auto path = fullPath(*filename); auto dirpath = path + ".parallel"; if (!auxiliary::create_directories(dirpath)) @@ -1453,23 +1454,22 @@ merge the .json files somehow (no tooling provided for this (yet)). } }; - std::shared_ptr res; - if (m_communicator.has_value()) - { - parallelImplementation(m_communicator.value()); - } - else - { - serialImplementation(); - } + std::shared_ptr res; + if (m_communicator.has_value()) + { + parallelImplementation(m_communicator.value()); + } + else + { + serialImplementation(); + } #else - serialImplementation(); + serialImplementation(); #endif - if (unsetDirty) - { - m_dirty.erase(filename); - } + if (unsetDirty) + { + m_dirty.erase(filename); } return it; } From 38fd114eeb613c49266aa4e3ab0656ab65df35cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Fri, 24 Nov 2023 14:33:37 +0100 Subject: [PATCH 13/37] Add documentation --- docs/source/backends/json.rst | 37 +++++++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/docs/source/backends/json.rst b/docs/source/backends/json.rst index 48ec6b1f44..bbae92aaf6 100644 --- a/docs/source/backends/json.rst +++ b/docs/source/backends/json.rst @@ -92,7 +92,6 @@ propagate the exception thrown by Niels Lohmann's library. The (keys) names ``"attributes"``, ``"data"`` and ``"datatype"`` are reserved and must not be used for base/mesh/particles path, records and their components. -A parallel (i.e. MPI) implementation is *not* available. TOML Restrictions ----------------- @@ -106,7 +105,41 @@ TOML does not support null values. The (keys) names ``"attributes"``, ``"data"`` and ``"datatype"`` are reserved and must not be used for base/mesh/particles path, records and their components. -A parallel (i.e. MPI) implementation is *not* available. + +Using in parallel (MPI) +----------------------- + +Parallel I/O is not a first-class citizen in the JSON and TOML backends, and neither backend will "go out of its way" to support parallel workflows. + +However there is a rudimentary form of read and write support in parallel: + +Parallel reading +................ + +In order not to overload the parallel filesystem with parallel reads, read access to JSON datasets is done by rank 0 and then broadcast to all other ranks. +Note that there is no granularity whatsoever in reading a JSON file. +A JSON file is always read into memory and broadcast to all other ranks in its entirety. + +Parallel writing +................ + +When executed in an MPI context, the JSON/TOML backends will not directly output a single text file, but instead a folder containing one file per MPI rank. +Neither backend will perform any data aggregation at all. + +.. note:: + + The parallel write support of the JSON/TOML backends is intended mainly for debugging and prototyping workflows. + +The folder will use the specified Series name, but append the postfix ``.parallel``. +(This is a deliberate indication that this folder cannot directly be opened again by the openPMD-api as a JSON/TOML dataset.) +This folder contains for each MPI rank *i* a file ``mpi_rank_.json`` (resp. ``mpi_rank_.toml``), containing the serial output of that rank. +A ``README.txt`` with basic usage instructions is also written. + +.. note:: + + There is no direct support in the openPMD-api to read a JSON/TOML dataset written in this parallel fashion. The single files (e.g. ``data.json.parallel/mpi_rank_0.json``) are each valid openPMD files and can be read separately, however. + + Note that the auxiliary function ``json::merge()`` (or in Python ``openpmd_api.merge_json()``) is not adequate for merging the single JSON/TOML files back into one, since it does not merge anything below the array level. Example From 0b755623c2f76d049a4c09a3642f7d1adfdb35ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Fri, 4 Aug 2023 11:55:37 +0200 Subject: [PATCH 14/37] Introduce dataset template mode to JSON backend --- CMakeLists.txt | 1 + include/openPMD/IO/JSON/JSONIOHandler.hpp | 1 + include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp | 23 +- src/IO/JSON/JSONIOHandlerImpl.cpp | 362 ++++++++++++++---- test/SerialIOTest.cpp | 36 +- 5 files changed, 341 insertions(+), 82 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f76d15d8e5..fffcf395c7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -761,6 +761,7 @@ set(openPMD_EXAMPLE_NAMES 10_streaming_read 12_span_write 13_write_dynamic_configuration + 14_toml_template ) set(openPMD_PYTHON_EXAMPLE_NAMES 2_read_serial diff --git a/include/openPMD/IO/JSON/JSONIOHandler.hpp b/include/openPMD/IO/JSON/JSONIOHandler.hpp index 7cb6870f5b..e22fdb93d1 100644 --- a/include/openPMD/IO/JSON/JSONIOHandler.hpp +++ b/include/openPMD/IO/JSON/JSONIOHandler.hpp @@ -23,6 +23,7 @@ #include "openPMD/IO/AbstractIOHandler.hpp" #include "openPMD/IO/JSON/JSONIOHandlerImpl.hpp" +#include "openPMD/auxiliary/JSON_internal.hpp" #if openPMD_HAVE_MPI #include diff --git a/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp b/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp index da7e296d59..75fe7ab8a3 100644 --- a/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp +++ b/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp @@ -263,8 +263,27 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl */ FileFormat m_fileFormat{}; + std::string backendConfigKey() const; + + /* + * First return value: The location of the JSON value (either "json" or + * "toml") Second return value: The value that was maybe found at this place + */ + std::pair> + getBackendConfig(openPMD::json::TracingJSON &) const; + std::string m_originalExtension; + enum class IOMode + { + Dataset, + Template + }; + + IOMode m_mode = IOMode::Dataset; + + IOMode retrieveDatasetMode(openPMD::json::TracingJSON &config) const; + // HELPER FUNCTIONS // will use the IOHandler to retrieve the correct directory. @@ -311,7 +330,7 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl // essentially: m_i = \prod_{j=0}^{i-1} extent_j static Extent getMultiplicators(Extent const &extent); - static Extent getExtent(nlohmann::json &j); + static std::pair getExtent(nlohmann::json &j); // remove single '/' in the beginning and end of a string static std::string removeSlashes(std::string); @@ -369,7 +388,7 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl // check whether the json reference contains a valid dataset template - void verifyDataset(Param const ¶meters, nlohmann::json &); + IOMode verifyDataset(Param const ¶meters, nlohmann::json &); static nlohmann::json platformSpecifics(); diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index 9910714c4b..58ba4462b1 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -125,18 +125,120 @@ namespace } return *accum_ptr; } + + void warnUnusedJson(openPMD::json::TracingJSON const &jsonConfig) + { + auto shadow = jsonConfig.invertShadow(); + if (shadow.size() > 0) + { + switch (jsonConfig.originallySpecifiedAs) + { + case openPMD::json::SupportedLanguages::JSON: + std::cerr << "Warning: parts of the backend configuration for " + "JSON/TOML backend remain unused:\n" + << shadow << std::endl; + break; + case openPMD::json::SupportedLanguages::TOML: { + auto asToml = openPMD::json::jsonToToml(shadow); + std::cerr << "Warning: parts of the backend configuration for " + "JSON/TOML backend remain unused:\n" + << asToml << std::endl; + break; + } + } + } + } } // namespace +auto JSONIOHandlerImpl::retrieveDatasetMode( + openPMD::json::TracingJSON &config) const -> IOMode +{ + IOMode res = m_mode; + if (auto [configLocation, maybeConfig] = getBackendConfig(config); + maybeConfig.has_value()) + { + auto jsonConfig = maybeConfig.value(); + if (jsonConfig.json().contains("dataset")) + { + auto datasetConfig = jsonConfig["dataset"]; + if (datasetConfig.json().contains("mode")) + { + auto modeOption = openPMD::json::asLowerCaseStringDynamic( + datasetConfig["mode"].json()); + if (!modeOption.has_value()) + { + throw error::BackendConfigSchema( + {configLocation, "mode"}, + "Invalid value of non-string type (accepted values are " + "'dataset' and 'template'."); + } + auto mode = modeOption.value(); + if (mode == "dataset") + { + res = IOMode::Dataset; + } + else if (mode == "template") + { + res = IOMode::Template; + } + else + { + throw error::BackendConfigSchema( + {configLocation, "dataset", "mode"}, + "Invalid value: '" + mode + + "' (accepted values are 'dataset' and 'template'."); + } + } + } + } + return res; +} + +std::string JSONIOHandlerImpl::backendConfigKey() const +{ + switch (m_fileFormat) + { + case FileFormat::Json: + return "json"; + case FileFormat::Toml: + return "toml"; + } + throw std::runtime_error("Unreachable!"); +} + +std::pair> +JSONIOHandlerImpl::getBackendConfig(openPMD::json::TracingJSON &config) const +{ + std::string configLocation = backendConfigKey(); + if (config.json().contains(configLocation)) + { + return std::make_pair( + std::move(configLocation), config[configLocation]); + } + else + { + return std::make_pair(std::move(configLocation), std::nullopt); + } +} + JSONIOHandlerImpl::JSONIOHandlerImpl( AbstractIOHandler *handler, - // NOLINTNEXTLINE(performance-unnecessary-value-param) - [[maybe_unused]] openPMD::json::TracingJSON config, + openPMD::json::TracingJSON config, FileFormat format, std::string originalExtension) : AbstractIOHandlerImpl(handler) , m_fileFormat{format} , m_originalExtension{std::move(originalExtension)} -{} +{ + m_mode = retrieveDatasetMode(config); + + if (auto [_, backendConfig] = getBackendConfig(config); + backendConfig.has_value()) + { + (void)_; + warnUnusedJson(backendConfig.value()); + } +} #if openPMD_HAVE_MPI JSONIOHandlerImpl::JSONIOHandlerImpl( @@ -280,6 +382,19 @@ void JSONIOHandlerImpl::createDataset( "[JSON] Creating a dataset in a file opened as read only is not " "possible."); } + + openPMD::json::TracingJSON config = openPMD::json::parseOptions( + parameter.options, /* considerFiles = */ false); + // Retrieves mode from dataset-specific configuration, falls back to global + // value if not defined + IOMode localMode = retrieveDatasetMode(config); + + parameter.warnUnusedParameters( + config, + backendConfigKey(), + "Warning: parts of the dataset-specific backend configuration for " + "JSON/TOML backend remain unused"); + if (!writable->written) { /* Sanitize name */ @@ -296,23 +411,41 @@ void JSONIOHandlerImpl::createDataset( setAndGetFilePosition(writable, name); auto &dset = jsonVal[name]; dset["datatype"] = datatypeToString(parameter.dtype); - auto extent = parameter.extent; - switch (parameter.dtype) + + switch (localMode) { - case Datatype::CFLOAT: - case Datatype::CDOUBLE: - case Datatype::CLONG_DOUBLE: { - extent.push_back(2); + case IOMode::Dataset: { + auto extent = parameter.extent; + switch (parameter.dtype) + { + case Datatype::CFLOAT: + case Datatype::CDOUBLE: + case Datatype::CLONG_DOUBLE: { + extent.push_back(2); + break; + } + default: + break; + } + // TOML does not support nulls, so initialize with zero + dset["data"] = initializeNDArray( + extent, + m_fileFormat == FileFormat::Json ? std::optional{} + : parameter.dtype); break; } - default: + case IOMode::Template: + if (parameter.extent != Extent{0}) + { + dset["extent"] = parameter.extent; + } + else + { + // no-op + // If extent is empty, don't bother writing it + } break; } - // TOML does not support nulls, so initialize with zero - dset["data"] = initializeNDArray( - extent, - m_fileFormat == FileFormat::Json ? std::optional() - : parameter.dtype); writable->written = true; m_dirty.emplace(file); } @@ -351,9 +484,11 @@ void JSONIOHandlerImpl::extendDataset( refreshFileFromParent(writable); auto &j = obtainJsonContents(writable); + IOMode localIOMode; try { - auto datasetExtent = getExtent(j); + Extent datasetExtent; + std::tie(datasetExtent, localIOMode) = getExtent(j); VERIFY_ALWAYS( datasetExtent.size() == parameters.extent.size(), "[JSON] Cannot change dimensionality of a dataset") @@ -370,28 +505,40 @@ void JSONIOHandlerImpl::extendDataset( throw std::runtime_error( "[JSON] The specified location contains no valid dataset"); } - auto extent = parameters.extent; - auto datatype = stringToDatatype(j["datatype"].get()); - switch (datatype) + + switch (localIOMode) { - case Datatype::CFLOAT: - case Datatype::CDOUBLE: - case Datatype::CLONG_DOUBLE: { - extent.push_back(2); - break; + case IOMode::Dataset: { + auto extent = parameters.extent; + auto datatype = stringToDatatype(j["datatype"].get()); + switch (datatype) + { + case Datatype::CFLOAT: + case Datatype::CDOUBLE: + case Datatype::CLONG_DOUBLE: { + extent.push_back(2); + break; + } + default: + // nothing to do + break; + } + // TOML does not support nulls, so initialize with zero + nlohmann::json newData = initializeNDArray( + extent, + m_fileFormat == FileFormat::Json ? std::optional{} + : datatype); + nlohmann::json &oldData = j["data"]; + mergeInto(newData, oldData); + j["data"] = newData; } - default: - // nothing to do - break; + break; + case IOMode::Template: { + j["extent"] = parameters.extent; + } + break; } - // TOML does not support nulls, so initialize with zero - nlohmann::json newData = initializeNDArray( - extent, - m_fileFormat == FileFormat::Json ? std::optional() - : datatype); - nlohmann::json &oldData = j["data"]; - mergeInto(newData, oldData); - j["data"] = newData; + writable->written = true; } @@ -687,7 +834,7 @@ void JSONIOHandlerImpl::openDataset( *parameters.dtype = Datatype(stringToDatatype(datasetJson["datatype"].get())); - *parameters.extent = getExtent(datasetJson); + *parameters.extent = getExtent(datasetJson).first; writable->written = true; } @@ -870,7 +1017,16 @@ void JSONIOHandlerImpl::writeDataset( auto file = refreshFileFromParent(writable); auto &j = obtainJsonContents(writable); - verifyDataset(parameters, j); + switch (verifyDataset(parameters, j)) + { + case IOMode::Dataset: + break; + case IOMode::Template: + std::cerr << "[JSON/TOML backend: Warning] Trying to write data to a " + "template dataset. Will skip." + << std::endl; + return; + } switchType(parameters.dtype, j, parameters); @@ -912,22 +1068,55 @@ void JSONIOHandlerImpl::writeAttribute( m_dirty.emplace(file); } +namespace +{ + struct FillWithZeroes + { + template + static void call(void *ptr, Extent const &extent) + { + T *casted = static_cast(ptr); + size_t flattenedExtent = std::accumulate( + extent.begin(), + extent.end(), + size_t(1), + [](size_t left, size_t right) { return left * right; }); + std::fill_n(casted, flattenedExtent, T{}); + } + + static constexpr char const *errorMsg = + "[JSON Backend] Fill with zeroes."; + }; +} // namespace + void JSONIOHandlerImpl::readDataset( Writable *writable, Parameter ¶meters) { refreshFileFromParent(writable); setAndGetFilePosition(writable); auto &j = obtainJsonContents(writable); - verifyDataset(parameters, j); + IOMode localMode = verifyDataset(parameters, j); - try + switch (localMode) { - switchType(parameters.dtype, j["data"], parameters); - } - catch (json::basic_json::type_error &) - { - throw std::runtime_error( - "[JSON] The given path does not contain a valid dataset."); + case IOMode::Template: + std::cerr << "[Warning] Cannot read chunks in Template mode of JSON " + "backend. Will fill with zeroes instead." + << std::endl; + switchNonVectorType( + parameters.dtype, parameters.data.get(), parameters.extent); + return; + case IOMode::Dataset: + try + { + switchType(parameters.dtype, j["data"], parameters); + } + catch (json::basic_json::type_error &) + { + throw std::runtime_error( + "[JSON] The given path does not contain a valid dataset."); + } + break; } } @@ -1168,28 +1357,44 @@ Extent JSONIOHandlerImpl::getMultiplicators(Extent const &extent) return res; } -Extent JSONIOHandlerImpl::getExtent(nlohmann::json &j) +auto JSONIOHandlerImpl::getExtent(nlohmann::json &j) + -> std::pair { Extent res; - nlohmann::json *ptr = &j["data"]; - while (ptr->is_array()) + IOMode ioMode; + if (j.contains("data")) + { + ioMode = IOMode::Dataset; + nlohmann::json *ptr = &j["data"]; + while (ptr->is_array()) + { + res.push_back(ptr->size()); + ptr = &(*ptr)[0]; + } + switch (stringToDatatype(j["datatype"].get())) + { + case Datatype::CFLOAT: + case Datatype::CDOUBLE: + case Datatype::CLONG_DOUBLE: + // the last "dimension" is only the two entries for the complex + // number, so remove that again + res.erase(res.end() - 1); + break; + default: + break; + } + } + else if (j.contains("extent")) { - res.push_back(ptr->size()); - ptr = &(*ptr)[0]; + ioMode = IOMode::Template; + res = j["extent"].get(); } - switch (stringToDatatype(j["datatype"].get())) + else { - case Datatype::CFLOAT: - case Datatype::CDOUBLE: - case Datatype::CLONG_DOUBLE: - // the last "dimension" is only the two entries for the complex - // number, so remove that again - res.erase(res.end() - 1); - break; - default: - break; + ioMode = IOMode::Template; + res = {0}; } - return res; + return std::make_pair(std::move(res), ioMode); } std::string JSONIOHandlerImpl::removeSlashes(std::string s) @@ -1351,7 +1556,14 @@ auto JSONIOHandlerImpl::putJsonContents( return it; } - (*it->second)["platform_byte_widths"] = platformSpecifics(); + switch (m_mode) + { + case IOMode::Dataset: + (*it->second)["platform_byte_widths"] = platformSpecifics(); + break; + case IOMode::Template: + break; + } auto writeSingleFile = [this, &it](std::string const &writeThisFile) { auto [fh, _, fh_with_precision] = @@ -1553,8 +1765,8 @@ bool JSONIOHandlerImpl::isDataset(nlohmann::json const &j) { return false; } - auto i = j.find("data"); - return i != j.end() && i.value().is_array(); + auto i = j.find("datatype"); + return i != j.end() && i.value().is_string(); } bool JSONIOHandlerImpl::isGroup(nlohmann::json::const_iterator const &it) @@ -1565,21 +1777,24 @@ bool JSONIOHandlerImpl::isGroup(nlohmann::json::const_iterator const &it) { return false; } - auto i = j.find("data"); - return i == j.end() || !i.value().is_array(); + + auto i = j.find("datatype"); + return i == j.end() || !i.value().is_string(); } template -void JSONIOHandlerImpl::verifyDataset( - Param const ¶meters, nlohmann::json &j) +auto JSONIOHandlerImpl::verifyDataset( + Param const ¶meters, nlohmann::json &j) -> IOMode { VERIFY_ALWAYS( isDataset(j), "[JSON] Specified dataset does not exist or is not a dataset."); + IOMode res; try { - auto datasetExtent = getExtent(j); + Extent datasetExtent; + std::tie(datasetExtent, res) = getExtent(j); VERIFY_ALWAYS( datasetExtent.size() == parameters.extent.size(), "[JSON] Read/Write request does not fit the dataset's dimension"); @@ -1601,6 +1816,7 @@ void JSONIOHandlerImpl::verifyDataset( throw std::runtime_error( "[JSON] The given path does not contain a valid dataset."); } + return res; } nlohmann::json JSONIOHandlerImpl::platformSpecifics() @@ -1682,7 +1898,7 @@ nlohmann::json JSONIOHandlerImpl::CppToJSON::operator()(const T &val) } template -nlohmann::json JSONIOHandlerImpl::CppToJSON >::operator()( +nlohmann::json JSONIOHandlerImpl::CppToJSON>::operator()( const std::vector &v) { nlohmann::json j; @@ -1695,7 +1911,7 @@ nlohmann::json JSONIOHandlerImpl::CppToJSON >::operator()( } template -nlohmann::json JSONIOHandlerImpl::CppToJSON >::operator()( +nlohmann::json JSONIOHandlerImpl::CppToJSON>::operator()( const std::array &v) { nlohmann::json j; @@ -1714,7 +1930,7 @@ T JSONIOHandlerImpl::JsonToCpp::operator()(nlohmann::json const &json) } template -std::vector JSONIOHandlerImpl::JsonToCpp >::operator()( +std::vector JSONIOHandlerImpl::JsonToCpp>::operator()( nlohmann::json const &json) { std::vector v; @@ -1727,7 +1943,7 @@ std::vector JSONIOHandlerImpl::JsonToCpp >::operator()( } template -std::array JSONIOHandlerImpl::JsonToCpp >::operator()( +std::array JSONIOHandlerImpl::JsonToCpp>::operator()( nlohmann::json const &json) { std::array a; diff --git a/test/SerialIOTest.cpp b/test/SerialIOTest.cpp index 7f126e104f..2f6b3fedd6 100644 --- a/test/SerialIOTest.cpp +++ b/test/SerialIOTest.cpp @@ -1263,12 +1263,19 @@ TEST_CASE("particle_patches", "[serial]") } } -inline void dtype_test(const std::string &backend) +inline void dtype_test( + const std::string &backend, + std::optional activateTemplateMode = {}) { bool test_long_double = backend != "json" && backend != "toml"; bool test_long_long = (backend != "json") || sizeof(long long) <= 8; { - Series s = Series("../samples/dtype_test." + backend, Access::CREATE); + Series s = activateTemplateMode.has_value() + ? Series( + "../samples/dtype_test." + backend, + Access::CREATE, + activateTemplateMode.value()) + : Series("../samples/dtype_test." + backend, Access::CREATE); char c = 'c'; s.setAttribute("char", c); @@ -1390,8 +1397,12 @@ inline void dtype_test(const std::string &backend) } } - Series s = Series("../samples/dtype_test." + backend, Access::READ_ONLY); - + Series s = activateTemplateMode.has_value() + ? Series( + "../samples/dtype_test." + backend, + Access::READ_ONLY, + activateTemplateMode.value()) + : Series("../samples/dtype_test." + backend, Access::READ_ONLY); REQUIRE(s.getAttribute("char").get() == 'c'); REQUIRE(s.getAttribute("uchar").get() == 'u'); REQUIRE(s.getAttribute("schar").get() == 's'); @@ -1461,6 +1472,10 @@ inline void dtype_test(const std::string &backend) REQUIRE(s.getAttribute("bool").get() == true); REQUIRE(s.getAttribute("boolF").get() == false); + if (activateTemplateMode.has_value()) + { + return; + } // same implementation types (not necessary aliases) detection #if !defined(_MSC_VER) REQUIRE(s.getAttribute("short").dtype == Datatype::SHORT); @@ -1533,6 +1548,7 @@ TEST_CASE("dtype_test", "[serial]") { dtype_test(t); } + dtype_test("json", R"({"json":{"dataset":{"mode":"template"}}})"); if (auto extensions = getFileExtensions(); std::find(extensions.begin(), extensions.end(), "toml") != extensions.end()) @@ -1541,6 +1557,7 @@ TEST_CASE("dtype_test", "[serial]") * testing it here. */ dtype_test("toml"); + dtype_test("toml", R"({"toml":{"dataset":{"mode":"template"}}})"); } } @@ -1555,7 +1572,10 @@ struct ReadFromAnyType inline void write_test(const std::string &backend) { - Series o = Series("../samples/serial_write." + backend, Access::CREATE); + Series o = Series( + "../samples/serial_write." + backend, + Access::CREATE, + R"({"json":{"dataset":{"mode":"template"}}})"); ParticleSpecies &e_1 = o.iterations[1].particles["e"]; @@ -1581,8 +1601,10 @@ inline void write_test(const std::string &backend) return posOff++; }); std::shared_ptr positionOffset_local_1(new uint64_t); - e_1["positionOffset"]["x"].resetDataset( - Dataset(determineDatatype(positionOffset_local_1), {4})); + e_1["positionOffset"]["x"].resetDataset(Dataset( + determineDatatype(positionOffset_local_1), + {4}, + R"({"json":{"dataset":{"mode":"dataset"}}})")); for (uint64_t i = 0; i < 4; ++i) { From 8bbf44443ca7a3bcaa36357448ca9120c8397c98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Fri, 4 Aug 2023 13:33:24 +0200 Subject: [PATCH 15/37] Write used mode to JSON file --- include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp | 10 ++- src/IO/JSON/JSONIOHandlerImpl.cpp | 63 +++++++++++++++++-- 2 files changed, 67 insertions(+), 6 deletions(-) diff --git a/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp b/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp index 75fe7ab8a3..1c13e01615 100644 --- a/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp +++ b/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp @@ -282,7 +282,15 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl IOMode m_mode = IOMode::Dataset; - IOMode retrieveDatasetMode(openPMD::json::TracingJSON &config) const; + enum class SpecificationVia + { + DefaultValue, + Manually + }; + SpecificationVia m_IOModeSpecificationVia = SpecificationVia::DefaultValue; + + std::pair + retrieveDatasetMode(openPMD::json::TracingJSON &config) const; // HELPER FUNCTIONS diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index 58ba4462b1..875a6a7436 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -63,6 +63,13 @@ namespace openPMD throw std::runtime_error((TEXT)); \ } +namespace JSONDefaults +{ + using const_str = char const *const; + constexpr const_str openpmd_internal = "__openPMD_internal"; + constexpr const_str IOMode = "IO_mode"; +} // namespace JSONDefaults + namespace { struct DefaultValue @@ -150,10 +157,11 @@ namespace } } // namespace -auto JSONIOHandlerImpl::retrieveDatasetMode( - openPMD::json::TracingJSON &config) const -> IOMode +auto JSONIOHandlerImpl::retrieveDatasetMode(openPMD::json::TracingJSON &config) + const -> std::pair { IOMode res = m_mode; + SpecificationVia res_2 = SpecificationVia::DefaultValue; if (auto [configLocation, maybeConfig] = getBackendConfig(config); maybeConfig.has_value()) { @@ -176,10 +184,12 @@ auto JSONIOHandlerImpl::retrieveDatasetMode( if (mode == "dataset") { res = IOMode::Dataset; + res_2 = SpecificationVia::Manually; } else if (mode == "template") { res = IOMode::Template; + res_2 = SpecificationVia::Manually; } else { @@ -191,7 +201,7 @@ auto JSONIOHandlerImpl::retrieveDatasetMode( } } } - return res; + return std::make_pair(res, res_2); } std::string JSONIOHandlerImpl::backendConfigKey() const @@ -230,7 +240,7 @@ JSONIOHandlerImpl::JSONIOHandlerImpl( , m_fileFormat{format} , m_originalExtension{std::move(originalExtension)} { - m_mode = retrieveDatasetMode(config); + std::tie(m_mode, m_IOModeSpecificationVia) = retrieveDatasetMode(config); if (auto [_, backendConfig] = getBackendConfig(config); backendConfig.has_value()) @@ -387,7 +397,7 @@ void JSONIOHandlerImpl::createDataset( parameter.options, /* considerFiles = */ false); // Retrieves mode from dataset-specific configuration, falls back to global // value if not defined - IOMode localMode = retrieveDatasetMode(config); + IOMode localMode = retrieveDatasetMode(config).first; parameter.warnUnusedParameters( config, @@ -1531,6 +1541,45 @@ JSONIOHandlerImpl::obtainJsonContents(File const &file) auto res = serialImplementation(); #endif + if (res->contains(JSONDefaults::openpmd_internal)) + { + auto const &openpmd_internal = res->at(JSONDefaults::openpmd_internal); + if (openpmd_internal.contains(JSONDefaults::IOMode)) + { + auto modeOption = openPMD::json::asLowerCaseStringDynamic( + openpmd_internal.at(JSONDefaults::IOMode)); + if (!modeOption.has_value()) + { + std::cerr + << "[JSON/TOML backend] Warning: Invalid value of " + "non-string type at internal meta table for entry '" + << JSONDefaults::IOMode << "'. Will ignore and continue." + << std::endl; + } + else if (modeOption.value() == "dataset") + { + if (m_IOModeSpecificationVia == SpecificationVia::DefaultValue) + { + m_mode = IOMode::Dataset; + } + } + else if (modeOption.value() == "template") + { + if (m_IOModeSpecificationVia == SpecificationVia::DefaultValue) + { + m_mode = IOMode::Template; + } + } + else + { + std::cerr << "[JSON/TOML backend] Warning: Invalid value '" + << modeOption.value() + << "' at internal meta table for entry '" + << JSONDefaults::IOMode + << "'. Will ignore and continue." << std::endl; + } + } + } m_jsonVals.emplace(file, res); return res; } @@ -1560,8 +1609,12 @@ auto JSONIOHandlerImpl::putJsonContents( { case IOMode::Dataset: (*it->second)["platform_byte_widths"] = platformSpecifics(); + (*it->second)[JSONDefaults::openpmd_internal][JSONDefaults::IOMode] = + "dataset"; break; case IOMode::Template: + (*it->second)[JSONDefaults::openpmd_internal][JSONDefaults::IOMode] = + "template"; break; } From bd456978d62fe25c44286594a1bcdcde45e5dd07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Thu, 23 Feb 2023 11:05:39 +0100 Subject: [PATCH 16/37] Use Attribute::getOptional for snapshot attribute --- src/Series.cpp | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/src/Series.cpp b/src/Series.cpp index 5d698ffbcf..32dc0b3a40 100644 --- a/src/Series.cpp +++ b/src/Series.cpp @@ -2412,19 +2412,13 @@ auto Series::currentSnapshot() const if (series.iterations.containsAttribute("snapshot")) { auto const &attribute = series.iterations.getAttribute("snapshot"); - switch (attribute.dtype) + auto res = attribute.getOptional(); + if (res.has_value()) { - case Datatype::ULONGLONG: - case Datatype::VEC_ULONGLONG: { - auto const &vec = attribute.get>(); - return vec_t{vec.begin(), vec.end()}; + return res.value(); } - case Datatype::ULONG: - case Datatype::VEC_ULONG: { - auto const &vec = attribute.get>(); - return vec_t{vec.begin(), vec.end()}; - } - default: { + else + { std::stringstream s; s << "Unexpected datatype for '/data/snapshot': " << attribute.dtype << " (expected a vector of integer, found " + @@ -2436,7 +2430,6 @@ auto Series::currentSnapshot() const {}, s.str()); } - } } else { From b493a11e0b3cd9ed5a676b0c54af4624eb1c9ec3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Fri, 4 Aug 2023 14:49:16 +0200 Subject: [PATCH 17/37] Introduce attribute mode --- include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp | 33 +- src/IO/JSON/JSONIOHandlerImpl.cpp | 330 +++++++++++++++++- 2 files changed, 342 insertions(+), 21 deletions(-) diff --git a/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp b/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp index 1c13e01615..c8d2b4db4c 100644 --- a/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp +++ b/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp @@ -274,6 +274,16 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl std::string m_originalExtension; + enum class SpecificationVia + { + DefaultValue, + Manually + }; + + ///////////////////// + // Dataset IO mode // + ///////////////////// + enum class IOMode { Dataset, @@ -281,17 +291,28 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl }; IOMode m_mode = IOMode::Dataset; - - enum class SpecificationVia - { - DefaultValue, - Manually - }; SpecificationVia m_IOModeSpecificationVia = SpecificationVia::DefaultValue; std::pair retrieveDatasetMode(openPMD::json::TracingJSON &config) const; + /////////////////////// + // Attribute IO mode // + /////////////////////// + + enum class AttributeMode + { + Short, + Long + }; + + AttributeMode m_attributeMode = AttributeMode::Long; + SpecificationVia m_attributeModeSpecificationVia = + SpecificationVia::DefaultValue; + + std::pair + retrieveAttributeMode(openPMD::json::TracingJSON &config) const; + // HELPER FUNCTIONS // will use the IOHandler to retrieve the correct directory. diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index 875a6a7436..ffda974b68 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -30,6 +30,7 @@ #include "openPMD/auxiliary/Memory.hpp" #include "openPMD/auxiliary/StringManip.hpp" #include "openPMD/auxiliary/TypeTraits.hpp" +#include "openPMD/backend/Attribute.hpp" #include "openPMD/backend/Writable.hpp" #include @@ -67,7 +68,8 @@ namespace JSONDefaults { using const_str = char const *const; constexpr const_str openpmd_internal = "__openPMD_internal"; - constexpr const_str IOMode = "IO_mode"; + constexpr const_str IOMode = "dataset_mode"; + constexpr const_str AttributeMode = "attribute_mode"; } // namespace JSONDefaults namespace @@ -204,6 +206,54 @@ auto JSONIOHandlerImpl::retrieveDatasetMode(openPMD::json::TracingJSON &config) return std::make_pair(res, res_2); } +auto JSONIOHandlerImpl::retrieveAttributeMode( + openPMD::json::TracingJSON &config) const + -> std::pair +{ + AttributeMode res = m_attributeMode; + SpecificationVia res_2 = SpecificationVia::DefaultValue; + if (auto [configLocation, maybeConfig] = getBackendConfig(config); + maybeConfig.has_value()) + { + auto jsonConfig = maybeConfig.value(); + if (jsonConfig.json().contains("attribute")) + { + auto attributeConfig = jsonConfig["attribute"]; + if (attributeConfig.json().contains("mode")) + { + auto modeOption = openPMD::json::asLowerCaseStringDynamic( + attributeConfig["mode"].json()); + if (!modeOption.has_value()) + { + throw error::BackendConfigSchema( + {configLocation, "mode"}, + "Invalid value of non-string type (accepted values are " + "'dataset' and 'template'."); + } + auto mode = modeOption.value(); + if (mode == "short") + { + res = AttributeMode::Short; + res_2 = SpecificationVia::Manually; + } + else if (mode == "long") + { + res = AttributeMode::Long; + res_2 = SpecificationVia::Manually; + } + else + { + throw error::BackendConfigSchema( + {configLocation, "attribute", "mode"}, + "Invalid value: '" + mode + + "' (accepted values are 'short' and 'long'."); + } + } + } + } + return std::make_pair(res, res_2); +} + std::string JSONIOHandlerImpl::backendConfigKey() const { switch (m_fileFormat) @@ -241,6 +291,8 @@ JSONIOHandlerImpl::JSONIOHandlerImpl( , m_originalExtension{std::move(originalExtension)} { std::tie(m_mode, m_IOModeSpecificationVia) = retrieveDatasetMode(config); + std::tie(m_attributeMode, m_attributeModeSpecificationVia) = + retrieveAttributeMode(config); if (auto [_, backendConfig] = getBackendConfig(config); backendConfig.has_value()) @@ -1072,8 +1124,17 @@ void JSONIOHandlerImpl::writeAttribute( } nlohmann::json value; switchType(parameter.dtype, value, parameter.resource); - (*jsonVal)[filePosition->id]["attributes"][parameter.name] = { - {"datatype", datatypeToString(parameter.dtype)}, {"value", value}}; + switch (m_attributeMode) + { + case AttributeMode::Long: + (*jsonVal)[filePosition->id]["attributes"][parameter.name] = { + {"datatype", datatypeToString(parameter.dtype)}, {"value", value}}; + break; + case AttributeMode::Short: + // short form + (*jsonVal)[filePosition->id]["attributes"][parameter.name] = value; + break; + } writable->written = true; m_dirty.emplace(file); } @@ -1130,6 +1191,195 @@ void JSONIOHandlerImpl::readDataset( } } +namespace +{ + template + Attribute recoverVectorAttributeFromJson(nlohmann::json const &j) + { + if (!j.is_array()) + { + throw std::runtime_error( + "[JSON backend: recoverVectorAttributeFromJson] Internal " + "control flow error."); + } + + if (j.size() == 7 && + (std::is_same_v || + std::is_same_v || + std::is_same_v)) + { + /* + * The frontend must deal with wrong type reports here. + */ + std::array res; + for (size_t i = 0; i < 7; ++i) + { + res[i] = j[i].get(); + } + return res; + } + else + { + std::vector res; + res.reserve(j.size()); + for (auto const &i : j) + { + res.push_back(i.get()); + } + return res; + } + } + + nlohmann::json::value_t unifyNumericType(nlohmann::json const &j) + { + if (!j.is_array() || j.empty()) + { + throw std::runtime_error( + "[JSON backend: recoverVectorAttributeFromJson] Internal " + "control flow error."); + } + auto dtypeRanking = [](nlohmann::json::value_t dtype) -> unsigned { + switch (dtype) + { + case nlohmann::json::value_t::number_unsigned: + return 0; + case nlohmann::json::value_t::number_integer: + return 1; + case nlohmann::json::value_t::number_float: + return 2; + default: + throw std::runtime_error( + "[JSON backend] Encountered vector with mixed number and " + "non-number datatypes."); + } + }; + auto higherDtype = + [&dtypeRanking]( + nlohmann::json::value_t dt1, + nlohmann::json::value_t dt2) -> nlohmann::json::value_t { + if (dtypeRanking(dt1) > dtypeRanking(dt2)) + { + return dt1; + } + else + { + return dt2; + } + }; + + nlohmann::json::value_t res = j[0].type(); + for (size_t i = 1; i < j.size(); ++i) + { + res = higherDtype(res, j[i].type()); + } + return res; + } + + Attribute recoverAttributeFromJson( + nlohmann::json const &j, std::string const &nameForErrorMessages) + { + // @todo use ReadError once it's mainlined + switch (j.type()) + { + case nlohmann::json::value_t::null: + throw std::runtime_error( + "[JSON backend] Attribute must not be null: '" + + nameForErrorMessages + "'."); + case nlohmann::json::value_t::object: + throw std::runtime_error( + "[JSON backend] Shorthand-style attribute must not be an " + "object: '" + + nameForErrorMessages + "'."); + case nlohmann::json::value_t::array: + if (j.empty()) + { + std::cerr << "Cannot recover datatype of empty vector without " + "explicit type annotation for attribute '" + << nameForErrorMessages + << "'. Will continue with VEC_INT datatype." + << std::endl; + return std::vector{}; + } + else + { + auto valueType = j[0].type(); + /* + * If the vector is of numeric type, it might happen that the + * first entry is an integer, but a later entry is a float. + * We need to pick the most generic datatype in that case. + */ + if (valueType == nlohmann::json::value_t::number_float || + valueType == nlohmann::json::value_t::number_unsigned || + valueType == nlohmann::json::value_t::number_integer) + { + valueType = unifyNumericType(j); + } + switch (valueType) + { + case nlohmann::json::value_t::null: + throw std::runtime_error( + "[JSON backend] Attribute must not be null: '" + + nameForErrorMessages + "'."); + case nlohmann::json::value_t::object: + throw std::runtime_error( + "[JSON backend] Invalid contained datatype (object) " + "inside vector-type attribute: '" + + nameForErrorMessages + "'."); + case nlohmann::json::value_t::array: + throw std::runtime_error( + "[JSON backend] Invalid contained datatype (array) " + "inside vector-type attribute: '" + + nameForErrorMessages + "'."); + case nlohmann::json::value_t::string: + return recoverVectorAttributeFromJson(j); + case nlohmann::json::value_t::boolean: + throw std::runtime_error( + "[JSON backend] Attribute must not be vector of bool: " + "'" + + nameForErrorMessages + "'."); + case nlohmann::json::value_t::number_integer: + return recoverVectorAttributeFromJson< + nlohmann::json::number_integer_t>(j); + case nlohmann::json::value_t::number_unsigned: + return recoverVectorAttributeFromJson< + nlohmann::json::number_unsigned_t>(j); + case nlohmann::json::value_t::number_float: + return recoverVectorAttributeFromJson< + nlohmann::json::number_float_t>(j); + case nlohmann::json::value_t::binary: + throw std::runtime_error( + "[JSON backend] Attribute must not have binary type: " + "'" + + nameForErrorMessages + "'."); + case nlohmann::json::value_t::discarded: + throw std::runtime_error( + "Internal JSON parser datatype leaked into JSON " + "value."); + } + throw std::runtime_error("Unreachable!"); + } + case nlohmann::json::value_t::string: + return j.get(); + case nlohmann::json::value_t::boolean: + return j.get(); + case nlohmann::json::value_t::number_integer: + return j.get(); + case nlohmann::json::value_t::number_unsigned: + return j.get(); + case nlohmann::json::value_t::number_float: + return j.get(); + case nlohmann::json::value_t::binary: + throw std::runtime_error( + "[JSON backend] Attribute must not have binary type: '" + + nameForErrorMessages + "'."); + case nlohmann::json::value_t::discarded: + throw std::runtime_error( + "Internal JSON parser datatype leaked into JSON value."); + } + throw std::runtime_error("Unreachable!"); + } +} // namespace + void JSONIOHandlerImpl::readAttribute( Writable *writable, Parameter ¶meters) { @@ -1154,9 +1404,19 @@ void JSONIOHandlerImpl::readAttribute( auto &j = jsonLoc[name]; try { - *parameters.dtype = - Datatype(stringToDatatype(j["datatype"].get())); - switchType(*parameters.dtype, j["value"], parameters); + if (j.is_object()) + { + *parameters.dtype = + Datatype(stringToDatatype(j["datatype"].get())); + switchType( + *parameters.dtype, j["value"], parameters); + } + else + { + Attribute attr = recoverAttributeFromJson(j, name); + *parameters.dtype = attr.dtype; + *parameters.resource = attr.getResource(); + } } catch (json::type_error &) { @@ -1544,7 +1804,10 @@ JSONIOHandlerImpl::obtainJsonContents(File const &file) if (res->contains(JSONDefaults::openpmd_internal)) { auto const &openpmd_internal = res->at(JSONDefaults::openpmd_internal); - if (openpmd_internal.contains(JSONDefaults::IOMode)) + + // Init dataset mode according to file's default + if (m_IOModeSpecificationVia == SpecificationVia::DefaultValue && + openpmd_internal.contains(JSONDefaults::IOMode)) { auto modeOption = openPMD::json::asLowerCaseStringDynamic( openpmd_internal.at(JSONDefaults::IOMode)); @@ -1558,17 +1821,42 @@ JSONIOHandlerImpl::obtainJsonContents(File const &file) } else if (modeOption.value() == "dataset") { - if (m_IOModeSpecificationVia == SpecificationVia::DefaultValue) - { - m_mode = IOMode::Dataset; - } + m_mode = IOMode::Dataset; } else if (modeOption.value() == "template") { - if (m_IOModeSpecificationVia == SpecificationVia::DefaultValue) - { - m_mode = IOMode::Template; - } + m_mode = IOMode::Template; + } + else + { + std::cerr << "[JSON/TOML backend] Warning: Invalid value '" + << modeOption.value() + << "' at internal meta table for entry '" + << JSONDefaults::IOMode + << "'. Will ignore and continue." << std::endl; + } + } + + if (m_IOModeSpecificationVia == SpecificationVia::DefaultValue && + openpmd_internal.contains(JSONDefaults::AttributeMode)) + { + auto modeOption = openPMD::json::asLowerCaseStringDynamic( + openpmd_internal.at(JSONDefaults::AttributeMode)); + if (!modeOption.has_value()) + { + std::cerr + << "[JSON/TOML backend] Warning: Invalid value of " + "non-string type at internal meta table for entry '" + << JSONDefaults::AttributeMode + << "'. Will ignore and continue." << std::endl; + } + else if (modeOption.value() == "long") + { + m_attributeMode = AttributeMode::Long; + } + else if (modeOption.value() == "short") + { + m_attributeMode = AttributeMode::Short; } else { @@ -1618,6 +1906,18 @@ auto JSONIOHandlerImpl::putJsonContents( break; } + switch (m_attributeMode) + { + case AttributeMode::Short: + (*it->second)[JSONDefaults::openpmd_internal] + [JSONDefaults::AttributeMode] = "short"; + break; + case AttributeMode::Long: + (*it->second)[JSONDefaults::openpmd_internal] + [JSONDefaults::AttributeMode] = "long"; + break; + } + auto writeSingleFile = [this, &it](std::string const &writeThisFile) { auto [fh, _, fh_with_precision] = getFilehandle(File(writeThisFile), Access::CREATE); From f91d770eec2489fa959f7f6c47127c8db3f86941 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Fri, 4 Aug 2023 14:50:34 +0200 Subject: [PATCH 18/37] Add example 14_toml_template.cpp --- examples/14_toml_template.cpp | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 examples/14_toml_template.cpp diff --git a/examples/14_toml_template.cpp b/examples/14_toml_template.cpp new file mode 100644 index 0000000000..33b9115129 --- /dev/null +++ b/examples/14_toml_template.cpp @@ -0,0 +1,22 @@ +#include + +int main() +{ + std::string config = R"( +{ + "iteration_encoding": "variable_based", + "toml": { + "dataset": {"mode": "template"}, + "attribute": {"mode": "short"} + } +} +)"; + + openPMD::Series writeTemplate( + "../samples/tomlTemplate.toml", openPMD::Access::CREATE, config); + auto iteration = writeTemplate.writeIterations()[0]; + + auto temperature = + iteration.meshes["temperature"][openPMD::RecordComponent::SCALAR]; + temperature.resetDataset({openPMD::Datatype::FLOAT, {5, 5}}); +} From 2c9539d03c238a2d45ec4ee2ddf8c41cd755f2c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Fri, 10 Mar 2023 15:42:59 +0100 Subject: [PATCH 19/37] Use Datatype::UNDEFINED to indicate no dataset definition in template --- include/openPMD/Dataset.hpp | 2 +- src/RecordComponent.cpp | 26 ++++++++++++++++++-------- src/backend/PatchRecordComponent.cpp | 19 +++++++++++-------- 3 files changed, 30 insertions(+), 17 deletions(-) diff --git a/include/openPMD/Dataset.hpp b/include/openPMD/Dataset.hpp index 8757a3cf0a..78ac6f0995 100644 --- a/include/openPMD/Dataset.hpp +++ b/include/openPMD/Dataset.hpp @@ -37,7 +37,7 @@ class Dataset friend class RecordComponent; public: - Dataset(Datatype, Extent, std::string options = "{}"); + Dataset(Datatype, Extent = {1}, std::string options = "{}"); /** * @brief Constructor that sets the datatype to undefined. diff --git a/src/RecordComponent.cpp b/src/RecordComponent.cpp index fed6fe60d4..ca0c50b81e 100644 --- a/src/RecordComponent.cpp +++ b/src/RecordComponent.cpp @@ -88,18 +88,28 @@ RecordComponent &RecordComponent::resetDataset(Dataset d) rc.m_hasBeenExtended = true; } - if (d.dtype == Datatype::UNDEFINED) - { - throw error::WrongAPIUsage( - "[RecordComponent] Must set specific datatype."); - } - // if( d.extent.empty() ) - // throw std::runtime_error("Dataset extent must be at least 1D."); + // if (d.dtype == Datatype::UNDEFINED) + // { + // throw error::WrongAPIUsage( + // "[RecordComponent] Must set specific datatype."); + // } + if (d.extent.empty()) + throw std::runtime_error("Dataset extent must be at least 1D."); if (std::any_of( d.extent.begin(), d.extent.end(), [](Extent::value_type const &i) { return i == 0u; })) - return makeEmpty(std::move(d)); + { + if (d.dtype != Datatype::UNDEFINED) + { + return makeEmpty(std::move(d)); + } + else + { + rc.m_dataset = std::move(d); + return *this; + } + } rc.m_isEmpty = false; if (written()) diff --git a/src/backend/PatchRecordComponent.cpp b/src/backend/PatchRecordComponent.cpp index 3277bac550..227af6d442 100644 --- a/src/backend/PatchRecordComponent.cpp +++ b/src/backend/PatchRecordComponent.cpp @@ -43,14 +43,17 @@ PatchRecordComponent &PatchRecordComponent::resetDataset(Dataset d) throw std::runtime_error( "A Records Dataset can not (yet) be changed after it has been " "written."); - if (d.extent.empty()) - throw std::runtime_error("Dataset extent must be at least 1D."); - if (std::any_of( - d.extent.begin(), d.extent.end(), [](Extent::value_type const &i) { - return i == 0u; - })) - throw std::runtime_error( - "Dataset extent must not be zero in any dimension."); + if (d.dtype != Datatype::UNDEFINED) + { + if (d.extent.empty()) + throw std::runtime_error("Dataset extent must be at least 1D."); + if (std::any_of( + d.extent.begin(), + d.extent.end(), + [](Extent::value_type const &i) { return i == 0u; })) + throw std::runtime_error( + "Dataset extent must not be zero in any dimension."); + } get().m_dataset = d; dirty() = true; From 6077cc93d9da820007fbc3899ce6ccb48e16efb2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Thu, 19 May 2022 17:29:24 +0200 Subject: [PATCH 20/37] Extend example --- examples/14_toml_template.cpp | 91 +++++++++++++++++++++++++++++++++-- 1 file changed, 88 insertions(+), 3 deletions(-) diff --git a/examples/14_toml_template.cpp b/examples/14_toml_template.cpp index 33b9115129..284db9ee84 100644 --- a/examples/14_toml_template.cpp +++ b/examples/14_toml_template.cpp @@ -1,6 +1,21 @@ #include -int main() +std::string backendEnding() +{ + auto extensions = openPMD::getFileExtensions(); + if (auto it = std::find(extensions.begin(), extensions.end(), "toml"); + it != extensions.end()) + { + return *it; + } + else + { + // Fallback for buggy old NVidia compiler + return "json"; + } +} + +void write() { std::string config = R"( { @@ -13,10 +28,80 @@ int main() )"; openPMD::Series writeTemplate( - "../samples/tomlTemplate.toml", openPMD::Access::CREATE, config); + "../samples/tomlTemplate." + backendEnding(), + openPMD::Access::CREATE, + config); auto iteration = writeTemplate.writeIterations()[0]; + openPMD::Dataset ds{openPMD::Datatype::FLOAT, {5, 5}}; + auto temperature = iteration.meshes["temperature"][openPMD::RecordComponent::SCALAR]; - temperature.resetDataset({openPMD::Datatype::FLOAT, {5, 5}}); + temperature.resetDataset(ds); + + auto E = iteration.meshes["E"]; + E["x"].resetDataset(ds); + E["y"].resetDataset(ds); + /* + * Don't specify datatype and extent for this one to indicate that this + * information is not yet known. + */ + E["z"].resetDataset({openPMD::Datatype::UNDEFINED}); + + ds.extent = {10}; + + auto electrons = iteration.particles["e"]; + electrons["position"]["x"].resetDataset(ds); + electrons["position"]["y"].resetDataset(ds); + electrons["position"]["z"].resetDataset(ds); + + electrons["positionOffset"]["x"].resetDataset(ds); + electrons["positionOffset"]["y"].resetDataset(ds); + electrons["positionOffset"]["z"].resetDataset(ds); + electrons["positionOffset"]["x"].makeConstant(3.14); + electrons["positionOffset"]["y"].makeConstant(3.14); + electrons["positionOffset"]["z"].makeConstant(3.14); + + ds.dtype = openPMD::determineDatatype(); + electrons.particlePatches["numParticles"][openPMD::RecordComponent::SCALAR] + .resetDataset(ds); + electrons + .particlePatches["numParticlesOffset"][openPMD::RecordComponent::SCALAR] + .resetDataset(ds); + electrons.particlePatches["offset"]["x"].resetDataset(ds); + electrons.particlePatches["offset"]["y"].resetDataset(ds); + electrons.particlePatches["offset"]["z"].resetDataset(ds); + electrons.particlePatches["extent"]["x"].resetDataset(ds); + electrons.particlePatches["extent"]["y"].resetDataset(ds); + electrons.particlePatches["extent"]["z"].resetDataset(ds); +} + +void read() +{ + /* + * The config is entirely optional, these things are also detected + * automatically when reading + */ + + // std::string config = R"( + // { + // "iteration_encoding": "variable_based", + // "toml": { + // "dataset": {"mode": "template"}, + // "attribute": {"mode": "short"} + // } + // } + // )"; + + openPMD::Series read( + "../samples/tomlTemplate." + backendEnding(), + openPMD::Access::READ_LINEAR); + read.readIterations(); // @todo change to read.parseBase() + openPMD::helper::listSeries(read); +} + +int main() +{ + write(); + read(); } From 401875fd4e092e6dff04b2a818db21a88c0c87c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 7 Aug 2023 11:04:07 +0200 Subject: [PATCH 21/37] Test short attribute mode --- test/SerialIOTest.cpp | 36 +++++++++++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/test/SerialIOTest.cpp b/test/SerialIOTest.cpp index 2f6b3fedd6..443db6eade 100644 --- a/test/SerialIOTest.cpp +++ b/test/SerialIOTest.cpp @@ -1548,7 +1548,17 @@ TEST_CASE("dtype_test", "[serial]") { dtype_test(t); } - dtype_test("json", R"({"json":{"dataset":{"mode":"template"}}})"); + dtype_test("json", R"( +{ + "json": { + "dataset": { + "mode": "template" + }, + "attribute": { + "mode": "short" + } + } +})"); if (auto extensions = getFileExtensions(); std::find(extensions.begin(), extensions.end(), "toml") != extensions.end()) @@ -1557,7 +1567,17 @@ TEST_CASE("dtype_test", "[serial]") * testing it here. */ dtype_test("toml"); - dtype_test("toml", R"({"toml":{"dataset":{"mode":"template"}}})"); + dtype_test("toml", R"( +{ + "toml": { + "dataset": { + "mode": "template" + }, + "attribute": { + "mode": "short" + } + } +})"); } } @@ -1575,7 +1595,17 @@ inline void write_test(const std::string &backend) Series o = Series( "../samples/serial_write." + backend, Access::CREATE, - R"({"json":{"dataset":{"mode":"template"}}})"); + R"( +{ + "json": { + "dataset": { + "mode": "template" + }, + "attribute": { + "mode": "short" + } + } +})"); ParticleSpecies &e_1 = o.iterations[1].particles["e"]; From 7606ddde2f7bfdddde6a8fc3e64f937aa2fb6481 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 7 Aug 2023 11:26:07 +0200 Subject: [PATCH 22/37] Copy datatypeToString to JSON implementation --- src/IO/JSON/JSONIOHandlerImpl.cpp | 96 ++++++++++++++++++++++++++++++- 1 file changed, 93 insertions(+), 3 deletions(-) diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index ffda974b68..c15013611e 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -157,6 +157,95 @@ namespace } } } + + // Does the same as datatypeToString(), but this makes sure that we don't + // accidentally change the JSON schema by modifying datatypeToString() + std::string jsonDatatypeToString(Datatype dt) + { + switch (dt) + { + using DT = Datatype; + case DT::CHAR: + return "CHAR"; + case DT::UCHAR: + return "UCHAR"; + case DT::SCHAR: + return "SCHAR"; + case DT::SHORT: + return "SHORT"; + case DT::INT: + return "INT"; + case DT::LONG: + return "LONG"; + case DT::LONGLONG: + return "LONGLONG"; + case DT::USHORT: + return "USHORT"; + case DT::UINT: + return "UINT"; + case DT::ULONG: + return "ULONG"; + case DT::ULONGLONG: + return "ULONGLONG"; + case DT::FLOAT: + return "FLOAT"; + case DT::DOUBLE: + return "DOUBLE"; + case DT::LONG_DOUBLE: + return "LONG_DOUBLE"; + case DT::CFLOAT: + return "CFLOAT"; + case DT::CDOUBLE: + return "CDOUBLE"; + case DT::CLONG_DOUBLE: + return "CLONG_DOUBLE"; + case DT::STRING: + return "STRING"; + case DT::VEC_CHAR: + return "VEC_CHAR"; + case DT::VEC_SHORT: + return "VEC_SHORT"; + case DT::VEC_INT: + return "VEC_INT"; + case DT::VEC_LONG: + return "VEC_LONG"; + case DT::VEC_LONGLONG: + return "VEC_LONGLONG"; + case DT::VEC_UCHAR: + return "VEC_UCHAR"; + case DT::VEC_USHORT: + return "VEC_USHORT"; + case DT::VEC_UINT: + return "VEC_UINT"; + case DT::VEC_ULONG: + return "VEC_ULONG"; + case DT::VEC_ULONGLONG: + return "VEC_ULONGLONG"; + case DT::VEC_FLOAT: + return "VEC_FLOAT"; + case DT::VEC_DOUBLE: + return "VEC_DOUBLE"; + case DT::VEC_LONG_DOUBLE: + return "VEC_LONG_DOUBLE"; + case DT::VEC_CFLOAT: + return "VEC_CFLOAT"; + case DT::VEC_CDOUBLE: + return "VEC_CDOUBLE"; + case DT::VEC_CLONG_DOUBLE: + return "VEC_CLONG_DOUBLE"; + case DT::VEC_SCHAR: + return "VEC_SCHAR"; + case DT::VEC_STRING: + return "VEC_STRING"; + case DT::ARR_DBL_7: + return "ARR_DBL_7"; + case DT::BOOL: + return "BOOL"; + case DT::UNDEFINED: + return "UNDEFINED"; + } + return "Unreachable!"; + } } // namespace auto JSONIOHandlerImpl::retrieveDatasetMode(openPMD::json::TracingJSON &config) @@ -472,7 +561,7 @@ void JSONIOHandlerImpl::createDataset( } setAndGetFilePosition(writable, name); auto &dset = jsonVal[name]; - dset["datatype"] = datatypeToString(parameter.dtype); + dset["datatype"] = jsonDatatypeToString(parameter.dtype); switch (localMode) { @@ -1128,7 +1217,8 @@ void JSONIOHandlerImpl::writeAttribute( { case AttributeMode::Long: (*jsonVal)[filePosition->id]["attributes"][parameter.name] = { - {"datatype", datatypeToString(parameter.dtype)}, {"value", value}}; + {"datatype", jsonDatatypeToString(parameter.dtype)}, + {"value", value}}; break; case AttributeMode::Short: // short form @@ -2195,7 +2285,7 @@ nlohmann::json JSONIOHandlerImpl::platformSpecifics() Datatype::BOOL}; for (auto it = std::begin(datatypes); it != std::end(datatypes); it++) { - res[datatypeToString(*it)] = toBytes(*it); + res[jsonDatatypeToString(*it)] = toBytes(*it); } return res; } From 0de03b9b0281c0b7cd25f627e522d68f03a3847f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Fri, 22 Sep 2023 17:31:34 +0200 Subject: [PATCH 23/37] Fix after rebase: Init JSON config in parallel mode --- src/IO/JSON/JSONIOHandlerImpl.cpp | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index c15013611e..5560c3bafe 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -394,16 +394,25 @@ JSONIOHandlerImpl::JSONIOHandlerImpl( #if openPMD_HAVE_MPI JSONIOHandlerImpl::JSONIOHandlerImpl( AbstractIOHandler *handler, - MPI_Comm comm, - // NOLINTNEXTLINE(performance-unnecessary-value-param) - [[maybe_unused]] openPMD::json::TracingJSON config, + MPI_Comm comm,openPMD::json::TracingJSON config, FileFormat format, std::string originalExtension) : AbstractIOHandlerImpl(handler) , m_communicator{comm} , m_fileFormat{format} , m_originalExtension{std::move(originalExtension)} -{} +{ + std::tie(m_mode, m_IOModeSpecificationVia) = retrieveDatasetMode(config); + std::tie(m_attributeMode, m_attributeModeSpecificationVia) = + retrieveAttributeMode(config); + + if (auto [_, backendConfig] = getBackendConfig(config); + backendConfig.has_value()) + { + (void)_; + warnUnusedJson(backendConfig.value()); + } +} #endif JSONIOHandlerImpl::~JSONIOHandlerImpl() = default; From 7c37fcf8b6ac468e5bc390600ce0732510155a29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Fri, 22 Sep 2023 17:32:07 +0200 Subject: [PATCH 24/37] Fix after rebase: Don't erase JSON datasets when writing --- src/IO/JSON/JSONIOHandlerImpl.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index 5560c3bafe..bc4e247b6f 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -2131,6 +2131,7 @@ merge the .json files somehow (no tooling provided for this (yet)). #else serialImplementation(); #endif + if (unsetDirty) { m_dirty.erase(filename); From 417dfad13e96e1a5fc31b8bc6b440d57fed2cca9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Thu, 12 Oct 2023 09:48:23 +0200 Subject: [PATCH 25/37] openpmd-pipe: use short modes for test --- CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index fffcf395c7..b54071a4d3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1374,6 +1374,9 @@ if(openPMD_BUILD_TESTING) ${openPMD_RUNTIME_OUTPUT_DIRECTORY}/openpmd-pipe \ --infile ../samples/git-sample/thetaMode/data_%T.bp \ --outfile ../samples/git-sample/thetaMode/data%T.json \ + --outconfig ' \ + json.attribute.mode = \"short\" \n\ + json.dataset.mode = \"template\"' \ " WORKING_DIRECTORY ${openPMD_RUNTIME_OUTPUT_DIRECTORY} ) From 587cd32d688d3caadd178f0e1b90a6e6b55b7dd9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Thu, 12 Oct 2023 10:50:28 +0200 Subject: [PATCH 26/37] Less intrusive warnings, allow disabling them --- CMakeLists.txt | 2 +- include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp | 17 +++++- src/IO/JSON/JSONIOHandlerImpl.cpp | 52 +++++++++++++------ 3 files changed, 53 insertions(+), 18 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b54071a4d3..0fbfc601f5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1376,7 +1376,7 @@ if(openPMD_BUILD_TESTING) --outfile ../samples/git-sample/thetaMode/data%T.json \ --outconfig ' \ json.attribute.mode = \"short\" \n\ - json.dataset.mode = \"template\"' \ + json.dataset.mode = \"template_no_warn\"' \ " WORKING_DIRECTORY ${openPMD_RUNTIME_OUTPUT_DIRECTORY} ) diff --git a/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp b/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp index c8d2b4db4c..83d777cf6f 100644 --- a/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp +++ b/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp @@ -292,9 +292,22 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl IOMode m_mode = IOMode::Dataset; SpecificationVia m_IOModeSpecificationVia = SpecificationVia::DefaultValue; + bool m_printedSkippedWriteWarningAlready = false; - std::pair - retrieveDatasetMode(openPMD::json::TracingJSON &config) const; + struct DatasetMode + { + IOMode m_IOMode; + SpecificationVia m_specificationVia; + bool m_skipWarnings; + + template + operator std::tuple() + { + return std::tuple{ + m_IOMode, m_specificationVia, m_skipWarnings}; + } + }; + DatasetMode retrieveDatasetMode(openPMD::json::TracingJSON &config) const; /////////////////////// // Attribute IO mode // diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index bc4e247b6f..fffb418e41 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -248,11 +248,12 @@ namespace } } // namespace -auto JSONIOHandlerImpl::retrieveDatasetMode(openPMD::json::TracingJSON &config) - const -> std::pair +auto JSONIOHandlerImpl::retrieveDatasetMode( + openPMD::json::TracingJSON &config) const -> DatasetMode { - IOMode res = m_mode; - SpecificationVia res_2 = SpecificationVia::DefaultValue; + IOMode ioMode = m_mode; + SpecificationVia specificationVia = SpecificationVia::DefaultValue; + bool skipWarnings = false; if (auto [configLocation, maybeConfig] = getBackendConfig(config); maybeConfig.has_value()) { @@ -274,13 +275,19 @@ auto JSONIOHandlerImpl::retrieveDatasetMode(openPMD::json::TracingJSON &config) auto mode = modeOption.value(); if (mode == "dataset") { - res = IOMode::Dataset; - res_2 = SpecificationVia::Manually; + ioMode = IOMode::Dataset; + specificationVia = SpecificationVia::Manually; } else if (mode == "template") { - res = IOMode::Template; - res_2 = SpecificationVia::Manually; + ioMode = IOMode::Template; + specificationVia = SpecificationVia::Manually; + } + else if (mode == "template_no_warn") + { + ioMode = IOMode::Template; + specificationVia = SpecificationVia::Manually; + skipWarnings = true; } else { @@ -292,7 +299,7 @@ auto JSONIOHandlerImpl::retrieveDatasetMode(openPMD::json::TracingJSON &config) } } } - return std::make_pair(res, res_2); + return DatasetMode{ioMode, specificationVia, skipWarnings}; } auto JSONIOHandlerImpl::retrieveAttributeMode( @@ -379,7 +386,9 @@ JSONIOHandlerImpl::JSONIOHandlerImpl( , m_fileFormat{format} , m_originalExtension{std::move(originalExtension)} { - std::tie(m_mode, m_IOModeSpecificationVia) = retrieveDatasetMode(config); + std::tie( + m_mode, m_IOModeSpecificationVia, m_printedSkippedWriteWarningAlready) = + retrieveDatasetMode(config); std::tie(m_attributeMode, m_attributeModeSpecificationVia) = retrieveAttributeMode(config); @@ -402,7 +411,9 @@ JSONIOHandlerImpl::JSONIOHandlerImpl( , m_fileFormat{format} , m_originalExtension{std::move(originalExtension)} { - std::tie(m_mode, m_IOModeSpecificationVia) = retrieveDatasetMode(config); + std::tie( + m_mode, m_IOModeSpecificationVia, m_printedSkippedWriteWarningAlready) = + retrieveDatasetMode(config); std::tie(m_attributeMode, m_attributeModeSpecificationVia) = retrieveAttributeMode(config); @@ -547,7 +558,13 @@ void JSONIOHandlerImpl::createDataset( parameter.options, /* considerFiles = */ false); // Retrieves mode from dataset-specific configuration, falls back to global // value if not defined - IOMode localMode = retrieveDatasetMode(config).first; + auto [localMode, _, skipWarnings] = retrieveDatasetMode(config); + (void)_; + // No use in introducing logic to skip warnings only for one particular + // dataset. If warnings are skipped, then they are skipped consistently. + // Use |= since `false` is the default value and we don't wish to reset + // the flag. + m_printedSkippedWriteWarningAlready |= skipWarnings; parameter.warnUnusedParameters( config, @@ -1182,9 +1199,14 @@ void JSONIOHandlerImpl::writeDataset( case IOMode::Dataset: break; case IOMode::Template: - std::cerr << "[JSON/TOML backend: Warning] Trying to write data to a " - "template dataset. Will skip." - << std::endl; + if (!m_printedSkippedWriteWarningAlready) + { + std::cerr + << "[JSON/TOML backend: Warning] Trying to write data to a " + "template dataset. Will skip." + << std::endl; + m_printedSkippedWriteWarningAlready = true; + } return; } From 2f8857952ebe83da30148ededb1002038bf256af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Thu, 12 Oct 2023 11:29:09 +0200 Subject: [PATCH 27/37] TOML: Use short modes by default --- include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp | 2 ++ src/IO/JSON/JSONIOHandlerImpl.cpp | 34 ++++++++++++------- test/SerialIOTest.cpp | 26 ++++++++++---- test/python/unittest/API/APITest.py | 3 +- 4 files changed, 45 insertions(+), 20 deletions(-) diff --git a/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp b/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp index 83d777cf6f..4a30d79e01 100644 --- a/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp +++ b/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp @@ -180,6 +180,8 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl std::string originalExtension); #endif + void init(openPMD::json::TracingJSON config); + ~JSONIOHandlerImpl() override; void diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index fffb418e41..3b2089509a 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -386,18 +386,7 @@ JSONIOHandlerImpl::JSONIOHandlerImpl( , m_fileFormat{format} , m_originalExtension{std::move(originalExtension)} { - std::tie( - m_mode, m_IOModeSpecificationVia, m_printedSkippedWriteWarningAlready) = - retrieveDatasetMode(config); - std::tie(m_attributeMode, m_attributeModeSpecificationVia) = - retrieveAttributeMode(config); - - if (auto [_, backendConfig] = getBackendConfig(config); - backendConfig.has_value()) - { - (void)_; - warnUnusedJson(backendConfig.value()); - } + init(std::move(config)); } #if openPMD_HAVE_MPI @@ -411,6 +400,26 @@ JSONIOHandlerImpl::JSONIOHandlerImpl( , m_fileFormat{format} , m_originalExtension{std::move(originalExtension)} { + init(std::move(config)); +} +#endif + +void JSONIOHandlerImpl::init(openPMD::json::TracingJSON config) +{ + // set the defaults + switch (m_fileFormat) + { + case FileFormat::Json: + // @todo take the switch to openPMD 2.0 as a chance to switch to + // short attribute mode as a default here + m_attributeMode = AttributeMode::Long; + m_mode = IOMode::Dataset; + break; + case FileFormat::Toml: + m_attributeMode = AttributeMode::Short; + m_mode = IOMode::Template; + break; + } std::tie( m_mode, m_IOModeSpecificationVia, m_printedSkippedWriteWarningAlready) = retrieveDatasetMode(config); @@ -424,7 +433,6 @@ JSONIOHandlerImpl::JSONIOHandlerImpl( warnUnusedJson(backendConfig.value()); } } -#endif JSONIOHandlerImpl::~JSONIOHandlerImpl() = default; diff --git a/test/SerialIOTest.cpp b/test/SerialIOTest.cpp index 443db6eade..3d1970237f 100644 --- a/test/SerialIOTest.cpp +++ b/test/SerialIOTest.cpp @@ -1275,8 +1275,12 @@ inline void dtype_test( "../samples/dtype_test." + backend, Access::CREATE, activateTemplateMode.value()) - : Series("../samples/dtype_test." + backend, Access::CREATE); - + : + // test TOML long attribute mode by default + Series( + "../samples/dtype_test." + backend, + Access::CREATE, + R"({"toml":{"attribute":{"mode":"long"}}})"); char c = 'c'; s.setAttribute("char", c); unsigned char uc = 'u'; @@ -1874,7 +1878,8 @@ inline void fileBased_write_test(const std::string &backend) { Series o = Series( "../samples/subdir/serial_fileBased_write%03T." + backend, - Access::CREATE); + Access::CREATE, + R"({"toml":{"dataset":{"mode":"dataset"}}})"); ParticleSpecies &e_1 = o.iterations[1].particles["e"]; @@ -7233,7 +7238,10 @@ void groupbased_read_write(std::string const &ext) std::string filename = "../samples/groupbased_read_write." + ext; { - Series write(filename, Access::CREATE); + Series write( + filename, + Access::CREATE, + R"({"toml":{"dataset":{"mode":"dataset"}}})"); auto E_x = write.iterations[0].meshes["E"]["x"]; auto E_y = write.iterations[0].meshes["E"]["y"]; E_x.resetDataset(ds); @@ -7246,7 +7254,10 @@ void groupbased_read_write(std::string const &ext) } { - Series write(filename, Access::READ_WRITE); + Series write( + filename, + Access::READ_WRITE, + R"({"toml":{"dataset":{"mode":"dataset"}}})"); // create a new iteration auto E_x = write.iterations[1].meshes["E"]["x"]; E_x.resetDataset(ds); @@ -7286,7 +7297,10 @@ void groupbased_read_write(std::string const &ext) // check that truncation works correctly { - Series write(filename, Access::CREATE); + Series write( + filename, + Access::CREATE, + R"({"toml":{"dataset":{"mode":"dataset"}}})"); // create a new iteration auto E_x = write.iterations[2].meshes["E"]["x"]; E_x.resetDataset(ds); diff --git a/test/python/unittest/API/APITest.py b/test/python/unittest/API/APITest.py index 6ff987f657..3e1c07a2d0 100644 --- a/test/python/unittest/API/APITest.py +++ b/test/python/unittest/API/APITest.py @@ -25,7 +25,8 @@ from TestUtilities.TestUtilities import generateTestFilePath tested_file_extensions = [ - ext for ext in io.file_extensions if ext != 'sst' and ext != 'ssc' + ext for ext in io.file_extensions + if ext != 'sst' and ext != 'ssc' and ext != 'toml' ] From 9951ef35039876eac5d4afaae32cdfc13d675a52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Thu, 26 Oct 2023 15:41:46 +0200 Subject: [PATCH 28/37] Python formatting --- test/python/unittest/API/APITest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/python/unittest/API/APITest.py b/test/python/unittest/API/APITest.py index 3e1c07a2d0..1a73bb0bb6 100644 --- a/test/python/unittest/API/APITest.py +++ b/test/python/unittest/API/APITest.py @@ -26,7 +26,7 @@ tested_file_extensions = [ ext for ext in io.file_extensions - if ext != 'sst' and ext != 'ssc' and ext != 'toml' + if ext != 'sst' and ext != 'ssc' and ext != 'toml' ] From 6cb3eef3df09a033804d61ab4e1842a71eabbf0c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 24 Nov 2023 12:48:52 +0000 Subject: [PATCH 29/37] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/IO/JSON/JSONIOHandlerImpl.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index 3b2089509a..02c9e0710e 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -392,7 +392,8 @@ JSONIOHandlerImpl::JSONIOHandlerImpl( #if openPMD_HAVE_MPI JSONIOHandlerImpl::JSONIOHandlerImpl( AbstractIOHandler *handler, - MPI_Comm comm,openPMD::json::TracingJSON config, + MPI_Comm comm, + openPMD::json::TracingJSON config, FileFormat format, std::string originalExtension) : AbstractIOHandlerImpl(handler) From f351140688d79d5db5df0b00b1981d90754f02be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Fri, 24 Nov 2023 16:16:31 +0100 Subject: [PATCH 30/37] Documentation --- docs/source/backends/json.rst | 36 +++++++++++++++++++++++---- docs/source/details/backendconfig.rst | 23 ++++++++++++++--- docs/source/details/json.json | 10 ++++++++ 3 files changed, 60 insertions(+), 9 deletions(-) create mode 100644 docs/source/details/json.json diff --git a/docs/source/backends/json.rst b/docs/source/backends/json.rst index bbae92aaf6..0f0d9510c9 100644 --- a/docs/source/backends/json.rst +++ b/docs/source/backends/json.rst @@ -38,20 +38,46 @@ when working with the JSON backend. Datasets and groups have the same namespace, meaning that there may not be a subgroup and a dataset with the same name contained in one group. -Any **openPMD dataset** is a JSON object with three keys: +Datasets +........ - * ``attributes``: Attributes associated with the dataset. May be ``null`` or not present if no attributes are associated with the dataset. - * ``datatype``: A string describing the type of the stored data. - * ``data`` A nested array storing the actual data in row-major manner. +Datasets can be stored in two modes, either as actual datasets or as dataset templates. +The mode is selected by the :ref:`JSON/TOML parameter` ``json.dataset.mode`` (resp. ``toml.dataset.mode``) with possible values ``["dataset", "template"]`` (default: ``"dataset"``). + +Stored as an actual dataset, an **openPMD dataset** is a JSON object with three JSON keys: + + * ``datatype`` (required): A string describing the type of the stored data. + * ``data`` (required): A nested array storing the actual data in row-major manner. The data needs to be consistent with the fields ``datatype`` and ``extent``. Checking whether this key points to an array can be (and is internally) used to distinguish groups from datasets. + * ``attributes``: Attributes associated with the dataset. May be ``null`` or not present if no attributes are associated with the dataset. + +Stored as a **dataset template**, an openPMD dataset is represented by three JSON keys: + +* ``datatype`` (required): As above. +* ``extent`` (required): A list of integers, describing the extent of the dataset. +* ``attributes``: As above. -**Attributes** are stored as a JSON object with a key for each attribute. +This mode stores only the dataset metadata. +Chunk load/store operations are ignored. + +Attributes +.......... + +In order to avoid name clashes, attributes are generally stored within a separate subgroup ``attributes``. + +Attributes can be stored in two formats. +The format is selected by the :ref:`JSON/TOML parameter` ``json.attribute.mode`` (resp. ``toml.attribute.mode``) with possible values ``["long", "short"]`` (default: ``"long"`` in openPMD 1.*, ``"short"`` in openPMD >= 2.0). + +Attributes in **long format** store the datatype explicitly, by representing attributes as JSON objects. Every such attribute is itself a JSON object with two keys: * ``datatype``: A string describing the type of the value. * ``value``: The actual value of type ``datatype``. +Attributes in **short format** are stored as just the simple value corresponding with the attribute. +Since JSON/TOML values are pretty-printed into a human-readable format, byte-level type details can be lost when reading those values again later on (e.g. the distinction between different integer types). + TOML File Format ---------------- diff --git a/docs/source/details/backendconfig.rst b/docs/source/details/backendconfig.rst index ae2a2d4f63..2bfbdad371 100644 --- a/docs/source/details/backendconfig.rst +++ b/docs/source/details/backendconfig.rst @@ -100,6 +100,8 @@ For JSON and ADIOS2, all datasets are resizable, independent of this option. Configuration Structure per Backend ----------------------------------- +Please refer to the respective backends' documentations for further information on their configuration. + .. _backendconfig-adios2: ADIOS2 @@ -193,8 +195,21 @@ Explanation of the single keys: .. _backendconfig-other: -Other backends -^^^^^^^^^^^^^^ +JSON/TOML +^^^^^^^^^ -Do currently not read the configuration string. -Please refer to the respective backends' documentations for further information on their configuration. +A full configuration of the JSON backend: + +.. literalinclude:: json.json + :language: json + +The TOML backend is configured analogously, replacing the ``"json"`` key with ``"toml"``. + +All keys found under ``hdf5.dataset`` are applicable globally as well as per dataset. +Explanation of the single keys: + +* ``json.dataset.mode`` / ``toml.dataset.mode``: One of ``"dataset"`` (default) or ``"template"``. + In "dataset" mode, the dataset will be written as an n-dimensional (recursive) array, padded with nulls (JSON) or zeroes (TOML) for missing values. + In "template" mode, only the dataset metadata (type, extent and attributes) are stored and no chunks can be written or read. +* ``json.attribute.mode`` / ``toml.attribute.mode``: One of ``"long"`` (default in openPMD 1.*) or ``"short"`` (default in openPMD 2.*). + The long format explicitly encodes the attribute type in the dataset on disk, the short format only writes the actual attribute as a JSON/TOML value, requiring readers to recover the type. diff --git a/docs/source/details/json.json b/docs/source/details/json.json new file mode 100644 index 0000000000..c1491f7245 --- /dev/null +++ b/docs/source/details/json.json @@ -0,0 +1,10 @@ +{ + "json": { + "dataset": { + "mode": "template" + }, + "attribute": { + "mode": "short" + } + } +} From 1cade93df780d8a02136235edfa387e740e4fa5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 9 Oct 2023 16:25:10 +0200 Subject: [PATCH 31/37] Add openPMD 2.0 standard setting --- include/openPMD/Error.hpp | 6 ++++++ include/openPMD/version.hpp | 20 ++++++++++++++++++-- src/Error.cpp | 6 ++++++ src/Series.cpp | 13 ++++++++----- src/binding/python/Error.cpp | 3 +++ src/version.cpp | 10 ++++++++++ test/CoreTest.cpp | 5 ++++- test/SerialIOTest.cpp | 1 + 8 files changed, 56 insertions(+), 8 deletions(-) diff --git a/include/openPMD/Error.hpp b/include/openPMD/Error.hpp index 3e516e16ec..d1762e7e6d 100644 --- a/include/openPMD/Error.hpp +++ b/include/openPMD/Error.hpp @@ -109,6 +109,12 @@ namespace error public: NoSuchAttribute(std::string attributeName); }; + + class IllegalInOpenPMDStandard : public Error + { + public: + IllegalInOpenPMDStandard(std::string what); + }; } // namespace error /** diff --git a/include/openPMD/version.hpp b/include/openPMD/version.hpp index c57e3ecf17..e9748b3939 100644 --- a/include/openPMD/version.hpp +++ b/include/openPMD/version.hpp @@ -37,11 +37,20 @@ * compile-time) * @{ */ -#define OPENPMD_STANDARD_MAJOR 1 -#define OPENPMD_STANDARD_MINOR 1 +#define OPENPMD_STANDARD_MAJOR 2 +#define OPENPMD_STANDARD_MINOR 0 #define OPENPMD_STANDARD_PATCH 0 /** @} */ +/** maximum supported version of the openPMD standard (read & write, + * compile-time) + * @{ + */ +#define OPENPMD_STANDARD_DEFAULT_MAJOR 1 +#define OPENPMD_STANDARD_DEFAULT_MINOR 1 +#define OPENPMD_STANDARD_DEFAULT_PATCH 0 +/** @} */ + /** minimum supported version of the openPMD standard (read, compile-time) * @{ */ @@ -79,6 +88,13 @@ std::string getVersion(); */ std::string getStandard(); +/** Return the default used version of the openPMD standard (read & write, + * run-time) + * + * @return std::string openPMD standard version (dot separated) + */ +std::string getStandardDefault(); + /** Return the minimum supported version of the openPMD standard (read, * run-time) * diff --git a/src/Error.cpp b/src/Error.cpp index f2e27a0213..dbc13f40b0 100644 --- a/src/Error.cpp +++ b/src/Error.cpp @@ -122,6 +122,12 @@ namespace error , description(std::move(description_in)) {} + IllegalInOpenPMDStandard::IllegalInOpenPMDStandard(std::string what_in) + : Error( + "Operation leads to illegal use of the openPMD standard:\n" + + std::move(what_in)) + {} + void throwReadError( AffectedObject affectedObject, Reason reason, diff --git a/src/Series.cpp b/src/Series.cpp index 32dc0b3a40..b2c5fc5667 100644 --- a/src/Series.cpp +++ b/src/Series.cpp @@ -147,9 +147,10 @@ std::string Series::basePath() const Series &Series::setBasePath(std::string const &bp) { std::string version = openPMD(); - if (version == "1.0.0" || version == "1.0.1" || version == "1.1.0") + if (version == "1.0.0" || version == "1.0.1" || version == "1.1.0" || + version == "2.0.0") throw std::runtime_error( - "Custom basePath not allowed in openPMD <=1.1.0"); + "Custom basePath not allowed in openPMD <=2.0"); setAttribute("basePath", bp); return *this; @@ -684,7 +685,7 @@ void Series::initDefaults(IterationEncoding ie, bool initAll) } } if (!containsAttribute("openPMD")) - setOpenPMD(getStandard()); + setOpenPMD(getStandardDefault()); /* * In Append mode, only init the rest of the defaults after checking that * the file does not yet exist to avoid overriding more than needed. @@ -1268,7 +1269,8 @@ void Series::readOneIterationFileBased(std::string const &filePath) Parameter pOpen; std::string version = openPMD(); - if (version == "1.0.0" || version == "1.0.1" || version == "1.1.0") + if (version == "1.0.0" || version == "1.0.1" || version == "1.1.0" || + version == "2.0.0") pOpen.path = auxiliary::replace_first(basePath(), "/%T/", ""); else throw error::ReadError( @@ -1420,7 +1422,8 @@ creating new iterations. Parameter pOpen; std::string version = openPMD(); - if (version == "1.0.0" || version == "1.0.1" || version == "1.1.0") + if (version == "1.0.0" || version == "1.0.1" || version == "1.1.0" || + version == "2.0.0") pOpen.path = auxiliary::replace_first(basePath(), "/%T/", ""); else throw error::ReadError( diff --git a/src/binding/python/Error.cpp b/src/binding/python/Error.cpp index 681398c579..27d9c7d9b4 100644 --- a/src/binding/python/Error.cpp +++ b/src/binding/python/Error.cpp @@ -9,6 +9,7 @@ #include "openPMD/Error.hpp" #include "openPMD/binding/python/Common.hpp" +#include void init_Error(py::module &m) { @@ -22,6 +23,8 @@ void init_Error(py::module &m) py::register_exception(m, "ErrorInternal", baseError); py::register_exception( m, "ErrorNoSuchAttribute", baseError); + py::register_exception( + m, "ErrorIllegalInOpenPMDStandard", baseError); #ifndef NDEBUG m.def("test_throw", [](std::string description) { diff --git a/src/version.cpp b/src/version.cpp index c2e8809a32..5feeccba13 100644 --- a/src/version.cpp +++ b/src/version.cpp @@ -41,6 +41,16 @@ std::string openPMD::getStandard() return standard.str(); } +std::string openPMD::getStandardDefault() +{ + std::stringstream standard; + standard << OPENPMD_STANDARD_DEFAULT_MAJOR << "." + << OPENPMD_STANDARD_DEFAULT_MINOR << "." + << OPENPMD_STANDARD_DEFAULT_PATCH; + std::string const standardstr = standard.str(); + return standardstr; +} + std::string openPMD::getStandardMinimum() { std::stringstream standardMin; diff --git a/test/CoreTest.cpp b/test/CoreTest.cpp index 084d118578..ac06aaf4c7 100644 --- a/test/CoreTest.cpp +++ b/test/CoreTest.cpp @@ -35,8 +35,11 @@ TEST_CASE("versions_test", "[core]") auto const is_dot = [](char const c) { return c == '.'; }; REQUIRE(2u == std::count_if(apiVersion.begin(), apiVersion.end(), is_dot)); + auto const standardDefault = getStandardDefault(); + REQUIRE(standardDefault == "1.1.0"); + auto const standard = getStandard(); - REQUIRE(standard == "1.1.0"); + REQUIRE(standard == "2.0.0"); auto const standardMin = getStandardMinimum(); REQUIRE(standardMin == "1.0.0"); diff --git a/test/SerialIOTest.cpp b/test/SerialIOTest.cpp index 3d1970237f..207ac209d6 100644 --- a/test/SerialIOTest.cpp +++ b/test/SerialIOTest.cpp @@ -905,6 +905,7 @@ inline void constant_scalar(std::string const &file_ending) // constant scalar Series s = Series("../samples/constant_scalar." + file_ending, Access::CREATE); + s.setOpenPMD("2.0.0"); auto rho = s.iterations[1].meshes["rho"][MeshRecordComponent::SCALAR]; REQUIRE(s.iterations[1].meshes["rho"].scalar()); rho.resetDataset(Dataset(Datatype::CHAR, {1, 2, 3})); From 047e571c6548751e93cccbf43579311655d99390 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Fri, 24 Nov 2023 17:47:57 +0100 Subject: [PATCH 32/37] Short mode in default in openPMD >= 2. --- include/openPMD/IO/IOTask.hpp | 1 + include/openPMD/RecordComponent.tpp | 51 ++++++++++++++++++++++++++--- src/IO/JSON/JSONIOHandlerImpl.cpp | 7 ++++ src/Iteration.cpp | 1 + src/Series.cpp | 1 + 5 files changed, 57 insertions(+), 4 deletions(-) diff --git a/include/openPMD/IO/IOTask.hpp b/include/openPMD/IO/IOTask.hpp index d2fc05f379..51dfac4268 100644 --- a/include/openPMD/IO/IOTask.hpp +++ b/include/openPMD/IO/IOTask.hpp @@ -125,6 +125,7 @@ struct OPENPMDAPI_EXPORT Parameter } std::string name = ""; + std::string openPMDversion; // @todo: Maybe move this to AbstractIOHandler }; template <> diff --git a/include/openPMD/RecordComponent.tpp b/include/openPMD/RecordComponent.tpp index e8ba6006ab..7a1f466561 100644 --- a/include/openPMD/RecordComponent.tpp +++ b/include/openPMD/RecordComponent.tpp @@ -21,6 +21,8 @@ #pragma once +#include "openPMD/Datatype.hpp" +#include "openPMD/Error.hpp" #include "openPMD/RecordComponent.hpp" #include "openPMD/Span.hpp" #include "openPMD/auxiliary/Memory.hpp" @@ -93,12 +95,38 @@ inline std::shared_ptr RecordComponent::loadChunk(Offset o, Extent e) #endif } +namespace detail +{ + template + struct do_convert + { + template + static std::optional call(Attribute &attr) + { + if constexpr (std::is_convertible_v) + { + return std::make_optional(attr.get()); + } + else + { + return std::nullopt; + } + } + + static constexpr char const *errorMsg = "is_conversible"; + }; +} // namespace detail + template inline void RecordComponent::loadChunk(std::shared_ptr data, Offset o, Extent e) { Datatype dtype = determineDatatype(data); - if (dtype != getDatatype()) + /* + * For constant components, we implement type conversion, so there is + * a separate check further below. + */ + if (dtype != getDatatype() && !constant()) if (!isSameInteger(getDatatype()) && !isSameFloatingPoint(getDatatype()) && !isSameComplexFloatingPoint(getDatatype()) && @@ -160,10 +188,25 @@ RecordComponent::loadChunk(std::shared_ptr data, Offset o, Extent e) for (auto const &dimensionSize : extent) numPoints *= dimensionSize; - T value = rc.m_constantValue.get(); + std::optional val = + switchNonVectorType>( + /* from = */ getDatatype(), rc.m_constantValue); - T *raw_ptr = data.get(); - std::fill(raw_ptr, raw_ptr + numPoints, value); + if (val.has_value()) + { + T *raw_ptr = data.get(); + std::fill(raw_ptr, raw_ptr + numPoints, *val); + } + else + { + std::string const data_type_str = datatypeToString(getDatatype()); + std::string const requ_type_str = + datatypeToString(determineDatatype()); + std::string err_msg = + "Type conversion during chunk loading not possible! "; + err_msg += "Data: " + data_type_str + "; Load as: " + requ_type_str; + throw error::WrongAPIUsage(err_msg); + } } else { diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index 02c9e0710e..c1fd9095a2 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -455,6 +455,13 @@ void JSONIOHandlerImpl::createFile( access::write(m_handler->m_backendAccess), "[JSON] Creating a file in read-only mode is not possible."); + if (m_attributeModeSpecificationVia == SpecificationVia::DefaultValue) + { + m_attributeMode = parameters.openPMDversion >= "2." + ? AttributeMode::Short + : AttributeMode::Long; + } + if (!writable->written) { std::string name = parameters.name + m_originalExtension; diff --git a/src/Iteration.cpp b/src/Iteration.cpp index 593e38066f..7bb87ff8f3 100644 --- a/src/Iteration.cpp +++ b/src/Iteration.cpp @@ -206,6 +206,7 @@ void Iteration::flushFileBased( /* create file */ Parameter fCreate; fCreate.name = filename; + fCreate.openPMDversion = s.openPMD(); IOHandler()->enqueue(IOTask(&s.writable(), fCreate)); /* create basePath */ diff --git a/src/Series.cpp b/src/Series.cpp index b2c5fc5667..496ae20be4 100644 --- a/src/Series.cpp +++ b/src/Series.cpp @@ -925,6 +925,7 @@ void Series::flushGorVBased( } Parameter fCreate; fCreate.name = series.m_name; + fCreate.openPMDversion = openPMD(); IOHandler()->enqueue(IOTask(this, fCreate)); } From fc2d2a947837b2b139c961974d6928377a8e2b2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 4 Jul 2022 16:28:49 +0200 Subject: [PATCH 33/37] Initialize Series attributes and datasets from template --- CMakeLists.txt | 1 + include/openPMD/Iteration.hpp | 12 ++ include/openPMD/auxiliary/TemplateFile.hpp | 10 ++ src/Iteration.cpp | 54 ++++++- src/auxiliary/TemplateFile.cpp | 173 +++++++++++++++++++++ test/SerialIOTest.cpp | 7 + 6 files changed, 255 insertions(+), 2 deletions(-) create mode 100644 include/openPMD/auxiliary/TemplateFile.hpp create mode 100644 src/auxiliary/TemplateFile.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 0fbfc601f5..b9bcd1c54a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -460,6 +460,7 @@ set(CORE_SOURCE src/auxiliary/Date.cpp src/auxiliary/Filesystem.cpp src/auxiliary/JSON.cpp + src/auxiliary/TemplateFile.cpp src/backend/Attributable.cpp src/backend/BaseRecordComponent.cpp src/backend/MeshRecordComponent.cpp diff --git a/include/openPMD/Iteration.hpp b/include/openPMD/Iteration.hpp index 3ede7a6a37..9ef6de7d48 100644 --- a/include/openPMD/Iteration.hpp +++ b/include/openPMD/Iteration.hpp @@ -115,6 +115,15 @@ namespace internal * alone. */ std::optional m_overrideFilebasedFilename{}; + + enum TernaryBool + { + Undefined, + True, + False + }; + TernaryBool hasMeshes = TernaryBool::Undefined; + TernaryBool hasParticles = TernaryBool::Undefined; }; } // namespace internal /** @brief Logical compilation of data from one snapshot (e.g. a single @@ -239,6 +248,9 @@ class Iteration : public Attributable Container meshes{}; Container particles{}; // particleSpecies? + bool hasMeshes() const; + bool hasParticles() const; + virtual ~Iteration() = default; private: diff --git a/include/openPMD/auxiliary/TemplateFile.hpp b/include/openPMD/auxiliary/TemplateFile.hpp new file mode 100644 index 0000000000..95ea7e9cf3 --- /dev/null +++ b/include/openPMD/auxiliary/TemplateFile.hpp @@ -0,0 +1,10 @@ +#pragma once + +#include "openPMD/Series.hpp" + +namespace openPMD::auxiliary +{ +// @todo replace uint64_t with proper type after merging #1285 +Series &initializeFromTemplate( + Series &initializeMe, Series const &fromTemplate, uint64_t iteration); +} // namespace openPMD::auxiliary diff --git a/src/Iteration.cpp b/src/Iteration.cpp index 7bb87ff8f3..6b3118e576 100644 --- a/src/Iteration.cpp +++ b/src/Iteration.cpp @@ -36,6 +36,50 @@ namespace openPMD using internal::CloseStatus; using internal::DeferredParseAccess; +bool Iteration::hasMeshes() const +{ + /* + * Currently defined at the Series level, but might be defined at the + * Iteration level in next standard iterations. + * Hence an Iteration:: method. + */ + + switch (get().hasMeshes) + { + case internal::IterationData::TernaryBool::True: + return true; + case internal::IterationData::TernaryBool::False: + return false; + case internal::IterationData::TernaryBool::Undefined: { + Series s = retrieveSeries(); + return !meshes.empty() || s.containsAttribute("meshesPath"); + }; + } + throw std::runtime_error("Unreachable!"); +} + +bool Iteration::hasParticles() const +{ + /* + * Currently defined at the Series level, but might be defined at the + * Iteration level in next standard iterations. + * Hence an Iteration:: method. + */ + + switch (get().hasParticles) + { + case internal::IterationData::TernaryBool::True: + return true; + case internal::IterationData::TernaryBool::False: + return false; + case internal::IterationData::TernaryBool::Undefined: { + Series s = retrieveSeries(); + return !particles.empty() || s.containsAttribute("particlesPath"); + }; + } + throw std::runtime_error("Unreachable!"); +} + Iteration::Iteration() : Attributable(NoInit()) { setData(std::make_shared()); @@ -319,7 +363,7 @@ void Iteration::flush(internal::FlushParams const &flushParams) * meshesPath and particlesPath are stored there */ Series s = retrieveSeries(); - if (!meshes.empty() || s.containsAttribute("meshesPath")) + if (hasMeshes()) { if (!s.containsAttribute("meshesPath")) { @@ -335,7 +379,7 @@ void Iteration::flush(internal::FlushParams const &flushParams) meshes.dirty() = false; } - if (!particles.empty() || s.containsAttribute("particlesPath")) + if (hasParticles()) { if (!s.containsAttribute("particlesPath")) { @@ -497,6 +541,12 @@ void Iteration::read_impl(std::string const &groupPath) hasMeshes = s.containsAttribute("meshesPath"); hasParticles = s.containsAttribute("particlesPath"); } + { + using TB = internal::IterationData::TernaryBool; + auto &data = get(); + data.hasMeshes = hasMeshes ? TB::True : TB::False; + data.hasParticles = hasParticles ? TB::True : TB::False; + } if (hasMeshes) { diff --git a/src/auxiliary/TemplateFile.cpp b/src/auxiliary/TemplateFile.cpp new file mode 100644 index 0000000000..8eab0727dc --- /dev/null +++ b/src/auxiliary/TemplateFile.cpp @@ -0,0 +1,173 @@ +#include "openPMD/auxiliary/TemplateFile.hpp" +#include "openPMD/DatatypeHelpers.hpp" + +#include + +namespace openPMD::auxiliary +{ +namespace +{ + // Some forward declarations + template + void initializeFromTemplate( + Container &initializeMe, Container const &fromTemplate); + + struct SetAttribute + { + template + static void + call(Attributable &object, std::string const &name, Attribute attr) + { + object.setAttribute(name, attr.get()); + } + + template + static void call(Attributable &, std::string const &name, Attribute) + { + std::cerr << "Unknown datatype for template attribute '" << name + << "'. Will skip it." << std::endl; + } + }; + + void copyAttributes( + Attributable &target, + Attributable const &source, + std::vector ignore = {}) + { + auto shouldBeIgnored = [&ignore](std::string const &attrName) { + // `ignore` is empty by default and normally has only a handful of + // entries otherwise. + // So just use linear search. + for (auto const &ignored : ignore) + { + if (attrName == ignored) + { + return true; + } + } + return false; + }; + + for (auto const &attrName : source.attributes()) + { + if (shouldBeIgnored(attrName)) + { + continue; + } + auto attr = source.getAttribute(attrName); + auto dtype = attr.dtype; + switchType(dtype, target, attrName, std::move(attr)); + } + } + + void initializeFromTemplate( + BaseRecordComponent &initializeMe, + BaseRecordComponent const &fromTemplate) + { + copyAttributes(initializeMe, fromTemplate); + } + + void initializeFromTemplate( + RecordComponent &initializeMe, RecordComponent const &fromTemplate) + { + if (fromTemplate.getDatatype() != Datatype::UNDEFINED) + { + initializeMe.resetDataset( + Dataset{fromTemplate.getDatatype(), fromTemplate.getExtent()}); + } + initializeFromTemplate( + static_cast(initializeMe), + static_cast(fromTemplate)); + } + + void initializeFromTemplate( + PatchRecordComponent &initializeMe, + PatchRecordComponent const &fromTemplate) + { + if (fromTemplate.getDatatype() != Datatype::UNDEFINED) + { + initializeMe.resetDataset( + Dataset{fromTemplate.getDatatype(), fromTemplate.getExtent()}); + } + initializeFromTemplate( + static_cast(initializeMe), + static_cast(fromTemplate)); + } + + void initializeFromTemplate( + ParticleSpecies &initializeMe, ParticleSpecies const &fromTemplate) + { + if (!fromTemplate.particlePatches.empty()) + { + initializeFromTemplate( + static_cast &>( + initializeMe.particlePatches), + static_cast const &>( + fromTemplate.particlePatches)); + } + initializeFromTemplate( + static_cast &>(initializeMe), + static_cast const &>(fromTemplate)); + } + + template + void initializeFromTemplate( + Container &initializeMe, Container const &fromTemplate) + { + copyAttributes(initializeMe, fromTemplate); + for (auto const &pair : fromTemplate) + { + initializeFromTemplate(initializeMe[pair.first], pair.second); + } + } + + void initializeFromTemplate( + Iteration &initializeMe, Iteration const &fromTemplate) + { + copyAttributes(initializeMe, fromTemplate, {"snapshot"}); + if (fromTemplate.hasMeshes()) + { + initializeFromTemplate(initializeMe.meshes, fromTemplate.meshes); + } + if (fromTemplate.hasParticles()) + { + initializeFromTemplate( + initializeMe.particles, fromTemplate.particles); + } + } +} // namespace + +Series &initializeFromTemplate( + Series &initializeMe, Series const &fromTemplate, uint64_t iteration) +{ + if (!initializeMe.containsAttribute("from_template")) + { + copyAttributes( + initializeMe, + fromTemplate, + {"basePath", "iterationEncoding", "iterationFormat", "openPMD"}); + initializeMe.setAttribute("from_template", fromTemplate.name()); + } + + uint64_t sourceIteration = iteration; + if (!fromTemplate.iterations.contains(sourceIteration)) + { + if (fromTemplate.iterations.empty()) + { + std::cerr << "Template file has no iterations, will only fill in " + "global attributes." + << std::endl; + return initializeMe; + } + else + { + sourceIteration = fromTemplate.iterations.begin()->first; + } + } + + initializeFromTemplate( + initializeMe.iterations[iteration], + fromTemplate.iterations.at(sourceIteration)); + return initializeMe; +} +} // namespace openPMD::auxiliary diff --git a/test/SerialIOTest.cpp b/test/SerialIOTest.cpp index 207ac209d6..784bc7a3f5 100644 --- a/test/SerialIOTest.cpp +++ b/test/SerialIOTest.cpp @@ -10,6 +10,7 @@ #include "openPMD/auxiliary/Environment.hpp" #include "openPMD/auxiliary/Filesystem.hpp" #include "openPMD/auxiliary/StringManip.hpp" +#include "openPMD/auxiliary/TemplateFile.hpp" #include "openPMD/openPMD.hpp" #include @@ -1479,6 +1480,12 @@ inline void dtype_test( if (activateTemplateMode.has_value()) { + Series out( + "../samples/dtype_test_from_template." + backend, + Access::CREATE, + activateTemplateMode.value()); + auxiliary::initializeFromTemplate(out, s, 1000); + out.flush(); return; } // same implementation types (not necessary aliases) detection From 45d0e6a7bdaf0160267559e67420adf06797627b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Fri, 10 Mar 2023 15:43:53 +0100 Subject: [PATCH 34/37] Further testing (@todo: cleanup) --- test/ParallelIOTest.cpp | 2 +- test/SerialIOTest.cpp | 42 ++++++++++++++++++++++++++++++++--------- 2 files changed, 34 insertions(+), 10 deletions(-) diff --git a/test/ParallelIOTest.cpp b/test/ParallelIOTest.cpp index a82a300b0e..6c4819a2d1 100644 --- a/test/ParallelIOTest.cpp +++ b/test/ParallelIOTest.cpp @@ -55,7 +55,7 @@ std::vector testedFileExtensions() // sst and ssc need a receiver for testing // bp4 is already tested via bp return ext == "sst" || ext == "ssc" || ext == "bp4" || - ext == "toml" || ext == "json"; + ext == "json" || ext == "toml"; }); return {allExtensions.begin(), newEnd}; } diff --git a/test/SerialIOTest.cpp b/test/SerialIOTest.cpp index 784bc7a3f5..81dd6f3303 100644 --- a/test/SerialIOTest.cpp +++ b/test/SerialIOTest.cpp @@ -2929,21 +2929,21 @@ TEST_CASE("git_hdf5_legacy_picongpu", "[serial][hdf5]") TEST_CASE("git_hdf5_sample_attribute_test", "[serial][hdf5]") { - try - { - Series o = Series("../samples/git-sample/data%T.h5", Access::READ_ONLY); - + auto verifySeries = [](Series o, bool this_is_the_original_file) { REQUIRE(o.openPMD() == "1.1.0"); REQUIRE(o.openPMDextension() == 1); REQUIRE(o.basePath() == "/data/%T/"); REQUIRE(o.meshesPath() == "fields/"); REQUIRE(o.particlesPath() == "particles/"); - REQUIRE(o.iterationEncoding() == IterationEncoding::fileBased); - REQUIRE(o.iterationFormat() == "data%T.h5"); - REQUIRE(o.name() == "data%T"); + if (this_is_the_original_file) + { + REQUIRE(o.iterationEncoding() == IterationEncoding::fileBased); + REQUIRE(o.iterationFormat() == "data%T.h5"); + REQUIRE(o.name() == "data%T"); - REQUIRE(o.iterations.size() == 5); - REQUIRE(o.iterations.count(100) == 1); + REQUIRE(o.iterations.size() == 5); + REQUIRE(o.iterations.count(100) == 1); + } Iteration &iteration_100 = o.iterations[100]; REQUIRE(iteration_100.time() == 3.2847121452090077e-14); @@ -3173,6 +3173,30 @@ TEST_CASE("git_hdf5_sample_attribute_test", "[serial][hdf5]") REQUIRE(weighting_scalar.getDatatype() == Datatype::DOUBLE); REQUIRE(weighting_scalar.getDimensionality() == 1); REQUIRE(weighting_scalar.getExtent() == e); + }; + + try + { + { + Series o = + Series("../samples/git-sample/data%T.h5", Access::READ_ONLY); + verifySeries(o, true); + + Series fromTemplate( + "../samples/initialized_from_git_sample.json", + Access::CREATE, + R"(json.mode = "template")"); + auxiliary::initializeFromTemplate(fromTemplate, o, 100); + fromTemplate.flush(); + } + + { + Series o( + "../samples/initialized_from_git_sample.json", + Access::READ_ONLY, + R"(json.mode = "template")"); + verifySeries(o, false); + } } catch (error::ReadError &e) { From 65dd21a56b870164ebc2b3a798851f3f6764d5a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Thu, 19 May 2022 17:29:24 +0200 Subject: [PATCH 35/37] Extend example --- examples/14_toml_template.cpp | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/examples/14_toml_template.cpp b/examples/14_toml_template.cpp index 284db9ee84..366323ee21 100644 --- a/examples/14_toml_template.cpp +++ b/examples/14_toml_template.cpp @@ -1,3 +1,4 @@ +#include #include std::string backendEnding() @@ -97,7 +98,23 @@ void read() "../samples/tomlTemplate." + backendEnding(), openPMD::Access::READ_LINEAR); read.readIterations(); // @todo change to read.parseBase() - openPMD::helper::listSeries(read); + + std::string jsonConfig = R"( +{ + "iteration_encoding": "variable_based", + "json": { + "mode": "template" + } +} +)"; + openPMD::Series cloned( + "../samples/jsonTemplate.json", openPMD::Access::CREATE, jsonConfig); + openPMD::auxiliary::initializeFromTemplate(cloned, read, 0); + // Have to define the dataset for E/z as it is not defined in the template + // @todo check that the dataset is defined only upon destruction, not at + // flushing already + cloned.writeIterations()[0].meshes["E"]["z"].resetDataset( + {openPMD::Datatype::INT}); } int main() From 510f0114be51dbdf59f9feba0934915a4538672d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 20 Mar 2023 12:19:54 +0100 Subject: [PATCH 36/37] Only opt-into tests for TOML Has the same implementation as JSON anyway, and it makes tests run into timeouts otherwise. --- examples/14_toml_template.cpp | 3 ++- test/SerialIOTest.cpp | 10 +++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/examples/14_toml_template.cpp b/examples/14_toml_template.cpp index 366323ee21..1d3d2f9f8d 100644 --- a/examples/14_toml_template.cpp +++ b/examples/14_toml_template.cpp @@ -103,7 +103,8 @@ void read() { "iteration_encoding": "variable_based", "json": { - "mode": "template" + "dataset": {"mode": "template"}, + "attribute": {"mode": "short"} } } )"; diff --git a/test/SerialIOTest.cpp b/test/SerialIOTest.cpp index 81dd6f3303..7c03c76f2e 100644 --- a/test/SerialIOTest.cpp +++ b/test/SerialIOTest.cpp @@ -1558,7 +1558,15 @@ TEST_CASE("dtype_test", "[serial]") { for (auto const &t : testedFileExtensions()) { - dtype_test(t); + if (t == "json") + { + dtype_test(t); + dtype_test(t, R"(json.mode = "template")"); + } + else + { + dtype_test(t); + } } dtype_test("json", R"( { From 367798c1a96b80734eebbe1edc212f1373f7cc2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Fri, 22 Dec 2023 19:17:51 +0100 Subject: [PATCH 37/37] Adapt this to changed SCALAR API --- src/auxiliary/TemplateFile.cpp | 38 ++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/src/auxiliary/TemplateFile.cpp b/src/auxiliary/TemplateFile.cpp index 8eab0727dc..59d71ec613 100644 --- a/src/auxiliary/TemplateFile.cpp +++ b/src/auxiliary/TemplateFile.cpp @@ -34,6 +34,25 @@ namespace Attributable const &source, std::vector ignore = {}) { +#if 0 // leave this in for potential future debugging + std::cout << "COPYING ATTRIBUTES FROM '" << [&source]() -> std::string { + auto vec = source.myPath().group; + if (vec.empty()) + { + return "[]"; + } + std::stringstream sstream; + auto it = vec.begin(); + sstream << "[" << *it++; + for (; it != vec.end(); ++it) + { + sstream << ", " << *it; + } + sstream << "]"; + return sstream.str(); + }() << "'" + << std::endl; +#endif auto shouldBeIgnored = [&ignore](std::string const &attrName) { // `ignore` is empty by default and normally has only a handful of // entries otherwise. @@ -94,6 +113,25 @@ namespace static_cast(fromTemplate)); } + template + void initializeFromTemplate( + BaseRecord &initializeMe, BaseRecord const &fromTemplate) + { + if (fromTemplate.scalar()) + { + initializeMe[RecordComponent::SCALAR]; + initializeFromTemplate( + static_cast(initializeMe), + static_cast(fromTemplate)); + } + else + { + initializeFromTemplate( + static_cast &>(initializeMe), + static_cast const &>(fromTemplate)); + } + } + void initializeFromTemplate( ParticleSpecies &initializeMe, ParticleSpecies const &fromTemplate) {