diff --git a/changelog/current.md b/changelog/current.md index e2265599..03976c06 100644 --- a/changelog/current.md +++ b/changelog/current.md @@ -8,9 +8,9 @@ Most of the changes are from the giant Parser refactor described below. Before g ### Parser refactor -The parser was completely refactored ([#PR414](https://github.com/biojppm/rapidyaml/pull/414)). This was a large and hard job carried out over several months, and the result is: +The parser was completely refactored ([#PR414](https://github.com/biojppm/rapidyaml/pull/414)). This was a large and hard job carried out over several months, but it brings important improvements. -- A new event-based parser engine is now in place, enabling the improvements described below. This engine uses a templated event handler, where each event is a function call, which spares branches on the event handler. The parsing code was fully rewritten, and is now much more simple (albeit longer), and much easier to work with and fix. +- The new parser is an event-based parser, based on an event dispatcher engine. This engine is templated on event handler, where each event is a function call, which spares branches on the event handler. The parsing code was fully rewritten, and is now much more simple (albeit longer), and much easier to work with and fix. - YAML standard-conformance was improved significantly. Along with many smaller fixes and additions, (too many to list here), the main changes are the following: - The parser engine can now successfully parse container keys, emitting all the events in the correct , **but** as before, the ryml tree cannot accomodate these (and this constraint is no longer enforced by the parser, but instead by `EventHandlerTree`). For an example of a handler which can accomodate key containers, see the one which is used for the test suite at `test/test_suite/test_suite_event_handler.hpp` - Anchor keys can now be terminated with colon (eg, `&anchor: key: val`), as dictated by the standard. diff --git a/src/c4/yml/event_handler_stack.hpp b/src/c4/yml/event_handler_stack.hpp index 34b0e4ea..4bb78077 100644 --- a/src/c4/yml/event_handler_stack.hpp +++ b/src/c4/yml/event_handler_stack.hpp @@ -89,6 +89,28 @@ struct EventHandlerStack #endif } + substr _stack_relocate_to_new_arena(csubstr s, csubstr prev, substr curr) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, prev.is_super(s)); + auto pos = s.str - prev.str; + substr out = {curr.str + pos, s.len}; + _RYML_CB_ASSERT(m_stack.m_callbacks, curr.is_super(out)); + return out; + } + + void _stack_relocate_to_new_arena(csubstr prev, substr curr) + { + for(state &st : m_stack) + { + if(st.line_contents.rem.is_sub(prev)) + st.line_contents.rem = _stack_relocate_to_new_arena(st.line_contents.rem, prev, curr); + if(st.line_contents.full.is_sub(prev)) + st.line_contents.full = _stack_relocate_to_new_arena(st.line_contents.full, prev, curr); + if(st.line_contents.stripped.is_sub(prev)) + st.line_contents.stripped = _stack_relocate_to_new_arena(st.line_contents.stripped, prev, curr); + } + } + protected: // undefined at the end diff --git a/src/c4/yml/event_handler_tree.hpp b/src/c4/yml/event_handler_tree.hpp index 5852e2c6..f9bf597f 100644 --- a/src/c4/yml/event_handler_tree.hpp +++ b/src/c4/yml/event_handler_tree.hpp @@ -229,6 +229,7 @@ struct EventHandlerTree : public EventHandlerStacknode_id); + _RYML_CB_CHECK(m_stack.m_callbacks, !_has_any_(VAL)); _enable_(MAP|FLOW_SL); _save_loc(); _push(); @@ -236,6 +237,7 @@ struct EventHandlerTree : public EventHandlerStacknode_id); + _RYML_CB_CHECK(m_stack.m_callbacks, !_has_any_(VAL)); _enable_(MAP|BLOCK); _save_loc(); _push(); @@ -266,6 +268,7 @@ struct EventHandlerTree : public EventHandlerStacknode_id); + _RYML_CB_CHECK(m_stack.m_callbacks, !_has_any_(VAL)); _enable_(SEQ|FLOW_SL); _save_loc(); _push(); @@ -273,6 +276,7 @@ struct EventHandlerTree : public EventHandlerStacknode_id); + _RYML_CB_CHECK(m_stack.m_callbacks, !_has_any_(VAL)); _enable_(SEQ|BLOCK); _save_loc(); _push(); @@ -515,7 +519,24 @@ struct EventHandlerTree : public EventHandlerStackalloc_arena(len); + csubstr prev = m_tree->arena(); + substr out = m_tree->alloc_arena(len); + substr curr = m_tree->arena(); + if(curr.str != prev.str) + _stack_relocate_to_new_arena(prev, curr); + return out; + } + + substr alloc_arena(size_t len, substr *relocated) + { + csubstr prev = m_tree->arena(); + if(!prev.is_super(*relocated)) + return alloc_arena(len); + substr out = alloc_arena(len); + substr curr = m_tree->arena(); + if(curr.str != prev.str) + *relocated = _stack_relocate_to_new_arena(*relocated, prev, curr); + return out; } /** @} */ @@ -675,6 +696,7 @@ struct EventHandlerTree : public EventHandlerStack_p(m_curr->node_id)->m_val.scalar.len == 0); m_tree->_p(m_curr->node_id)->m_val.scalar.str = m_curr->line_contents.rem.str; } diff --git a/src/c4/yml/parse_engine.def.hpp b/src/c4/yml/parse_engine.def.hpp index 94729f9b..c5d912c4 100644 --- a/src/c4/yml/parse_engine.def.hpp +++ b/src/c4/yml/parse_engine.def.hpp @@ -3341,7 +3341,7 @@ csubstr ParseEngine::_filter_scalar_dquot(substr s) { const size_t len = r.required_len(); _c4dbgpf("filtering dquo scalar: not enough space: needs {}, have {}", len, s.len); - substr dst = m_evt_handler->alloc_arena(len); + substr dst = m_evt_handler->alloc_arena(len, &s); _c4dbgpf("filtering dquo scalar: dst.len={}", dst.len); _RYML_CB_ASSERT(this->callbacks(), dst.len == len); FilterResult rsd = this->filter_scalar_dquoted(s, dst); @@ -3368,7 +3368,7 @@ csubstr ParseEngine::_filter_scalar_literal(substr s, size_t inden else { _c4dbgpf("filtering block literal scalar: not enough space: needs {}, have {}", r.required_len(), s.len); - substr dst = m_evt_handler->alloc_arena(r.required_len()); + substr dst = m_evt_handler->alloc_arena(r.required_len(), &s); FilterResult rsd = this->filter_scalar_block_literal(s, dst, indentation, chomp); _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, rsd.valid()); _c4dbgpf("filtering block literal scalar: success! s=[{}]~~~{}~~~", rsd.get().len, rsd.get()); @@ -3391,7 +3391,7 @@ csubstr ParseEngine::_filter_scalar_folded(substr s, size_t indent else { _c4dbgpf("filtering block folded scalar: not enough space: needs {}, have {}", r.required_len(), s.len); - substr dst = m_evt_handler->alloc_arena(r.required_len()); + substr dst = m_evt_handler->alloc_arena(r.required_len(), &s); FilterResult rsd = this->filter_scalar_block_folded(s, dst, indentation, chomp); _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, rsd.valid()); _c4dbgpf("filtering block folded scalar: success! s=[{}]~~~{}~~~", rsd.get().len, rsd.get()); @@ -4727,9 +4727,16 @@ void ParseEngine::_handle_seq_imap() _c4dbgt("seqimap: go again", 0); if(_finished_line()) { - _line_ended(); - _scan_line(); - _c4dbgnextline(); + if(C4_LIKELY(!_finished_file())) + { + _line_ended(); + _scan_line(); + _c4dbgnextline(); + } + else + { + _c4err("parse error"); + } } goto seqimap_start; diff --git a/src/c4/yml/tree.cpp b/src/c4/yml/tree.cpp index 8ccc06c6..da78c8b0 100644 --- a/src/c4/yml/tree.cpp +++ b/src/c4/yml/tree.cpp @@ -216,7 +216,8 @@ void Tree::_relocate(substr next_arena) { _RYML_CB_ASSERT(m_callbacks, next_arena.not_empty()); _RYML_CB_ASSERT(m_callbacks, next_arena.len >= m_arena.len); - memcpy(next_arena.str, m_arena.str, m_arena_pos); + if(m_arena_pos) + memcpy(next_arena.str, m_arena.str, m_arena_pos); for(NodeData *C4_RESTRICT n = m_buf, *e = m_buf + m_cap; n != e; ++n) { if(in_arena(n->m_key.scalar)) diff --git a/test/test_suite/test_suite_event_handler.hpp b/test/test_suite/test_suite_event_handler.hpp index 9addf034..c830e1d6 100644 --- a/test/test_suite/test_suite_event_handler.hpp +++ b/test/test_suite/test_suite_event_handler.hpp @@ -90,9 +90,9 @@ struct EventHandlerYamlStd : public EventHandlerStacklevel, tmp); + _disable_(_VALMASK|VAL_STYLE); // create the map. // this will push a new level, and tmp is one further begin_map_val_flow(); @@ -557,9 +567,26 @@ struct EventHandlerYamlStd : public EventHandlerStack