Skip to content

Commit

Permalink
post #414: ensure no read-after-free when filtering into the arena
Browse files Browse the repository at this point in the history
  • Loading branch information
biojppm committed May 18, 2024
1 parent 4c37688 commit 1ccfbc0
Show file tree
Hide file tree
Showing 6 changed files with 95 additions and 16 deletions.
4 changes: 2 additions & 2 deletions changelog/current.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ Most of the changes are from the giant Parser refactor described below. Before g

### Parser refactor

The parser was completely refactored ([#PR414](https://github.com/biojppm/rapidyaml/pull/414)). This was a large and hard job carried out over several months, and the result is:
The parser was completely refactored ([#PR414](https://github.com/biojppm/rapidyaml/pull/414)). This was a large and hard job carried out over several months, but it brings important improvements.

- A new event-based parser engine is now in place, enabling the improvements described below. This engine uses a templated event handler, where each event is a function call, which spares branches on the event handler. The parsing code was fully rewritten, and is now much more simple (albeit longer), and much easier to work with and fix.
- The new parser is an event-based parser, based on an event dispatcher engine. This engine is templated on event handler, where each event is a function call, which spares branches on the event handler. The parsing code was fully rewritten, and is now much more simple (albeit longer), and much easier to work with and fix.
- YAML standard-conformance was improved significantly. Along with many smaller fixes and additions, (too many to list here), the main changes are the following:
- The parser engine can now successfully parse container keys, emitting all the events in the correct , **but** as before, the ryml tree cannot accomodate these (and this constraint is no longer enforced by the parser, but instead by `EventHandlerTree`). For an example of a handler which can accomodate key containers, see the one which is used for the test suite at `test/test_suite/test_suite_event_handler.hpp`
- Anchor keys can now be terminated with colon (eg, `&anchor: key: val`), as dictated by the standard.
Expand Down
22 changes: 22 additions & 0 deletions src/c4/yml/event_handler_stack.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,28 @@ struct EventHandlerStack
#endif
}

substr _stack_relocate_to_new_arena(csubstr s, csubstr prev, substr curr)
{
_RYML_CB_ASSERT(m_stack.m_callbacks, prev.is_super(s));
auto pos = s.str - prev.str;
substr out = {curr.str + pos, s.len};
_RYML_CB_ASSERT(m_stack.m_callbacks, curr.is_super(out));
return out;
}

void _stack_relocate_to_new_arena(csubstr prev, substr curr)
{
for(state &st : m_stack)
{
if(st.line_contents.rem.is_sub(prev))
st.line_contents.rem = _stack_relocate_to_new_arena(st.line_contents.rem, prev, curr);
if(st.line_contents.full.is_sub(prev))
st.line_contents.full = _stack_relocate_to_new_arena(st.line_contents.full, prev, curr);
if(st.line_contents.stripped.is_sub(prev))
st.line_contents.stripped = _stack_relocate_to_new_arena(st.line_contents.stripped, prev, curr);
}
}

protected:

// undefined at the end
Expand Down
24 changes: 23 additions & 1 deletion src/c4/yml/event_handler_tree.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -229,13 +229,15 @@ struct EventHandlerTree : public EventHandlerStack<EventHandlerTree, EventHandle
void begin_map_val_flow()
{
_c4dbgpf("node[{}]: begin_map_val_flow", m_curr->node_id);
_RYML_CB_CHECK(m_stack.m_callbacks, !_has_any_(VAL));
_enable_(MAP|FLOW_SL);
_save_loc();
_push();
}
void begin_map_val_block()
{
_c4dbgpf("node[{}]: begin_map_val_block", m_curr->node_id);
_RYML_CB_CHECK(m_stack.m_callbacks, !_has_any_(VAL));
_enable_(MAP|BLOCK);
_save_loc();
_push();
Expand Down Expand Up @@ -266,13 +268,15 @@ struct EventHandlerTree : public EventHandlerStack<EventHandlerTree, EventHandle
void begin_seq_val_flow()
{
_c4dbgpf("node[{}]: begin_seq_val_flow", m_curr->node_id);
_RYML_CB_CHECK(m_stack.m_callbacks, !_has_any_(VAL));
_enable_(SEQ|FLOW_SL);
_save_loc();
_push();
}
void begin_seq_val_block()
{
_c4dbgpf("node[{}]: begin_seq_val_block", m_curr->node_id);
_RYML_CB_CHECK(m_stack.m_callbacks, !_has_any_(VAL));
_enable_(SEQ|BLOCK);
_save_loc();
_push();
Expand Down Expand Up @@ -515,7 +519,24 @@ struct EventHandlerTree : public EventHandlerStack<EventHandlerTree, EventHandle

substr alloc_arena(size_t len)
{
return m_tree->alloc_arena(len);
csubstr prev = m_tree->arena();
substr out = m_tree->alloc_arena(len);
substr curr = m_tree->arena();
if(curr.str != prev.str)
_stack_relocate_to_new_arena(prev, curr);
return out;
}

substr alloc_arena(size_t len, substr *relocated)
{
csubstr prev = m_tree->arena();
if(!prev.is_super(*relocated))
return alloc_arena(len);
substr out = alloc_arena(len);
substr curr = m_tree->arena();
if(curr.str != prev.str)
*relocated = _stack_relocate_to_new_arena(*relocated, prev, curr);
return out;
}

/** @} */
Expand Down Expand Up @@ -675,6 +696,7 @@ struct EventHandlerTree : public EventHandlerStack<EventHandlerTree, EventHandle

C4_ALWAYS_INLINE void _save_loc()
{
_RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->_p(m_curr->node_id)->m_val.scalar.len == 0);
m_tree->_p(m_curr->node_id)->m_val.scalar.str = m_curr->line_contents.rem.str;
}

Expand Down
19 changes: 13 additions & 6 deletions src/c4/yml/parse_engine.def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3341,7 +3341,7 @@ csubstr ParseEngine<EventHandler>::_filter_scalar_dquot(substr s)
{
const size_t len = r.required_len();
_c4dbgpf("filtering dquo scalar: not enough space: needs {}, have {}", len, s.len);
substr dst = m_evt_handler->alloc_arena(len);
substr dst = m_evt_handler->alloc_arena(len, &s);
_c4dbgpf("filtering dquo scalar: dst.len={}", dst.len);
_RYML_CB_ASSERT(this->callbacks(), dst.len == len);
FilterResult rsd = this->filter_scalar_dquoted(s, dst);
Expand All @@ -3368,7 +3368,7 @@ csubstr ParseEngine<EventHandler>::_filter_scalar_literal(substr s, size_t inden
else
{
_c4dbgpf("filtering block literal scalar: not enough space: needs {}, have {}", r.required_len(), s.len);
substr dst = m_evt_handler->alloc_arena(r.required_len());
substr dst = m_evt_handler->alloc_arena(r.required_len(), &s);
FilterResult rsd = this->filter_scalar_block_literal(s, dst, indentation, chomp);
_RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, rsd.valid());
_c4dbgpf("filtering block literal scalar: success! s=[{}]~~~{}~~~", rsd.get().len, rsd.get());
Expand All @@ -3391,7 +3391,7 @@ csubstr ParseEngine<EventHandler>::_filter_scalar_folded(substr s, size_t indent
else
{
_c4dbgpf("filtering block folded scalar: not enough space: needs {}, have {}", r.required_len(), s.len);
substr dst = m_evt_handler->alloc_arena(r.required_len());
substr dst = m_evt_handler->alloc_arena(r.required_len(), &s);
FilterResult rsd = this->filter_scalar_block_folded(s, dst, indentation, chomp);
_RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, rsd.valid());
_c4dbgpf("filtering block folded scalar: success! s=[{}]~~~{}~~~", rsd.get().len, rsd.get());
Expand Down Expand Up @@ -4727,9 +4727,16 @@ void ParseEngine<EventHandler>::_handle_seq_imap()
_c4dbgt("seqimap: go again", 0);
if(_finished_line())
{
_line_ended();
_scan_line();
_c4dbgnextline();
if(C4_LIKELY(!_finished_file()))
{
_line_ended();
_scan_line();
_c4dbgnextline();
}
else
{
_c4err("parse error");
}
}
goto seqimap_start;

Expand Down
3 changes: 2 additions & 1 deletion src/c4/yml/tree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,8 @@ void Tree::_relocate(substr next_arena)
{
_RYML_CB_ASSERT(m_callbacks, next_arena.not_empty());
_RYML_CB_ASSERT(m_callbacks, next_arena.len >= m_arena.len);
memcpy(next_arena.str, m_arena.str, m_arena_pos);
if(m_arena_pos)
memcpy(next_arena.str, m_arena.str, m_arena_pos);
for(NodeData *C4_RESTRICT n = m_buf, *e = m_buf + m_cap; n != e; ++n)
{
if(in_arena(n->m_key.scalar))
Expand Down
39 changes: 33 additions & 6 deletions test/test_suite/test_suite_event_handler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,9 @@ struct EventHandlerYamlStd : public EventHandlerStack<EventHandlerYamlStd, Event
/** @name construction and resetting
* @{ */

EventHandlerYamlStd() : EventHandlerStack(), m_sink(), m_val_buffers() {}
EventHandlerYamlStd(Callbacks const& cb) : EventHandlerStack(cb), m_sink(), m_val_buffers() {}
EventHandlerYamlStd(EventSink *sink, Callbacks const& cb) : EventHandlerStack(cb), m_sink(sink), m_val_buffers()
EventHandlerYamlStd() : EventHandlerStack(), m_sink(), m_val_buffers(), m_tag_directives(), m_arena() {}
EventHandlerYamlStd(Callbacks const& cb) : EventHandlerStack(cb), m_sink(), m_val_buffers(), m_tag_directives(), m_arena() {}
EventHandlerYamlStd(EventSink *sink, Callbacks const& cb) : EventHandlerStack(cb), m_sink(sink), m_val_buffers(), m_tag_directives(), m_arena()
{
reset();
}
Expand All @@ -106,6 +106,7 @@ struct EventHandlerYamlStd : public EventHandlerStack<EventHandlerYamlStd, Event
td = {};
m_val_buffers.resize((size_t)m_stack.size());
m_arena.clear();
m_arena.reserve(1024);
}

/** @} */
Expand Down Expand Up @@ -213,6 +214,7 @@ struct EventHandlerYamlStd : public EventHandlerStack<EventHandlerYamlStd, Event

void begin_map_key_flow()
{
_RYML_CB_CHECK(m_stack.m_callbacks, !_has_any_(VAL));
_send_("+MAP {}");
_send_key_props_();
_send_('\n');
Expand All @@ -222,6 +224,7 @@ struct EventHandlerYamlStd : public EventHandlerStack<EventHandlerYamlStd, Event
}
void begin_map_key_block()
{
_RYML_CB_CHECK(m_stack.m_callbacks, !_has_any_(VAL));
_send_("+MAP");
_send_key_props_();
_send_('\n');
Expand All @@ -232,6 +235,7 @@ struct EventHandlerYamlStd : public EventHandlerStack<EventHandlerYamlStd, Event

void begin_map_val_flow()
{
_RYML_CB_CHECK(m_stack.m_callbacks, !_has_any_(VAL));
_send_("+MAP {}");
_send_val_props_();
_send_('\n');
Expand All @@ -241,6 +245,7 @@ struct EventHandlerYamlStd : public EventHandlerStack<EventHandlerYamlStd, Event
}
void begin_map_val_block()
{
_RYML_CB_CHECK(m_stack.m_callbacks, !_has_any_(VAL));
_send_("+MAP");
_send_val_props_();
_send_('\n');
Expand All @@ -264,6 +269,7 @@ struct EventHandlerYamlStd : public EventHandlerStack<EventHandlerYamlStd, Event

void begin_seq_key_flow()
{
_RYML_CB_CHECK(m_stack.m_callbacks, !_has_any_(VAL));
_send_("+SEQ []");
_send_key_props_();
_send_('\n');
Expand All @@ -273,6 +279,7 @@ struct EventHandlerYamlStd : public EventHandlerStack<EventHandlerYamlStd, Event
}
void begin_seq_key_block()
{
_RYML_CB_CHECK(m_stack.m_callbacks, !_has_any_(VAL));
_send_("+SEQ");
_send_key_props_();
_send_('\n');
Expand All @@ -283,6 +290,7 @@ struct EventHandlerYamlStd : public EventHandlerStack<EventHandlerYamlStd, Event

void begin_seq_val_flow()
{
_RYML_CB_CHECK(m_stack.m_callbacks, !_has_any_(VAL));
_send_("+SEQ []");
_send_val_props_();
_send_('\n');
Expand All @@ -292,6 +300,7 @@ struct EventHandlerYamlStd : public EventHandlerStack<EventHandlerYamlStd, Event
}
void begin_seq_val_block()
{
_RYML_CB_CHECK(m_stack.m_callbacks, !_has_any_(VAL));
_send_("+SEQ");
_send_val_props_();
_send_('\n');
Expand Down Expand Up @@ -332,6 +341,7 @@ struct EventHandlerYamlStd : public EventHandlerStack<EventHandlerYamlStd, Event
_buf_ensure_(tmp + id_type(2));
// save the current val to the temporary buffer
_buf_flush_to_(m_curr->level, tmp);
_disable_(_VALMASK|VAL_STYLE);
// create the map.
// this will push a new level, and tmp is one further
begin_map_val_flow();
Expand Down Expand Up @@ -557,9 +567,26 @@ struct EventHandlerYamlStd : public EventHandlerStack<EventHandlerYamlStd, Event

substr alloc_arena(size_t len)
{
const size_t curr = m_arena.size();
m_arena.resize(curr + len);
return to_substr(m_arena).sub(curr);
const size_t sz = m_arena.size();
csubstr prev = to_csubstr(m_arena);
m_arena.resize(sz + len);
substr out = to_substr(m_arena).sub(sz);
substr curr = to_substr(m_arena);
if(curr.str != prev.str)
_stack_relocate_to_new_arena(prev, curr);
return out;
}

substr alloc_arena(size_t len, substr *relocated)
{
csubstr prev = to_csubstr(m_arena);
if(!prev.is_super(*relocated))
return alloc_arena(len);
substr out = alloc_arena(len);
substr curr = to_substr(m_arena);
if(curr.str != prev.str)
*relocated = _stack_relocate_to_new_arena(*relocated, prev, curr);
return out;
}

/** @} */
Expand Down

0 comments on commit 1ccfbc0

Please sign in to comment.