Skip to content

Commit

Permalink
clp-s: Report exactly where parsing error occurs when parsing JSON (f…
Browse files Browse the repository at this point in the history
…ixes #514). (#503)
  • Loading branch information
gibber9809 authored Aug 12, 2024
1 parent c3cdf66 commit f05264e
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 10 deletions.
42 changes: 32 additions & 10 deletions components/core/src/clp_s/JsonParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -428,7 +428,7 @@ bool JsonParser::parse() {

if (simdjson::error_code::SUCCESS != json_file_iterator.get_error()) {
SPDLOG_ERROR(
"Encountered error - {} - while trying to parse {}",
"Encountered error - {} - while trying to parse {} after parsing 0 bytes",
simdjson::error_message(json_file_iterator.get_error()),
file_path
);
Expand All @@ -439,7 +439,8 @@ bool JsonParser::parse() {
simdjson::ondemand::document_stream::iterator json_it;

m_num_messages = 0;
size_t last_num_bytes_consumed = 0;
size_t bytes_consumed_up_to_prev_archive = 0;
size_t bytes_consumed_up_to_prev_record = 0;
while (json_file_iterator.get_json(json_it)) {
m_current_schema.clear();

Expand All @@ -450,39 +451,60 @@ bool JsonParser::parse() {
// that this isn't a valid JSON document but they get set in different situations so we
// need to check both here.
if (is_scalar_result.error() || true == is_scalar_result.value()) {
SPDLOG_ERROR("Encountered non-json-object while trying to parse {}", file_path);
SPDLOG_ERROR(
"Encountered non-json-object while trying to parse {} after parsing {} "
"bytes",
file_path,
bytes_consumed_up_to_prev_record
);
m_archive_writer->close();
return false;
}

// Some errors from simdjson are latent until trying to access invalid JSON fields.
// Instead of checking for an error every time we access a JSON field in parse_line we
// just catch simdjson_error here instead.
try {
parse_line(ref.value(), -1, "");
} catch (simdjson::simdjson_error& error) {
SPDLOG_ERROR(
"Encountered error - {} - while trying to parse {} after parsing {} bytes",
error.what(),
file_path,
bytes_consumed_up_to_prev_record
);
m_archive_writer->close();
return false;
}
parse_line(ref.value(), -1, "");
m_num_messages++;

int32_t current_schema_id = m_archive_writer->add_schema(m_current_schema);
m_current_parsed_message.set_id(current_schema_id);
m_archive_writer
->append_message(current_schema_id, m_current_schema, m_current_parsed_message);

bytes_consumed_up_to_prev_record = json_file_iterator.get_num_bytes_consumed();
if (m_archive_writer->get_data_size() >= m_target_encoded_size) {
size_t num_bytes_read = json_file_iterator.get_num_bytes_consumed();
m_archive_writer->increment_uncompressed_size(
num_bytes_read - last_num_bytes_consumed
bytes_consumed_up_to_prev_record - bytes_consumed_up_to_prev_archive
);
last_num_bytes_consumed = num_bytes_read;
bytes_consumed_up_to_prev_archive = bytes_consumed_up_to_prev_record;
split_archive();
}

m_current_parsed_message.clear();
}

m_archive_writer->increment_uncompressed_size(
json_file_iterator.get_num_bytes_read() - last_num_bytes_consumed
json_file_iterator.get_num_bytes_read() - bytes_consumed_up_to_prev_archive
);

if (simdjson::error_code::SUCCESS != json_file_iterator.get_error()) {
SPDLOG_ERROR(
"Encountered error - {} - while trying to parse {}",
"Encountered error - {} - while trying to parse {} after parsing {} bytes",
simdjson::error_message(json_file_iterator.get_error()),
file_path
file_path,
bytes_consumed_up_to_prev_record
);
m_archive_writer->close();
return false;
Expand Down
1 change: 1 addition & 0 deletions components/core/src/clp_s/JsonParser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ class JsonParser {
* @param line the JSON line
* @param parent_node_id the parent node id
* @param key the key of the node
* @throw simdjson::simdjson_error when encountering invalid fields while parsing line
*/
void parse_line(ondemand::value line, int32_t parent_node_id, std::string const& key);

Expand Down

0 comments on commit f05264e

Please sign in to comment.