Skip to content

Commit

Permalink
Merge pull request #88 from paulsengroup/feat/py-logging
Browse files Browse the repository at this point in the history
Support interfacing spdlog with Python's logging stdlib
  • Loading branch information
robomics authored Oct 18, 2024
2 parents 79df47c + e96df83 commit 778a0d4
Show file tree
Hide file tree
Showing 12 changed files with 216 additions and 24 deletions.
1 change: 1 addition & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ nanobind_add_module(
"${CMAKE_CURRENT_SOURCE_DIR}/file.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/hic_file_writer.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/hictkpy.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/logger.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/multires_file.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/pixel_selector.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/reference.cpp"
Expand Down
9 changes: 9 additions & 0 deletions src/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

#include <fmt/format.h>

#include <algorithm>
#include <cctype>
#include <cstdint>
#include <hictk/numeric_variant.hpp>
#include <stdexcept>
Expand Down Expand Up @@ -65,4 +67,11 @@ hictk::internal::NumericVariant map_dtype_to_type(std::string_view dtype) {
dtype));
}

std::string normalize_log_lvl(std::string_view lvl) {
std::string normalized_lvl{lvl};
std::transform(normalized_lvl.begin(), normalized_lvl.end(), normalized_lvl.begin(),
[](unsigned char c) { return std::tolower(c); });
return normalized_lvl;
}

} // namespace hictkpy
44 changes: 34 additions & 10 deletions src/cooler_file_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ CoolerFileWriter::CoolerFileWriter(std::filesystem::path path_, const Chromosome
throw std::runtime_error(
fmt::format(FMT_STRING("unable to create .cool file \"{}\": file already exists"), path()));
}

SPDLOG_INFO(FMT_STRING("using \"{}\" folder to store temporary file(s)"), _tmpdir());
}

const std::filesystem::path &CoolerFileWriter::path() const noexcept { return _path; }
Expand Down Expand Up @@ -87,37 +89,56 @@ void CoolerFileWriter::add_pixels(const nb::object &df) {

auto clr = _w->create_cell<N>(cell_id, std::move(attrs),
hictk::cooler::DEFAULT_HDF5_CACHE_SIZE * 4, 1);

SPDLOG_INFO(FMT_STRING("adding {} pixels of type {} to file \"{}\"..."), pixels.size(),
dtype_str, clr.uri());
clr.append_pixels(pixels.begin(), pixels.end());

clr.flush();
},
var);
}

void CoolerFileWriter::serialize([[maybe_unused]] const std::string &log_lvl_str) {
void CoolerFileWriter::finalize([[maybe_unused]] std::string_view log_lvl_str,
std::size_t chunk_size, std::size_t update_freq) {
if (_finalized) {
throw std::runtime_error(
fmt::format(FMT_STRING("serialize() was already called on file \"{}\""), _path));
fmt::format(FMT_STRING("finalize() was already called on file \"{}\""), _path));
}

if (chunk_size == 0) {
throw std::runtime_error("chunk_size must be greater than 0");
}

assert(_w.has_value());
// NOLINTBEGIN(*-unchecked-optional-access)
#ifndef _WIN32
// TODO fixme
// There is something very odd going on when trying to call most spdlog functions from within
// Python bindings on recent versions of Windows.
// Possibly related to https://github.com/gabime/spdlog/issues/3212
const auto log_lvl = spdlog::level::from_str(log_lvl_str);
const auto log_lvl = spdlog::level::from_str(normalize_log_lvl(log_lvl_str));
const auto previous_lvl = spdlog::default_logger()->level();
spdlog::default_logger()->set_level(log_lvl);

SPDLOG_INFO(FMT_STRING("finalizing file \"{}\"..."), _path);
#endif
std::visit(
[&](const auto &num) {
using N = hictk::remove_cvref_t<decltype(num)>;
_w->aggregate<N>(_path.string(), false, _compression_lvl);
},
_w->open("0").pixel_variant());
try {
std::visit(
[&](const auto &num) {
using N = hictk::remove_cvref_t<decltype(num)>;
_w->aggregate<N>(_path.string(), false, _compression_lvl, chunk_size, update_freq);
},
_w->open("0").pixel_variant());
} catch (...) {
#ifndef _WIN32
spdlog::default_logger()->set_level(previous_lvl);
#endif
throw;
}

_finalized = true;
SPDLOG_INFO(FMT_STRING("merged {} cooler(s) into file \"{}\""), _w->cells().size(), _path);
#ifndef _WIN32
spdlog::default_logger()->set_level(previous_lvl);
#endif
Expand Down Expand Up @@ -173,7 +194,10 @@ void CoolerFileWriter::bind(nb::module_ &m) {
"Add pixels from a pandas DataFrame containing pixels in COO or BG2 format (i.e. "
"either with columns=[bin1_id, bin2_id, count] or with columns=[chrom1, start1, end1, "
"chrom2, start2, end2, count].");
writer.def("finalize", &hictkpy::CoolerFileWriter::serialize, nb::arg("log_lvl") = "warn",
// NOLINTBEGIN(*-avoid-magic-numbers)
writer.def("finalize", &hictkpy::CoolerFileWriter::finalize, nb::arg("log_lvl") = "WARN",
nb::arg("chunk_size") = 500'000, nb::arg("update_frequency") = 10'000'000,
"Write interactions to file.");
// NOLINTEND(*-avoid-magic-numbers)
}
} // namespace hictkpy
29 changes: 22 additions & 7 deletions src/hic_file_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <utility>
#include <vector>

#include "hictkpy/common.hpp"
#include "hictkpy/nanobind.hpp"
#include "hictkpy/pixel.hpp"
#include "hictkpy/reference.hpp"
Expand All @@ -33,7 +34,9 @@ HiCFileWriter::HiCFileWriter(const std::filesystem::path &path_, const Chromosom
std::uint32_t compression_lvl, bool skip_all_vs_all_matrix)
: _tmpdir(tmpdir, true),
_w(path_.string(), chromosome_dict_to_reference(chromosomes), resolutions_, assembly,
n_threads, chunk_size, _tmpdir(), compression_lvl, skip_all_vs_all_matrix) {}
n_threads, chunk_size, _tmpdir(), compression_lvl, skip_all_vs_all_matrix) {
SPDLOG_INFO(FMT_STRING("using \"{}\" folder to store temporary file(s)"), _tmpdir());
}

HiCFileWriter::HiCFileWriter(const std::filesystem::path &path_, const ChromosomeDict &chromosomes,
std::uint32_t resolution, std::string_view assembly,
Expand All @@ -43,22 +46,33 @@ HiCFileWriter::HiCFileWriter(const std::filesystem::path &path_, const Chromosom
: HiCFileWriter(path_, chromosomes, std::vector<std::uint32_t>{resolution}, assembly, n_threads,
chunk_size, tmpdir, compression_lvl, skip_all_vs_all_matrix) {}

void HiCFileWriter::serialize([[maybe_unused]] const std::string &log_lvl_str) {
void HiCFileWriter::finalize([[maybe_unused]] std::string_view log_lvl_str) {
if (_finalized) {
throw std::runtime_error(
fmt::format(FMT_STRING("finalize was already called on file \"{}\""), _w.path()));
fmt::format(FMT_STRING("finalize() was already called on file \"{}\""), _w.path()));
}

#ifndef _WIN32
// TODO fixme
// There is something very odd going on when trying to call most spdlog functions from within
// Python bindings on recent versions of Windows.
// Possibly related to https://github.com/gabime/spdlog/issues/3212
const auto log_lvl = spdlog::level::from_str(log_lvl_str);
const auto log_lvl = spdlog::level::from_str(normalize_log_lvl(log_lvl_str));
const auto previous_lvl = spdlog::default_logger()->level();
spdlog::default_logger()->set_level(log_lvl);

SPDLOG_INFO(FMT_STRING("finalizing file \"{}\"..."), _w.path());
#endif
_w.serialize();
_finalized = true;
try {
_w.serialize();
_finalized = true;
} catch (...) {
#ifndef _WIN32
spdlog::default_logger()->set_level(previous_lvl);
#endif
throw;
}
SPDLOG_INFO(FMT_STRING("successfully finalized \"{}\"!"), _w.path());
#ifndef _WIN32
spdlog::default_logger()->set_level(previous_lvl);
#endif
Expand All @@ -84,6 +98,7 @@ void HiCFileWriter::add_pixels(const nb::object &df) {
const auto pixels =
coo_format ? coo_df_to_thin_pixels<float>(df, false)
: bg2_df_to_thin_pixels<float>(_w.bins(_w.resolutions().front()), df, false);
SPDLOG_INFO(FMT_STRING("adding {} pixels to file \"{}\"..."), pixels.size(), _w.path());
_w.add_pixels(_w.resolutions().front(), pixels.begin(), pixels.end());
}

Expand Down Expand Up @@ -133,7 +148,7 @@ void HiCFileWriter::bind(nb::module_ &m) {
"Add pixels from a pandas DataFrame containing pixels in COO or BG2 format (i.e. "
"either with columns=[bin1_id, bin2_id, count] or with columns=[chrom1, start1, end1, "
"chrom2, start2, end2, count].");
writer.def("finalize", &hictkpy::HiCFileWriter::serialize, nb::arg("log_lvl") = "warn",
writer.def("finalize", &hictkpy::HiCFileWriter::finalize, nb::arg("log_lvl") = "WARN",
"Write interactions to file.");
}

Expand Down
12 changes: 12 additions & 0 deletions src/hictkpy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
// SPDX-License-Identifier: MIT

#include <arrow/python/api.h>
#include <spdlog/spdlog.h>

#include <cstdint>
#include <hictk/version.hpp>
Expand All @@ -11,6 +12,7 @@
#include "hictkpy/cooler_file_writer.hpp"
#include "hictkpy/file.hpp"
#include "hictkpy/hic_file_writer.hpp"
#include "hictkpy/logger.hpp"
#include "hictkpy/multires_file.hpp"
#include "hictkpy/nanobind.hpp"
#include "hictkpy/pixel.hpp"
Expand All @@ -20,7 +22,17 @@
namespace nb = nanobind;
namespace hictkpy {

[[nodiscard]] static hictkpy::Logger init_logger() {
hictkpy::Logger logger{spdlog::level::debug};
#ifndef _WIN32
spdlog::set_default_logger(logger.get_logger());
#endif
return logger;
}

NB_MODULE(_hictkpy, m) {
[[maybe_unused]] const auto logger = init_logger();

if (arrow::py::import_pyarrow() == -1) {
throw std::runtime_error("failed to initialize pyarrow runtime");
}
Expand Down
3 changes: 3 additions & 0 deletions src/include/hictkpy/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <hictk/common.hpp>
#include <hictk/numeric_variant.hpp>
#include <stdexcept>
#include <string>
#include <string_view>
#include <type_traits>

Expand Down Expand Up @@ -60,4 +61,6 @@ template <typename T>

[[nodiscard]] hictk::internal::NumericVariant map_dtype_to_type(std::string_view dtype);

[[nodiscard]] std::string normalize_log_lvl(std::string_view lvl);

} // namespace hictkpy
2 changes: 1 addition & 1 deletion src/include/hictkpy/cooler_file_writer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ class CoolerFileWriter {

void add_pixels(const nanobind::object& df);

void serialize(const std::string& log_lvl_str = "warn");
void finalize(std::string_view log_lvl_str, std::size_t chunk_size, std::size_t update_frequency);

[[nodiscard]] std::string repr() const;
static void bind(nanobind::module_& m);
Expand Down
2 changes: 1 addition & 1 deletion src/include/hictkpy/hic_file_writer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ class HiCFileWriter {

void add_pixels(const nanobind::object& df);

void serialize(const std::string& log_lvl_str = "warn");
void finalize(std::string_view log_lvl_str);

[[nodiscard]] std::string repr() const;

Expand Down
32 changes: 32 additions & 0 deletions src/include/hictkpy/logger.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
// Copyright (C) 2024 Roberto Rossini <roberros@uio.no>
//
// SPDX-License-Identifier: MIT

#pragma once

#include <spdlog/spdlog-inl.h>

#include <memory>
#include <string_view>

#include "hictkpy/nanobind.hpp"

namespace hictkpy {

class Logger {
nanobind::object _py_logger{};
std::shared_ptr<spdlog::logger> _logger{};

public:
explicit Logger(spdlog::level::level_enum level_ = spdlog::level::warn);
explicit Logger(std::string_view level_);

[[nodiscard]] std::shared_ptr<spdlog::logger> get_logger();

private:
[[nodiscard]] static nanobind::object init_py_logger();
[[nodiscard]] static std::shared_ptr<spdlog::logger> init_cpp_logger(
spdlog::level::level_enum level_, nanobind::object py_logger);
};

} // namespace hictkpy
75 changes: 75 additions & 0 deletions src/logger.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
// Copyright (C) 2024 Roberto Rossini <roberros@uio.no>
//
// SPDX-License-Identifier: MIT

#include "hictkpy/logger.hpp"

#include <spdlog/sinks/callback_sink.h>
#include <spdlog/spdlog.h>

#include "hictkpy/common.hpp"
#include "hictkpy/nanobind.hpp"

namespace nb = nanobind;

namespace hictkpy {

[[nodiscard]] static std::int32_t to_py_lvl(spdlog::level::level_enum level) {
// https://docs.python.org/3/library/logging.html#logging-levels
// NOLINTBEGIN(*-avoid-magic-numbers)
switch (level) {
case spdlog::level::trace:
[[fallthrough]];
case spdlog::level::debug:
return 10;
case spdlog::level::info:
return 20;
case spdlog::level::warn:
return 30;
case spdlog::level::err:
return 40;
case spdlog::level::critical:
[[fallthrough]];
case spdlog::level::off:
return 50;
default:
unreachable_code();
}
// NOLINTEND(*-avoid-magic-numbers)
}

Logger::Logger(spdlog::level::level_enum level_)
: _py_logger(init_py_logger()), _logger(init_cpp_logger(level_, _py_logger)) {}

Logger::Logger(std::string_view level_) : Logger(spdlog::level::from_str(std::string{level_})) {}

nb::object Logger::init_py_logger() {
const auto logging = nb::module_::import_("logging");
return logging.attr("getLogger")("hictkpy");
}

std::shared_ptr<spdlog::logger> Logger::get_logger() { return _logger; }

std::shared_ptr<spdlog::logger> Logger::init_cpp_logger(
[[maybe_unused]] spdlog::level::level_enum level_, [[maybe_unused]] nb::object py_logger) {
#ifndef _WIN32
auto sink = std::make_shared<spdlog::sinks::callback_sink_mt>(
// NOLINTNEXTLINE(*-unnecessary-value-param)
[logger = py_logger](const spdlog::details::log_msg& msg) {
logger.attr("log")(to_py_lvl(msg.level),
std::string_view{msg.payload.data(), msg.payload.size()});
});

sink->set_pattern("%v");
sink->set_level(level_);

auto logger = std::make_shared<spdlog::logger>("hictkpy", std::move(sink));
logger->set_level(level_);

return logger;
#else
return nullptr;
#endif
}

} // namespace hictkpy
Loading

0 comments on commit 778a0d4

Please sign in to comment.